Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

# AutoML 10: Multi-output

This notebook shows how to use AutoML to train multi-output problems by leveraging the correlation between the outputs using indicator vectors.

Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.

In [None]:
import logging
import os
import random

from matplotlib import pyplot as plt
from matplotlib.pyplot import imshow
import numpy as np
import pandas as pd
from sklearn import datasets

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun

## Diagnostics

Opt-in diagnostics for better experience, quality, and security of future releases.

In [None]:
from azureml.telemetry import set_diagnostics_collection
set_diagnostics_collection(send_diagnostics = True)

## Transformer Functions
The transformations of inputs `X` and `y` are happening as follows, e.g. `y = {y_1, y_2}`, then `X` becomes
    
`X 1 0`
     
`X 0 1`

and `y` becomes,

`y_1`

`y_2`

In [None]:
from scipy import sparse
from scipy import linalg

#Transformer functions
def multi_output_transform_x_y(X, y):
    X_new = multi_output_transformer_x(X, y.shape[1])
    y_new = multi_output_transform_y(y)
    return X_new, y_new

def multi_output_transformer_x(X, number_of_columns_y):
    indicator_vecs = linalg.block_diag(*([np.ones((X.shape[0], 1))] * number_of_columns_y))
    if sparse.issparse(X):
        X_new = sparse.vstack(np.tile(X, number_of_columns_y))
        indicator_vecs = sparse.coo_matrix(indicator_vecs)
        X_new = sparse.hstack((X_new, indicator_vecs))
    else:
        X_new = np.tile(X, (number_of_columns_y, 1))
        X_new = np.hstack((X_new, indicator_vecs))
    return X_new

def multi_output_transform_y(y):
    return y.reshape(-1, order="F")

def multi_output_inverse_transform_y(y, number_of_columns_y):
    return y.reshape((-1, number_of_columns_y), order = "F")

## AutoML Experiment Setup

In [None]:
ws = Workspace.from_config()

# Choose a name for the experiment and specify the project folder.
experiment_name = 'automl-local-multi-output'
project_folder = './sample_projects/automl-local-multi-output'

experiment = Experiment(ws, experiment_name)

output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
output['Experiment Name'] = experiment.name
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data = output, index = ['']).T

## Create a Random Dataset for Test Purposes

In [None]:
rng = np.random.RandomState(1)
X_train = np.sort(200 * rng.rand(600, 1) - 100, axis = 0)
y_train = np.array([np.pi * np.sin(X_train).ravel(), np.pi * np.cos(X_train).ravel()]).T
y_train += (0.5 - rng.rand(*y_train.shape))

Perform X and y transformation using the transformer function.

In [None]:
X_train_transformed, y_train_transformed = multi_output_transform_x_y(X_train, y_train)

Configure AutoML using the transformed results.

In [None]:
automl_config = AutoMLConfig(task = 'regression',
                             debug_log = 'automl_errors_multi.log',
                             primary_metric = 'r2_score',
                             iterations = 10,
                             n_cross_validations = 2,
                             verbosity = logging.INFO,
                             X = X_train_transformed,
                             y = y_train_transformed,
                             path = project_folder)

## Fit the Transformed Data

In [None]:
local_run = experiment.submit(automl_config, show_output = True)

In [None]:
# Get the best fit model.
best_run, fitted_model = local_run.get_output()

In [None]:
# Generate random data set for predicting.
X_test = np.sort(200 * rng.rand(200, 1) - 100, axis = 0)

In [None]:
# Transform predict data.
X_test_transformed = multi_output_transformer_x(X_test, y_train.shape[1])

# Predict and inverse transform the prediction.
y_predict = fitted_model.predict(X_test_transformed)
y_predict = multi_output_inverse_transform_y(y_predict, y_train.shape[1])

In [None]:
print(y_predict)