Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

# AutoML 10: Multi output Example for AutoML

This notebook shows an example to use AutoML to train the multi output problems by leveraging the correlation between the outputs using indicator vectors.

In [None]:
import logging
import os
import random

from matplotlib import pyplot as plt
from matplotlib.pyplot import imshow
import numpy as np
import pandas as pd
from sklearn import datasets

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.train.automl.run import AutoMLRun

## Diagnostics

Opt-in diagnostics for better experience, quality, and security of future releases

In [None]:
from azureml.telemetry import set_diagnostics_collection
set_diagnostics_collection(send_diagnostics=True)

## Transformer functions
The transformation of the input are happening for input X and Y as following, e.g. Y = {y_1, y_2}, then X becomes
 
X 1 0
 
X 0 1

and Y becomes,

y_1

y_2

In [None]:
from scipy import sparse
from scipy import linalg

#Transformer functions
def multi_output_transform_x_y(X, Y):
 X_new = multi_output_transformer_x(X, Y.shape[1])
 y_new = multi_output_transform_y(Y)
 return X_new, y_new

def multi_output_transformer_x(X, number_of_columns_Y):
 indicator_vecs = linalg.block_diag(*([np.ones((X.shape[0], 1))] * number_of_columns_Y))
 if sparse.issparse(X):
 X_new = sparse.vstack(np.tile(X, number_of_columns_Y))
 indicator_vecs = sparse.coo_matrix(indicator_vecs)
 X_new = sparse.hstack((X_new, indicator_vecs))
 else:
 X_new = np.tile(X, (number_of_columns_Y, 1))
 X_new = np.hstack((X_new, indicator_vecs))
 return X_new

def multi_output_transform_y(Y):
 return Y.reshape(-1, order="F")
 
def multi_output_inverse_transform_y(y, number_of_columns_y):
 return y.reshape((-1, number_of_columns_y), order="F")

## AutoML experiment set up

In [None]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'automl-local-multi-output'
# project folder
project_folder = './sample_projects/automl-local-multi-output'

experiment=Experiment(ws, experiment_name)

output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
output['Experiment Name'] = experiment.name
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data=output, index=['']).T

## Create a random dataset for the test purpose 

In [None]:
rng = np.random.RandomState(1)
X_train = np.sort(200 * rng.rand(600, 1) - 100, axis=0)
Y_train = np.array([np.pi * np.sin(X_train).ravel(), np.pi * np.cos(X_train).ravel()]).T
Y_train += (0.5 - rng.rand(*Y_train.shape))

Perform X and Y transformation using transformer function

In [None]:
X_train_transformed, y_train_transformed = multi_output_transform_x_y(X_train, Y_train)

In [None]:
automl_config = AutoMLConfig(task = 'regression',
 debug_log='automl_errors_multi.log',
 primary_metric='r2_score',
 iterations=10,
 n_cross_validations=2,
 verbosity=logging.INFO,
 X=X_train_transformed,
 y=y_train_transformed,
 path=project_folder)

## Fit the transformed data 

In [None]:
local_run = experiment.submit(automl_config, show_output=True)

In [None]:
# Get the best fit model
best_run, fitted_model = local_run.get_output()

In [None]:
# Generate random data set for predicting
X_predict = np.sort(200 * rng.rand(200, 1) - 100, axis=0)

In [None]:
# Transform predict data
X_predict_transformed = multi_output_transformer_x(X_predict, Y_train.shape[1])
# Predict and inverse transform the prediction
y_predict = fitted_model.predict(X_predict_transformed)
Y_predict = multi_output_inverse_transform_y(y_predict, Y_train.shape[1])

In [None]:
print(Y_predict)