mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 01:27:06 -05:00
295 lines
7.6 KiB
Plaintext
295 lines
7.6 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
"\n",
|
|
"Licensed under the MIT License."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# AutoML 10: Multi-output\n",
|
|
"\n",
|
|
"This notebook shows how to use AutoML to train multi-output problems by leveraging the correlation between the outputs using indicator vectors.\n",
|
|
"\n",
|
|
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import logging\n",
|
|
"import os\n",
|
|
"import random\n",
|
|
"\n",
|
|
"from matplotlib import pyplot as plt\n",
|
|
"from matplotlib.pyplot import imshow\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"from sklearn import datasets\n",
|
|
"\n",
|
|
"import azureml.core\n",
|
|
"from azureml.core.experiment import Experiment\n",
|
|
"from azureml.core.workspace import Workspace\n",
|
|
"from azureml.train.automl import AutoMLConfig\n",
|
|
"from azureml.train.automl.run import AutoMLRun"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Diagnostics\n",
|
|
"\n",
|
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
|
"set_diagnostics_collection(send_diagnostics = True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Transformer Functions\n",
|
|
"The transformations of inputs `X` and `y` are happening as follows, e.g. `y = {y_1, y_2}`, then `X` becomes\n",
|
|
" \n",
|
|
"`X 1 0`\n",
|
|
" \n",
|
|
"`X 0 1`\n",
|
|
"\n",
|
|
"and `y` becomes,\n",
|
|
"\n",
|
|
"`y_1`\n",
|
|
"\n",
|
|
"`y_2`"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from scipy import sparse\n",
|
|
"from scipy import linalg\n",
|
|
"\n",
|
|
"#Transformer functions\n",
|
|
"def multi_output_transform_x_y(X, y):\n",
|
|
" X_new = multi_output_transformer_x(X, y.shape[1])\n",
|
|
" y_new = multi_output_transform_y(y)\n",
|
|
" return X_new, y_new\n",
|
|
"\n",
|
|
"def multi_output_transformer_x(X, number_of_columns_y):\n",
|
|
" indicator_vecs = linalg.block_diag(*([np.ones((X.shape[0], 1))] * number_of_columns_y))\n",
|
|
" if sparse.issparse(X):\n",
|
|
" X_new = sparse.vstack(np.tile(X, number_of_columns_y))\n",
|
|
" indicator_vecs = sparse.coo_matrix(indicator_vecs)\n",
|
|
" X_new = sparse.hstack((X_new, indicator_vecs))\n",
|
|
" else:\n",
|
|
" X_new = np.tile(X, (number_of_columns_y, 1))\n",
|
|
" X_new = np.hstack((X_new, indicator_vecs))\n",
|
|
" return X_new\n",
|
|
"\n",
|
|
"def multi_output_transform_y(y):\n",
|
|
" return y.reshape(-1, order=\"F\")\n",
|
|
"\n",
|
|
"def multi_output_inverse_transform_y(y, number_of_columns_y):\n",
|
|
" return y.reshape((-1, number_of_columns_y), order = \"F\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## AutoML Experiment Setup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ws = Workspace.from_config()\n",
|
|
"\n",
|
|
"# Choose a name for the experiment and specify the project folder.\n",
|
|
"experiment_name = 'automl-local-multi-output'\n",
|
|
"project_folder = './sample_projects/automl-local-multi-output'\n",
|
|
"\n",
|
|
"experiment = Experiment(ws, experiment_name)\n",
|
|
"\n",
|
|
"output = {}\n",
|
|
"output['SDK version'] = azureml.core.VERSION\n",
|
|
"output['Subscription ID'] = ws.subscription_id\n",
|
|
"output['Workspace'] = ws.name\n",
|
|
"output['Resource Group'] = ws.resource_group\n",
|
|
"output['Location'] = ws.location\n",
|
|
"output['Project Directory'] = project_folder\n",
|
|
"output['Experiment Name'] = experiment.name\n",
|
|
"pd.set_option('display.max_colwidth', -1)\n",
|
|
"pd.DataFrame(data = output, index = ['']).T"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Create a Random Dataset for Test Purposes"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"rng = np.random.RandomState(1)\n",
|
|
"X_train = np.sort(200 * rng.rand(600, 1) - 100, axis = 0)\n",
|
|
"y_train = np.array([np.pi * np.sin(X_train).ravel(), np.pi * np.cos(X_train).ravel()]).T\n",
|
|
"y_train += (0.5 - rng.rand(*y_train.shape))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Perform X and y transformation using the transformer function."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"X_train_transformed, y_train_transformed = multi_output_transform_x_y(X_train, y_train)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Configure AutoML using the transformed results."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"automl_config = AutoMLConfig(task = 'regression',\n",
|
|
" debug_log = 'automl_errors_multi.log',\n",
|
|
" primary_metric = 'r2_score',\n",
|
|
" iterations = 10,\n",
|
|
" n_cross_validations = 2,\n",
|
|
" verbosity = logging.INFO,\n",
|
|
" X = X_train_transformed,\n",
|
|
" y = y_train_transformed,\n",
|
|
" path = project_folder)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Fit the Transformed Data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"local_run = experiment.submit(automl_config, show_output = True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Get the best fit model.\n",
|
|
"best_run, fitted_model = local_run.get_output()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Generate random data set for predicting.\n",
|
|
"X_test = np.sort(200 * rng.rand(200, 1) - 100, axis = 0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Transform predict data.\n",
|
|
"X_test_transformed = multi_output_transformer_x(X_test, y_train.shape[1])\n",
|
|
"\n",
|
|
"# Predict and inverse transform the prediction.\n",
|
|
"y_predict = fitted_model.predict(X_test_transformed)\n",
|
|
"y_predict = multi_output_inverse_transform_y(y_predict, y_train.shape[1])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(y_predict)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"authors": [
|
|
{
|
|
"name": "savitam"
|
|
}
|
|
],
|
|
"kernelspec": {
|
|
"display_name": "Python 3.6",
|
|
"language": "python",
|
|
"name": "python36"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|