MachineLearningNotebooks/automl/10.auto-ml-multi-output-example.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
    "\n",
    "Licensed under the MIT License."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# AutoML 10: Multi-output\n",
    "\n",
    "This notebook shows how to use AutoML to train multi-output problems by leveraging the correlation between the outputs using indicator vectors.\n",
    "\n",
    "Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import os\n",
    "import random\n",
    "\n",
    "from matplotlib import pyplot as plt\n",
    "from matplotlib.pyplot import imshow\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn import datasets\n",
    "\n",
    "import azureml.core\n",
    "from azureml.core.experiment import Experiment\n",
    "from azureml.core.workspace import Workspace\n",
    "from azureml.train.automl import AutoMLConfig\n",
    "from azureml.train.automl.run import AutoMLRun"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Diagnostics\n",
    "\n",
    "Opt-in diagnostics for better experience, quality, and security of future releases."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from azureml.telemetry import set_diagnostics_collection\n",
    "set_diagnostics_collection(send_diagnostics = True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Transformer Functions\n",
    "The transformations of inputs `X` and `y` are happening as follows, e.g. `y = {y_1, y_2}`, then `X` becomes\n",
    "    \n",
    "`X 1 0`\n",
    "     \n",
    "`X 0 1`\n",
    "\n",
    "and `y` becomes,\n",
    "\n",
    "`y_1`\n",
    "\n",
    "`y_2`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import sparse\n",
    "from scipy import linalg\n",
    "\n",
    "#Transformer functions\n",
    "def multi_output_transform_x_y(X, y):\n",
    "    X_new = multi_output_transformer_x(X, y.shape[1])\n",
    "    y_new = multi_output_transform_y(y)\n",
    "    return X_new, y_new\n",
    "\n",
    "def multi_output_transformer_x(X, number_of_columns_y):\n",
    "    indicator_vecs = linalg.block_diag(*([np.ones((X.shape[0], 1))] * number_of_columns_y))\n",
    "    if sparse.issparse(X):\n",
    "        X_new = sparse.vstack(np.tile(X, number_of_columns_y))\n",
    "        indicator_vecs = sparse.coo_matrix(indicator_vecs)\n",
    "        X_new = sparse.hstack((X_new, indicator_vecs))\n",
    "    else:\n",
    "        X_new = np.tile(X, (number_of_columns_y, 1))\n",
    "        X_new = np.hstack((X_new, indicator_vecs))\n",
    "    return X_new\n",
    "\n",
    "def multi_output_transform_y(y):\n",
    "    return y.reshape(-1, order=\"F\")\n",
    "\n",
    "def multi_output_inverse_transform_y(y, number_of_columns_y):\n",
    "    return y.reshape((-1, number_of_columns_y), order = \"F\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## AutoML Experiment Setup"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ws = Workspace.from_config()\n",
    "\n",
    "# Choose a name for the experiment and specify the project folder.\n",
    "experiment_name = 'automl-local-multi-output'\n",
    "project_folder = './sample_projects/automl-local-multi-output'\n",
    "\n",
    "experiment = Experiment(ws, experiment_name)\n",
    "\n",
    "output = {}\n",
    "output['SDK version'] = azureml.core.VERSION\n",
    "output['Subscription ID'] = ws.subscription_id\n",
    "output['Workspace'] = ws.name\n",
    "output['Resource Group'] = ws.resource_group\n",
    "output['Location'] = ws.location\n",
    "output['Project Directory'] = project_folder\n",
    "output['Experiment Name'] = experiment.name\n",
    "pd.set_option('display.max_colwidth', -1)\n",
    "pd.DataFrame(data = output, index = ['']).T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create a Random Dataset for Test Purposes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rng = np.random.RandomState(1)\n",
    "X_train = np.sort(200 * rng.rand(600, 1) - 100, axis = 0)\n",
    "y_train = np.array([np.pi * np.sin(X_train).ravel(), np.pi * np.cos(X_train).ravel()]).T\n",
    "y_train += (0.5 - rng.rand(*y_train.shape))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Perform X and y transformation using the transformer function."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train_transformed, y_train_transformed = multi_output_transform_x_y(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Configure AutoML using the transformed results."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "automl_config = AutoMLConfig(task = 'regression',\n",
    "                             debug_log = 'automl_errors_multi.log',\n",
    "                             primary_metric = 'r2_score',\n",
    "                             iterations = 10,\n",
    "                             n_cross_validations = 2,\n",
    "                             verbosity = logging.INFO,\n",
    "                             X = X_train_transformed,\n",
    "                             y = y_train_transformed,\n",
    "                             path = project_folder)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Fit the Transformed Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "local_run = experiment.submit(automl_config, show_output = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the best fit model.\n",
    "best_run, fitted_model = local_run.get_output()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate random data set for predicting.\n",
    "X_test = np.sort(200 * rng.rand(200, 1) - 100, axis = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Transform predict data.\n",
    "X_test_transformed = multi_output_transformer_x(X_test, y_train.shape[1])\n",
    "\n",
    "# Predict and inverse transform the prediction.\n",
    "y_predict = fitted_model.predict(X_test_transformed)\n",
    "y_predict = multi_output_inverse_transform_y(y_predict, y_train.shape[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(y_predict)"
   ]
  }
 ],
 "metadata": {
  "authors": [
   {
    "name": "savitam"
   }
  ],
  "kernelspec": {
   "display_name": "Python 3.6",
   "language": "python",
   "name": "python36"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}