mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-22 18:42:41 -05:00
Compare commits
8 Commits
azureml-sd
...
release_up
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
553fa43e17 | ||
|
|
e98131729e | ||
|
|
fd2b09e2c2 | ||
|
|
7970209069 | ||
|
|
24f8651bb5 | ||
|
|
b881f78e46 | ||
|
|
057e22b253 | ||
|
|
c520bd1d41 |
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.2.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Table of Contents
|
||||
1. [Automated ML Introduction](#introduction)
|
||||
1. [Setup using Azure Notebooks](#jupyter)
|
||||
1. [Setup using Azure Databricks](#databricks)
|
||||
1. [Setup using Compute Instances](#jupyter)
|
||||
1. [Setup using a Local Conda environment](#localconda)
|
||||
1. [Setup using Azure Databricks](#databricks)
|
||||
1. [Automated ML SDK Sample Notebooks](#samples)
|
||||
1. [Documentation](#documentation)
|
||||
1. [Running using python command](#pythoncommand)
|
||||
@@ -21,13 +21,13 @@ Below are the three execution environments supported by automated ML.
|
||||
|
||||
|
||||
<a name="jupyter"></a>
|
||||
## Setup using Notebook VMs - Jupyter based notebooks from a Azure VM
|
||||
## Setup using Compute Instances - Jupyter based notebooks from a Azure Virtual Machine
|
||||
|
||||
1. Open the [ML Azure portal](https://ml.azure.com)
|
||||
1. Select Compute
|
||||
1. Select Notebook VMs
|
||||
1. Select Compute Instances
|
||||
1. Click New
|
||||
1. Type a name for the Vm and select a VM type
|
||||
1. Type a Compute Name, select a Virtual Machine type and select a Virtual Machine size
|
||||
1. Click Create
|
||||
|
||||
<a name="localconda"></a>
|
||||
@@ -117,7 +117,7 @@ jupyter notebook
|
||||
- Simple example of using automated ML for regression
|
||||
- Uses azure compute for training
|
||||
|
||||
- [auto-ml-regression-hardware-performance-explanation-and-featurization.ipynb](regression-hardware-performance-explanation-and-featurization/auto-ml-regression-hardware-performance-explanation-and-featurization.ipynb)
|
||||
- [auto-ml-regression-explanation-featurization.ipynb](regression-explanation-featurization/auto-ml-regression-explanation-featurization.ipynb)
|
||||
- Dataset: Hardware Performance Dataset
|
||||
- Shows featurization and excplanation
|
||||
- Uses azure compute for training
|
||||
@@ -152,7 +152,7 @@ jupyter notebook
|
||||
- Beer Production Forecasting
|
||||
|
||||
- [auto-ml-continuous-retraining.ipynb](continuous-retraining/auto-ml-continuous-retraining.ipynb)
|
||||
- Continous retraining using Pipelines and Time-Series TabularDataset
|
||||
- Continuous retraining using Pipelines and Time-Series TabularDataset
|
||||
|
||||
- [auto-ml-classification-text-dnn.ipynb](classification-text-dnn/auto-ml-classification-text-dnn.ipynb)
|
||||
- Classification with text data using deep learning in AutoML
|
||||
|
||||
@@ -5,19 +5,18 @@ dependencies:
|
||||
- pip<=19.3.1
|
||||
- nomkl
|
||||
- python>=3.5.2,<3.6.8
|
||||
- wheel==0.30.0
|
||||
- nb_conda
|
||||
- matplotlib==2.1.0
|
||||
- numpy>=1.16.0,<=1.16.2
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- scipy>=1.0.0,<=1.1.0
|
||||
- scipy==1.4.1
|
||||
- scikit-learn>=0.19.0,<=0.20.3
|
||||
- pandas>=0.22.0,<0.23.0
|
||||
- py-xgboost<=0.80
|
||||
- fbprophet==0.5
|
||||
- pytorch=1.1.0
|
||||
- cudatoolkit=9.0
|
||||
- conda-forge::fbprophet==0.5
|
||||
- pytorch::pytorch=1.4.0
|
||||
- cudatoolkit=10.1.243
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
@@ -27,13 +26,6 @@ dependencies:
|
||||
- azureml-train
|
||||
- azureml-widgets
|
||||
- azureml-pipeline
|
||||
- azureml-contrib-interpret
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- onnxruntime==1.0.0
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
|
||||
channels:
|
||||
- anaconda
|
||||
- conda-forge
|
||||
- pytorch
|
||||
@@ -1,38 +1,33 @@
|
||||
name: azure_automl
|
||||
name: automl_env_master
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- pip<=19.3.1
|
||||
- python>=3.5.2,<3.6.8
|
||||
- wheel==0.30.0
|
||||
- nb_conda
|
||||
- matplotlib==2.1.0
|
||||
- numpy>=1.16.0,<=1.16.2
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- scipy>=1.0.0,<=1.1.0
|
||||
- scipy==1.4.1
|
||||
- scikit-learn>=0.19.0,<=0.20.3
|
||||
- pandas>=0.22.0,<=0.23.4
|
||||
- testpath=0.3.1
|
||||
- py-xgboost<=0.90
|
||||
- fbprophet==0.5
|
||||
- pytorch=1.1.0
|
||||
- cudatoolkit=9.0
|
||||
- conda-forge::fbprophet==0.5
|
||||
- pytorch::pytorch=1.4.0
|
||||
- cudatoolkit=10.1.243
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-defaults
|
||||
- --extra-index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/master/588E708E0DF342C4A80BD954289657CF
|
||||
- --extra-index-url https://dataprepdownloads.azureedge.net/pypi/weekly-rc-932B96D048E011E8B56608/latest/
|
||||
- azureml-defaults<0.1.50
|
||||
- azureml-dataprep[pandas]
|
||||
- azureml-train-automl
|
||||
- azureml-train
|
||||
- azureml-widgets
|
||||
- azureml-pipeline
|
||||
- azureml-contrib-interpret
|
||||
- azureml-train-automl<0.1.50
|
||||
- azureml-train<0.1.50
|
||||
- azureml-widgets<0.1.50
|
||||
- azureml-pipeline<0.1.50
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- onnxruntime==1.0.0
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
|
||||
channels:
|
||||
- anaconda
|
||||
- conda-forge
|
||||
- pytorch
|
||||
@@ -41,7 +41,7 @@
|
||||
"\n",
|
||||
"In this example we use the UCI Bank Marketing dataset to showcase how you can use AutoML for a classification problem and deploy it to an Azure Container Instance (ACI). The classification goal is to predict if the client will subscribe to a term deposit with the bank.\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"\n",
|
||||
"Please find the ONNX related documentations [here](https://github.com/onnx/onnx).\n",
|
||||
"\n",
|
||||
@@ -92,6 +92,23 @@
|
||||
"from azureml.explain.model._internal.explanation_client import ExplanationClient"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -132,7 +149,6 @@
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
@@ -160,35 +176,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster-4\"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"cpu_cluster_name = \"cpu-cluster-4\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
" \n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 6)\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||
" max_nodes=6)\n",
|
||||
" compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" \n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
"compute_target.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -394,8 +397,6 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#from azureml.train.automl.run import AutoMLRun\n",
|
||||
"#experiment_name = 'automl-classification-bmarketing'\n",
|
||||
"#experiment = Experiment(ws, experiment_name)\n",
|
||||
"#remote_run = AutoMLRun(experiment=experiment, run_id='<run_ID_goes_here')\n",
|
||||
"#remote_run"
|
||||
]
|
||||
@@ -642,7 +643,7 @@
|
||||
"\n",
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -6,5 +6,3 @@ dependencies:
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- onnxruntime==1.0.0
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-interpret
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
"\n",
|
||||
"This notebook is using remote compute to train the model.\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning [Notebook VM](https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-1st-experiment-sdk-setup), you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an experiment using an existing workspace.\n",
|
||||
@@ -80,6 +80,23 @@
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -94,7 +111,6 @@
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
@@ -306,7 +322,7 @@
|
||||
"\n",
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,4 +5,3 @@ dependencies:
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- azureml-explain-model
|
||||
|
||||
@@ -47,8 +47,8 @@
|
||||
"Notebook synopsis:\n",
|
||||
"1. Creating an Experiment in an existing Workspace\n",
|
||||
"2. Configuration and remote run of AutoML for a text dataset (20 Newsgroups dataset from scikit-learn) for classification\n",
|
||||
"3. Evaluating the final model on a test set\n",
|
||||
"4. Deploying the model on ACI"
|
||||
"3. Registering the best model for future use\n",
|
||||
"4. Evaluating the final model on a test set"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -84,6 +84,23 @@
|
||||
"from sklearn.datasets import fetch_20newsgroups"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -105,7 +122,6 @@
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
@@ -132,34 +148,25 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"dnntext-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
"\n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\", # CPU for BiLSTM, such as \"STANDARD_D2_V2\" \n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\", # CPU for BiLSTM, such as \"STANDARD_D2_V2\" \n",
|
||||
" # To use BERT (this is recommended for best performance), select a GPU such as \"STANDARD_NC6\" \n",
|
||||
" # or similar GPU option\n",
|
||||
" # available in your workspace\n",
|
||||
" max_nodes = 1)\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # Create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
"\n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
"compute_target.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -187,8 +194,8 @@
|
||||
" '''\n",
|
||||
" remove = ('headers', 'footers', 'quotes')\n",
|
||||
" categories = [\n",
|
||||
" 'alt.atheism',\n",
|
||||
" 'talk.religion.misc',\n",
|
||||
" 'rec.sport.baseball',\n",
|
||||
" 'rec.sport.hockey',\n",
|
||||
" 'comp.graphics',\n",
|
||||
" 'sci.space',\n",
|
||||
" ]\n",
|
||||
@@ -338,7 +345,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can test the model locally to get a feel of the input/output. This step may require additional package installations such as pytorch."
|
||||
"You can test the model locally to get a feel of the input/output. When the model contains BERT, this step will require pytorch and pytorch-transformers installed in your local environment. The exact versions of these packages can be found in the **automl_env.yml** file located in the local copy of your MachineLearningNotebooks folder here:\n",
|
||||
"MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/automl_env.yml"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -373,8 +381,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploying the model\n",
|
||||
"We now use the best fitted model from the AutoML Run to make predictions on the test set. "
|
||||
"### Registering the best model\n",
|
||||
"We now register the best fitted model from the AutoML Run for use in future deployments. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -474,7 +482,7 @@
|
||||
"source": [
|
||||
"script_folder = os.path.join(os.getcwd(), 'inference')\n",
|
||||
"os.makedirs(script_folder, exist_ok=True)\n",
|
||||
"shutil.copy2('infer.py', script_folder)"
|
||||
"shutil.copy('infer.py', script_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,7 +5,6 @@ dependencies:
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- azurmel-train
|
||||
- https://download.pytorch.org/whl/cpu/torch-1.1.0-cp35-cp35m-win_amd64.whl
|
||||
- sentencepiece==0.1.82
|
||||
- pytorch-transformers==1.0
|
||||
|
||||
@@ -2,8 +2,7 @@ import numpy as np
|
||||
import argparse
|
||||
from azureml.core import Run
|
||||
from sklearn.externals import joblib
|
||||
from azureml.automl.core._vendor.automl.client.core.common import metrics
|
||||
from automl.client.core.common import constants
|
||||
from azureml.automl.core.shared import constants, metrics
|
||||
from azureml.core.model import Model
|
||||
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning \n",
|
||||
"**Continous retraining using Pipelines and Time-Series TabularDataset**\n",
|
||||
"**Continuous retraining using Pipelines and Time-Series TabularDataset**\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"2. [Setup](#Setup)\n",
|
||||
@@ -75,6 +75,23 @@
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -112,7 +129,6 @@
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
@@ -143,33 +159,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster-42\"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"amlcompute_cluster_name = \"cont-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
" \n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 4)\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||
" max_nodes=4)\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = 0, timeout_in_minutes = 10)\n",
|
||||
" \n",
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
"compute_target.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -101,6 +101,23 @@
|
||||
"from azureml.train.estimator import Estimator"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
@@ -128,7 +145,6 @@
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
@@ -163,7 +179,7 @@
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"cpu_cluster_name = \"cpu-cluster\"\n",
|
||||
"cpu_cluster_name = \"beer-cluster\"\n",
|
||||
"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
@@ -218,19 +234,18 @@
|
||||
"import pandas as pd\n",
|
||||
"from pandas import DataFrame\n",
|
||||
"from pandas import Grouper\n",
|
||||
"from matplotlib import pyplot\n",
|
||||
"from pandas import concat\n",
|
||||
"from matplotlib import pyplot\n",
|
||||
"from pandas.plotting import register_matplotlib_converters\n",
|
||||
"\n",
|
||||
"register_matplotlib_converters()\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.figure(figsize=(20, 10))\n",
|
||||
"plt.tight_layout()\n",
|
||||
"\n",
|
||||
"plt.subplot(2, 1, 1)\n",
|
||||
"plt.title('Beer Production By Year')\n",
|
||||
"df = pd.read_csv(\"Beer_no_valid_split_train.csv\", parse_dates=True, index_col= 'DATE').drop(columns='grain')\n",
|
||||
"test_df = pd.read_csv(\"Beer_no_valid_split_test.csv\", parse_dates=True, index_col= 'DATE').drop(columns='grain')\n",
|
||||
"pyplot.plot(df)\n",
|
||||
"plt.plot(df)\n",
|
||||
"\n",
|
||||
"plt.subplot(2, 1, 2)\n",
|
||||
"plt.title('Beer Production By Month')\n",
|
||||
@@ -239,7 +254,8 @@
|
||||
"months = DataFrame(months)\n",
|
||||
"months.columns = range(1,13)\n",
|
||||
"months.boxplot()\n",
|
||||
"pyplot.show()\n"
|
||||
"\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -538,7 +554,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"compute_target = ws.compute_targets['cpu-cluster']\n",
|
||||
"compute_target = ws.compute_targets['beer-cluster']\n",
|
||||
"test_experiment = Experiment(ws, experiment_name + \"_test\")"
|
||||
]
|
||||
},
|
||||
@@ -556,7 +572,7 @@
|
||||
"\n",
|
||||
"script_folder = os.path.join(os.getcwd(), 'inference')\n",
|
||||
"os.makedirs(script_folder, exist_ok=True)\n",
|
||||
"shutil.copy2('infer.py', script_folder)"
|
||||
"shutil.copy('infer.py', script_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -4,6 +4,7 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- numpy==1.16.2
|
||||
- pandas==0.23.4
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
|
||||
@@ -4,8 +4,7 @@ import argparse
|
||||
from azureml.core import Run
|
||||
from sklearn.externals import joblib
|
||||
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
||||
from azureml.automl.core._vendor.automl.client.core.common import metrics
|
||||
from automl.client.core.common import constants
|
||||
from azureml.automl.core.shared import constants, metrics
|
||||
from pandas.tseries.frequencies import to_offset
|
||||
|
||||
|
||||
|
||||
@@ -74,6 +74,23 @@
|
||||
"from datetime import datetime"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -95,7 +112,6 @@
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['SKU'] = ws.sku\n",
|
||||
@@ -124,35 +140,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster-bike\"\n",
|
||||
"amlcompute_cluster_name = \"bike-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
" \n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 4)\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||
" max_nodes=4)\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" \n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
"compute_target.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -450,8 +453,8 @@
|
||||
"\n",
|
||||
"script_folder = os.path.join(os.getcwd(), 'forecast')\n",
|
||||
"os.makedirs(script_folder, exist_ok=True)\n",
|
||||
"shutil.copy2('forecasting_script.py', script_folder)\n",
|
||||
"shutil.copy2('forecasting_helper.py', script_folder)"
|
||||
"shutil.copy('forecasting_script.py', script_folder)\n",
|
||||
"shutil.copy('forecasting_helper.py', script_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -507,10 +510,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.automl.core._vendor.automl.client.core.common import metrics\n",
|
||||
"from azureml.automl.core.shared import constants, metrics\n",
|
||||
"from sklearn.metrics import mean_absolute_error, mean_squared_error\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from automl.client.core.common import constants\n",
|
||||
"\n",
|
||||
"# use automl metrics module\n",
|
||||
"scores = metrics.compute_metrics_regression(\n",
|
||||
|
||||
@@ -4,6 +4,7 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- numpy==1.16.2
|
||||
- pandas==0.23.4
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import argparse
|
||||
import azureml.train.automl
|
||||
from azureml.automl.runtime._vendor.automl.client.core.runtime import forecasting_models
|
||||
from azureml.automl.runtime.shared import forecasting_models
|
||||
from azureml.core import Run
|
||||
from sklearn.externals import joblib
|
||||
import forecasting_helper
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Data and Forecasting Configurations](#Data)\n",
|
||||
"1. [Train](#Train)\n",
|
||||
"1. [Results](#Results)\n",
|
||||
"\n",
|
||||
"Advanced Forecasting\n",
|
||||
"1. [Advanced Training](#advanced_training)\n",
|
||||
@@ -43,7 +42,7 @@
|
||||
"\n",
|
||||
"In this example we use the associated New York City energy demand dataset to showcase how you can use AutoML for a simple forecasting problem and explore the results. The goal is predict the energy demand for the next 48 hours based on historic time-series data.\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning [Notebook VM](https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-1st-experiment-sdk-setup), you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) first, if you haven't already, to establish your connection to the AzureML Workspace.\n",
|
||||
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) first, if you haven't already, to establish your connection to the AzureML Workspace.\n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Creating an Experiment using an existing Workspace\n",
|
||||
@@ -85,6 +84,23 @@
|
||||
"from datetime import datetime"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -109,7 +125,6 @@
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
@@ -140,35 +155,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"aml-compute\"\n",
|
||||
"amlcompute_cluster_name = \"energy-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',\n",
|
||||
" max_nodes=6)\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_DS12_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 6)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\\n\",\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
"\n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
"compute_target.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -463,7 +465,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Forecast Function\n",
|
||||
"For forecasting, we will use the forecast function instead of the predict function. Using the predict method would result in getting predictions for EVERY horizon the forecaster can predict at. This is useful when training and evaluating the performance of the forecaster at various horizons, but the level of detail is excessive for normal use. Forecast function also can handle more complicated scenarios, see notebook on [high frequency forecasting](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb)."
|
||||
"For forecasting, we will use the forecast function instead of the predict function. Using the predict method would result in getting predictions for EVERY horizon the forecaster can predict at. This is useful when training and evaluating the performance of the forecaster at various horizons, but the level of detail is excessive for normal use. Forecast function also can handle more complicated scenarios, see notebook on [high frequency forecasting](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/auto-ml-forecasting-function.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -505,9 +507,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.automl.core._vendor.automl.client.core.common import metrics\n",
|
||||
"from azureml.automl.core.shared import constants, metrics\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from automl.client.core.common import constants\n",
|
||||
"\n",
|
||||
"# use automl metrics module\n",
|
||||
"scores = metrics.compute_metrics_regression(\n",
|
||||
@@ -666,9 +667,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.automl.core._vendor.automl.client.core.common import metrics\n",
|
||||
"from azureml.automl.core.shared import constants, metrics\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from automl.client.core.common import constants\n",
|
||||
"\n",
|
||||
"# use automl metrics module\n",
|
||||
"scores = metrics.compute_metrics_regression(\n",
|
||||
|
||||
@@ -3,8 +3,7 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- numpy==1.16.2
|
||||
- pandas==0.23.4
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-interpret
|
||||
|
||||
@@ -68,6 +68,7 @@
|
||||
"import logging\n",
|
||||
"import warnings\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from pandas.tseries.frequencies import to_offset\n",
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
@@ -81,13 +82,29 @@
|
||||
"np.set_printoptions(precision=4, suppress=True, linewidth=120)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
@@ -100,7 +117,6 @@
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['SKU'] = ws.sku\n",
|
||||
@@ -258,29 +274,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"amlcompute_cluster_name = \"cpu-cluster-fcfn\"\n",
|
||||
" \n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 6)\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"amlcompute_cluster_name = \"fcfn-cluster\"\n",
|
||||
"\n",
|
||||
" # Create the cluster.\\n\",\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||
" max_nodes=6)\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)"
|
||||
"compute_target.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -346,9 +355,24 @@
|
||||
" label_column_name=target_label,\n",
|
||||
" **time_series_settings)\n",
|
||||
"\n",
|
||||
"remote_run = experiment.submit(automl_config, show_output=False)\n",
|
||||
"remote_run.wait_for_completion()\n",
|
||||
"\n",
|
||||
"remote_run = experiment.submit(automl_config, show_output=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run.wait_for_completion()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Retrieve the best model to use it further.\n",
|
||||
"_, fitted_model = remote_run.get_output()"
|
||||
]
|
||||
|
||||
@@ -4,6 +4,7 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- numpy==1.16.2
|
||||
- pandas==0.23.4
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 24 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 24 KiB |
@@ -65,7 +65,25 @@
|
||||
"\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.automl.core.featurization import FeaturizationConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -89,7 +107,6 @@
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['SKU'] = ws.sku\n",
|
||||
@@ -118,35 +135,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster-oj\"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"amlcompute_cluster_name = \"oj-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
" \n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 6)\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||
" max_nodes=6)\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" \n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
"compute_target.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -315,17 +319,54 @@
|
||||
"target_column_name = 'Quantity'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Customization\n",
|
||||
"\n",
|
||||
"The featurization customization in forecasting is an advanced feature in AutoML which allows our customers to change the default forecasting featurization behaviors and column types through `FeaturizationConfig`. The supported scenarios include,\n",
|
||||
"1. Column purposes update: Override feature type for the specified column. Currently supports DateTime, Categorical and Numeric. This customization can be used in the scenario that the type of the column cannot correctly reflect its purpose. Some numerical columns, for instance, can be treated as Categorical columns which need to be converted to categorical while some can be treated as epoch timestamp which need to be converted to datetime. To tell our SDK to correctly preprocess these columns, a configuration need to be add with the columns and their desired types.\n",
|
||||
"2. Transformer parameters update: Currently supports parameter change for Imputer only. User can customize imputation methods, the supported methods are constant for target data and mean, median, most frequent and constant for training data. This customization can be used for the scenario that our customers know which imputation methods fit best to the input data. For instance, some datasets use NaN to represent 0 which the correct behavior should impute all the missing value with 0. To achieve this behavior, these columns need to be configured as constant imputation with `fill_value` 0.\n",
|
||||
"3. Drop columns: Columns to drop from being featurized. These usually are the columns which are leaky or the columns contain no useful data.\n",
|
||||
"\n",
|
||||
"This step requires an Enterprise workspace to gain access to this feature. To learn more about creating an Enterprise workspace or upgrading to an Enterprise workspace from the Azure portal, please visit our [Workspace page.](https://docs.microsoft.com/azure/machine-learning/service/concept-workspace#upgrade)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"featurization_config = FeaturizationConfig()\n",
|
||||
"featurization_config.drop_columns = ['logQuantity'] # 'logQuantity' is a leaky feature, so we remove it.\n",
|
||||
"# Force the CPWVOL5 feature to be numeric type.\n",
|
||||
"featurization_config.add_column_purpose('CPWVOL5', 'Numeric')\n",
|
||||
"# Fill missing values in the target column, Quantity, with zeros.\n",
|
||||
"featurization_config.add_transformer_params('Imputer', ['Quantity'], {\"strategy\": \"constant\", \"fill_value\": 0})\n",
|
||||
"# Fill missing values in the INCOME column with median value.\n",
|
||||
"featurization_config.add_transformer_params('Imputer', ['INCOME'], {\"strategy\": \"median\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train\n",
|
||||
"\n",
|
||||
"The AutoMLConfig object defines the settings and data for an AutoML training job. Here, we set necessary inputs like the task type, the number of AutoML iterations to try, the training data, and cross-validation parameters. \n",
|
||||
"The [AutoMLConfig](https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.automlconfig.automlconfig?view=azure-ml-py) object defines the settings and data for an AutoML training job. Here, we set necessary inputs like the task type, the number of AutoML iterations to try, the training data, and cross-validation parameters.\n",
|
||||
"\n",
|
||||
"For forecasting tasks, there are some additional parameters that can be set: the name of the column holding the date/time, the grain column names, and the maximum forecast horizon. A time column is required for forecasting, while the grain is optional. If a grain is not given, AutoML assumes that the whole dataset is a single time-series. We also pass a list of columns to drop prior to modeling. The _logQuantity_ column is completely correlated with the target quantity, so it must be removed to prevent a target leak.\n",
|
||||
"For forecasting tasks, there are some additional parameters that can be set: the name of the column holding the date/time, the grain column names, and the maximum forecast horizon. A time column is required for forecasting, while the grain is optional. If grain columns are not given, AutoML assumes that the whole dataset is a single time-series. We also pass a list of columns to drop prior to modeling. The _logQuantity_ column is completely correlated with the target quantity, so it must be removed to prevent a target leak.\n",
|
||||
"\n",
|
||||
"The forecast horizon is given in units of the time-series frequency; for instance, the OJ series frequency is weekly, so a horizon of 20 means that a trained model will estimate sales up to 20 weeks beyond the latest date in the training data for each series. In this example, we set the maximum horizon to the number of samples per series in the test set (n_test_periods). Generally, the value of this parameter will be dictated by business needs. For example, a demand planning application that estimates the next month of sales should set the horizon according to suitable planning time-scales. Please see the [energy_demand notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand) for more discussion of forecast horizon.\n",
|
||||
"\n",
|
||||
"We note here that AutoML can sweep over two types of time-series models:\n",
|
||||
"* Models that are trained for each series such as ARIMA and Facebook's Prophet. Note that these models are only available for [Enterprise Edition Workspaces](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-manage-workspace#upgrade).\n",
|
||||
"* Models trained across multiple time-series using a regression approach.\n",
|
||||
"\n",
|
||||
"In the first case, AutoML loops over all time-series in your dataset and trains one model (e.g. AutoArima or Prophet, as the case may be) for each series. This can result in long runtimes to train these models if there are a lot of series in the data. One way to mitigate this problem is to fit models for different series in parallel if you have multiple compute cores available. To enable this behavior, set the `max_cores_per_iteration` parameter in your AutoMLConfig as shown in the example in the next cell. \n",
|
||||
"\n",
|
||||
"The forecast horizon is given in units of the time-series frequency; for instance, the OJ series frequency is weekly, so a horizon of 20 means that a trained model will estimate sales up to 20 weeks beyond the latest date in the training data for each series. In this example, we set the maximum horizon to the number of samples per series in the test set (n_test_periods). Generally, the value of this parameter will be dictated by business needs. For example, a demand planning organizaion that needs to estimate the next month of sales would set the horizon accordingly. Please see the [energy_demand notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand) for more discussion of forecast horizon.\n",
|
||||
"\n",
|
||||
"Finally, a note about the cross-validation (CV) procedure for time-series data. AutoML uses out-of-sample error estimates to select a best pipeline/model, so it is important that the CV fold splitting is done correctly. Time-series can violate the basic statistical assumptions of the canonical K-Fold CV strategy, so AutoML implements a [rolling origin validation](https://robjhyndman.com/hyndsight/tscv/) procedure to create CV folds for time-series data. To use this procedure, you just need to specify the desired number of CV folds in the AutoMLConfig object. It is also possible to bypass CV and use your own validation set by setting the *validation_data* parameter of AutoMLConfig.\n",
|
||||
"\n",
|
||||
@@ -346,8 +387,9 @@
|
||||
"|**debug_log**|Log file path for writing debugging information|\n",
|
||||
"|**time_column_name**|Name of the datetime column in the input data|\n",
|
||||
"|**grain_column_names**|Name(s) of the columns defining individual series in the input data|\n",
|
||||
"|**drop_column_names**|Name(s) of columns to drop prior to modeling|\n",
|
||||
"|**max_horizon**|Maximum desired forecast horizon in units of time-series frequency|"
|
||||
"|**max_horizon**|Maximum desired forecast horizon in units of time-series frequency|\n",
|
||||
"|**featurization**| 'auto' / 'off' / FeaturizationConfig Indicator for whether featurization step should be done automatically or not, or whether customized featurization should be used. Setting this enables AutoML to perform featurization on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
|
||||
"|**max_cores_per_iteration**|Maximum number of cores to utilize per iteration. A value of -1 indicates all available cores should be used.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -359,7 +401,6 @@
|
||||
"time_series_settings = {\n",
|
||||
" 'time_column_name': time_column_name,\n",
|
||||
" 'grain_column_names': grain_column_names,\n",
|
||||
" 'drop_column_names': ['logQuantity'], # 'logQuantity' is a leaky feature, so we remove it.\n",
|
||||
" 'max_horizon': n_test_periods\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
@@ -371,8 +412,10 @@
|
||||
" label_column_name=target_column_name,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" enable_early_stopping=True,\n",
|
||||
" featurization=featurization_config,\n",
|
||||
" n_cross_validations=3,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" max_cores_per_iteration=-1,\n",
|
||||
" **time_series_settings)"
|
||||
]
|
||||
},
|
||||
@@ -422,6 +465,33 @@
|
||||
"model_name = best_run.properties['model_name']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Transparency\n",
|
||||
"\n",
|
||||
"View updated featurization summary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_featurizer = fitted_model.named_steps['timeseriestransformer']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_featurizer.get_featurization_summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -475,7 +545,7 @@
|
||||
"source": [
|
||||
"If you are used to scikit pipelines, perhaps you expected `predict(X_test)`. However, forecasting requires a more general interface that also supplies the past target `y` values. Please use `forecast(X,y)` as `predict(X)` is reserved for internal purposes on forecasting models.\n",
|
||||
"\n",
|
||||
"The [energy demand forecasting notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand) demonstrates the use of the forecast function in more detail in the context of using lags and rolling window features. "
|
||||
"The [forecast function notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/auto-ml-forecasting-function.ipynb) demonstrates the use of the forecast function for a variety of use cases. Also, please see the [API documentation for the forecast function](https://docs.microsoft.com/en-us/python/api/azureml-automl-runtime/azureml.automl.runtime.shared.model_wrappers.forecastingpipelinewrapper?view=azure-ml-py#forecast-x-pred--typing-union-pandas-core-frame-dataframe--nonetype----none--y-pred--typing-union-pandas-core-frame-dataframe--numpy-ndarray--nonetype----none--forecast-destination--typing-union-pandas--libs-tslibs-timestamps-timestamp--nonetype----none--ignore-data-errors--bool---false-----typing-tuple-numpy-ndarray--pandas-core-frame-dataframe-)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -506,9 +576,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.automl.core._vendor.automl.client.core.common import metrics\n",
|
||||
"from azureml.automl.core.shared import constants, metrics\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from automl.client.core.common import constants\n",
|
||||
"\n",
|
||||
"# use automl metrics module\n",
|
||||
"scores = metrics.compute_metrics_regression(\n",
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
"\n",
|
||||
"This notebook is using the local machine compute to train the model.\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning [Notebook VM](https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-1st-experiment-sdk-setup), you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an experiment using an existing workspace.\n",
|
||||
@@ -82,6 +82,23 @@
|
||||
"from azureml.explain.model._internal.explanation_client import ExplanationClient"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -96,7 +113,6 @@
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
@@ -354,7 +370,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Initialize the Mimic Explainer for feature importance\n",
|
||||
"For explaining the AutoML models, use the MimicWrapper from azureml.explain.model package. The MimicWrapper can be initialized with fields in automl_explainer_setup_obj, your workspace and a LightGBM model which acts as a surrogate model to explain the AutoML model (fitted_model here). The MimicWrapper also takes the automl_run object where engineered explanations will be uploaded."
|
||||
"For explaining the AutoML models, use the MimicWrapper from azureml.explain.model package. The MimicWrapper can be initialized with fields in automl_explainer_setup_obj, your workspace and a surrogate model to explain the AutoML model (fitted_model here). The MimicWrapper also takes the automl_run object where engineered explanations will be uploaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -363,13 +379,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel\n",
|
||||
"from azureml.explain.model.mimic_wrapper import MimicWrapper\n",
|
||||
"explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMExplainableModel, \n",
|
||||
"explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator,\n",
|
||||
" explainable_model=automl_explainer_setup_obj.surrogate_model, \n",
|
||||
" init_dataset=automl_explainer_setup_obj.X_transform, run=automl_run,\n",
|
||||
" features=automl_explainer_setup_obj.engineered_feature_names, \n",
|
||||
" feature_maps=[automl_explainer_setup_obj.feature_map],\n",
|
||||
" classes=automl_explainer_setup_obj.classes)"
|
||||
" classes=automl_explainer_setup_obj.classes,\n",
|
||||
" explainer_kwargs=automl_explainer_setup_obj.surrogate_model_params)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,4 +5,3 @@ dependencies:
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- azureml-explain-model
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
"In this example we use the Hardware Performance Dataset to showcase how you can use AutoML for a simple regression problem. The Regression goal is to predict the performance of certain combinations of hardware parts.\n",
|
||||
"After training AutoML models for this regression data set, we show how you can compute model explanations on your remote compute using a sample explainer script.\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"\n",
|
||||
"An Enterprise workspace is required for this notebook. To learn more about creating an Enterprise workspace or upgrading to an Enterprise workspace from the Azure portal, please visit our [Workspace page.](https://docs.microsoft.com/azure/machine-learning/service/concept-workspace#upgrade) \n",
|
||||
"\n",
|
||||
@@ -85,6 +85,23 @@
|
||||
"from azureml.core.dataset import Dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -98,7 +115,6 @@
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
@@ -127,35 +143,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster-5\"\n",
|
||||
"amlcompute_cluster_name = \"hardware-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||
" max_nodes=4)\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 4)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\\n\",\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
"\n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
"compute_target.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -320,8 +323,6 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#from azureml.train.automl.run import AutoMLRun\n",
|
||||
"#experiment_name = 'automl-regression-hardware'\n",
|
||||
"#experiment = Experiment(ws, experiment_name)\n",
|
||||
"#remote_run = AutoMLRun(experiment=experiment, run_id='<run_ID_goes_here')\n",
|
||||
"#remote_run"
|
||||
]
|
||||
@@ -618,7 +619,7 @@
|
||||
"source": [
|
||||
"from azureml.explain.model._internal.explanation_client import ExplanationClient\n",
|
||||
"client = ExplanationClient.from_run(automl_run)\n",
|
||||
"engineered_explanations = client.download_model_explanation(raw=False)\n",
|
||||
"engineered_explanations = client.download_model_explanation(raw=False, comment='engineered explanations')\n",
|
||||
"print(engineered_explanations.get_feature_importance_dict())\n",
|
||||
"print(\"You can visualize the engineered explanations under the 'Explanations (preview)' tab in the AutoML run at:-\\n\" + automl_run.get_portal_url())"
|
||||
]
|
||||
@@ -637,7 +638,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"raw_explanations = client.download_model_explanation(raw=True)\n",
|
||||
"raw_explanations = client.download_model_explanation(raw=True, comment='raw explanations')\n",
|
||||
"print(raw_explanations.get_feature_importance_dict())\n",
|
||||
"print(\"You can visualize the raw explanations under the 'Explanations (preview)' tab in the AutoML run at:-\\n\" + automl_run.get_portal_url())"
|
||||
]
|
||||
@@ -0,0 +1,7 @@
|
||||
name: auto-ml-regression-explanation-featurization
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
@@ -10,7 +10,7 @@ from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplaine
|
||||
automl_setup_model_explanations, automl_check_model_if_explainable
|
||||
from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel
|
||||
from azureml.explain.model.mimic_wrapper import MimicWrapper
|
||||
from automl.client.core.common.constants import MODEL_PATH
|
||||
from azureml.automl.core.shared.constants import MODEL_PATH
|
||||
from azureml.explain.model.scoring.scoring_explainer import TreeScoringExplainer, save
|
||||
|
||||
|
||||
@@ -60,17 +60,16 @@ explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMEx
|
||||
classes=automl_explainer_setup_obj.classes)
|
||||
|
||||
# Compute the engineered explanations
|
||||
engineered_explanations = explainer.explain(['local', 'global'],
|
||||
engineered_explanations = explainer.explain(['local', 'global'], tag='engineered explanations',
|
||||
eval_dataset=automl_explainer_setup_obj.X_test_transform)
|
||||
|
||||
# Compute the raw explanations
|
||||
raw_explanations = explainer.explain(['local', 'global'], get_raw=True,
|
||||
raw_explanations = explainer.explain(['local', 'global'], get_raw=True, tag='raw explanations',
|
||||
raw_feature_names=automl_explainer_setup_obj.raw_feature_names,
|
||||
eval_dataset=automl_explainer_setup_obj.X_test_transform)
|
||||
|
||||
print("Engineered and raw explanations computed successfully")
|
||||
|
||||
|
||||
# Initialize the ScoringExplainer
|
||||
scoring_explainer = TreeScoringExplainer(explainer.explainer, feature_maps=[automl_explainer_setup_obj.feature_map])
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
name: auto-ml-regression-hardware-performance-explanation-and-featurization
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- azureml-explain-model
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-interpret
|
||||
@@ -40,7 +40,7 @@
|
||||
"## Introduction\n",
|
||||
"In this example we use the Hardware Performance Dataset to showcase how you can use AutoML for a simple regression problem. The Regression goal is to predict the performance of certain combinations of hardware parts.\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
@@ -79,6 +79,23 @@
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.5.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -93,7 +110,6 @@
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
@@ -122,7 +138,7 @@
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"cpu_cluster_name = \"cpu-cluster-2\"\n",
|
||||
"cpu_cluster_name = \"reg-cluster\"\n",
|
||||
"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
|
||||
@@ -512,9 +512,11 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model after the above run is complete \n",
|
||||
"## Deploy\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -523,17 +525,15 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = local_run.get_output()\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
"best_run, fitted_model = local_run.get_output()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model Based on Any Other Metric after the above run is complete based on the child run\n",
|
||||
"Show the run and the model that has the smallest `log_loss` value:"
|
||||
"### Download the conda environment file\n",
|
||||
"From the *best_run* download the conda environment file that was used to train the AutoML model."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -542,10 +542,34 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lookup_metric = \"log_loss\"\n",
|
||||
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
"from azureml.automl.core.shared import constants\n",
|
||||
"conda_env_file_name = 'conda_env.yml'\n",
|
||||
"best_run.download_file(name=\"outputs/conda_env_v_1_0_0.yml\", output_file_path=conda_env_file_name)\n",
|
||||
"with open(conda_env_file_name, \"r\") as conda_file:\n",
|
||||
" conda_file_contents = conda_file.read()\n",
|
||||
" print(conda_file_contents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Download the model scoring file\n",
|
||||
"From the *best_run* download the scoring file to get the predictions from the AutoML model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.automl.core.shared import constants\n",
|
||||
"script_file_name = 'scoring_file.py'\n",
|
||||
"best_run.download_file(name=\"outputs/scoring_file_v_1_0_0.py\", output_file_path=script_file_name)\n",
|
||||
"with open(script_file_name, \"r\") as scoring_file:\n",
|
||||
" scoring_file_contents = scoring_file.read()\n",
|
||||
" print(scoring_file_contents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -572,8 +596,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Scoring Script\n",
|
||||
"Replace model_id with name of model from output of above register cell"
|
||||
"### Deploy the model as a Web Service on Azure Container Instance\n",
|
||||
"\n",
|
||||
"Create the configuration needed for deploying the model as a web service service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -582,123 +607,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile score.py\n",
|
||||
"import pickle\n",
|
||||
"import json\n",
|
||||
"import numpy as np\n",
|
||||
"import azureml.train.automl\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global model\n",
|
||||
" model_path = Model.get_model_path(model_name = '<<model_id>>') # this name is model.id of model that we want to deploy\n",
|
||||
" # deserialize the model file back into a sklearn model\n",
|
||||
" model = joblib.load(model_path)\n",
|
||||
"\n",
|
||||
"def run(raw_data):\n",
|
||||
" try:\n",
|
||||
" data = (pd.DataFrame(np.array(json.loads(raw_data)['data']), columns=[str(i) for i in range(0,64)]))\n",
|
||||
" result = model.predict(data)\n",
|
||||
" except Exception as e:\n",
|
||||
" result = str(e)\n",
|
||||
" return json.dumps({\"error\": result})\n",
|
||||
" return json.dumps({\"result\":result.tolist()})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Replace <<model_id>>\n",
|
||||
"content = \"\"\n",
|
||||
"with open(\"score.py\", \"r\") as fo:\n",
|
||||
" content = fo.read()\n",
|
||||
"\n",
|
||||
"new_content = content.replace(\"<<model_id>>\", local_run.model_id)\n",
|
||||
"with open(\"score.py\", \"w\") as fw:\n",
|
||||
" fw.write(new_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Create a YAML File for the Environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-defaults', 'azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"conda_env_file_name = 'myenv.yml'\n",
|
||||
"myenv.save_to_file('.', conda_env_file_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy the model as a Web Service on Azure Container Instance\n",
|
||||
"Replace servicename with any meaningful name of service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# this will take 10-15 minutes to finish\n",
|
||||
"\n",
|
||||
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||
"from azureml.exceptions import WebserviceException\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=conda_env_file_name)\n",
|
||||
"inference_config = InferenceConfig(entry_script=script_file_name, environment=myenv)\n",
|
||||
"\n",
|
||||
"myaci_config = AciWebservice.deploy_configuration(\n",
|
||||
" cpu_cores = 2, \n",
|
||||
" memory_gb = 2, \n",
|
||||
" tags = {'name':'Databricks Azure ML ACI'}, \n",
|
||||
" description = 'This is for ADB and AutoML example.')\n",
|
||||
"\n",
|
||||
"myenv = Environment.get(ws, name='AzureML-PySpark-MmlSpark-0.15')\n",
|
||||
"# we need to add extra packages to procured environment\n",
|
||||
"# in order to deploy amended environment we need to rename it\n",
|
||||
"myenv.name = 'myenv'\n",
|
||||
"model_dependencies = CondaDependencies('myenv.yml')\n",
|
||||
"for pip_dep in model_dependencies.pip_packages:\n",
|
||||
" myenv.python.conda_dependencies.add_pip_package(pip_dep)\n",
|
||||
"for conda_dep in model_dependencies.conda_packages:\n",
|
||||
" myenv.python.conda_dependencies.add_conda_package(conda_dep)\n",
|
||||
"inference_config = InferenceConfig(entry_script='score_sparkml.py', environment=myenv)\n",
|
||||
"\n",
|
||||
"guid = str(uuid.uuid4()).split(\"-\")[0]\n",
|
||||
"service_name = \"myservice-{}\".format(guid)\n",
|
||||
"\n",
|
||||
"# Remove any existing service under the same name.\n",
|
||||
"try:\n",
|
||||
" Webservice(ws, service_name).delete()\n",
|
||||
"except WebserviceException:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"print(\"Creating service with name: {}\".format(service_name))\n",
|
||||
"\n",
|
||||
"myservice = Model.deploy(ws, service_name, [model], inference_config, myaci_config)\n",
|
||||
"myservice.wait_for_deployment(show_output=True)"
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||
" memory_gb = 1, \n",
|
||||
" tags = {'area': \"digits\", 'type': \"automl_classification\"}, \n",
|
||||
" description = 'sample service for Automl Classification')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -707,8 +626,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#for using the Web HTTP API \n",
|
||||
"print(myservice.scoring_uri)"
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"aci_service_name = 'automl-databricks-local'\n",
|
||||
"print(aci_service_name)\n",
|
||||
"aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n",
|
||||
"aci_service.wait_for_deployment(True)\n",
|
||||
"print(aci_service.state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -752,7 +677,7 @@
|
||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" test_sample = json.dumps({'data':X_test[index:index + 1].values.tolist()})\n",
|
||||
" predicted = myservice.run(input_data = test_sample)\n",
|
||||
" predicted = aci_service.run(input_data = test_sample)\n",
|
||||
" label = y_test.values[index]\n",
|
||||
" predictedDict = json.loads(predicted)\n",
|
||||
" title = \"Label value = %d Predicted value = %s \" % ( label,predictedDict['result'][0]) \n",
|
||||
|
||||
@@ -285,7 +285,7 @@
|
||||
"from azureml.exceptions import WebserviceException\n",
|
||||
"\n",
|
||||
"deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)\n",
|
||||
"aci_service_name = 'aciservice1'\n",
|
||||
"aci_service_name = 'aciservice-multimodel'\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" # if you want to get existing service below is the command\n",
|
||||
|
||||
@@ -383,11 +383,21 @@
|
||||
"- an inference configuration\n",
|
||||
"- a single column tabular dataset, where each row contains a string representing sample request data sent to the service.\n",
|
||||
"\n",
|
||||
"Please, note that profiling is a long running operation and can take up to 25 minutes depending on the size of the dataset.\n",
|
||||
"\n",
|
||||
"At this point we only support profiling of services that expect their request data to be a string, for example: string serialized json, text, string serialized image, etc. The content of each row of the dataset (string) will be put into the body of the HTTP request and sent to the service encapsulating the model for scoring.\n",
|
||||
"\n",
|
||||
"Below is an example of how you can construct an input dataset to profile a service which expects its incoming requests to contain serialized json. In this case we created a dataset based one hundred instances of the same request data. In real world scenarios however, we suggest that you use larger datasets with various inputs, especially if your model resource usage/behavior is input dependent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You may want to register datasets using the register() method to your workspace so they can be shared with others, reused and referred to by name in your script.\n",
|
||||
"You can try get the dataset first to see if it's already registered."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -398,35 +408,44 @@
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.data import dataset_type_definitions\n",
|
||||
"\n",
|
||||
"dataset_name='diabetes_sample_request_data'\n",
|
||||
"\n",
|
||||
"# create a string that can be utf-8 encoded and\n",
|
||||
"# put in the body of the request\n",
|
||||
"serialized_input_json = json.dumps({\n",
|
||||
"dataset_registered = False\n",
|
||||
"try:\n",
|
||||
" sample_request_data = Dataset.get_by_name(workspace = ws, name = dataset_name)\n",
|
||||
" dataset_registered = True\n",
|
||||
"except:\n",
|
||||
" print(\"The dataset {} is not registered in workspace yet.\".format(dataset_name))\n",
|
||||
"\n",
|
||||
"if not dataset_registered:\n",
|
||||
" # create a string that can be utf-8 encoded and\n",
|
||||
" # put in the body of the request\n",
|
||||
" serialized_input_json = json.dumps({\n",
|
||||
" 'data': [\n",
|
||||
" [ 0.03807591, 0.05068012, 0.06169621, 0.02187235, -0.0442235,\n",
|
||||
" -0.03482076, -0.04340085, -0.00259226, 0.01990842, -0.01764613]\n",
|
||||
" ]\n",
|
||||
"})\n",
|
||||
"dataset_content = []\n",
|
||||
"for i in range(100):\n",
|
||||
" })\n",
|
||||
" dataset_content = []\n",
|
||||
" for i in range(100):\n",
|
||||
" dataset_content.append(serialized_input_json)\n",
|
||||
"dataset_content = '\\n'.join(dataset_content)\n",
|
||||
"file_name = 'sample_request_data.txt'\n",
|
||||
"f = open(file_name, 'w')\n",
|
||||
"f.write(dataset_content)\n",
|
||||
"f.close()\n",
|
||||
" dataset_content = '\\n'.join(dataset_content)\n",
|
||||
" file_name = \"{}.txt\".format(dataset_name)\n",
|
||||
" f = open(file_name, 'w')\n",
|
||||
" f.write(dataset_content)\n",
|
||||
" f.close()\n",
|
||||
"\n",
|
||||
"# upload the txt file created above to the Datastore and create a dataset from it\n",
|
||||
"data_store = Datastore.get_default(ws)\n",
|
||||
"data_store.upload_files(['./' + file_name], target_path='sample_request_data')\n",
|
||||
"datastore_path = [(data_store, 'sample_request_data' +'/' + file_name)]\n",
|
||||
"sample_request_data = Dataset.Tabular.from_delimited_files(\n",
|
||||
" # upload the txt file created above to the Datastore and create a dataset from it\n",
|
||||
" data_store = Datastore.get_default(ws)\n",
|
||||
" data_store.upload_files(['./' + file_name], target_path='sample_request_data')\n",
|
||||
" datastore_path = [(data_store, 'sample_request_data' +'/' + file_name)]\n",
|
||||
" sample_request_data = Dataset.Tabular.from_delimited_files(\n",
|
||||
" datastore_path,\n",
|
||||
" separator='\\n',\n",
|
||||
" infer_column_types=True,\n",
|
||||
" header=dataset_type_definitions.PromoteHeadersBehavior.NO_HEADERS)\n",
|
||||
"sample_request_data = sample_request_data.register(workspace=ws,\n",
|
||||
" name='diabetes_sample_request_data',\n",
|
||||
" sample_request_data = sample_request_data.register(workspace=ws,\n",
|
||||
" name=dataset_name,\n",
|
||||
" create_new_version=True)"
|
||||
]
|
||||
},
|
||||
@@ -466,6 +485,7 @@
|
||||
" cpu=1.0,\n",
|
||||
" memory_in_gb=0.5)\n",
|
||||
"\n",
|
||||
"# profiling is a long running operation and may take up to 25 min\n",
|
||||
"profile.wait_for_completion(True)\n",
|
||||
"details = profile.get_details()"
|
||||
]
|
||||
@@ -512,7 +532,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "aashishb"
|
||||
"name": "vaidyas"
|
||||
}
|
||||
],
|
||||
"category": "deployment",
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the name `sklearn_regression_model_local_adv` in the workspace.\n",
|
||||
"You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the name `sklearn_regression_model` in the workspace.\n",
|
||||
"\n",
|
||||
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model, framework, category, target customer etc. Note that tags must be alphanumeric."
|
||||
]
|
||||
@@ -105,7 +105,7 @@
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"model = Model.register(model_path=\"sklearn_regression_model.pkl\",\n",
|
||||
" model_name=\"sklearn_regression_model_local_adv\",\n",
|
||||
" model_name=\"sklearn_regression_model\",\n",
|
||||
" tags={'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||
" description=\"Ridge regression model to predict diabetes\",\n",
|
||||
" workspace=ws)"
|
||||
@@ -126,12 +126,12 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"source_directory = \"C:/abc\"\n",
|
||||
"source_directory = \"source_directory\"\n",
|
||||
"\n",
|
||||
"os.makedirs(source_directory, exist_ok=True)\n",
|
||||
"os.makedirs(\"C:/abc/x/y\", exist_ok=True)\n",
|
||||
"os.makedirs(\"C:/abc/env\", exist_ok=True)\n",
|
||||
"os.makedirs(\"C:/abc/dockerstep\", exist_ok=True)"
|
||||
"os.makedirs(os.path.join(source_directory, \"x/y\"), exist_ok=True)\n",
|
||||
"os.makedirs(os.path.join(source_directory, \"env\"), exist_ok=True)\n",
|
||||
"os.makedirs(os.path.join(source_directory, \"dockerstep\"), exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -147,7 +147,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile C:/abc/x/y/score.py\n",
|
||||
"%%writefile source_directory/x/y/score.py\n",
|
||||
"import os\n",
|
||||
"import pickle\n",
|
||||
"import json\n",
|
||||
@@ -170,7 +170,7 @@
|
||||
" global name\n",
|
||||
" # note here, entire source directory on inference config gets added into image\n",
|
||||
" # bellow is the example how you can use any extra files in image\n",
|
||||
" with open('./abc/extradata.json') as json_file: \n",
|
||||
" with open('./source_directory/extradata.json') as json_file:\n",
|
||||
" data = json.load(json_file)\n",
|
||||
" name = data[\"people\"][0][\"name\"]\n",
|
||||
"\n",
|
||||
@@ -191,9 +191,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency for your environemnt. This package contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
@@ -204,7 +202,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile C:/abc/env/myenv.yml\n",
|
||||
"%%writefile source_directory/env/myenv.yml\n",
|
||||
"name: project_environment\n",
|
||||
"dependencies:\n",
|
||||
" - python=3.6.2\n",
|
||||
@@ -221,7 +219,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile C:/abc/extradata.json\n",
|
||||
"%%writefile source_directory/extradata.json\n",
|
||||
"{\n",
|
||||
" \"people\": [\n",
|
||||
" {\n",
|
||||
@@ -255,13 +253,14 @@
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name='myenv', file_path='env/myenv.yml')\n",
|
||||
"myenv = Environment.from_conda_specification(name='myenv', file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"# explicitly set base_image to None when setting base_dockerfile\n",
|
||||
"myenv.docker.base_image = None\n",
|
||||
"myenv.docker.base_dockerfile = \"RUN echo \\\"this is test\\\"\"\n",
|
||||
"myenv.docker.base_dockerfile = \"FROM mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04\\nRUN echo \\\"this is test\\\"\"\n",
|
||||
"myenv.inferencing_stack_version = \"latest\"\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
|
||||
"inference_config = InferenceConfig(source_directory=source_directory,\n",
|
||||
" entry_script=\"x/y/score.py\",\n",
|
||||
" environment=myenv)\n"
|
||||
]
|
||||
@@ -379,7 +378,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile C:/abc/x/y/score.py\n",
|
||||
"%%writefile source_directory/x/y/score.py\n",
|
||||
"import os\n",
|
||||
"import pickle\n",
|
||||
"import json\n",
|
||||
@@ -401,7 +400,7 @@
|
||||
" global name, from_location\n",
|
||||
" # note here, entire source directory on inference config gets added into image\n",
|
||||
" # bellow is the example how you can use any extra files in image\n",
|
||||
" with open('./abc/extradata.json') as json_file: \n",
|
||||
" with open('source_directory/extradata.json') as json_file: \n",
|
||||
" data = json.load(json_file)\n",
|
||||
" name = data[\"people\"][0][\"name\"]\n",
|
||||
" from_location = data[\"people\"][0][\"from\"]\n",
|
||||
|
||||
@@ -82,7 +82,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the name `sklearn_regression_model_local` in the workspace.\n",
|
||||
"You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the name `sklearn_regression_model` in the workspace.\n",
|
||||
"\n",
|
||||
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model, framework, category, target customer etc. Note that tags must be alphanumeric."
|
||||
]
|
||||
@@ -100,7 +100,7 @@
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"model = Model.register(model_path=\"sklearn_regression_model.pkl\",\n",
|
||||
" model_name=\"sklearn_regression_model_local\",\n",
|
||||
" model_name=\"sklearn_regression_model\",\n",
|
||||
" tags={'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||
" description=\"Ridge regression model to predict diabetes\",\n",
|
||||
" workspace=ws)"
|
||||
@@ -159,6 +159,8 @@
|
||||
"- an inference configuration\n",
|
||||
"- a single column tabular dataset, where each row contains a string representing sample request data sent to the service.\n",
|
||||
"\n",
|
||||
"Please, note that profiling is a long running operation and can take up to 25 minutes depending on the size of the dataset.\n",
|
||||
"\n",
|
||||
"At this point we only support profiling of services that expect their request data to be a string, for example: string serialized json, text, string serialized image, etc. The content of each row of the dataset (string) will be put into the body of the HTTP request and sent to the service encapsulating the model for scoring.\n",
|
||||
"\n",
|
||||
"Below is an example of how you can construct an input dataset to profile a service which expects its incoming requests to contain serialized json. In this case we created a dataset based one hundred instances of the same request data. In real world scenarios however, we suggest that you use larger datasets with various inputs, especially if your model resource usage/behavior is input dependent."
|
||||
@@ -245,6 +247,7 @@
|
||||
" cpu=1.0,\n",
|
||||
" memory_in_gb=0.5)\n",
|
||||
"\n",
|
||||
"# profiling is a long running operation and may take up to 25 min\n",
|
||||
"profile.wait_for_completion(True)\n",
|
||||
"details = profile.get_details()"
|
||||
]
|
||||
|
||||
@@ -4,4 +4,4 @@ dependencies:
|
||||
- azureml-sdk
|
||||
- numpy
|
||||
- git+https://github.com/apple/coremltools@v2.1
|
||||
- onnxmltools==1.3.1
|
||||
- onnxmltools
|
||||
|
||||
@@ -6,4 +6,4 @@ dependencies:
|
||||
- matplotlib
|
||||
- numpy
|
||||
- onnx
|
||||
- opencv-python
|
||||
- opencv-python-headless
|
||||
|
||||
@@ -6,4 +6,4 @@ dependencies:
|
||||
- matplotlib
|
||||
- numpy
|
||||
- onnx
|
||||
- opencv-python
|
||||
- opencv-python-headless
|
||||
|
||||
@@ -202,7 +202,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "aashishb"
|
||||
"name": "vaidyas"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
|
||||
@@ -59,8 +59,44 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Register the model\n",
|
||||
"Register an existing trained model, add descirption and tags. Prior to registering the model, you should have a TensorFlow [Saved Model](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md) in the `resnet50` directory. You can download a [pretrained resnet50](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW_jpg.tar.gz) and unpack it to that directory."
|
||||
"# Download the model\n",
|
||||
"\n",
|
||||
"Prior to registering the model, you should have a TensorFlow [Saved Model](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md) in the `resnet50` directory. This cell will download a [pretrained resnet50](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW_jpg.tar.gz) and unpack it to that directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import requests\n",
|
||||
"import shutil\n",
|
||||
"import tarfile\n",
|
||||
"import tempfile\n",
|
||||
"\n",
|
||||
"from io import BytesIO\n",
|
||||
"\n",
|
||||
"model_url = \"http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW_jpg.tar.gz\"\n",
|
||||
"\n",
|
||||
"archive_prefix = \"./resnet_v1_fp32_savedmodel_NCHW_jpg/1538686758/\"\n",
|
||||
"target_folder = \"resnet50\"\n",
|
||||
"\n",
|
||||
"if not os.path.exists(target_folder):\n",
|
||||
" response = requests.get(model_url)\n",
|
||||
" archive = tarfile.open(fileobj=BytesIO(response.content))\n",
|
||||
" with tempfile.TemporaryDirectory() as temp_folder:\n",
|
||||
" archive.extractall(temp_folder)\n",
|
||||
" shutil.copytree(os.path.join(temp_folder, archive_prefix), target_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Register the model\n",
|
||||
"Register an existing trained model, add description and tags."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -69,13 +105,13 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Register the model\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"model = Model.register(model_path = \"resnet50\", # this points to a local file\n",
|
||||
" model_name = \"resnet50\", # this is the name the model is registered as\n",
|
||||
" tags = {'area': \"Image classification\", 'type': \"classification\"},\n",
|
||||
" description = \"Image classification trained on Imagenet Dataset\",\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"model = Model.register(model_path=\"resnet50\", # This points to the local directory to upload.\n",
|
||||
" model_name=\"resnet50\", # This is the name the model is registered as.\n",
|
||||
" tags={'area': \"Image classification\", 'type': \"classification\"},\n",
|
||||
" description=\"Image classification trained on Imagenet Dataset\",\n",
|
||||
" workspace=ws)\n",
|
||||
"\n",
|
||||
"print(model.name, model.description, model.version)"
|
||||
]
|
||||
@@ -288,7 +324,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "aashishb"
|
||||
"name": "vaidyas"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
|
||||
@@ -212,11 +212,21 @@
|
||||
"- an inference configuration\n",
|
||||
"- a single column tabular dataset, where each row contains a string representing sample request data sent to the service.\n",
|
||||
"\n",
|
||||
"Please, note that profiling is a long running operation and can take up to 25 minutes depending on the size of the dataset.\n",
|
||||
"\n",
|
||||
"At this point we only support profiling of services that expect their request data to be a string, for example: string serialized json, text, string serialized image, etc. The content of each row of the dataset (string) will be put into the body of the HTTP request and sent to the service encapsulating the model for scoring.\n",
|
||||
"\n",
|
||||
"Below is an example of how you can construct an input dataset to profile a service which expects its incoming requests to contain serialized json. In this case we created a dataset based one hundred instances of the same request data. In real world scenarios however, we suggest that you use larger datasets with various inputs, especially if your model resource usage/behavior is input dependent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You may want to register datasets using the register() method to your workspace so they can be shared with others, reused and referred to by name in your script.\n",
|
||||
"You can try get the dataset first to see if it's already registered."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -228,30 +238,40 @@
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.data import dataset_type_definitions\n",
|
||||
"\n",
|
||||
"input_json = {'data': [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
|
||||
" [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]]}\n",
|
||||
"# create a string that can be put in the body of the request\n",
|
||||
"serialized_input_json = json.dumps(input_json)\n",
|
||||
"dataset_content = []\n",
|
||||
"for i in range(100):\n",
|
||||
" dataset_content.append(serialized_input_json)\n",
|
||||
"sample_request_data = '\\n'.join(dataset_content)\n",
|
||||
"file_name = 'sample_request_data.txt'\n",
|
||||
"f = open(file_name, 'w')\n",
|
||||
"f.write(sample_request_data)\n",
|
||||
"f.close()\n",
|
||||
"dataset_name='sample_request_data'\n",
|
||||
"\n",
|
||||
"# upload the txt file created above to the Datastore and create a dataset from it\n",
|
||||
"data_store = Datastore.get_default(ws)\n",
|
||||
"data_store.upload_files(['./' + file_name], target_path='sample_request_data')\n",
|
||||
"datastore_path = [(data_store, 'sample_request_data' +'/' + file_name)]\n",
|
||||
"sample_request_data = Dataset.Tabular.from_delimited_files(\n",
|
||||
"dataset_registered = False\n",
|
||||
"try:\n",
|
||||
" sample_request_data = Dataset.get_by_name(workspace = ws, name = dataset_name)\n",
|
||||
" dataset_registered = True\n",
|
||||
"except:\n",
|
||||
" print(\"The dataset {} is not registered in workspace yet.\".format(dataset_name))\n",
|
||||
"\n",
|
||||
"if not dataset_registered:\n",
|
||||
" input_json = {'data': [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
|
||||
" [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]]}\n",
|
||||
" # create a string that can be put in the body of the request\n",
|
||||
" serialized_input_json = json.dumps(input_json)\n",
|
||||
" dataset_content = []\n",
|
||||
" for i in range(100):\n",
|
||||
" dataset_content.append(serialized_input_json)\n",
|
||||
" sample_request_data = '\\n'.join(dataset_content)\n",
|
||||
" file_name = \"{}.txt\".format(dataset_name)\n",
|
||||
" f = open(file_name, 'w')\n",
|
||||
" f.write(sample_request_data)\n",
|
||||
" f.close()\n",
|
||||
"\n",
|
||||
" # upload the txt file created above to the Datastore and create a dataset from it\n",
|
||||
" data_store = Datastore.get_default(ws)\n",
|
||||
" data_store.upload_files(['./' + file_name], target_path='sample_request_data')\n",
|
||||
" datastore_path = [(data_store, 'sample_request_data' +'/' + file_name)]\n",
|
||||
" sample_request_data = Dataset.Tabular.from_delimited_files(\n",
|
||||
" datastore_path,\n",
|
||||
" separator='\\n',\n",
|
||||
" infer_column_types=True,\n",
|
||||
" header=dataset_type_definitions.PromoteHeadersBehavior.NO_HEADERS)\n",
|
||||
"sample_request_data = sample_request_data.register(workspace=ws,\n",
|
||||
" name='sample_request_data',\n",
|
||||
" sample_request_data = sample_request_data.register(workspace=ws,\n",
|
||||
" name=dataset_name,\n",
|
||||
" create_new_version=True)"
|
||||
]
|
||||
},
|
||||
@@ -294,6 +314,7 @@
|
||||
" cpu=1.0,\n",
|
||||
" memory_in_gb=0.5)\n",
|
||||
"\n",
|
||||
"# profiling is a long running operation and may take up to 25 min\n",
|
||||
"profile.wait_for_completion(True)\n",
|
||||
"details = profile.get_details()"
|
||||
]
|
||||
@@ -560,7 +581,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "aashishb"
|
||||
"name": "vaidyas"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
|
||||
@@ -302,7 +302,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "aashishb"
|
||||
"name": "vaidyas"
|
||||
}
|
||||
],
|
||||
"category": "deployment",
|
||||
|
||||
@@ -234,7 +234,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "aashishb"
|
||||
"name": "vaidyas"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
|
||||
@@ -243,8 +243,25 @@
|
||||
" 'azureml-interpret', 'sklearn-pandas', 'azureml-dataprep'\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Note: this is to pin the scikit-learn and pandas versions to be same as notebook.\n",
|
||||
"# In production scenario user would choose their dependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"available_packages = pkg_resources.working_set\n",
|
||||
"sklearn_ver = None\n",
|
||||
"pandas_ver = None\n",
|
||||
"for dist in available_packages:\n",
|
||||
" if dist.key == 'scikit-learn':\n",
|
||||
" sklearn_ver = dist.version\n",
|
||||
" elif dist.key == 'pandas':\n",
|
||||
" pandas_ver = dist.version\n",
|
||||
"sklearn_dep = 'scikit-learn'\n",
|
||||
"pandas_dep = 'pandas'\n",
|
||||
"if sklearn_ver:\n",
|
||||
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
||||
"if pandas_ver:\n",
|
||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[sklearn_dep, pandas_dep],\n",
|
||||
" pip_packages=azureml_pip_packages)\n",
|
||||
"\n",
|
||||
"# Now submit a run on AmlCompute\n",
|
||||
@@ -344,8 +361,25 @@
|
||||
" 'azureml-interpret', 'azureml-dataprep'\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Note: this is to pin the scikit-learn and pandas versions to be same as notebook.\n",
|
||||
"# In production scenario user would choose their dependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"available_packages = pkg_resources.working_set\n",
|
||||
"sklearn_ver = None\n",
|
||||
"pandas_ver = None\n",
|
||||
"for dist in available_packages:\n",
|
||||
" if dist.key == 'scikit-learn':\n",
|
||||
" sklearn_ver = dist.version\n",
|
||||
" elif dist.key == 'pandas':\n",
|
||||
" pandas_ver = dist.version\n",
|
||||
"sklearn_dep = 'scikit-learn'\n",
|
||||
"pandas_dep = 'pandas'\n",
|
||||
"if sklearn_ver:\n",
|
||||
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
||||
"if pandas_ver:\n",
|
||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[sklearn_dep, pandas_dep],\n",
|
||||
" pip_packages=azureml_pip_packages)\n",
|
||||
"\n",
|
||||
"from azureml.core import Run\n",
|
||||
@@ -457,8 +491,25 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Note: this is to pin the scikit-learn and pandas versions to be same as notebook.\n",
|
||||
"# In production scenario user would choose their dependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"available_packages = pkg_resources.working_set\n",
|
||||
"sklearn_ver = None\n",
|
||||
"pandas_ver = None\n",
|
||||
"for dist in available_packages:\n",
|
||||
" if dist.key == 'scikit-learn':\n",
|
||||
" sklearn_ver = dist.version\n",
|
||||
" elif dist.key == 'pandas':\n",
|
||||
" pandas_ver = dist.version\n",
|
||||
"sklearn_dep = 'scikit-learn'\n",
|
||||
"pandas_dep = 'pandas'\n",
|
||||
"if sklearn_ver:\n",
|
||||
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
||||
"if pandas_ver:\n",
|
||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[sklearn_dep, pandas_dep],\n",
|
||||
" pip_packages=azureml_pip_packages)\n",
|
||||
"\n",
|
||||
"from azureml.core import Run\n",
|
||||
@@ -687,15 +738,16 @@
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
"1. [Training time: regression problem](../../tabular-data/explain-binary-classification-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](../../tabular-data/explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](../../tabular-data/explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. [Training time: regression problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-regression-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. Explain models with engineered features:\n",
|
||||
" 1. [Simple feature transformations](../../tabular-data/simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](../../tabular-data/advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Simple feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Save model explanations via Azure Machine Learning Run History](../run-history/save-retrieve-explanations-run-history.ipynb)\n",
|
||||
"1. Inferencing time: deploy a classification model and explainer:\n",
|
||||
" 1. [Deploy a locally-trained model and explainer](../scoring-time/train-explain-model-locally-and-deploy.ipynb)\n",
|
||||
" 1. [Deploy a locally-trained keras model and explainer](../scoring-time/train-explain-model-keras-locally-and-deploy.ipynb)\n",
|
||||
" 1. [Deploy a remotely-trained model and explainer](../scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -3,6 +3,8 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- interpret-community[visualization]
|
||||
- matplotlib
|
||||
- azureml-contrib-interpret
|
||||
- sklearn-pandas
|
||||
- azureml-dataprep
|
||||
|
||||
@@ -582,15 +582,16 @@
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
"1. [Training time: regression problem](../../tabular-data/explain-binary-classification-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](../../tabular-data/explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](../../tabular-data/explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. [Training time: regression problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-regression-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. Explain models with engineered features:\n",
|
||||
" 1. [Simple feature transformations](../../tabular-data/simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](../../tabular-data/advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Simple feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../remote-explanation/explain-model-on-amlcompute.ipynb)\n",
|
||||
"1. Inferencing time: deploy a classification model and explainer:\n",
|
||||
" 1. [Deploy a locally-trained model and explainer](../scoring-time/train-explain-model-locally-and-deploy.ipynb)\n",
|
||||
" 1. [Deploy a locally-trained keras model and explainer](../scoring-time/train-explain-model-keras-locally-and-deploy.ipynb)\n",
|
||||
" 1. [Deploy a remotely-trained model and explainer](../scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -3,5 +3,7 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- interpret-community[visualization]
|
||||
- matplotlib
|
||||
- azureml-contrib-interpret
|
||||
- ipywidgets
|
||||
|
||||
@@ -0,0 +1,33 @@
|
||||
import json
|
||||
import pandas as pd
|
||||
from sklearn.externals import joblib
|
||||
from azureml.core.model import Model
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def init():
|
||||
global preprocess
|
||||
global network
|
||||
global scoring_explainer
|
||||
|
||||
# Retrieve the path to the model file using the model name
|
||||
# Assume original model is named original_prediction_model
|
||||
featurize_path = Model.get_model_path('featurize')
|
||||
keras_model_path = Model.get_model_path('keras_model')
|
||||
scoring_explainer_path = Model.get_model_path('IBM_attrition_explainer')
|
||||
|
||||
preprocess = joblib.load(featurize_path)
|
||||
network = tf.keras.models.load_model(keras_model_path)
|
||||
scoring_explainer = joblib.load(scoring_explainer_path)
|
||||
|
||||
|
||||
def run(raw_data):
|
||||
# Get predictions and explanations for each data point
|
||||
data = pd.read_json(raw_data)
|
||||
preprocessed_data = preprocess.transform(data)
|
||||
# Make prediction
|
||||
predictions = network.predict(preprocessed_data)
|
||||
# Retrieve model explanations
|
||||
local_importance_values = scoring_explainer.explain(data)
|
||||
# You can return any data type as long as it is JSON-serializable
|
||||
return {'predictions': predictions.tolist(), 'local_importance_values': local_importance_values}
|
||||
@@ -0,0 +1,612 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Train and explain keras model locally and deploy model with scoring explainer\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"_**This notebook illustrates how to use the Azure Machine Learning Interpretability SDK to deploy a locally-trained keras model and its corresponding deep scoring explainer to Azure Container Instances (ACI) as a web service.**_\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Problem: IBM employee attrition classification with keras (train and explain a model locally and use Azure Container Instances (ACI) for deploying your model and its corresponding deep scoring explainer as a web service.)\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Table of Contents\n",
|
||||
"\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Run model explainer locally at training time](#Explain)\n",
|
||||
" 1. Apply feature transformations\n",
|
||||
" 1. Train a binary classification keras model\n",
|
||||
" 1. Explain the model on raw features\n",
|
||||
" 1. Generate global explanations\n",
|
||||
" 1. Generate local explanations\n",
|
||||
"1. [Visualize explanations](#Visualize)\n",
|
||||
"1. [Deploy keras model and scoring explainer](#Deploy)\n",
|
||||
"1. [Next steps](#Next)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This notebook showcases how to train and explain a keras classification model locally, and deploy the trained model and its corresponding DeepExplainer to Azure Container Instances (ACI).\n",
|
||||
"It demonstrates the API calls that you need to make to submit a run for training and explaining a keras model to AMLCompute, download the compute explanations remotely, and visualizing the global and local explanations via a visualization dashboard that provides an interactive way of discovering patterns in model predictions and downloaded explanations. It also demonstrates how to use Azure Machine Learning MLOps capabilities to deploy your keras model and its corresponding DeepExplainer.\n",
|
||||
"\n",
|
||||
"We will showcase one of the tabular data explainers, DeepExplainer (SHAP), following these steps:\n",
|
||||
"1.\tDevelop a machine learning script in Python which involves the training script and the explanation script.\n",
|
||||
"2.\tRun the script locally.\n",
|
||||
"3.\tUse the interpretability toolkit\u00e2\u20ac\u2122s visualization dashboard to visualize predictions and their explanation. If the metrics and explanations don't indicate a desired outcome, loop back to step 1 and iterate on your scripts.\n",
|
||||
"5.\tAfter a satisfactory run is found, create a Deep Scoring Explainer and register the persisted model and its corresponding DeepExplainer in the model registry.\n",
|
||||
"6.\tDevelop a scoring script.\n",
|
||||
"7.\tCreate an image and register it in the image registry.\n",
|
||||
"8.\tDeploy the image as a web service in Azure.\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"Make sure you go through the [configuration notebook](../../../../configuration.ipynb) first if you haven't."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize a Workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"create workspace"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain\n",
|
||||
"Create An Experiment: **Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"experiment_name = 'explain_model_at_scoring_time'\n",
|
||||
"experiment = Experiment(workspace=ws, name=experiment_name)\n",
|
||||
"run = experiment.start_logging()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get IBM attrition data\n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"outdirname = 'dataset.6.21.19'\n",
|
||||
"try:\n",
|
||||
" from urllib import urlretrieve\n",
|
||||
"except ImportError:\n",
|
||||
" from urllib.request import urlretrieve\n",
|
||||
"import zipfile\n",
|
||||
"zipfilename = outdirname + '.zip'\n",
|
||||
"urlretrieve('https://publictestdatasets.blob.core.windows.net/data/' + zipfilename, zipfilename)\n",
|
||||
"with zipfile.ZipFile(zipfilename, 'r') as unzip:\n",
|
||||
" unzip.extractall('.')\n",
|
||||
"attritionData = pd.read_csv('./WA_Fn-UseC_-HR-Employee-Attrition.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"from sklearn_pandas import DataFrameMapper\n",
|
||||
"\n",
|
||||
"os.makedirs('./outputs', exist_ok=True)\n",
|
||||
"\n",
|
||||
"# Dropping Employee count as all values are 1 and hence attrition is independent of this feature\n",
|
||||
"attritionData = attritionData.drop(['EmployeeCount'], axis=1)\n",
|
||||
"# Dropping Employee Number since it is merely an identifier\n",
|
||||
"attritionData = attritionData.drop(['EmployeeNumber'], axis=1)\n",
|
||||
"attritionData = attritionData.drop(['Over18'], axis=1)\n",
|
||||
"# Since all values are 80\n",
|
||||
"attritionData = attritionData.drop(['StandardHours'], axis=1)\n",
|
||||
"\n",
|
||||
"# Converting target variables from string to numerical values\n",
|
||||
"target_map = {'Yes': 1, 'No': 0}\n",
|
||||
"attritionData[\"Attrition_numerical\"] = attritionData[\"Attrition\"].apply(lambda x: target_map[x])\n",
|
||||
"target = attritionData[\"Attrition_numerical\"]\n",
|
||||
"\n",
|
||||
"attritionXData = attritionData.drop(['Attrition_numerical', 'Attrition'], axis=1)\n",
|
||||
"\n",
|
||||
"# Creating dummy columns for each categorical feature\n",
|
||||
"categorical = []\n",
|
||||
"for col, value in attritionXData.iteritems():\n",
|
||||
" if value.dtype == 'object':\n",
|
||||
" categorical.append(col)\n",
|
||||
"\n",
|
||||
"# Store the numerical columns in a list numerical\n",
|
||||
"numerical = attritionXData.columns.difference(categorical)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.compose import ColumnTransformer\n",
|
||||
"\n",
|
||||
"# We create the preprocessing pipelines for both numeric and categorical data.\n",
|
||||
"numeric_transformer = Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='median')),\n",
|
||||
" ('scaler', StandardScaler())])\n",
|
||||
"\n",
|
||||
"categorical_transformer = Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n",
|
||||
" ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n",
|
||||
"\n",
|
||||
"preprocess = ColumnTransformer(\n",
|
||||
" transformers=[\n",
|
||||
" ('num', numeric_transformer, numerical),\n",
|
||||
" ('cat', categorical_transformer, categorical)])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.pipeline import make_pipeline\n",
|
||||
"pipeline = make_pipeline(preprocess)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(attritionXData, \n",
|
||||
" target, \n",
|
||||
" test_size=0.2,\n",
|
||||
" random_state=0,\n",
|
||||
" stratify=target)\n",
|
||||
"\n",
|
||||
"X_train_t = pipeline.fit_transform(X_train)\n",
|
||||
"X_test_t = pipeline.transform(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# check tensorflow version\n",
|
||||
"import tensorflow as tf\n",
|
||||
"from distutils.version import StrictVersion\n",
|
||||
"\n",
|
||||
"print(tf.__version__)\n",
|
||||
"# Append classifier to preprocessing pipeline.\n",
|
||||
"# Now we have a full prediction pipeline.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"network = tf.keras.models.Sequential()\n",
|
||||
"network.add(tf.keras.layers.Dense(units=16, activation='relu', input_shape=(X_train_t.shape[1],)))\n",
|
||||
"network.add(tf.keras.layers.Dense(units=16, activation='relu'))\n",
|
||||
"network.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))\n",
|
||||
"\n",
|
||||
"# Compile neural network\n",
|
||||
"network.compile(loss='binary_crossentropy', # Cross-entropy\n",
|
||||
" optimizer='rmsprop', # Root Mean Square Propagation\n",
|
||||
" metrics=['accuracy']) # Accuracy performance metric\n",
|
||||
"\n",
|
||||
"# Train neural network\n",
|
||||
"history = network.fit(X_train_t, # Features\n",
|
||||
" y_train, # Target vector\n",
|
||||
" epochs=20, # Number of epochs\n",
|
||||
" verbose=1, # Print description after each epoch\n",
|
||||
" batch_size=100, # Number of observations per batch\n",
|
||||
" validation_data=(X_test_t, y_test)) # Data for evaluation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can run the DeepExplainer directly, or run the TabularExplainer which will choose the most appropriate explainer\n",
|
||||
"from interpret.ext.greybox import DeepExplainer\n",
|
||||
"explainer = DeepExplainer(network,\n",
|
||||
" X_train,\n",
|
||||
" features=X_train.columns,\n",
|
||||
" classes=[\"STAYING\", \"LEAVING\"], \n",
|
||||
" transformations=preprocess,\n",
|
||||
" model_task=\"classification\",\n",
|
||||
" is_classifier=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Save featurization prior to keras model in the outputs folder so it automatically get uploaded\n",
|
||||
"# We cannot save Keras with the pipeline due to known issues with pickling Keras models\n",
|
||||
"featurize_file_name = 'featurize.pkl'\n",
|
||||
"\n",
|
||||
"with open(featurize_file_name, 'wb') as file:\n",
|
||||
" joblib.dump(value=preprocess, filename=os.path.join('./outputs/', featurize_file_name))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Save keras model to disk\n",
|
||||
"keras_model_file_name = 'keras_model.pkl'\n",
|
||||
"network.save(os.path.join('./outputs/', keras_model_file_name))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Explain overall model predictions (global explanation)\n",
|
||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||
"# x_train can be passed as well, but with more examples explanations it will\n",
|
||||
"# take longer although they may be more accurate\n",
|
||||
"global_explanation = explainer.explain_global(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.interpret.scoring.scoring_explainer import DeepScoringExplainer, save\n",
|
||||
"from azureml.interpret.model.serialize import KerasSerializer\n",
|
||||
"# ScoringExplainer with custom keras serializer\n",
|
||||
"scoring_explainer = DeepScoringExplainer(explainer, serializer=KerasSerializer())\n",
|
||||
"# Pickle scoring explainer locally\n",
|
||||
"save(scoring_explainer, exist_ok=True)\n",
|
||||
"\n",
|
||||
"# Register featurization\n",
|
||||
"run.upload_file(featurize_file_name, os.path.join('./outputs/', featurize_file_name))\n",
|
||||
"featurize_model = run.register_model(model_name='featurize',\n",
|
||||
" model_path=featurize_file_name)\n",
|
||||
"\n",
|
||||
"# Register keras model\n",
|
||||
"run.upload_file(keras_model_file_name, os.path.join('./outputs/', keras_model_file_name))\n",
|
||||
"keras_model = run.register_model(model_name='keras_model',\n",
|
||||
" model_path=keras_model_file_name)\n",
|
||||
"\n",
|
||||
"# Register scoring explainer\n",
|
||||
"run.upload_file('IBM_attrition_explainer.pkl', 'scoring_explainer.pkl')\n",
|
||||
"scoring_explainer_model = run.register_model(model_name='IBM_attrition_explainer', model_path='IBM_attrition_explainer.pkl')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use helper utility to wrap keras model in scikit-learn style API for visualization dashboard\n",
|
||||
"from interpret_community.common.model_wrapper import wrap_model\n",
|
||||
"from interpret_community.dataset.dataset_wrapper import DatasetWrapper\n",
|
||||
"wrapped_model, ml_domain = wrap_model(network, DatasetWrapper(X_test_t), \"classification\")\n",
|
||||
"wrapped_model.fit = network.fit\n",
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"dashboard_pipeline = Pipeline(steps=[('preprocess', preprocess), ('network', wrapped_model)])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Visualize\n",
|
||||
"Visualize the explanations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from interpret_community.widget import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, dashboard_pipeline, datasetX=X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy \n",
|
||||
"\n",
|
||||
"Deploy Model and ScoringExplainer.\n",
|
||||
"\n",
|
||||
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"# azureml-defaults is required to host the model as a web service.\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-interpret'\n",
|
||||
"]\n",
|
||||
"# Note: this is to pin the scikit-learn and pandas versions to be same as notebook.\n",
|
||||
"# In production scenario user would choose their dependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"available_packages = pkg_resources.working_set\n",
|
||||
"sklearn_ver = None\n",
|
||||
"pandas_ver = None\n",
|
||||
"for dist in available_packages:\n",
|
||||
" if dist.key == 'scikit-learn':\n",
|
||||
" sklearn_ver = dist.version\n",
|
||||
" elif dist.key == 'pandas':\n",
|
||||
" pandas_ver = dist.version\n",
|
||||
"sklearn_dep = 'scikit-learn'\n",
|
||||
"pandas_dep = 'pandas'\n",
|
||||
"if sklearn_ver:\n",
|
||||
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
||||
"if pandas_ver:\n",
|
||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=[sklearn_dep, pandas_dep],\n",
|
||||
" pip_packages=['sklearn-pandas', 'pyyaml', 'tensorflow<2.0', 'keras==2.3.1'] + azureml_pip_packages)\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"r\") as f:\n",
|
||||
" print(f.read())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import Model\n",
|
||||
"# retrieve scoring explainer for deployment\n",
|
||||
"scoring_explainer_model = Model(ws, 'IBM_attrition_explainer')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
|
||||
" memory_gb=1,\n",
|
||||
" tags={\"data\": \"IBM_Attrition\",\n",
|
||||
" \"method\" : \"local_explanation\"},\n",
|
||||
" description='Get local explanations for IBM Employee Attrition data')\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score_local_explain_keras.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"# Use configs and models generated above\n",
|
||||
"service = Model.deploy(ws, 'model-scoring-keras-deploy-local', [scoring_explainer_model, featurize_model, keras_model], inference_config, aciconfig)\n",
|
||||
"service.wait_for_deployment(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(service.get_logs())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# Create data to test service with\n",
|
||||
"sample_data = '{\"Age\":{\"899\":49},\"BusinessTravel\":{\"899\":\"Travel_Rarely\"},\"DailyRate\":{\"899\":1098},\"Department\":{\"899\":\"Research & Development\"},\"DistanceFromHome\":{\"899\":4},\"Education\":{\"899\":2},\"EducationField\":{\"899\":\"Medical\"},\"EnvironmentSatisfaction\":{\"899\":1},\"Gender\":{\"899\":\"Male\"},\"HourlyRate\":{\"899\":85},\"JobInvolvement\":{\"899\":2},\"JobLevel\":{\"899\":5},\"JobRole\":{\"899\":\"Manager\"},\"JobSatisfaction\":{\"899\":3},\"MaritalStatus\":{\"899\":\"Married\"},\"MonthlyIncome\":{\"899\":18711},\"MonthlyRate\":{\"899\":12124},\"NumCompaniesWorked\":{\"899\":2},\"OverTime\":{\"899\":\"No\"},\"PercentSalaryHike\":{\"899\":13},\"PerformanceRating\":{\"899\":3},\"RelationshipSatisfaction\":{\"899\":3},\"StockOptionLevel\":{\"899\":1},\"TotalWorkingYears\":{\"899\":23},\"TrainingTimesLastYear\":{\"899\":2},\"WorkLifeBalance\":{\"899\":4},\"YearsAtCompany\":{\"899\":1},\"YearsInCurrentRole\":{\"899\":0},\"YearsSinceLastPromotion\":{\"899\":0},\"YearsWithCurrManager\":{\"899\":0}}'\n",
|
||||
"\n",
|
||||
"headers = {'Content-Type':'application/json'}\n",
|
||||
"\n",
|
||||
"# send request to service\n",
|
||||
"resp = requests.post(service.scoring_uri, sample_data, headers=headers)\n",
|
||||
"\n",
|
||||
"print(\"POST to url\", service.scoring_uri)\n",
|
||||
"# can covert back to Python objects from json string if desired\n",
|
||||
"print(\"prediction:\", resp.text)\n",
|
||||
"result = json.loads(resp.text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#plot the feature importance for the prediction\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt; plt.rcdefaults()\n",
|
||||
"\n",
|
||||
"labels = json.loads(sample_data)\n",
|
||||
"labels = labels.keys()\n",
|
||||
"objects = labels\n",
|
||||
"y_pos = np.arange(len(objects))\n",
|
||||
"performance = result[\"local_importance_values\"][0][0]\n",
|
||||
"\n",
|
||||
"plt.bar(y_pos, performance, align='center', alpha=0.5)\n",
|
||||
"plt.xticks(y_pos, objects)\n",
|
||||
"locs, labels = plt.xticks()\n",
|
||||
"plt.setp(labels, rotation=90)\n",
|
||||
"plt.ylabel('Feature impact - leaving vs not leaving')\n",
|
||||
"plt.title('Local feature importance for prediction')\n",
|
||||
"\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"service.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
"1. [Training time: regression problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-regression-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. Explain models with engineered features:\n",
|
||||
" 1. [Simple feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Save model explanations via Azure Machine Learning Run History](../run-history/save-retrieve-explanations-run-history.ipynb)\n",
|
||||
"1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../remote-explanation/explain-model-on-amlcompute.ipynb)\n",
|
||||
"1. [Inferencing time: deploy a remotely-trained model and explainer](./train-explain-model-on-amlcompute-and-deploy.ipynb)\n",
|
||||
"1. [Inferencing time: deploy a locally-trained model and explainer](./train-explain-model-locally-and-deploy.ipynb)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
name: train-explain-model-keras-locally-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- interpret-community[visualization]
|
||||
- matplotlib
|
||||
- azureml-contrib-interpret
|
||||
- sklearn-pandas
|
||||
- ipywidgets
|
||||
- tensorflow<2.0
|
||||
- keras
|
||||
@@ -328,8 +328,25 @@
|
||||
"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"# Note: this is to pin the scikit-learn and pandas versions to be same as notebook.\n",
|
||||
"# In production scenario user would choose their dependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"available_packages = pkg_resources.working_set\n",
|
||||
"sklearn_ver = None\n",
|
||||
"pandas_ver = None\n",
|
||||
"for dist in available_packages:\n",
|
||||
" if dist.key == 'scikit-learn':\n",
|
||||
" sklearn_ver = dist.version\n",
|
||||
" elif dist.key == 'pandas':\n",
|
||||
" pandas_ver = dist.version\n",
|
||||
"sklearn_dep = 'scikit-learn'\n",
|
||||
"pandas_dep = 'pandas'\n",
|
||||
"if sklearn_ver:\n",
|
||||
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
||||
"if pandas_ver:\n",
|
||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas'],\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=[sklearn_dep, pandas_dep],\n",
|
||||
" pip_packages=['sklearn-pandas', 'pyyaml'] + azureml_pip_packages,\n",
|
||||
" pin_sdk_version=False)\n",
|
||||
"\n",
|
||||
@@ -445,15 +462,16 @@
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
"1. [Training time: regression problem](../../tabular-data/explain-binary-classification-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](../../tabular-data/explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](../../tabular-data/explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. [Training time: regression problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-regression-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. Explain models with engineered features:\n",
|
||||
" 1. [Simple feature transformations](../../tabular-data/simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](../../tabular-data/advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Simple feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Save model explanations via Azure Machine Learning Run History](../run-history/save-retrieve-explanations-run-history.ipynb)\n",
|
||||
"1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../remote-explanation/explain-model-on-amlcompute.ipynb)\n",
|
||||
"1. [Inferencing time: deploy a remotely-trained model and explainer](./train-explain-model-on-amlcompute-and-deploy.ipynb)"
|
||||
"1. [Inferencing time: deploy a remotely-trained model and explainer](./train-explain-model-on-amlcompute-and-deploy.ipynb)\n",
|
||||
"1. [Inferencing time: deploy a locally-trained keras model and explainer](./train-explain-model-keras-locally-and-deploy.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -3,6 +3,8 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- interpret-community[visualization]
|
||||
- matplotlib
|
||||
- azureml-contrib-interpret
|
||||
- sklearn-pandas
|
||||
- ipywidgets
|
||||
|
||||
@@ -246,8 +246,25 @@
|
||||
" \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Note: this is to pin the scikit-learn version to be same as notebook.\n",
|
||||
"# In production scenario user would choose their dependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"available_packages = pkg_resources.working_set\n",
|
||||
"sklearn_ver = None\n",
|
||||
"pandas_ver = None\n",
|
||||
"for dist in available_packages:\n",
|
||||
" if dist.key == 'scikit-learn':\n",
|
||||
" sklearn_ver = dist.version\n",
|
||||
" elif dist.key == 'pandas':\n",
|
||||
" pandas_ver = dist.version\n",
|
||||
"sklearn_dep = 'scikit-learn'\n",
|
||||
"pandas_dep = 'pandas'\n",
|
||||
"if sklearn_ver:\n",
|
||||
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
||||
"if pandas_ver:\n",
|
||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[sklearn_dep, pandas_dep],\n",
|
||||
" pip_packages=['sklearn_pandas', 'pyyaml'] + azureml_pip_packages,\n",
|
||||
" pin_sdk_version=False)\n",
|
||||
"# Now submit a run on AmlCompute\n",
|
||||
@@ -397,8 +414,25 @@
|
||||
"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"# Note: this is to pin the scikit-learn and pandas versions to be same as notebook.\n",
|
||||
"# In production scenario user would choose their dependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"available_packages = pkg_resources.working_set\n",
|
||||
"sklearn_ver = None\n",
|
||||
"pandas_ver = None\n",
|
||||
"for dist in available_packages:\n",
|
||||
" if dist.key == 'scikit-learn':\n",
|
||||
" sklearn_ver = dist.version\n",
|
||||
" elif dist.key == 'pandas':\n",
|
||||
" pandas_ver = dist.version\n",
|
||||
"sklearn_dep = 'scikit-learn'\n",
|
||||
"pandas_dep = 'pandas'\n",
|
||||
"if sklearn_ver:\n",
|
||||
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
||||
"if pandas_ver:\n",
|
||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas'],\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=[sklearn_dep, pandas_dep],\n",
|
||||
" pip_packages=['sklearn-pandas', 'pyyaml'] + azureml_pip_packages,\n",
|
||||
" pin_sdk_version=False)\n",
|
||||
"\n",
|
||||
@@ -483,16 +517,16 @@
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
"1. [Training time: regression problem](../../tabular-data/explain-binary-classification-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](../../tabular-data/explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](../../tabular-data/explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. [Training time: regression problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-regression-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. Explain models with engineered features:\n",
|
||||
" 1. [Simple feature transformations](../../tabular-data/simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](../../tabular-data/advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Simple feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Save model explanations via Azure Machine Learning Run History](../run-history/save-retrieve-explanations-run-history.ipynb)\n",
|
||||
"1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../remote-explanation/explain-model-on-amlcompute.ipynb)\n",
|
||||
"1. [Inferencing time: deploy a locally-trained model and explainer](./train-explain-model-locally-and-deploy.ipynb)\n",
|
||||
" "
|
||||
"1. [Inferencing time: deploy a locally-trained keras model and explainer](./train-explain-model-keras-locally-and-deploy.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -3,6 +3,8 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- interpret-community[visualization]
|
||||
- matplotlib
|
||||
- azureml-contrib-interpret
|
||||
- sklearn-pandas
|
||||
- azureml-dataprep
|
||||
|
||||
@@ -537,259 +537,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy the model in ACI\n",
|
||||
"Now we are ready to deploy the model as a web service running in Azure Container Instance [ACI](https://azure.microsoft.com/en-us/services/container-instances/). \n",
|
||||
"### Create score.py\n",
|
||||
"First, we will create a scoring script that will be invoked by the web service call. \n",
|
||||
"\n",
|
||||
"* Note that the scoring script must have two required functions, `init()` and `run(input_data)`. \n",
|
||||
" * In `init()` function, you typically load the model into a global object. This function is executed only once when the Docker container is started. \n",
|
||||
" * In `run(input_data)` function, the model is used to predict a value based on the input data. The input and output to `run` typically use JSON as serialization and de-serialization format but you are not limited to that."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile score.py\n",
|
||||
"import json\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"import tensorflow as tf\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global X, output, sess\n",
|
||||
" tf.reset_default_graph()\n",
|
||||
" # AZUREML_MODEL_DIR is an environment variable created during deployment.\n",
|
||||
" # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)\n",
|
||||
" # For multiple models, it points to the folder containing all deployed models (./azureml-models)\n",
|
||||
" model_root = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model')\n",
|
||||
" saver = tf.train.import_meta_graph(os.path.join(model_root, 'mnist-tf.model.meta'))\n",
|
||||
" X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n",
|
||||
" output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n",
|
||||
" \n",
|
||||
" sess = tf.Session()\n",
|
||||
" saver.restore(sess, os.path.join(model_root, 'mnist-tf.model'))\n",
|
||||
"\n",
|
||||
"def run(raw_data):\n",
|
||||
" data = np.array(json.loads(raw_data)['data'])\n",
|
||||
" # make prediction\n",
|
||||
" out = output.eval(session=sess, feed_dict={X: data})\n",
|
||||
" y_hat = np.argmax(out, axis=1)\n",
|
||||
" return y_hat.tolist()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create myenv.yml\n",
|
||||
"We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify packages `numpy`, `tensorflow`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import CondaDependencies\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create()\n",
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_tensorflow_conda_package()\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy to ACI\n",
|
||||
"Now we can deploy. **This cell will run for about 7-8 minutes**. Behind the scene, AzureML will build a Docker container image with the given configuration, if already not available. This image will be deployed to the ACI infrastructure and the scoring script and model will be mounted on the container. The model will then be available as a web service with an HTTP endpoint to accept REST client calls."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"from azureml.core.model import Model, InferenceConfig\n",
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"env\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
" memory_gb=1, \n",
|
||||
" tags={'name':'mnist', 'framework': 'TensorFlow DNN'},\n",
|
||||
" description='Tensorflow DNN on MNIST')\n",
|
||||
"\n",
|
||||
"service = Model.deploy(ws, 'tf-mnist-svc', [model], inference_config, aciconfig)\n",
|
||||
"service.wait_for_deployment(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(service.get_logs())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This is the scoring web service endpoint:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(service.scoring_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test the deployed model\n",
|
||||
"Let's test the deployed model. Pick 30 random samples from the test set, and send it to the web service hosted in ACI. Note here we are using the `run` API in the SDK to invoke the service. You can also make raw HTTP calls using any HTTP tool such as curl.\n",
|
||||
"\n",
|
||||
"After the invocation, we print the returned predictions and plot them along with the input images. Use red font color and inversed image (white on black) to highlight the misclassified samples. Note since the model accuracy is pretty high, you might have to run the below cell a few times before you can see a misclassified sample."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# find 30 random samples from test set\n",
|
||||
"n = 30\n",
|
||||
"sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n",
|
||||
"\n",
|
||||
"test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n",
|
||||
"test_samples = bytes(test_samples, encoding='utf8')\n",
|
||||
"\n",
|
||||
"# predict using the deployed model\n",
|
||||
"result = service.run(input_data=test_samples)\n",
|
||||
"\n",
|
||||
"# compare actual value vs. the predicted values:\n",
|
||||
"i = 0\n",
|
||||
"plt.figure(figsize = (20, 1))\n",
|
||||
"\n",
|
||||
"for s in sample_indices:\n",
|
||||
" plt.subplot(1, n, i + 1)\n",
|
||||
" plt.axhline('')\n",
|
||||
" plt.axvline('')\n",
|
||||
" \n",
|
||||
" # use different color for misclassified sample\n",
|
||||
" font_color = 'red' if y_test[s] != result[i] else 'black'\n",
|
||||
" clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n",
|
||||
" \n",
|
||||
" plt.text(x=10, y=-10, s=y_hat[s], fontsize=18, color=font_color)\n",
|
||||
" plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n",
|
||||
" \n",
|
||||
" i = i + 1\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also send raw HTTP request to the service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"# send a random row from the test set to score\n",
|
||||
"random_index = np.random.randint(0, len(X_test)-1)\n",
|
||||
"input_data = \"{\\\"data\\\": [\" + str(list(X_test[random_index])) + \"]}\"\n",
|
||||
"\n",
|
||||
"headers = {'Content-Type':'application/json'}\n",
|
||||
"\n",
|
||||
"resp = requests.post(service.scoring_uri, input_data, headers=headers)\n",
|
||||
"\n",
|
||||
"print(\"POST to url\", service.scoring_uri)\n",
|
||||
"print(\"input data:\", input_data)\n",
|
||||
"print(\"label:\", y_test[random_index])\n",
|
||||
"print(\"prediction:\", resp.text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's look at the workspace after the web service was deployed. You should see \n",
|
||||
"* a registered model named 'model' and with the id 'model:1'\n",
|
||||
"* an image called 'tf-mnist' and with a docker image location pointing to your workspace's Azure Container Registry (ACR) \n",
|
||||
"* a webservice called 'tf-mnist' with some scoring URL"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"models = ws.models\n",
|
||||
"for name, model in models.items():\n",
|
||||
" print(\"Model: {}, ID: {}\".format(name, model.id))\n",
|
||||
" \n",
|
||||
"images = ws.images\n",
|
||||
"for name, image in images.items():\n",
|
||||
" print(\"Image: {}, location: {}\".format(name, image.image_location))\n",
|
||||
" \n",
|
||||
"webservices = ws.webservices\n",
|
||||
"for name, webservice in webservices.items():\n",
|
||||
" print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clean up\n",
|
||||
"You can delete the ACI deployment with a simple delete API call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"service.delete()"
|
||||
"For model deployment, please refer to [Training, hyperparameter tune, and deploy with TensorFlow](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb)."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -70,11 +70,7 @@
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"from azureml.pipeline.steps import AutoMLStep\n",
|
||||
"\n",
|
||||
@@ -105,7 +101,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create an Azure ML experiment\n",
|
||||
"Let's create an experiment named \"automl-classification\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure.\n",
|
||||
"Let's create an experiment named \"automlstep-classification\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure.\n",
|
||||
"\n",
|
||||
"The best practice is to use separate folders for scripts and its dependent files for each step and specify that folder as the `source_directory` for the step. This helps reduce the size of the snapshot created for the step (only the specific folder is snapshotted). Since changes in any files in the `source_directory` would trigger a re-upload of the snapshot, this helps keep the reuse of the step when there are no changes in the `source_directory` of the step."
|
||||
]
|
||||
@@ -138,45 +134,25 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Choose a name for your cluster.\n",
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
" \n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 4)\n",
|
||||
" max_nodes=4)\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)\n",
|
||||
" \n",
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd\n",
|
||||
"\n",
|
||||
"print('run config is ready')"
|
||||
"compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)\n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -192,19 +168,30 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n",
|
||||
"example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n",
|
||||
"dataset = Dataset.Tabular.from_delimited_files(example_data)\n",
|
||||
"dataset.to_pandas_dataframe().describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
"# Try to load the dataset from the Workspace. Otherwise, create it from the file\n",
|
||||
"found = False\n",
|
||||
"key = \"Crime-Dataset\"\n",
|
||||
"description_text = \"Crime Dataset (used in the the aml-pipelines-with-automated-machine-learning-step.ipynb notebook)\"\n",
|
||||
"\n",
|
||||
"if key in ws.datasets.keys(): \n",
|
||||
" found = True\n",
|
||||
" dataset = ws.datasets[key] \n",
|
||||
"\n",
|
||||
"if not found:\n",
|
||||
" # Create AML Dataset and register it into Workspace\n",
|
||||
" # The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n",
|
||||
" example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n",
|
||||
" dataset = Dataset.Tabular.from_delimited_files(example_data)\n",
|
||||
" dataset = dataset.drop_columns(['FBI Code'])\n",
|
||||
" \n",
|
||||
" #Register Dataset in Workspace\n",
|
||||
" dataset = dataset.register(workspace=ws,\n",
|
||||
" name=key,\n",
|
||||
" description=description_text)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"df = dataset.to_pandas_dataframe()\n",
|
||||
"df.describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -224,9 +211,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = dataset.drop_columns(columns=['Primary Type', 'FBI Code'])\n",
|
||||
"y = dataset.keep_columns(columns=['Primary Type'], validate=True)\n",
|
||||
"print('X and y are ready!')"
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -244,19 +229,18 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\" : 5,\n",
|
||||
" \"iterations\" : 2,\n",
|
||||
" \"primary_metric\" : 'AUC_weighted',\n",
|
||||
" \"preprocess\" : True,\n",
|
||||
" \"verbosity\" : logging.INFO\n",
|
||||
" \"experiment_timeout_minutes\": 20,\n",
|
||||
" \"max_concurrent_iterations\": 4,\n",
|
||||
" \"primary_metric\" : 'AUC_weighted'\n",
|
||||
"}\n",
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
"automl_config = AutoMLConfig(compute_target=compute_target,\n",
|
||||
" task = \"classification\",\n",
|
||||
" training_data=dataset,\n",
|
||||
" label_column_name=\"Primary Type\", \n",
|
||||
" path = project_folder,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" X = X,\n",
|
||||
" y = y,\n",
|
||||
" enable_early_stopping= True,\n",
|
||||
" featurization= 'auto',\n",
|
||||
" debug_log = \"automl_errors.log\",\n",
|
||||
" **automl_settings\n",
|
||||
" )"
|
||||
]
|
||||
@@ -265,6 +249,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Create Pipeline and AutoMLStep\n",
|
||||
"\n",
|
||||
"You can define outputs for the AutoMLStep using TrainingOutput."
|
||||
]
|
||||
},
|
||||
@@ -300,7 +286,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"automlstep-remarks-sample1"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_step = AutoMLStep(\n",
|
||||
@@ -313,7 +303,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"automlstep-remarks-sample2"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core import Pipeline\n",
|
||||
@@ -378,7 +372,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"with open(metrics_output._path_on_datastore) as f: \n",
|
||||
"with open(metrics_output._path_on_datastore) as f:\n",
|
||||
" metrics_output_result = f.read()\n",
|
||||
" \n",
|
||||
"deserialized_metrics_output = json.loads(metrics_output_result)\n",
|
||||
@@ -399,6 +393,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Retrieve best model from Pipeline Run\n",
|
||||
"best_model_output = pipeline_run.get_pipeline_output(best_model_output_name)\n",
|
||||
"num_file_downloaded = best_model_output.download('.', show_progress=True)"
|
||||
]
|
||||
@@ -416,6 +411,15 @@
|
||||
"best_model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_model.steps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -431,11 +435,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = Dataset.Tabular.from_delimited_files(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv')\n",
|
||||
"dataset_test = Dataset.Tabular.from_delimited_files(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv')\n",
|
||||
"df_test = dataset_test.to_pandas_dataframe()\n",
|
||||
"df_test = df_test[pd.notnull(df['Primary Type'])]\n",
|
||||
"df_test = df_test[pd.notnull(df_test['Primary Type'])]\n",
|
||||
"\n",
|
||||
"y_test = df_test[['Primary Type']]\n",
|
||||
"y_test = df_test['Primary Type']\n",
|
||||
"X_test = df_test.drop(['Primary Type', 'FBI Code'], axis=1)"
|
||||
]
|
||||
},
|
||||
@@ -454,15 +458,19 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pandas_ml import ConfusionMatrix\n",
|
||||
"\n",
|
||||
"from sklearn.metrics import confusion_matrix\n",
|
||||
"ypred = best_model.predict(X_test)\n",
|
||||
"\n",
|
||||
"cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n",
|
||||
"\n",
|
||||
"print(cm)\n",
|
||||
"\n",
|
||||
"cm.plot()"
|
||||
"cm = confusion_matrix(y_test, ypred)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Visualize the confusion matrix\n",
|
||||
"pd.DataFrame(cm).style.background_gradient(cmap='Blues', low=0, high=0.9)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -16,16 +16,12 @@
|
||||
"\n",
|
||||
"You can combine the two part tutorial into one using AzureML Pipelines as Pipelines provide a way to stitch together various steps involved (like data preparation and training in this case) in a machine learning workflow.\n",
|
||||
"\n",
|
||||
"In this notebook, you learn how to prepare data for regression modeling by using the [Azure Machine Learning Data Prep SDK](https://aka.ms/data-prep-sdk) for Python. You run various transformations to filter and combine two different NYC taxi data sets. Once you prepare the NYC taxi data for regression modeling, then you will use [AutoMLStep](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.automlstep?view=azure-ml-py) available with [Azure Machine Learning Pipelines](https://aka.ms/aml-pipelines) to define your machine learning goals and constraints as well as to launch the automated machine learning process. The automated machine learning technique iterates over many combinations of algorithms and hyperparameters until it finds the best model based on your criterion.\n",
|
||||
"In this notebook, you learn how to prepare data for regression modeling by using open source library [pandas](https://pandas.pydata.org/). You run various transformations to filter and combine two different NYC taxi datasets. Once you prepare the NYC taxi data for regression modeling, then you will use [AutoMLStep](https://docs.microsoft.com/python/api/azureml-train-automl-runtime/azureml.train.automl.runtime.automl_step.automlstep?view=azure-ml-py) available with [Azure Machine Learning Pipelines](https://aka.ms/aml-pipelines) to define your machine learning goals and constraints as well as to launch the automated machine learning process. The automated machine learning technique iterates over many combinations of algorithms and hyperparameters until it finds the best model based on your criterion.\n",
|
||||
"\n",
|
||||
"After you complete building the model, you can predict the cost of a taxi trip by training a model on data features. These features include the pickup day and time, the number of passengers, and the pickup location.\n",
|
||||
"\n",
|
||||
"## Prerequisite\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc.\n",
|
||||
"\n",
|
||||
"We will run various transformations to filter and combine two different NYC taxi data sets. We will use DataPrep SDK for this preparing data. \n",
|
||||
"\n",
|
||||
"Perform `pip install azureml-dataprep` if you have't already done so."
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -108,7 +104,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.dataprep as dprep\n",
|
||||
"from IPython.display import display\n",
|
||||
"\n",
|
||||
"display(green_df_raw.head(5))\n",
|
||||
@@ -144,8 +139,8 @@
|
||||
"if not os.path.exists(yelloDir):\n",
|
||||
" os.mkdir(yelloDir)\n",
|
||||
" \n",
|
||||
"greenTaxiData = greenDir + \"/part-00000\"\n",
|
||||
"yellowTaxiData = yelloDir + \"/part-00000\"\n",
|
||||
"greenTaxiData = greenDir + \"/unprepared.parquet\"\n",
|
||||
"yellowTaxiData = yelloDir + \"/unprepared.parquet\"\n",
|
||||
"\n",
|
||||
"green_df_raw.to_csv(greenTaxiData, index=False)\n",
|
||||
"yellow_df_raw.to_csv(yellowTaxiData, index=False)\n",
|
||||
@@ -169,17 +164,54 @@
|
||||
"\n",
|
||||
"default_store.upload_files([greenTaxiData], \n",
|
||||
" target_path = 'green', \n",
|
||||
" overwrite = False, \n",
|
||||
" overwrite = True, \n",
|
||||
" show_progress = True)\n",
|
||||
"\n",
|
||||
"default_store.upload_files([yellowTaxiData], \n",
|
||||
" target_path = 'yellow', \n",
|
||||
" overwrite = False, \n",
|
||||
" overwrite = True, \n",
|
||||
" show_progress = True)\n",
|
||||
"\n",
|
||||
"print(\"Upload calls completed.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create and register datasets\n",
|
||||
"\n",
|
||||
"By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. You can learn more about the what subsetting capabilities are supported by referring to [our documentation](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.tabular_dataset.tabulardataset?view=azure-ml-py#remarks). The data remains in its existing location, so no extra storage cost is incurred."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Dataset\n",
|
||||
"green_taxi_data = Dataset.Tabular.from_delimited_files(default_store.path('green/unprepared.parquet'))\n",
|
||||
"yellow_taxi_data = Dataset.Tabular.from_delimited_files(default_store.path('yellow/unprepared.parquet'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Register the taxi datasets with the workspace so that you can reuse them in other experiments or share with your colleagues who have access to your workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"green_taxi_data = green_taxi_data.register(ws, 'green_taxi_data')\n",
|
||||
"yellow_taxi_data = yellow_taxi_data.register(ws, 'yellow_taxi_data')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -194,20 +226,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"aml_compute = ws.get_default_compute_target(\"CPU\")\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"if aml_compute is None:\n",
|
||||
" amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||
" max_nodes = 4)\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" aml_compute = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||
" max_nodes=4)\n",
|
||||
" aml_compute = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" aml_compute = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" aml_compute.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
"aml_compute"
|
||||
"aml_compute.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -215,7 +249,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Define RunConfig for the compute\n",
|
||||
"We need `azureml-dataprep` SDK for all the steps below. We will also use `pandas`, `scikit-learn` and `automl` for the training step. Defining the `runconfig` for that."
|
||||
"We will also use `pandas`, `scikit-learn` and `automl`, `pyarrow` for the pipeline steps. Defining the `runconfig` for that."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -242,13 +276,10 @@
|
||||
"# Use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
|
||||
"aml_run_config.environment.python.user_managed_dependencies = False\n",
|
||||
"\n",
|
||||
"# Auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
|
||||
"aml_run_config.auto_prepare_environment = True\n",
|
||||
"\n",
|
||||
"# Specify CondaDependencies obj, add necessary packages\n",
|
||||
"aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n",
|
||||
" conda_packages=['pandas','scikit-learn'], \n",
|
||||
" pip_packages=['azureml-sdk', 'azureml-dataprep', 'azureml-train-automl'], \n",
|
||||
" pip_packages=['azureml-sdk[automl,explain]', 'pyarrow'], \n",
|
||||
" pin_sdk_version=False)\n",
|
||||
"\n",
|
||||
"print (\"Run configuration created.\")"
|
||||
@@ -259,7 +290,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare data\n",
|
||||
"Now we will prepare for regression modeling by using the `Azure Machine Learning Data Prep SDK for Python`. We run various transformations to filter and combine two different NYC taxi data sets.\n",
|
||||
"Now we will prepare for regression modeling by using `pandas`. We run various transformations to filter and combine two different NYC taxi datasets.\n",
|
||||
"\n",
|
||||
"We achieve this by creating a separate step for each transformation as this allows us to reuse the steps and saves us from running all over again in case of any change. We will keep data preparation scripts in one subfolder and training scripts in another.\n",
|
||||
"\n",
|
||||
@@ -270,7 +301,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Define Useful Colums\n",
|
||||
"#### Define Useful Columns\n",
|
||||
"Here we are defining a set of \"useful\" columns for both Green and Yellow taxi data."
|
||||
]
|
||||
},
|
||||
@@ -304,18 +335,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.data.data_reference import DataReference \n",
|
||||
"from azureml.pipeline.core import PipelineData\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"\n",
|
||||
"# python scripts folder\n",
|
||||
"prepare_data_folder = './scripts/prepdata'\n",
|
||||
"\n",
|
||||
"blob_green_data = DataReference(\n",
|
||||
" datastore=default_store,\n",
|
||||
" data_reference_name=\"green_taxi_data\",\n",
|
||||
" path_on_datastore=\"green/part-00000\")\n",
|
||||
"\n",
|
||||
"# rename columns as per Azure Machine Learning NYC Taxi tutorial\n",
|
||||
"green_columns = str({ \n",
|
||||
" \"vendorID\": \"vendor\",\n",
|
||||
@@ -332,7 +357,7 @@
|
||||
"}).replace(\",\", \";\")\n",
|
||||
"\n",
|
||||
"# Define output after cleansing step\n",
|
||||
"cleansed_green_data = PipelineData(\"green_taxi_data\", datastore=default_store)\n",
|
||||
"cleansed_green_data = PipelineData(\"cleansed_green_data\", datastore=default_store).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Cleanse script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -341,11 +366,10 @@
|
||||
"cleansingStepGreen = PythonScriptStep(\n",
|
||||
" name=\"Cleanse Green Taxi Data\",\n",
|
||||
" script_name=\"cleanse.py\", \n",
|
||||
" arguments=[\"--input_cleanse\", blob_green_data, \n",
|
||||
" \"--useful_columns\", useful_columns,\n",
|
||||
" arguments=[\"--useful_columns\", useful_columns,\n",
|
||||
" \"--columns\", green_columns,\n",
|
||||
" \"--output_cleanse\", cleansed_green_data],\n",
|
||||
" inputs=[blob_green_data],\n",
|
||||
" inputs=[green_taxi_data.as_named_input('raw_data')],\n",
|
||||
" outputs=[cleansed_green_data],\n",
|
||||
" compute_target=aml_compute,\n",
|
||||
" runconfig=aml_run_config,\n",
|
||||
@@ -369,11 +393,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"blob_yellow_data = DataReference(\n",
|
||||
" datastore=default_store,\n",
|
||||
" data_reference_name=\"yellow_taxi_data\",\n",
|
||||
" path_on_datastore=\"yellow/part-00000\")\n",
|
||||
"\n",
|
||||
"yellow_columns = str({\n",
|
||||
" \"vendorID\": \"vendor\",\n",
|
||||
" \"tpepPickupDateTime\": \"pickup_datetime\",\n",
|
||||
@@ -389,7 +408,7 @@
|
||||
"}).replace(\",\", \";\")\n",
|
||||
"\n",
|
||||
"# Define output after cleansing step\n",
|
||||
"cleansed_yellow_data = PipelineData(\"yellow_taxi_data\", datastore=default_store)\n",
|
||||
"cleansed_yellow_data = PipelineData(\"cleansed_yellow_data\", datastore=default_store).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Cleanse script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -398,11 +417,10 @@
|
||||
"cleansingStepYellow = PythonScriptStep(\n",
|
||||
" name=\"Cleanse Yellow Taxi Data\",\n",
|
||||
" script_name=\"cleanse.py\", \n",
|
||||
" arguments=[\"--input_cleanse\", blob_yellow_data, \n",
|
||||
" \"--useful_columns\", useful_columns,\n",
|
||||
" arguments=[\"--useful_columns\", useful_columns,\n",
|
||||
" \"--columns\", yellow_columns,\n",
|
||||
" \"--output_cleanse\", cleansed_yellow_data],\n",
|
||||
" inputs=[blob_yellow_data],\n",
|
||||
" inputs=[yellow_taxi_data.as_named_input('raw_data')],\n",
|
||||
" outputs=[cleansed_yellow_data],\n",
|
||||
" compute_target=aml_compute,\n",
|
||||
" runconfig=aml_run_config,\n",
|
||||
@@ -428,7 +446,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define output after merging step\n",
|
||||
"merged_data = PipelineData(\"merged_data\", datastore=default_store)\n",
|
||||
"merged_data = PipelineData(\"merged_data\", datastore=default_store).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Merge script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -437,10 +455,9 @@
|
||||
"mergingStep = PythonScriptStep(\n",
|
||||
" name=\"Merge Taxi Data\",\n",
|
||||
" script_name=\"merge.py\", \n",
|
||||
" arguments=[\"--input_green_merge\", cleansed_green_data, \n",
|
||||
" \"--input_yellow_merge\", cleansed_yellow_data,\n",
|
||||
" \"--output_merge\", merged_data],\n",
|
||||
" inputs=[cleansed_green_data, cleansed_yellow_data],\n",
|
||||
" arguments=[\"--output_merge\", merged_data],\n",
|
||||
" inputs=[cleansed_green_data.parse_parquet_files(file_extension=None),\n",
|
||||
" cleansed_yellow_data.parse_parquet_files(file_extension=None)],\n",
|
||||
" outputs=[merged_data],\n",
|
||||
" compute_target=aml_compute,\n",
|
||||
" runconfig=aml_run_config,\n",
|
||||
@@ -466,7 +483,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define output after merging step\n",
|
||||
"filtered_data = PipelineData(\"filtered_data\", datastore=default_store)\n",
|
||||
"filtered_data = PipelineData(\"filtered_data\", datastore=default_store).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Filter script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -475,9 +492,8 @@
|
||||
"filterStep = PythonScriptStep(\n",
|
||||
" name=\"Filter Taxi Data\",\n",
|
||||
" script_name=\"filter.py\", \n",
|
||||
" arguments=[\"--input_filter\", merged_data, \n",
|
||||
" \"--output_filter\", filtered_data],\n",
|
||||
" inputs=[merged_data],\n",
|
||||
" arguments=[\"--output_filter\", filtered_data],\n",
|
||||
" inputs=[merged_data.parse_parquet_files(file_extension=None)],\n",
|
||||
" outputs=[filtered_data],\n",
|
||||
" compute_target=aml_compute,\n",
|
||||
" runconfig = aml_run_config,\n",
|
||||
@@ -503,7 +519,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define output after normalize step\n",
|
||||
"normalized_data = PipelineData(\"normalized_data\", datastore=default_store)\n",
|
||||
"normalized_data = PipelineData(\"normalized_data\", datastore=default_store).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Normalize script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -512,9 +528,8 @@
|
||||
"normalizeStep = PythonScriptStep(\n",
|
||||
" name=\"Normalize Taxi Data\",\n",
|
||||
" script_name=\"normalize.py\", \n",
|
||||
" arguments=[\"--input_normalize\", filtered_data, \n",
|
||||
" \"--output_normalize\", normalized_data],\n",
|
||||
" inputs=[filtered_data],\n",
|
||||
" arguments=[\"--output_normalize\", normalized_data],\n",
|
||||
" inputs=[filtered_data.parse_parquet_files(file_extension=None)],\n",
|
||||
" outputs=[normalized_data],\n",
|
||||
" compute_target=aml_compute,\n",
|
||||
" runconfig = aml_run_config,\n",
|
||||
@@ -544,8 +559,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define output after transforme step\n",
|
||||
"transformed_data = PipelineData(\"transformed_data\", datastore=default_store)\n",
|
||||
"# Define output after transform step\n",
|
||||
"transformed_data = PipelineData(\"transformed_data\", datastore=default_store).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Transform script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -554,9 +569,8 @@
|
||||
"transformStep = PythonScriptStep(\n",
|
||||
" name=\"Transform Taxi Data\",\n",
|
||||
" script_name=\"transform.py\", \n",
|
||||
" arguments=[\"--input_transform\", normalized_data,\n",
|
||||
" \"--output_transform\", transformed_data],\n",
|
||||
" inputs=[normalized_data],\n",
|
||||
" arguments=[\"--output_transform\", transformed_data],\n",
|
||||
" inputs=[normalized_data.parse_parquet_files(file_extension=None)],\n",
|
||||
" outputs=[transformed_data],\n",
|
||||
" compute_target=aml_compute,\n",
|
||||
" runconfig = aml_run_config,\n",
|
||||
@@ -571,8 +585,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Extract features\n",
|
||||
"Add the following columns to be features for our model creation. The prediction value will be *cost*."
|
||||
"### Split the data into train and test sets\n",
|
||||
"This function segregates the data into dataset for model training and dataset for testing."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -581,92 +595,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"feature_columns = str(['pickup_weekday','pickup_hour', 'distance','passengers', 'vendor']).replace(\",\", \";\")\n",
|
||||
"\n",
|
||||
"train_model_folder = './scripts/trainmodel'\n",
|
||||
"\n",
|
||||
"print('Extract script is in {}.'.format(os.path.realpath(train_model_folder)))\n",
|
||||
"\n",
|
||||
"# features data after transform step\n",
|
||||
"features_data = PipelineData(\"features_data\", datastore=default_store)\n",
|
||||
"\n",
|
||||
"# featurization step creation\n",
|
||||
"# See the featurization.py for details about input and output\n",
|
||||
"featurizationStep = PythonScriptStep(\n",
|
||||
" name=\"Extract Features\",\n",
|
||||
" script_name=\"featurization.py\", \n",
|
||||
" arguments=[\"--input_featurization\", transformed_data, \n",
|
||||
" \"--useful_columns\", feature_columns,\n",
|
||||
" \"--output_featurization\", features_data],\n",
|
||||
" inputs=[transformed_data],\n",
|
||||
" outputs=[features_data],\n",
|
||||
" compute_target=aml_compute,\n",
|
||||
" runconfig = aml_run_config,\n",
|
||||
" source_directory=train_model_folder,\n",
|
||||
" allow_reuse=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"featurizationStep created.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Extract label"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"label_columns = str(['cost']).replace(\",\", \";\")\n",
|
||||
"\n",
|
||||
"# label data after transform step\n",
|
||||
"label_data = PipelineData(\"label_data\", datastore=default_store)\n",
|
||||
"\n",
|
||||
"print('Extract script is in {}.'.format(os.path.realpath(train_model_folder)))\n",
|
||||
"\n",
|
||||
"# label step creation\n",
|
||||
"# See the featurization.py for details about input and output\n",
|
||||
"labelStep = PythonScriptStep(\n",
|
||||
" name=\"Extract Labels\",\n",
|
||||
" script_name=\"featurization.py\", \n",
|
||||
" arguments=[\"--input_featurization\", transformed_data, \n",
|
||||
" \"--useful_columns\", label_columns,\n",
|
||||
" \"--output_featurization\", label_data],\n",
|
||||
" inputs=[transformed_data],\n",
|
||||
" outputs=[label_data],\n",
|
||||
" compute_target=aml_compute,\n",
|
||||
" runconfig = aml_run_config,\n",
|
||||
" source_directory=train_model_folder,\n",
|
||||
" allow_reuse=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"labelStep created.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Split the data into train and test sets\n",
|
||||
"This function segregates the data into the **x**, features, dataset for model training and **y**, values to predict, dataset for testing."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# train and test splits output\n",
|
||||
"output_split_train_x = PipelineData(\"output_split_train_x\", datastore=default_store)\n",
|
||||
"output_split_train_y = PipelineData(\"output_split_train_y\", datastore=default_store)\n",
|
||||
"output_split_test_x = PipelineData(\"output_split_test_x\", datastore=default_store)\n",
|
||||
"output_split_test_y = PipelineData(\"output_split_test_y\", datastore=default_store)\n",
|
||||
"output_split_train = PipelineData(\"output_split_train\", datastore=default_store).as_dataset()\n",
|
||||
"output_split_test = PipelineData(\"output_split_test\", datastore=default_store).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Data spilt script is in {}.'.format(os.path.realpath(train_model_folder)))\n",
|
||||
"\n",
|
||||
@@ -675,14 +608,10 @@
|
||||
"testTrainSplitStep = PythonScriptStep(\n",
|
||||
" name=\"Train Test Data Split\",\n",
|
||||
" script_name=\"train_test_split.py\", \n",
|
||||
" arguments=[\"--input_split_features\", features_data, \n",
|
||||
" \"--input_split_labels\", label_data,\n",
|
||||
" \"--output_split_train_x\", output_split_train_x,\n",
|
||||
" \"--output_split_train_y\", output_split_train_y,\n",
|
||||
" \"--output_split_test_x\", output_split_test_x,\n",
|
||||
" \"--output_split_test_y\", output_split_test_y],\n",
|
||||
" inputs=[features_data, label_data],\n",
|
||||
" outputs=[output_split_train_x, output_split_train_y, output_split_test_x, output_split_test_y],\n",
|
||||
" arguments=[\"--output_split_train\", output_split_train,\n",
|
||||
" \"--output_split_test\", output_split_test],\n",
|
||||
" inputs=[transformed_data.parse_parquet_files(file_extension=None)],\n",
|
||||
" outputs=[output_split_train, output_split_test],\n",
|
||||
" compute_target=aml_compute,\n",
|
||||
" runconfig = aml_run_config,\n",
|
||||
" source_directory=train_model_folder,\n",
|
||||
@@ -697,7 +626,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use automated machine learning to build regression model\n",
|
||||
"Now we will use **automated machine learning** to build the regression model. We will use [AutoMLStep](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.automlstep?view=azure-ml-py) in AML Pipelines for this part. These functions use various features from the data set and allow an automated model to build relationships between the features and the price of a taxi trip."
|
||||
"Now we will use **automated machine learning** to build the regression model. We will use [AutoMLStep](https://docs.microsoft.com/python/api/azureml-train-automl-runtime/azureml.train.automl.runtime.automl_step.automlstep?view=azure-ml-py) in AML Pipelines for this part. Perform `pip install azureml-sdk[automl]`to get the automated machine learning package. These functions use various features from the data set and allow an automated model to build relationships between the features and the price of a taxi trip."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -727,52 +656,13 @@
|
||||
"print(\"Experiment created\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Create get_data script\n",
|
||||
"\n",
|
||||
"A script with `get_data()` function is necessary to fetch training features(X) and labels(Y) on remote compute, from input data. Here we use mounted path of `train_test_split` step to get the x and y train values. They are added as environment variable on compute machine by default\n",
|
||||
"\n",
|
||||
"Note: Every DataReference are added as environment variable on compute machine since the defualt mode is mount"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('get_data.py will be written to {}.'.format(os.path.realpath(train_model_folder)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile $train_model_folder/get_data.py\n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"def get_data():\n",
|
||||
" print(\"In get_data\")\n",
|
||||
" print(os.environ['AZUREML_DATAREFERENCE_output_split_train_x'])\n",
|
||||
" X_train = pd.read_csv(os.environ['AZUREML_DATAREFERENCE_output_split_train_x'] + \"/part-00000\", header=0)\n",
|
||||
" y_train = pd.read_csv(os.environ['AZUREML_DATAREFERENCE_output_split_train_y'] + \"/part-00000\", header=0)\n",
|
||||
" \n",
|
||||
" return { \"X\" : X_train.values, \"y\" : y_train.values.flatten() }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Define settings for autogeneration and tuning\n",
|
||||
"\n",
|
||||
"Here we define the experiment parameter and model settings for autogeneration and tuning. We can specify automl_settings as **kwargs as well. Also note that we have to use a get_data() function for remote excutions. See get_data script for more details.\n",
|
||||
"Here we define the experiment parameter and model settings for autogeneration and tuning. We can specify automl_settings as **kwargs as well.\n",
|
||||
"\n",
|
||||
"Use your defined training settings as a parameter to an `AutoMLConfig` object. Additionally, specify your training data and the type of model, which is `regression` in this case.\n",
|
||||
"\n",
|
||||
@@ -793,17 +683,19 @@
|
||||
" \"iteration_timeout_minutes\" : 10,\n",
|
||||
" \"iterations\" : 2,\n",
|
||||
" \"primary_metric\" : 'spearman_correlation',\n",
|
||||
" \"preprocess\" : True,\n",
|
||||
" \"verbosity\" : logging.INFO,\n",
|
||||
" \"n_cross_validations\": 5\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"training_dataset = output_split_train.parse_parquet_files(file_extension=None).keep_columns(['pickup_weekday','pickup_hour', 'distance','passengers', 'vendor', 'cost'])\n",
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'regression',\n",
|
||||
" debug_log = 'automated_ml_errors.log',\n",
|
||||
" path = train_model_folder,\n",
|
||||
" compute_target=aml_compute,\n",
|
||||
" run_configuration=aml_run_config,\n",
|
||||
" data_script = train_model_folder + \"/get_data.py\",\n",
|
||||
" compute_target = aml_compute,\n",
|
||||
" run_configuration = aml_run_config,\n",
|
||||
" featurization = 'auto',\n",
|
||||
" training_data = training_dataset,\n",
|
||||
" label_column_name = 'cost',\n",
|
||||
" **automl_settings)\n",
|
||||
" \n",
|
||||
"print(\"AutoML config created.\")"
|
||||
@@ -822,15 +714,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl.runtime import AutoMLStep\n",
|
||||
"from azureml.pipeline.steps import AutoMLStep\n",
|
||||
"\n",
|
||||
"trainWithAutomlStep = AutoMLStep(\n",
|
||||
" name='AutoML_Regression',\n",
|
||||
"trainWithAutomlStep = AutoMLStep(name='AutoML_Regression',\n",
|
||||
" automl_config=automl_config,\n",
|
||||
" inputs=[output_split_train_x, output_split_train_y],\n",
|
||||
" allow_reuse=True,\n",
|
||||
" hash_paths=[os.path.realpath(train_model_folder)])\n",
|
||||
"\n",
|
||||
" passthru_automl_config=False,\n",
|
||||
" allow_reuse=True)\n",
|
||||
"print(\"trainWithAutomlStep created.\")"
|
||||
]
|
||||
},
|
||||
@@ -892,12 +781,11 @@
|
||||
" return path\n",
|
||||
"\n",
|
||||
"def fetch_df(step, output_name):\n",
|
||||
" output_data = step.get_output_data(output_name)\n",
|
||||
" \n",
|
||||
" output_data = step.get_output_data(output_name) \n",
|
||||
" download_path = './outputs/' + output_name\n",
|
||||
" output_data.download(download_path)\n",
|
||||
" df_path = get_download_path(download_path, output_name) + '/part-00000'\n",
|
||||
" return dprep.auto_read_file(path=df_path)"
|
||||
" output_data.download(download_path, overwrite=True)\n",
|
||||
" df_path = get_download_path(download_path, output_name) + '/processed.parquet'\n",
|
||||
" return pd.read_parquet(df_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -939,7 +827,7 @@
|
||||
"merge_step = pipeline_run.find_step_run(mergingStep.name)[0]\n",
|
||||
"combined_df = fetch_df(merge_step, merged_data.name)\n",
|
||||
"\n",
|
||||
"display(combined_df.get_profile())"
|
||||
"display(combined_df.describe())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -958,7 +846,7 @@
|
||||
"filter_step = pipeline_run.find_step_run(filterStep.name)[0]\n",
|
||||
"filtered_df = fetch_df(filter_step, filtered_data.name)\n",
|
||||
"\n",
|
||||
"display(filtered_df.get_profile())"
|
||||
"display(filtered_df.describe())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -996,7 +884,7 @@
|
||||
"transform_step = pipeline_run.find_step_run(transformStep.name)[0]\n",
|
||||
"transformed_df = fetch_df(transform_step, transformed_data.name)\n",
|
||||
"\n",
|
||||
"display(transformed_df.get_profile())\n",
|
||||
"display(transformed_df.describe())\n",
|
||||
"display(transformed_df.head(5))"
|
||||
]
|
||||
},
|
||||
@@ -1014,16 +902,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"split_step = pipeline_run.find_step_run(testTrainSplitStep.name)[0]\n",
|
||||
"train_split_x = fetch_df(split_step, output_split_train_x.name)\n",
|
||||
"train_split_y = fetch_df(split_step, output_split_train_y.name)\n",
|
||||
"train_split = fetch_df(split_step, output_split_train.name)\n",
|
||||
"\n",
|
||||
"display_x_train = train_split_x.keep_columns(columns=[\"vendor\", \"pickup_weekday\", \"pickup_hour\", \"passengers\", \"distance\"])\n",
|
||||
"display_y_train = train_split_y.rename_columns(column_pairs={\"Column1\": \"cost\"})\n",
|
||||
"\n",
|
||||
"display(display_x_train.get_profile())\n",
|
||||
"display(display_x_train.head(5))\n",
|
||||
"display(display_y_train.get_profile())\n",
|
||||
"display(display_y_train.head(5))"
|
||||
"display(train_split.describe())\n",
|
||||
"display(train_split.head(5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1125,14 +1007,11 @@
|
||||
"source": [
|
||||
"# split_step = pipeline_run.find_step_run(testTrainSplitStep.name)[0]\n",
|
||||
"\n",
|
||||
"# x_test = fetch_df(split_step, output_split_test_x.name)\n",
|
||||
"# y_test = fetch_df(split_step, output_split_test_y.name)\n",
|
||||
"# x_test = fetch_df(split_step, output_split_test.name)[['distance','passengers', 'vendor','pickup_weekday','pickup_hour']]\n",
|
||||
"# y_test = fetch_df(split_step, output_split_test.name)[['cost']]\n",
|
||||
"\n",
|
||||
"# display(x_test.keep_columns(columns=[\"vendor\", \"pickup_weekday\", \"pickup_hour\", \"passengers\", \"distance\"]).head(5))\n",
|
||||
"# display(y_test.rename_columns(column_pairs={\"Column1\": \"cost\"}).head(5))\n",
|
||||
"\n",
|
||||
"# x_test = x_test.to_pandas_dataframe()\n",
|
||||
"# y_test = y_test.to_pandas_dataframe()"
|
||||
"# display(x_test.head(5))\n",
|
||||
"# display(y_test.head(5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -1150,9 +1029,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# y_predict = fitted_model.predict(x_test.values)\n",
|
||||
"# y_predict = fitted_model.predict(x_test)\n",
|
||||
"\n",
|
||||
"# y_actual = y_test.iloc[:,0].values.tolist()\n",
|
||||
"# y_actual = y_test.values.tolist()\n",
|
||||
"\n",
|
||||
"# display(pd.DataFrame({'Actual':y_actual, 'Predicted':y_predict}).head(5))"
|
||||
]
|
||||
@@ -1168,7 +1047,7 @@
|
||||
"# fig = plt.figure(figsize=(14, 10))\n",
|
||||
"# ax1 = fig.add_subplot(111)\n",
|
||||
"\n",
|
||||
"# distance_vals = [x[4] for x in x_test.values]\n",
|
||||
"# distance_vals = [x[0] for x in x_test.values]\n",
|
||||
"\n",
|
||||
"# ax1.scatter(distance_vals[:100], y_predict[:100], s=18, c='b', marker=\"s\", label='Predicted')\n",
|
||||
"# ax1.scatter(distance_vals[:100], y_actual[:100], s=18, c='r', marker=\"o\", label='Actual')\n",
|
||||
@@ -1204,7 +1083,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -4,6 +4,7 @@ dependencies:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-opendatasets
|
||||
- azureml-dataprep
|
||||
- azureml-train-automl
|
||||
- matplotlib
|
||||
- pandas
|
||||
- pyarrow
|
||||
|
||||
@@ -3,15 +3,14 @@
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import pandas as pd
|
||||
import azureml.dataprep as dprep
|
||||
from azureml.core import Run
|
||||
|
||||
|
||||
def get_dict(dict_str):
|
||||
pairs = dict_str.strip("{}").split("\;")
|
||||
new_dict = {}
|
||||
for pair in pairs:
|
||||
key, value = pair.strip('\\').split(":")
|
||||
key, value = pair.strip().split(":")
|
||||
new_dict[key.strip().strip("'")] = value.strip().strip("'")
|
||||
|
||||
return new_dict
|
||||
@@ -19,40 +18,37 @@ def get_dict(dict_str):
|
||||
|
||||
print("Cleans the input data")
|
||||
|
||||
# Get the input green_taxi_data. To learn more about how to access dataset in your script, please
|
||||
# see https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-with-datasets.
|
||||
run = Run.get_context()
|
||||
raw_data = run.input_datasets["raw_data"]
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser("cleanse")
|
||||
parser.add_argument("--input_cleanse", type=str, help="raw taxi data")
|
||||
parser.add_argument("--output_cleanse", type=str, help="cleaned taxi data directory")
|
||||
parser.add_argument("--useful_columns", type=str, help="useful columns to keep")
|
||||
parser.add_argument("--columns", type=str, help="rename column pattern")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Argument 1(input taxi data path): %s" % args.input_cleanse)
|
||||
print("Argument 2(columns to keep): %s" % str(args.useful_columns.strip("[]").split("\;")))
|
||||
print("Argument 3(columns renaming mapping): %s" % str(args.columns.strip("{}").split("\;")))
|
||||
print("Argument 4(output cleansed taxi data path): %s" % args.output_cleanse)
|
||||
print("Argument 1(columns to keep): %s" % str(args.useful_columns.strip("[]").split("\;")))
|
||||
print("Argument 2(columns renaming mapping): %s" % str(args.columns.strip("{}").split("\;")))
|
||||
print("Argument 3(output cleansed taxi data path): %s" % args.output_cleanse)
|
||||
|
||||
raw_df = dprep.read_csv(path=args.input_cleanse, header=dprep.PromoteHeadersMode.GROUPED)
|
||||
|
||||
# These functions ensure that null data is removed from the data set,
|
||||
# These functions ensure that null data is removed from the dataset,
|
||||
# which will help increase machine learning model accuracy.
|
||||
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-data-prep
|
||||
# for more details
|
||||
|
||||
useful_columns = [s.strip().strip("'") for s in args.useful_columns.strip("[]").split("\;")]
|
||||
columns = get_dict(args.columns)
|
||||
|
||||
all_columns = dprep.ColumnSelector(term=".*", use_regex=True)
|
||||
drop_if_all_null = [all_columns, dprep.ColumnRelationship(dprep.ColumnRelationship.ALL)]
|
||||
new_df = (raw_data.to_pandas_dataframe()
|
||||
.dropna(how='all')
|
||||
.rename(columns=columns))[useful_columns]
|
||||
|
||||
new_df = (raw_df
|
||||
.replace_na(columns=all_columns)
|
||||
.drop_nulls(*drop_if_all_null)
|
||||
.rename_columns(column_pairs=columns)
|
||||
.keep_columns(columns=useful_columns))
|
||||
new_df.reset_index(inplace=True, drop=True)
|
||||
|
||||
if not (args.output_cleanse is None):
|
||||
os.makedirs(args.output_cleanse, exist_ok=True)
|
||||
print("%s created" % args.output_cleanse)
|
||||
write_df = new_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_cleanse))
|
||||
write_df.run_local()
|
||||
path = args.output_cleanse + "/processed.parquet"
|
||||
write_df = new_df.to_parquet(path)
|
||||
|
||||
@@ -1,55 +1,47 @@
|
||||
import argparse
|
||||
import os
|
||||
import azureml.dataprep as dprep
|
||||
from azureml.core import Run
|
||||
|
||||
print("Filters out coordinates for locations that are outside the city border.",
|
||||
"Chain the column filter commands within the filter() function",
|
||||
"and define the minimum and maximum bounds for each field.")
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
# To learn more about how to access dataset in your script, please
|
||||
# see https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-with-datasets.
|
||||
merged_data = run.input_datasets["merged_data"]
|
||||
combined_df = merged_data.to_pandas_dataframe()
|
||||
|
||||
parser = argparse.ArgumentParser("filter")
|
||||
parser.add_argument("--input_filter", type=str, help="merged taxi data directory")
|
||||
parser.add_argument("--output_filter", type=str, help="filter out out of city locations")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Argument 1(input taxi data path): %s" % args.input_filter)
|
||||
print("Argument 2(output filtered taxi data path): %s" % args.output_filter)
|
||||
|
||||
combined_df = dprep.read_csv(args.input_filter + '/part-*')
|
||||
print("Argument (output filtered taxi data path): %s" % args.output_filter)
|
||||
|
||||
# These functions filter out coordinates for locations that are outside the city border.
|
||||
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-data-prep for more details
|
||||
|
||||
# Create a condensed view of the dataflow to just show the lat/long fields,
|
||||
# which makes it easier to evaluate missing or out-of-scope coordinates
|
||||
decimal_type = dprep.TypeConverter(data_type=dprep.FieldType.DECIMAL)
|
||||
combined_df = combined_df.set_column_types(type_conversions={
|
||||
"pickup_longitude": decimal_type,
|
||||
"pickup_latitude": decimal_type,
|
||||
"dropoff_longitude": decimal_type,
|
||||
"dropoff_latitude": decimal_type
|
||||
})
|
||||
|
||||
# Filter out coordinates for locations that are outside the city border.
|
||||
# Chain the column filter commands within the filter() function
|
||||
# and define the minimum and maximum bounds for each field
|
||||
latlong_filtered_df = (combined_df
|
||||
.drop_nulls(columns=["pickup_longitude",
|
||||
"pickup_latitude",
|
||||
"dropoff_longitude",
|
||||
"dropoff_latitude"],
|
||||
column_relationship=dprep.ColumnRelationship(dprep.ColumnRelationship.ANY))
|
||||
.filter(dprep.f_and(dprep.col("pickup_longitude") <= -73.72,
|
||||
dprep.col("pickup_longitude") >= -74.09,
|
||||
dprep.col("pickup_latitude") <= 40.88,
|
||||
dprep.col("pickup_latitude") >= 40.53,
|
||||
dprep.col("dropoff_longitude") <= -73.72,
|
||||
dprep.col("dropoff_longitude") >= -74.09,
|
||||
dprep.col("dropoff_latitude") <= 40.88,
|
||||
dprep.col("dropoff_latitude") >= 40.53)))
|
||||
|
||||
combined_df = combined_df.astype({"pickup_longitude": 'float64', "pickup_latitude": 'float64',
|
||||
"dropoff_longitude": 'float64', "dropoff_latitude": 'float64'})
|
||||
|
||||
latlong_filtered_df = combined_df[(combined_df.pickup_longitude <= -73.72) &
|
||||
(combined_df.pickup_longitude >= -74.09) &
|
||||
(combined_df.pickup_latitude <= 40.88) &
|
||||
(combined_df.pickup_latitude >= 40.53) &
|
||||
(combined_df.dropoff_longitude <= -73.72) &
|
||||
(combined_df.dropoff_longitude >= -74.72) &
|
||||
(combined_df.dropoff_latitude <= 40.88) &
|
||||
(combined_df.dropoff_latitude >= 40.53)]
|
||||
|
||||
latlong_filtered_df.reset_index(inplace=True, drop=True)
|
||||
|
||||
if not (args.output_filter is None):
|
||||
os.makedirs(args.output_filter, exist_ok=True)
|
||||
print("%s created" % args.output_filter)
|
||||
write_df = latlong_filtered_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_filter))
|
||||
write_df.run_local()
|
||||
path = args.output_filter + "/processed.parquet"
|
||||
write_df = latlong_filtered_df.to_parquet(path)
|
||||
|
||||
@@ -1,29 +1,30 @@
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import azureml.dataprep as dprep
|
||||
from azureml.core import Run
|
||||
|
||||
print("Merge Green and Yellow taxi data")
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
# To learn more about how to access dataset in your script, please
|
||||
# see https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-with-datasets.
|
||||
cleansed_green_data = run.input_datasets["cleansed_green_data"]
|
||||
cleansed_yellow_data = run.input_datasets["cleansed_yellow_data"]
|
||||
green_df = cleansed_green_data.to_pandas_dataframe()
|
||||
yellow_df = cleansed_yellow_data.to_pandas_dataframe()
|
||||
|
||||
parser = argparse.ArgumentParser("merge")
|
||||
parser.add_argument("--input_green_merge", type=str, help="cleaned green taxi data directory")
|
||||
parser.add_argument("--input_yellow_merge", type=str, help="cleaned yellow taxi data directory")
|
||||
parser.add_argument("--output_merge", type=str, help="green and yellow taxi data merged")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Argument 1(input green taxi data path): %s" % args.input_green_merge)
|
||||
print("Argument 2(input yellow taxi data path): %s" % args.input_yellow_merge)
|
||||
print("Argument 3(output merge taxi data path): %s" % args.output_merge)
|
||||
|
||||
green_df = dprep.read_csv(args.input_green_merge + '/part-*')
|
||||
yellow_df = dprep.read_csv(args.input_yellow_merge + '/part-*')
|
||||
print("Argument (output merge taxi data path): %s" % args.output_merge)
|
||||
|
||||
# Appending yellow data to green data
|
||||
combined_df = green_df.append_rows([yellow_df])
|
||||
combined_df = green_df.append(yellow_df, ignore_index=True)
|
||||
combined_df.reset_index(inplace=True, drop=True)
|
||||
|
||||
if not (args.output_merge is None):
|
||||
os.makedirs(args.output_merge, exist_ok=True)
|
||||
print("%s created" % args.output_merge)
|
||||
write_df = combined_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_merge))
|
||||
write_df.run_local()
|
||||
path = args.output_merge + "/processed.parquet"
|
||||
write_df = combined_df.to_parquet(path)
|
||||
|
||||
@@ -1,47 +1,48 @@
|
||||
import argparse
|
||||
import os
|
||||
import azureml.dataprep as dprep
|
||||
import pandas as pd
|
||||
from azureml.core import Run
|
||||
|
||||
print("Replace undefined values to relavant values and rename columns to meaningful names")
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
# To learn more about how to access dataset in your script, please
|
||||
# see https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-with-datasets.
|
||||
filtered_data = run.input_datasets['filtered_data']
|
||||
combined_converted_df = filtered_data.to_pandas_dataframe()
|
||||
|
||||
parser = argparse.ArgumentParser("normalize")
|
||||
parser.add_argument("--input_normalize", type=str, help="combined and converted taxi data")
|
||||
parser.add_argument("--output_normalize", type=str, help="replaced undefined values and renamed columns")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Argument 1(input taxi data path): %s" % args.input_normalize)
|
||||
print("Argument 2(output normalized taxi data path): %s" % args.output_normalize)
|
||||
|
||||
combined_converted_df = dprep.read_csv(args.input_normalize + '/part-*')
|
||||
print("Argument (output normalized taxi data path): %s" % args.output_normalize)
|
||||
|
||||
# These functions replace undefined values and rename to use meaningful names.
|
||||
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-data-prep for more details
|
||||
replaced_stfor_vals_df = (combined_converted_df.replace({"store_forward": "0"}, {"store_forward": "N"})
|
||||
.fillna({"store_forward": "N"}))
|
||||
|
||||
replaced_stfor_vals_df = combined_converted_df.replace(columns="store_forward",
|
||||
find="0",
|
||||
replace_with="N").fill_nulls("store_forward", "N")
|
||||
replaced_distance_vals_df = (replaced_stfor_vals_df.replace({"distance": ".00"}, {"distance": 0})
|
||||
.fillna({"distance": 0}))
|
||||
|
||||
replaced_distance_vals_df = replaced_stfor_vals_df.replace(columns="distance",
|
||||
find=".00",
|
||||
replace_with=0).fill_nulls("distance", 0)
|
||||
normalized_df = replaced_distance_vals_df.astype({"distance": 'float64'})
|
||||
|
||||
replaced_distance_vals_df = replaced_distance_vals_df.to_number(["distance"])
|
||||
temp = pd.DatetimeIndex(normalized_df["pickup_datetime"])
|
||||
normalized_df["pickup_date"] = temp.date
|
||||
normalized_df["pickup_time"] = temp.time
|
||||
|
||||
time_split_df = (replaced_distance_vals_df
|
||||
.split_column_by_example(source_column="pickup_datetime")
|
||||
.split_column_by_example(source_column="dropoff_datetime"))
|
||||
temp = pd.DatetimeIndex(normalized_df["dropoff_datetime"])
|
||||
normalized_df["dropoff_date"] = temp.date
|
||||
normalized_df["dropoff_time"] = temp.time
|
||||
|
||||
# Split the pickup and dropoff datetime values into the respective date and time columns
|
||||
renamed_col_df = (time_split_df
|
||||
.rename_columns(column_pairs={
|
||||
"pickup_datetime_1": "pickup_date",
|
||||
"pickup_datetime_2": "pickup_time",
|
||||
"dropoff_datetime_1": "dropoff_date",
|
||||
"dropoff_datetime_2": "dropoff_time"}))
|
||||
del normalized_df["pickup_datetime"]
|
||||
del normalized_df["dropoff_datetime"]
|
||||
|
||||
normalized_df.reset_index(inplace=True, drop=True)
|
||||
|
||||
if not (args.output_normalize is None):
|
||||
os.makedirs(args.output_normalize, exist_ok=True)
|
||||
print("%s created" % args.output_normalize)
|
||||
write_df = renamed_col_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_normalize))
|
||||
write_df.run_local()
|
||||
path = args.output_normalize + "/processed.parquet"
|
||||
write_df = normalized_df.to_parquet(path)
|
||||
|
||||
@@ -1,22 +1,24 @@
|
||||
import argparse
|
||||
import os
|
||||
import azureml.dataprep as dprep
|
||||
from azureml.core import Run
|
||||
|
||||
print("Transforms the renamed taxi data to the required format")
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
# To learn more about how to access dataset in your script, please
|
||||
# see https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-with-datasets.
|
||||
normalized_data = run.input_datasets['normalized_data']
|
||||
normalized_df = normalized_data.to_pandas_dataframe()
|
||||
|
||||
parser = argparse.ArgumentParser("transform")
|
||||
parser.add_argument("--input_transform", type=str, help="renamed taxi data")
|
||||
parser.add_argument("--output_transform", type=str, help="transformed taxi data")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Argument 1(input taxi data path): %s" % args.input_transform)
|
||||
print("Argument 2(output final transformed taxi data): %s" % args.output_transform)
|
||||
|
||||
renamed_df = dprep.read_csv(args.input_transform + '/part-*')
|
||||
|
||||
# These functions transform the renamed data to be used finally for training.
|
||||
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-data-prep for more details
|
||||
|
||||
# Split the pickup and dropoff date further into the day of the week, day of the month, and month values.
|
||||
# To get the day of the week value, use the derive_column_by_example() function.
|
||||
@@ -27,62 +29,46 @@ renamed_df = dprep.read_csv(args.input_transform + '/part-*')
|
||||
# use the drop_columns() function to delete the original fields as the newly generated features are preferred.
|
||||
# Rename the rest of the fields to use meaningful descriptions.
|
||||
|
||||
transformed_features_df = (renamed_df
|
||||
.derive_column_by_example(
|
||||
source_columns="pickup_date",
|
||||
new_column_name="pickup_weekday",
|
||||
example_data=[("2009-01-04", "Sunday"), ("2013-08-22", "Thursday")])
|
||||
.derive_column_by_example(
|
||||
source_columns="dropoff_date",
|
||||
new_column_name="dropoff_weekday",
|
||||
example_data=[("2013-08-22", "Thursday"), ("2013-11-03", "Sunday")])
|
||||
normalized_df = normalized_df.astype({"pickup_date": 'datetime64', "dropoff_date": 'datetime64',
|
||||
"pickup_time": 'datetime64', "dropoff_time": 'datetime64',
|
||||
"distance": 'float64', "cost": 'float64'})
|
||||
|
||||
.split_column_by_example(source_column="pickup_time")
|
||||
.split_column_by_example(source_column="dropoff_time")
|
||||
normalized_df["pickup_weekday"] = normalized_df["pickup_date"].dt.dayofweek
|
||||
normalized_df["pickup_month"] = normalized_df["pickup_date"].dt.month
|
||||
normalized_df["pickup_monthday"] = normalized_df["pickup_date"].dt.day
|
||||
|
||||
.split_column_by_example(source_column="pickup_time_1")
|
||||
.split_column_by_example(source_column="dropoff_time_1")
|
||||
.drop_columns(columns=[
|
||||
"pickup_date", "pickup_time", "dropoff_date", "dropoff_time",
|
||||
"pickup_date_1", "dropoff_date_1", "pickup_time_1", "dropoff_time_1"])
|
||||
normalized_df["dropoff_weekday"] = normalized_df["dropoff_date"].dt.dayofweek
|
||||
normalized_df["dropoff_month"] = normalized_df["dropoff_date"].dt.month
|
||||
normalized_df["dropoff_monthday"] = normalized_df["dropoff_date"].dt.day
|
||||
|
||||
.rename_columns(column_pairs={
|
||||
"pickup_date_2": "pickup_month",
|
||||
"pickup_date_3": "pickup_monthday",
|
||||
"pickup_time_1_1": "pickup_hour",
|
||||
"pickup_time_1_2": "pickup_minute",
|
||||
"pickup_time_2": "pickup_second",
|
||||
"dropoff_date_2": "dropoff_month",
|
||||
"dropoff_date_3": "dropoff_monthday",
|
||||
"dropoff_time_1_1": "dropoff_hour",
|
||||
"dropoff_time_1_2": "dropoff_minute",
|
||||
"dropoff_time_2": "dropoff_second"}))
|
||||
normalized_df["pickup_hour"] = normalized_df["pickup_time"].dt.hour
|
||||
normalized_df["pickup_minute"] = normalized_df["pickup_time"].dt.minute
|
||||
normalized_df["pickup_second"] = normalized_df["pickup_time"].dt.second
|
||||
|
||||
# Drop the pickup_datetime and dropoff_datetime columns because they're
|
||||
normalized_df["dropoff_hour"] = normalized_df["dropoff_time"].dt.hour
|
||||
normalized_df["dropoff_minute"] = normalized_df["dropoff_time"].dt.minute
|
||||
normalized_df["dropoff_second"] = normalized_df["dropoff_time"].dt.second
|
||||
|
||||
# Drop the pickup_date, dropoff_date, pickup_time, dropoff_time columns because they're
|
||||
# no longer needed (granular time features like hour,
|
||||
# minute and second are more useful for model training).
|
||||
processed_df = transformed_features_df.drop_columns(columns=["pickup_datetime", "dropoff_datetime"])
|
||||
del normalized_df["pickup_date"]
|
||||
del normalized_df["dropoff_date"]
|
||||
del normalized_df["pickup_time"]
|
||||
del normalized_df["dropoff_time"]
|
||||
|
||||
# Use the type inference functionality to automatically check the data type of each field,
|
||||
# and display the inference results.
|
||||
type_infer = processed_df.builders.set_column_types()
|
||||
type_infer.learn()
|
||||
|
||||
# The inference results look correct based on the data. Now apply the type conversions to the dataflow.
|
||||
type_converted_df = type_infer.to_dataflow()
|
||||
|
||||
# Before you package the dataflow, run two final filters on the data set.
|
||||
# Before you package the dataset, run two final filters on the dataset.
|
||||
# To eliminate incorrectly captured data points,
|
||||
# filter the dataflow on records where both the cost and distance variable values are greater than zero.
|
||||
# filter the dataset on records where both the cost and distance variable values are greater than zero.
|
||||
# This step will significantly improve machine learning model accuracy,
|
||||
# because data points with a zero cost or distance represent major outliers that throw off prediction accuracy.
|
||||
|
||||
final_df = type_converted_df.filter(dprep.col("distance") > 0)
|
||||
final_df = final_df.filter(dprep.col("cost") > 0)
|
||||
final_df = normalized_df[(normalized_df.distance > 0) & (normalized_df.cost > 0)]
|
||||
final_df.reset_index(inplace=True, drop=True)
|
||||
|
||||
# Writing the final dataframe to use for training in the following steps
|
||||
if not (args.output_transform is None):
|
||||
os.makedirs(args.output_transform, exist_ok=True)
|
||||
print("%s created" % args.output_transform)
|
||||
write_df = final_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_transform))
|
||||
write_df.run_local()
|
||||
path = args.output_transform + "/processed.parquet"
|
||||
write_df = final_df.to_parquet(path)
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
import argparse
|
||||
import os
|
||||
import azureml.dataprep as dprep
|
||||
import azureml.core
|
||||
|
||||
print("Extracts important features from prepared data")
|
||||
|
||||
parser = argparse.ArgumentParser("featurization")
|
||||
parser.add_argument("--input_featurization", type=str, help="input featurization")
|
||||
parser.add_argument("--useful_columns", type=str, help="columns to use")
|
||||
parser.add_argument("--output_featurization", type=str, help="output featurization")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Argument 1(input training data path): %s" % args.input_featurization)
|
||||
print("Argument 2(column features to use): %s" % str(args.useful_columns.strip("[]").split("\;")))
|
||||
print("Argument 3:(output featurized training data path) %s" % args.output_featurization)
|
||||
|
||||
dflow_prepared = dprep.read_csv(args.input_featurization + '/part-*')
|
||||
|
||||
# These functions extracts useful features for training
|
||||
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-auto-train-models for more detail
|
||||
|
||||
useful_columns = [s.strip().strip("'") for s in args.useful_columns.strip("[]").split("\;")]
|
||||
dflow = dflow_prepared.keep_columns(useful_columns)
|
||||
|
||||
if not (args.output_featurization is None):
|
||||
os.makedirs(args.output_featurization, exist_ok=True)
|
||||
print("%s created" % args.output_featurization)
|
||||
write_df = dflow.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_featurization))
|
||||
write_df.run_local()
|
||||
@@ -1,12 +0,0 @@
|
||||
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def get_data():
|
||||
print("In get_data")
|
||||
print(os.environ['AZUREML_DATAREFERENCE_output_split_train_x'])
|
||||
X_train = pd.read_csv(os.environ['AZUREML_DATAREFERENCE_output_split_train_x'] + "/part-00000", header=0)
|
||||
y_train = pd.read_csv(os.environ['AZUREML_DATAREFERENCE_output_split_train_y'] + "/part-00000", header=0)
|
||||
|
||||
return {"X": X_train.values, "y": y_train.values.flatten()}
|
||||
@@ -1,48 +1,38 @@
|
||||
import argparse
|
||||
import os
|
||||
import azureml.dataprep as dprep
|
||||
import azureml.core
|
||||
from azureml.core import Run
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
def write_output(df, path):
|
||||
os.makedirs(path, exist_ok=True)
|
||||
print("%s created" % path)
|
||||
df.to_csv(path + "/part-00000", index=False)
|
||||
df.to_parquet(path + "/processed.parquet")
|
||||
|
||||
|
||||
print("Split the data into train and test")
|
||||
run = Run.get_context()
|
||||
transformed_data = run.input_datasets['transformed_data']
|
||||
transformed_df = transformed_data.to_pandas_dataframe()
|
||||
|
||||
parser = argparse.ArgumentParser("split")
|
||||
parser.add_argument("--input_split_features", type=str, help="input split features")
|
||||
parser.add_argument("--input_split_labels", type=str, help="input split labels")
|
||||
parser.add_argument("--output_split_train_x", type=str, help="output split train features")
|
||||
parser.add_argument("--output_split_train_y", type=str, help="output split train labels")
|
||||
parser.add_argument("--output_split_test_x", type=str, help="output split test features")
|
||||
parser.add_argument("--output_split_test_y", type=str, help="output split test labels")
|
||||
parser.add_argument("--output_split_train", type=str, help="output split train data")
|
||||
parser.add_argument("--output_split_test", type=str, help="output split test data")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Argument 1(input taxi data features path): %s" % args.input_split_features)
|
||||
print("Argument 2(input taxi data labels path): %s" % args.input_split_labels)
|
||||
print("Argument 3(output training features split path): %s" % args.output_split_train_x)
|
||||
print("Argument 4(output training labels split path): %s" % args.output_split_train_y)
|
||||
print("Argument 5(output test features split path): %s" % args.output_split_test_x)
|
||||
print("Argument 6(output test labels split path): %s" % args.output_split_test_y)
|
||||
|
||||
x_df = dprep.read_csv(path=args.input_split_features, header=dprep.PromoteHeadersMode.GROUPED).to_pandas_dataframe()
|
||||
y_df = dprep.read_csv(path=args.input_split_labels, header=dprep.PromoteHeadersMode.GROUPED).to_pandas_dataframe()
|
||||
print("Argument 1(output training data split path): %s" % args.output_split_train)
|
||||
print("Argument 2(output test data split path): %s" % args.output_split_test)
|
||||
|
||||
# These functions splits the input features and labels into test and train data
|
||||
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-auto-train-models for more detail
|
||||
|
||||
x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=223)
|
||||
output_split_train, output_split_test = train_test_split(transformed_df, test_size=0.2, random_state=223)
|
||||
output_split_train.reset_index(inplace=True, drop=True)
|
||||
output_split_test.reset_index(inplace=True, drop=True)
|
||||
|
||||
if not (args.output_split_train_x is None and
|
||||
args.output_split_test_x is None and
|
||||
args.output_split_train_y is None and
|
||||
args.output_split_test_y is None):
|
||||
write_output(x_train, args.output_split_train_x)
|
||||
write_output(y_train, args.output_split_train_y)
|
||||
write_output(x_test, args.output_split_test_x)
|
||||
write_output(y_test, args.output_split_test_y)
|
||||
if not (args.output_split_train is None and
|
||||
args.output_split_test is None):
|
||||
write_output(output_split_train, args.output_split_train)
|
||||
write_output(output_split_test, args.output_split_test)
|
||||
|
||||
@@ -341,7 +341,7 @@
|
||||
"from azureml.core import Environment\n",
|
||||
"from azureml.core.runconfig import CondaDependencies, DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"batch_conda_deps = CondaDependencies.create(pip_packages=[\"tensorflow==1.13.1\", \"pillow\"])\n",
|
||||
"batch_conda_deps = CondaDependencies.create(pip_packages=[\"tensorflow==1.15.2\", \"pillow\"])\n",
|
||||
"\n",
|
||||
"batch_env = Environment(name=\"batch_environment\")\n",
|
||||
"batch_env.python.conda_dependencies = batch_conda_deps\n",
|
||||
|
||||
@@ -314,7 +314,7 @@
|
||||
"cd = CondaDependencies()\n",
|
||||
"\n",
|
||||
"cd.add_channel(\"conda-forge\")\n",
|
||||
"cd.add_conda_package(\"ffmpeg\")\n",
|
||||
"cd.add_conda_package(\"ffmpeg==4.0.2\")\n",
|
||||
"\n",
|
||||
"# Runconfig\n",
|
||||
"amlcompute_run_config = RunConfiguration(conda_dependencies=cd)\n",
|
||||
@@ -334,8 +334,7 @@
|
||||
"\n",
|
||||
"ffmpeg_images_ds_name = \"ffmpeg_images_data\"\n",
|
||||
"ffmpeg_images = PipelineData(name=\"ffmpeg_images\", datastore=default_datastore)\n",
|
||||
"ffmpeg_images_file_dataset = ffmpeg_images.as_dataset()\n",
|
||||
"ffmpeg_images_named_file_dataset = ffmpeg_images_file_dataset.as_named_input(ffmpeg_images_ds_name)"
|
||||
"ffmpeg_images_file_dataset = ffmpeg_images.as_dataset()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -371,11 +370,11 @@
|
||||
" script_name=\"process_video.py\",\n",
|
||||
" arguments=[\"--input_video\", orangutan_video,\n",
|
||||
" \"--output_audio\", ffmpeg_audio,\n",
|
||||
" \"--output_images\", ffmpeg_images,\n",
|
||||
" \"--output_images\", ffmpeg_images_file_dataset,\n",
|
||||
" ],\n",
|
||||
" compute_target=cpu_cluster,\n",
|
||||
" inputs=[orangutan_video],\n",
|
||||
" outputs=[ffmpeg_images, ffmpeg_audio],\n",
|
||||
" outputs=[ffmpeg_images_file_dataset, ffmpeg_audio],\n",
|
||||
" runconfig=amlcompute_run_config,\n",
|
||||
" source_directory=scripts_folder\n",
|
||||
")\n",
|
||||
@@ -415,6 +414,7 @@
|
||||
"parallel_cd.add_channel(\"pytorch\")\n",
|
||||
"parallel_cd.add_conda_package(\"pytorch\")\n",
|
||||
"parallel_cd.add_conda_package(\"torchvision\")\n",
|
||||
"parallel_cd.add_conda_package(\"pillow<7\") # needed for torchvision==0.4.0\n",
|
||||
"\n",
|
||||
"styleenvironment = Environment(name=\"styleenvironment\")\n",
|
||||
"styleenvironment.python.conda_dependencies=parallel_cd\n",
|
||||
@@ -453,7 +453,7 @@
|
||||
"\n",
|
||||
"distributed_style_transfer_step = ParallelRunStep(\n",
|
||||
" name=parallel_step_name,\n",
|
||||
" inputs=[ffmpeg_images_named_file_dataset], # Input file share/blob container/file dataset\n",
|
||||
" inputs=[ffmpeg_images_file_dataset], # Input file share/blob container/file dataset\n",
|
||||
" output=processed_images, # Output file share/blob container\n",
|
||||
" models=[mosaic_model, candy_model],\n",
|
||||
" tags = {'scenario': \"batch inference\", 'type': \"demo\"},\n",
|
||||
|
||||
@@ -3,5 +3,6 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-contrib-pipeline-steps
|
||||
- azureml-pipeline-steps
|
||||
- azureml-widgets
|
||||
- requests
|
||||
|
||||
@@ -418,6 +418,15 @@
|
||||
"hyperdrive_run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"assert(hyperdrive_run.get_status() == \"Completed\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -440,6 +440,15 @@
|
||||
"hyperdrive_run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"assert(hyperdrive_run.get_status() == \"Completed\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -0,0 +1,350 @@
|
||||
import json
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
import copy
|
||||
import time
|
||||
import torch
|
||||
import torch._six
|
||||
|
||||
from pycocotools.cocoeval import COCOeval
|
||||
from pycocotools.coco import COCO
|
||||
import pycocotools.mask as mask_util
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
import utils
|
||||
|
||||
|
||||
class CocoEvaluator(object):
|
||||
def __init__(self, coco_gt, iou_types):
|
||||
assert isinstance(iou_types, (list, tuple))
|
||||
coco_gt = copy.deepcopy(coco_gt)
|
||||
self.coco_gt = coco_gt
|
||||
|
||||
self.iou_types = iou_types
|
||||
self.coco_eval = {}
|
||||
for iou_type in iou_types:
|
||||
self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
|
||||
|
||||
self.img_ids = []
|
||||
self.eval_imgs = {k: [] for k in iou_types}
|
||||
|
||||
def update(self, predictions):
|
||||
img_ids = list(np.unique(list(predictions.keys())))
|
||||
self.img_ids.extend(img_ids)
|
||||
|
||||
for iou_type in self.iou_types:
|
||||
results = self.prepare(predictions, iou_type)
|
||||
coco_dt = loadRes(self.coco_gt, results) if results else COCO()
|
||||
coco_eval = self.coco_eval[iou_type]
|
||||
|
||||
coco_eval.cocoDt = coco_dt
|
||||
coco_eval.params.imgIds = list(img_ids)
|
||||
img_ids, eval_imgs = evaluate(coco_eval)
|
||||
|
||||
self.eval_imgs[iou_type].append(eval_imgs)
|
||||
|
||||
def synchronize_between_processes(self):
|
||||
for iou_type in self.iou_types:
|
||||
self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
|
||||
create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
|
||||
|
||||
def accumulate(self):
|
||||
for coco_eval in self.coco_eval.values():
|
||||
coco_eval.accumulate()
|
||||
|
||||
def summarize(self):
|
||||
for iou_type, coco_eval in self.coco_eval.items():
|
||||
print("IoU metric: {}".format(iou_type))
|
||||
coco_eval.summarize()
|
||||
|
||||
def prepare(self, predictions, iou_type):
|
||||
if iou_type == "bbox":
|
||||
return self.prepare_for_coco_detection(predictions)
|
||||
elif iou_type == "segm":
|
||||
return self.prepare_for_coco_segmentation(predictions)
|
||||
elif iou_type == "keypoints":
|
||||
return self.prepare_for_coco_keypoint(predictions)
|
||||
else:
|
||||
raise ValueError("Unknown iou type {}".format(iou_type))
|
||||
|
||||
def prepare_for_coco_detection(self, predictions):
|
||||
coco_results = []
|
||||
for original_id, prediction in predictions.items():
|
||||
if len(prediction) == 0:
|
||||
continue
|
||||
|
||||
boxes = prediction["boxes"]
|
||||
boxes = convert_to_xywh(boxes).tolist()
|
||||
scores = prediction["scores"].tolist()
|
||||
labels = prediction["labels"].tolist()
|
||||
|
||||
coco_results.extend(
|
||||
[
|
||||
{
|
||||
"image_id": original_id,
|
||||
"category_id": labels[k],
|
||||
"bbox": box,
|
||||
"score": scores[k],
|
||||
}
|
||||
for k, box in enumerate(boxes)
|
||||
]
|
||||
)
|
||||
return coco_results
|
||||
|
||||
def prepare_for_coco_segmentation(self, predictions):
|
||||
coco_results = []
|
||||
for original_id, prediction in predictions.items():
|
||||
if len(prediction) == 0:
|
||||
continue
|
||||
|
||||
scores = prediction["scores"]
|
||||
labels = prediction["labels"]
|
||||
masks = prediction["masks"]
|
||||
|
||||
masks = masks > 0.5
|
||||
|
||||
scores = prediction["scores"].tolist()
|
||||
labels = prediction["labels"].tolist()
|
||||
|
||||
rles = [
|
||||
mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
|
||||
for mask in masks
|
||||
]
|
||||
for rle in rles:
|
||||
rle["counts"] = rle["counts"].decode("utf-8")
|
||||
|
||||
coco_results.extend(
|
||||
[
|
||||
{
|
||||
"image_id": original_id,
|
||||
"category_id": labels[k],
|
||||
"segmentation": rle,
|
||||
"score": scores[k],
|
||||
}
|
||||
for k, rle in enumerate(rles)
|
||||
]
|
||||
)
|
||||
return coco_results
|
||||
|
||||
def prepare_for_coco_keypoint(self, predictions):
|
||||
coco_results = []
|
||||
for original_id, prediction in predictions.items():
|
||||
if len(prediction) == 0:
|
||||
continue
|
||||
|
||||
boxes = prediction["boxes"]
|
||||
boxes = convert_to_xywh(boxes).tolist()
|
||||
scores = prediction["scores"].tolist()
|
||||
labels = prediction["labels"].tolist()
|
||||
keypoints = prediction["keypoints"]
|
||||
keypoints = keypoints.flatten(start_dim=1).tolist()
|
||||
|
||||
coco_results.extend(
|
||||
[
|
||||
{
|
||||
"image_id": original_id,
|
||||
"category_id": labels[k],
|
||||
'keypoints': keypoint,
|
||||
"score": scores[k],
|
||||
}
|
||||
for k, keypoint in enumerate(keypoints)
|
||||
]
|
||||
)
|
||||
return coco_results
|
||||
|
||||
|
||||
def convert_to_xywh(boxes):
|
||||
xmin, ymin, xmax, ymax = boxes.unbind(1)
|
||||
return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
|
||||
|
||||
|
||||
def merge(img_ids, eval_imgs):
|
||||
all_img_ids = utils.all_gather(img_ids)
|
||||
all_eval_imgs = utils.all_gather(eval_imgs)
|
||||
|
||||
merged_img_ids = []
|
||||
for p in all_img_ids:
|
||||
merged_img_ids.extend(p)
|
||||
|
||||
merged_eval_imgs = []
|
||||
for p in all_eval_imgs:
|
||||
merged_eval_imgs.append(p)
|
||||
|
||||
merged_img_ids = np.array(merged_img_ids)
|
||||
merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
|
||||
|
||||
# keep only unique (and in sorted order) images
|
||||
merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
|
||||
merged_eval_imgs = merged_eval_imgs[..., idx]
|
||||
|
||||
return merged_img_ids, merged_eval_imgs
|
||||
|
||||
|
||||
def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
|
||||
img_ids, eval_imgs = merge(img_ids, eval_imgs)
|
||||
img_ids = list(img_ids)
|
||||
eval_imgs = list(eval_imgs.flatten())
|
||||
|
||||
coco_eval.evalImgs = eval_imgs
|
||||
coco_eval.params.imgIds = img_ids
|
||||
coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
|
||||
|
||||
|
||||
#################################################################
|
||||
# From pycocotools, just removed the prints and fixed
|
||||
# a Python3 bug about unicode not defined
|
||||
#################################################################
|
||||
|
||||
# Ideally, pycocotools wouldn't have hard-coded prints
|
||||
# so that we could avoid copy-pasting those two functions
|
||||
|
||||
def createIndex(self):
|
||||
# create index
|
||||
# print('creating index...')
|
||||
anns, cats, imgs = {}, {}, {}
|
||||
imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
|
||||
if 'annotations' in self.dataset:
|
||||
for ann in self.dataset['annotations']:
|
||||
imgToAnns[ann['image_id']].append(ann)
|
||||
anns[ann['id']] = ann
|
||||
|
||||
if 'images' in self.dataset:
|
||||
for img in self.dataset['images']:
|
||||
imgs[img['id']] = img
|
||||
|
||||
if 'categories' in self.dataset:
|
||||
for cat in self.dataset['categories']:
|
||||
cats[cat['id']] = cat
|
||||
|
||||
if 'annotations' in self.dataset and 'categories' in self.dataset:
|
||||
for ann in self.dataset['annotations']:
|
||||
catToImgs[ann['category_id']].append(ann['image_id'])
|
||||
|
||||
# print('index created!')
|
||||
|
||||
# create class members
|
||||
self.anns = anns
|
||||
self.imgToAnns = imgToAnns
|
||||
self.catToImgs = catToImgs
|
||||
self.imgs = imgs
|
||||
self.cats = cats
|
||||
|
||||
|
||||
maskUtils = mask_util
|
||||
|
||||
|
||||
def loadRes(self, resFile):
|
||||
"""
|
||||
Load result file and return a result api object.
|
||||
:param resFile (str) : file name of result file
|
||||
:return: res (obj) : result api object
|
||||
"""
|
||||
res = COCO()
|
||||
res.dataset['images'] = [img for img in self.dataset['images']]
|
||||
|
||||
# print('Loading and preparing results...')
|
||||
# tic = time.time()
|
||||
if isinstance(resFile, torch._six.string_classes):
|
||||
anns = json.load(open(resFile))
|
||||
elif type(resFile) == np.ndarray:
|
||||
anns = self.loadNumpyAnnotations(resFile)
|
||||
else:
|
||||
anns = resFile
|
||||
assert type(anns) == list, 'results in not an array of objects'
|
||||
annsImgIds = [ann['image_id'] for ann in anns]
|
||||
assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
|
||||
'Results do not correspond to current coco set'
|
||||
if 'caption' in anns[0]:
|
||||
imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
|
||||
res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
|
||||
for id, ann in enumerate(anns):
|
||||
ann['id'] = id + 1
|
||||
elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
|
||||
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||
for id, ann in enumerate(anns):
|
||||
bb = ann['bbox']
|
||||
x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
|
||||
if 'segmentation' not in ann:
|
||||
ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
|
||||
ann['area'] = bb[2] * bb[3]
|
||||
ann['id'] = id + 1
|
||||
ann['iscrowd'] = 0
|
||||
elif 'segmentation' in anns[0]:
|
||||
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||
for id, ann in enumerate(anns):
|
||||
# now only support compressed RLE format as segmentation results
|
||||
ann['area'] = maskUtils.area(ann['segmentation'])
|
||||
if 'bbox' not in ann:
|
||||
ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
|
||||
ann['id'] = id + 1
|
||||
ann['iscrowd'] = 0
|
||||
elif 'keypoints' in anns[0]:
|
||||
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
|
||||
for id, ann in enumerate(anns):
|
||||
s = ann['keypoints']
|
||||
x = s[0::3]
|
||||
y = s[1::3]
|
||||
x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
|
||||
ann['area'] = (x2 - x1) * (y2 - y1)
|
||||
ann['id'] = id + 1
|
||||
ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
|
||||
# print('DONE (t={:0.2f}s)'.format(time.time()- tic))
|
||||
|
||||
res.dataset['annotations'] = anns
|
||||
createIndex(res)
|
||||
return res
|
||||
|
||||
|
||||
def evaluate(self):
|
||||
'''
|
||||
Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
|
||||
:return: None
|
||||
'''
|
||||
# tic = time.time()
|
||||
# print('Running per image evaluation...')
|
||||
p = self.params
|
||||
# add backward compatibility if useSegm is specified in params
|
||||
if p.useSegm is not None:
|
||||
p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
|
||||
print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
|
||||
# print('Evaluate annotation type *{}*'.format(p.iouType))
|
||||
p.imgIds = list(np.unique(p.imgIds))
|
||||
if p.useCats:
|
||||
p.catIds = list(np.unique(p.catIds))
|
||||
p.maxDets = sorted(p.maxDets)
|
||||
self.params = p
|
||||
|
||||
self._prepare()
|
||||
# loop through images, area range, max detection number
|
||||
catIds = p.catIds if p.useCats else [-1]
|
||||
|
||||
if p.iouType == 'segm' or p.iouType == 'bbox':
|
||||
computeIoU = self.computeIoU
|
||||
elif p.iouType == 'keypoints':
|
||||
computeIoU = self.computeOks
|
||||
self.ious = {
|
||||
(imgId, catId): computeIoU(imgId, catId)
|
||||
for imgId in p.imgIds
|
||||
for catId in catIds}
|
||||
|
||||
evaluateImg = self.evaluateImg
|
||||
maxDet = p.maxDets[-1]
|
||||
evalImgs = [
|
||||
evaluateImg(imgId, catId, areaRng, maxDet)
|
||||
for catId in catIds
|
||||
for areaRng in p.areaRng
|
||||
for imgId in p.imgIds
|
||||
]
|
||||
# this is NOT in the pycocotools code, but could be done outside
|
||||
evalImgs = np.asarray(evalImgs).reshape(
|
||||
len(catIds), len(p.areaRng), len(p.imgIds))
|
||||
self._paramsEval = copy.deepcopy(self.params)
|
||||
# toc = time.time()
|
||||
# print('DONE (t={:0.2f}s).'.format(toc-tic))
|
||||
return p.imgIds, evalImgs
|
||||
|
||||
#################################################################
|
||||
# end of straight copy from pycocotools, just removing the prints
|
||||
#################################################################
|
||||
@@ -0,0 +1,252 @@
|
||||
import copy
|
||||
import os
|
||||
from PIL import Image
|
||||
|
||||
import torch
|
||||
import torch.utils.data
|
||||
import torchvision
|
||||
|
||||
from pycocotools import mask as coco_mask
|
||||
from pycocotools.coco import COCO
|
||||
|
||||
import transforms as T
|
||||
|
||||
|
||||
class FilterAndRemapCocoCategories(object):
|
||||
def __init__(self, categories, remap=True):
|
||||
self.categories = categories
|
||||
self.remap = remap
|
||||
|
||||
def __call__(self, image, target):
|
||||
anno = target["annotations"]
|
||||
anno = [obj for obj in anno if obj["category_id"] in self.categories]
|
||||
if not self.remap:
|
||||
target["annotations"] = anno
|
||||
return image, target
|
||||
anno = copy.deepcopy(anno)
|
||||
for obj in anno:
|
||||
obj["category_id"] = self.categories.index(obj["category_id"])
|
||||
target["annotations"] = anno
|
||||
return image, target
|
||||
|
||||
|
||||
def convert_coco_poly_to_mask(segmentations, height, width):
|
||||
masks = []
|
||||
for polygons in segmentations:
|
||||
rles = coco_mask.frPyObjects(polygons, height, width)
|
||||
mask = coco_mask.decode(rles)
|
||||
if len(mask.shape) < 3:
|
||||
mask = mask[..., None]
|
||||
mask = torch.as_tensor(mask, dtype=torch.uint8)
|
||||
mask = mask.any(dim=2)
|
||||
masks.append(mask)
|
||||
if masks:
|
||||
masks = torch.stack(masks, dim=0)
|
||||
else:
|
||||
masks = torch.zeros((0, height, width), dtype=torch.uint8)
|
||||
return masks
|
||||
|
||||
|
||||
class ConvertCocoPolysToMask(object):
|
||||
def __call__(self, image, target):
|
||||
w, h = image.size
|
||||
|
||||
image_id = target["image_id"]
|
||||
image_id = torch.tensor([image_id])
|
||||
|
||||
anno = target["annotations"]
|
||||
|
||||
anno = [obj for obj in anno if obj['iscrowd'] == 0]
|
||||
|
||||
boxes = [obj["bbox"] for obj in anno]
|
||||
# guard against no boxes via resizing
|
||||
boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
|
||||
boxes[:, 2:] += boxes[:, :2]
|
||||
boxes[:, 0::2].clamp_(min=0, max=w)
|
||||
boxes[:, 1::2].clamp_(min=0, max=h)
|
||||
|
||||
classes = [obj["category_id"] for obj in anno]
|
||||
classes = torch.tensor(classes, dtype=torch.int64)
|
||||
|
||||
segmentations = [obj["segmentation"] for obj in anno]
|
||||
masks = convert_coco_poly_to_mask(segmentations, h, w)
|
||||
|
||||
keypoints = None
|
||||
if anno and "keypoints" in anno[0]:
|
||||
keypoints = [obj["keypoints"] for obj in anno]
|
||||
keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
|
||||
num_keypoints = keypoints.shape[0]
|
||||
if num_keypoints:
|
||||
keypoints = keypoints.view(num_keypoints, -1, 3)
|
||||
|
||||
keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
|
||||
boxes = boxes[keep]
|
||||
classes = classes[keep]
|
||||
masks = masks[keep]
|
||||
if keypoints is not None:
|
||||
keypoints = keypoints[keep]
|
||||
|
||||
target = {}
|
||||
target["boxes"] = boxes
|
||||
target["labels"] = classes
|
||||
target["masks"] = masks
|
||||
target["image_id"] = image_id
|
||||
if keypoints is not None:
|
||||
target["keypoints"] = keypoints
|
||||
|
||||
# for conversion to coco api
|
||||
area = torch.tensor([obj["area"] for obj in anno])
|
||||
iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
|
||||
target["area"] = area
|
||||
target["iscrowd"] = iscrowd
|
||||
|
||||
return image, target
|
||||
|
||||
|
||||
def _coco_remove_images_without_annotations(dataset, cat_list=None):
|
||||
def _has_only_empty_bbox(anno):
|
||||
return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
|
||||
|
||||
def _count_visible_keypoints(anno):
|
||||
return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
|
||||
|
||||
min_keypoints_per_image = 10
|
||||
|
||||
def _has_valid_annotation(anno):
|
||||
# if it's empty, there is no annotation
|
||||
if len(anno) == 0:
|
||||
return False
|
||||
# if all boxes have close to zero area, there is no annotation
|
||||
if _has_only_empty_bbox(anno):
|
||||
return False
|
||||
# keypoints task have a slight different critera for considering
|
||||
# if an annotation is valid
|
||||
if "keypoints" not in anno[0]:
|
||||
return True
|
||||
# for keypoint detection tasks, only consider valid images those
|
||||
# containing at least min_keypoints_per_image
|
||||
if _count_visible_keypoints(anno) >= min_keypoints_per_image:
|
||||
return True
|
||||
return False
|
||||
|
||||
assert isinstance(dataset, torchvision.datasets.CocoDetection)
|
||||
ids = []
|
||||
for ds_idx, img_id in enumerate(dataset.ids):
|
||||
ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
|
||||
anno = dataset.coco.loadAnns(ann_ids)
|
||||
if cat_list:
|
||||
anno = [obj for obj in anno if obj["category_id"] in cat_list]
|
||||
if _has_valid_annotation(anno):
|
||||
ids.append(ds_idx)
|
||||
|
||||
dataset = torch.utils.data.Subset(dataset, ids)
|
||||
return dataset
|
||||
|
||||
|
||||
def convert_to_coco_api(ds):
|
||||
coco_ds = COCO()
|
||||
# annotation IDs need to start at 1, not 0, see torchvision issue #1530
|
||||
ann_id = 1
|
||||
dataset = {'images': [], 'categories': [], 'annotations': []}
|
||||
categories = set()
|
||||
for img_idx in range(len(ds)):
|
||||
# find better way to get target
|
||||
# targets = ds.get_annotations(img_idx)
|
||||
img, targets = ds[img_idx]
|
||||
image_id = targets["image_id"].item()
|
||||
img_dict = {}
|
||||
img_dict['id'] = image_id
|
||||
img_dict['height'] = img.shape[-2]
|
||||
img_dict['width'] = img.shape[-1]
|
||||
dataset['images'].append(img_dict)
|
||||
bboxes = targets["boxes"]
|
||||
bboxes[:, 2:] -= bboxes[:, :2]
|
||||
bboxes = bboxes.tolist()
|
||||
labels = targets['labels'].tolist()
|
||||
areas = targets['area'].tolist()
|
||||
iscrowd = targets['iscrowd'].tolist()
|
||||
if 'masks' in targets:
|
||||
masks = targets['masks']
|
||||
# make masks Fortran contiguous for coco_mask
|
||||
masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
|
||||
if 'keypoints' in targets:
|
||||
keypoints = targets['keypoints']
|
||||
keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
|
||||
num_objs = len(bboxes)
|
||||
for i in range(num_objs):
|
||||
ann = {}
|
||||
ann['image_id'] = image_id
|
||||
ann['bbox'] = bboxes[i]
|
||||
ann['category_id'] = labels[i]
|
||||
categories.add(labels[i])
|
||||
ann['area'] = areas[i]
|
||||
ann['iscrowd'] = iscrowd[i]
|
||||
ann['id'] = ann_id
|
||||
if 'masks' in targets:
|
||||
ann["segmentation"] = coco_mask.encode(masks[i].numpy())
|
||||
if 'keypoints' in targets:
|
||||
ann['keypoints'] = keypoints[i]
|
||||
ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
|
||||
dataset['annotations'].append(ann)
|
||||
ann_id += 1
|
||||
dataset['categories'] = [{'id': i} for i in sorted(categories)]
|
||||
coco_ds.dataset = dataset
|
||||
coco_ds.createIndex()
|
||||
return coco_ds
|
||||
|
||||
|
||||
def get_coco_api_from_dataset(dataset):
|
||||
for _ in range(10):
|
||||
if isinstance(dataset, torchvision.datasets.CocoDetection):
|
||||
break
|
||||
if isinstance(dataset, torch.utils.data.Subset):
|
||||
dataset = dataset.dataset
|
||||
if isinstance(dataset, torchvision.datasets.CocoDetection):
|
||||
return dataset.coco
|
||||
return convert_to_coco_api(dataset)
|
||||
|
||||
|
||||
class CocoDetection(torchvision.datasets.CocoDetection):
|
||||
def __init__(self, img_folder, ann_file, transforms):
|
||||
super(CocoDetection, self).__init__(img_folder, ann_file)
|
||||
self._transforms = transforms
|
||||
|
||||
def __getitem__(self, idx):
|
||||
img, target = super(CocoDetection, self).__getitem__(idx)
|
||||
image_id = self.ids[idx]
|
||||
target = dict(image_id=image_id, annotations=target)
|
||||
if self._transforms is not None:
|
||||
img, target = self._transforms(img, target)
|
||||
return img, target
|
||||
|
||||
|
||||
def get_coco(root, image_set, transforms, mode='instances'):
|
||||
anno_file_template = "{}_{}2017.json"
|
||||
PATHS = {
|
||||
"train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
|
||||
"val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
|
||||
# "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
|
||||
}
|
||||
|
||||
t = [ConvertCocoPolysToMask()]
|
||||
|
||||
if transforms is not None:
|
||||
t.append(transforms)
|
||||
transforms = T.Compose(t)
|
||||
|
||||
img_folder, ann_file = PATHS[image_set]
|
||||
img_folder = os.path.join(root, img_folder)
|
||||
ann_file = os.path.join(root, ann_file)
|
||||
|
||||
dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
|
||||
|
||||
if image_set == "train":
|
||||
dataset = _coco_remove_images_without_annotations(dataset)
|
||||
|
||||
# dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
|
||||
|
||||
return dataset
|
||||
|
||||
|
||||
def get_coco_kp(root, image_set, transforms):
|
||||
return get_coco(root, image_set, transforms, mode="person_keypoints")
|
||||
@@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
import os
|
||||
import torch.utils.data
|
||||
|
||||
from azureml.core import Run
|
||||
from PIL import Image
|
||||
|
||||
|
||||
class PennFudanDataset(torch.utils.data.Dataset):
|
||||
def __init__(self, root, transforms=None):
|
||||
self.root = root
|
||||
self.transforms = transforms
|
||||
|
||||
# load all image files, sorting them to ensure that they are aligned
|
||||
self.img_dir = os.path.join(root, "PNGImages")
|
||||
self.mask_dir = os.path.join(root, "PedMasks")
|
||||
|
||||
self.imgs = list(sorted(os.listdir(self.img_dir)))
|
||||
self.masks = list(sorted(os.listdir(self.mask_dir)))
|
||||
|
||||
def __getitem__(self, idx):
|
||||
# load images ad masks
|
||||
img_path = os.path.join(self.img_dir, self.imgs[idx])
|
||||
mask_path = os.path.join(self.mask_dir, self.masks[idx])
|
||||
|
||||
img = Image.open(img_path).convert("RGB")
|
||||
# note that we haven't converted the mask to RGB,
|
||||
# because each color corresponds to a different instance
|
||||
# with 0 being background
|
||||
mask = Image.open(mask_path)
|
||||
|
||||
mask = np.array(mask)
|
||||
# instances are encoded as different colors
|
||||
obj_ids = np.unique(mask)
|
||||
# first id is the background, so remove it
|
||||
obj_ids = obj_ids[1:]
|
||||
|
||||
# split the color-encoded mask into a set
|
||||
# of binary masks
|
||||
masks = mask == obj_ids[:, None, None]
|
||||
|
||||
# get bounding box coordinates for each mask
|
||||
num_objs = len(obj_ids)
|
||||
boxes = []
|
||||
for i in range(num_objs):
|
||||
pos = np.where(masks[i])
|
||||
xmin = np.min(pos[1])
|
||||
xmax = np.max(pos[1])
|
||||
ymin = np.min(pos[0])
|
||||
ymax = np.max(pos[0])
|
||||
boxes.append([xmin, ymin, xmax, ymax])
|
||||
|
||||
boxes = torch.as_tensor(boxes, dtype=torch.float32)
|
||||
# there is only one class
|
||||
labels = torch.ones((num_objs,), dtype=torch.int64)
|
||||
masks = torch.as_tensor(masks, dtype=torch.uint8)
|
||||
|
||||
image_id = torch.tensor([idx])
|
||||
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
|
||||
# suppose all instances are not crowd
|
||||
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
|
||||
|
||||
target = {}
|
||||
target["boxes"] = boxes
|
||||
target["labels"] = labels
|
||||
target["masks"] = masks
|
||||
target["image_id"] = image_id
|
||||
target["area"] = area
|
||||
target["iscrowd"] = iscrowd
|
||||
|
||||
if self.transforms is not None:
|
||||
img, target = self.transforms(img, target)
|
||||
|
||||
return img, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.imgs)
|
||||
@@ -0,0 +1,16 @@
|
||||
# From https://github.com/microsoft/AzureML-BERT/blob/master/finetune/PyTorch/dockerfile
|
||||
|
||||
FROM mcr.microsoft.com/azureml/base-gpu:openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04
|
||||
|
||||
RUN apt update && apt install git -y && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN /opt/miniconda/bin/conda update -n base -c defaults conda
|
||||
RUN /opt/miniconda/bin/conda install -y cython=0.29.15 numpy=1.18.1
|
||||
RUN /opt/miniconda/bin/conda install -y pytorch=1.4 torchvision=0.5.0 -c pytorch
|
||||
|
||||
# Install cocoapi, required for drawing bounding boxes
|
||||
RUN git clone https://github.com/cocodataset/cocoapi.git && cd cocoapi/PythonAPI && python setup.py build_ext install
|
||||
|
||||
RUN pip install azureml-defaults
|
||||
RUN pip install "azureml-dataprep[fuse]"
|
||||
RUN pip install pandas pyarrow
|
||||
@@ -0,0 +1,108 @@
|
||||
import math
|
||||
import sys
|
||||
import time
|
||||
import torch
|
||||
|
||||
import torchvision.models.detection.mask_rcnn
|
||||
|
||||
from coco_utils import get_coco_api_from_dataset
|
||||
from coco_eval import CocoEvaluator
|
||||
import utils
|
||||
|
||||
|
||||
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
|
||||
model.train()
|
||||
metric_logger = utils.MetricLogger(delimiter=" ")
|
||||
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
|
||||
header = 'Epoch: [{}]'.format(epoch)
|
||||
|
||||
lr_scheduler = None
|
||||
if epoch == 0:
|
||||
warmup_factor = 1. / 1000
|
||||
warmup_iters = min(1000, len(data_loader) - 1)
|
||||
|
||||
lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
|
||||
|
||||
for images, targets in metric_logger.log_every(data_loader, print_freq, header):
|
||||
images = list(image.to(device) for image in images)
|
||||
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
|
||||
|
||||
loss_dict = model(images, targets)
|
||||
|
||||
losses = sum(loss for loss in loss_dict.values())
|
||||
|
||||
# reduce losses over all GPUs for logging purposes
|
||||
loss_dict_reduced = utils.reduce_dict(loss_dict)
|
||||
losses_reduced = sum(loss for loss in loss_dict_reduced.values())
|
||||
|
||||
loss_value = losses_reduced.item()
|
||||
|
||||
if not math.isfinite(loss_value):
|
||||
print("Loss is {}, stopping training".format(loss_value))
|
||||
print(loss_dict_reduced)
|
||||
sys.exit(1)
|
||||
|
||||
optimizer.zero_grad()
|
||||
losses.backward()
|
||||
optimizer.step()
|
||||
|
||||
if lr_scheduler is not None:
|
||||
lr_scheduler.step()
|
||||
|
||||
metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
|
||||
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
|
||||
|
||||
|
||||
def _get_iou_types(model):
|
||||
model_without_ddp = model
|
||||
if isinstance(model, torch.nn.parallel.DistributedDataParallel):
|
||||
model_without_ddp = model.module
|
||||
iou_types = ["bbox"]
|
||||
if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
|
||||
iou_types.append("segm")
|
||||
if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
|
||||
iou_types.append("keypoints")
|
||||
return iou_types
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def evaluate(model, data_loader, device):
|
||||
n_threads = torch.get_num_threads()
|
||||
# FIXME remove this and make paste_masks_in_image run on the GPU
|
||||
torch.set_num_threads(1)
|
||||
cpu_device = torch.device("cpu")
|
||||
model.eval()
|
||||
metric_logger = utils.MetricLogger(delimiter=" ")
|
||||
header = 'Test:'
|
||||
|
||||
coco = get_coco_api_from_dataset(data_loader.dataset)
|
||||
iou_types = _get_iou_types(model)
|
||||
coco_evaluator = CocoEvaluator(coco, iou_types)
|
||||
|
||||
for image, targets in metric_logger.log_every(data_loader, 100, header):
|
||||
image = list(img.to(device) for img in image)
|
||||
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
|
||||
|
||||
torch.cuda.synchronize()
|
||||
model_time = time.time()
|
||||
outputs = model(image)
|
||||
|
||||
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
|
||||
model_time = time.time() - model_time
|
||||
|
||||
res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
|
||||
evaluator_time = time.time()
|
||||
coco_evaluator.update(res)
|
||||
evaluator_time = time.time() - evaluator_time
|
||||
metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
|
||||
|
||||
# gather the stats from all processes
|
||||
metric_logger.synchronize_between_processes()
|
||||
print("Averaged stats:", metric_logger)
|
||||
coco_evaluator.synchronize_between_processes()
|
||||
|
||||
# accumulate predictions from all images
|
||||
coco_evaluator.accumulate()
|
||||
coco_evaluator.summarize()
|
||||
torch.set_num_threads(n_threads)
|
||||
return coco_evaluator
|
||||
@@ -0,0 +1,23 @@
|
||||
import torchvision
|
||||
|
||||
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
|
||||
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
|
||||
|
||||
|
||||
def get_instance_segmentation_model(num_classes):
|
||||
# load an instance segmentation model pre-trained on COCO
|
||||
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
|
||||
|
||||
# get the number of input features for the classifier
|
||||
in_features = model.roi_heads.box_predictor.cls_score.in_features
|
||||
# replace the pre-trained head with a new one
|
||||
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
|
||||
|
||||
# now get the number of input features for the mask classifier
|
||||
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
|
||||
hidden_layer = 256
|
||||
# and replace the mask predictor with a new one
|
||||
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
|
||||
hidden_layer,
|
||||
num_classes)
|
||||
return model
|
||||
@@ -0,0 +1,544 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Object detection with PyTorch, Mask R-CNN, and a custom Dockerfile\n",
|
||||
"\n",
|
||||
"In this tutorial, you will finetune a pre-trained [Mask R-CNN](https://arxiv.org/abs/1703.06870) model on images from the [Penn-Fudan Database for Pedestrian Detection and Segmentation](https://www.cis.upenn.edu/~jshi/ped_html/). The dataset has 170 images with 345 instances of pedestrians. After running this tutorial, you will have a model that can outline the silhouettes of all pedestrians within an image.\n",
|
||||
"\n",
|
||||
"You\u00e2\u20ac\u2122ll use Azure Machine Learning to: \n",
|
||||
"\n",
|
||||
"- Initialize a workspace \n",
|
||||
"- Create a compute cluster\n",
|
||||
"- Define a training environment\n",
|
||||
"- Train a model remotely\n",
|
||||
"- Register your model\n",
|
||||
"- Generate predictions locally\n",
|
||||
"\n",
|
||||
"## Prerequisities\n",
|
||||
"\n",
|
||||
"- If you are using an Azure Machine Learning Notebook VM, your environment already meets these prerequisites. Otherwise, go through the [configuration notebook](../../../../../configuration.ipynb) to install the Azure Machine Learning Python SDK and [create an Azure ML Workspace](https://docs.microsoft.com/azure/machine-learning/how-to-manage-workspace#create-a-workspace). You also need matplotlib 3.2, pycocotools-2.0.0, torchvision >= 0.5.0 and torch >= 1.4.0.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number, check other dependencies\n",
|
||||
"import azureml.core\n",
|
||||
"import matplotlib\n",
|
||||
"import pycocotools\n",
|
||||
"import torch\n",
|
||||
"import torchvision\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security in future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize a workspace\n",
|
||||
"\n",
|
||||
"Initialize a [workspace](https://docs.microsoft.com/en-us/azure/machine-learning/concept-workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`, using the [from_config()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.workspace(class)?view=azure-ml-py#from-config-path-none--auth-none---logger-none---file-name-none-) method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or attach existing Azure ML Managed Compute\n",
|
||||
"\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/concept-compute-target) for training your model. In this tutorial, we use [Azure ML managed compute](https://docs.microsoft.com/azure/machine-learning/how-to-set-up-training-targets#amlcompute) for our remote training compute resource. Specifically, the below code creates a `STANDARD_NC6` GPU cluster that autoscales from 0 to 4 nodes.\n",
|
||||
"\n",
|
||||
"**Creation of Compute takes approximately 5 minutes.** If the Aauzre ML Compute with that name is already in your workspace, this code will skip the creation process. \n",
|
||||
"\n",
|
||||
"As with other Azure servies, there are limits on certain resources associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/azure/machine-learning/how-to-manage-quotas) on the default limits and how to request more quota.\n",
|
||||
"\n",
|
||||
"> Note that the below code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = 'gpu-cluster'\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" compute_target.wait_for_completion(show_output=True)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Define a training environment\n",
|
||||
"\n",
|
||||
"### Create a project directory\n",
|
||||
"Create a directory that will contain all the code from your local machine that you will need access to on the remote resource. This includes the training script an any additional files your training script depends on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"project_folder = './pytorch-peds'\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" os.makedirs(project_folder, exist_ok=False)\n",
|
||||
"except FileExistsError:\n",
|
||||
" print('project folder {} exists, moving on...'.format(project_folder))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Copy training script and dependencies into project directory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"files_to_copy = ['data', 'model', 'script', 'utils', 'transforms', 'coco_eval', 'engine', 'coco_utils']\n",
|
||||
"for file in files_to_copy:\n",
|
||||
" shutil.copy(os.path.join(os.getcwd(), (file + '.py')), project_folder)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"experiment_name = 'pytorch-peds'\n",
|
||||
"experiment = Experiment(ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Specify dependencies with a custom Dockerfile\n",
|
||||
"\n",
|
||||
"There are a number of ways to [use environments](https://docs.microsoft.com/azure/machine-learning/how-to-use-environments) for specifying dependencies during model training. In this case, we use a custom Dockerfile."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"\n",
|
||||
"my_env = Environment(name='maskr-docker')\n",
|
||||
"my_env.docker.enabled = True\n",
|
||||
"with open(\"dockerfiles/Dockerfile\", \"r\") as f:\n",
|
||||
" dockerfile_contents=f.read()\n",
|
||||
"my_env.docker.base_dockerfile=dockerfile_contents\n",
|
||||
"my_env.docker.base_image = None\n",
|
||||
"my_env.python.interpreter_path = '/opt/miniconda/bin/python'\n",
|
||||
"my_env.python.user_managed_dependencies = True\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a ScriptRunConfig\n",
|
||||
"\n",
|
||||
"Use the [ScriptRunConfig](https://docs.microsoft.com/python/api/azureml-core/azureml.core.scriptrunconfig?view=azure-ml-py) class to define your run. Specify the source directory, compute target, and environment."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import PyTorch\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"\n",
|
||||
"model_name = 'pytorch-peds'\n",
|
||||
"output_dir = './outputs/'\n",
|
||||
"n_epochs = 2\n",
|
||||
"\n",
|
||||
"script_args = [\n",
|
||||
" '--model_name', model_name,\n",
|
||||
" '--output_dir', output_dir,\n",
|
||||
" '--n_epochs', n_epochs,\n",
|
||||
"]\n",
|
||||
"# Add training script to run config\n",
|
||||
"runconfig = ScriptRunConfig(\n",
|
||||
" source_directory=project_folder,\n",
|
||||
" script=\"script.py\",\n",
|
||||
" arguments=script_args)\n",
|
||||
"\n",
|
||||
"# Attach compute target to run config\n",
|
||||
"runconfig.run_config.target = cluster_name\n",
|
||||
"\n",
|
||||
"# Uncomment the line below if you want to try this locally first\n",
|
||||
"#runconfig.run_config.target = \"local\"\n",
|
||||
"\n",
|
||||
"# Attach environment to run config\n",
|
||||
"runconfig.run_config.environment = my_env"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train remotely\n",
|
||||
"\n",
|
||||
"### Submit your run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Submit run \n",
|
||||
"run = experiment.submit(runconfig)\n",
|
||||
"\n",
|
||||
"# to get more details of your run\n",
|
||||
"print(run.get_details())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor your run\n",
|
||||
"\n",
|
||||
"Use a widget to keep track of your run. You can also view the status of the run within the [Azure Machine Learning service portal](https://ml.azure.com)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"RunDetails(run).show()\n",
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test your model\n",
|
||||
"\n",
|
||||
"Now that we are done training, let's see how well this model actually performs.\n",
|
||||
"\n",
|
||||
"### Get your latest run\n",
|
||||
"First, pull the latest run using `experiment.get_runs()`, which lists runs from `experiment` in reverse chronological order."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Run\n",
|
||||
"\n",
|
||||
"last_run = next(experiment.get_runs())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register your model\n",
|
||||
"Next, [register the model](https://docs.microsoft.com/azure/machine-learning/concept-model-management-and-deployment#register-package-and-deploy-models-from-anywhere) from your run. Registering your model assigns it a version and helps you with auditability."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"last_run.register_model(model_name=model_name, model_path=os.path.join(output_dir, model_name))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Download your model\n",
|
||||
"Next, download this registered model. Notice how we can initialize the `Model` object with the name of the registered model, rather than a path to the file itself."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Model\n",
|
||||
"\n",
|
||||
"model = Model(workspace=ws, name=model_name)\n",
|
||||
"path = model.download(target_dir='model', exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Use your model to make a prediction\n",
|
||||
"\n",
|
||||
"Run inferencing on a single test image and display the results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"from azureml.core import Dataset\n",
|
||||
"from data import PennFudanDataset\n",
|
||||
"from script import get_transform, download_data, NUM_CLASSES\n",
|
||||
"from model import get_instance_segmentation_model\n",
|
||||
"\n",
|
||||
"if torch.cuda.is_available():\n",
|
||||
" device = torch.device('cuda')\n",
|
||||
"else:\n",
|
||||
" device = torch.device('cpu')\n",
|
||||
"\n",
|
||||
"# Instantiate model with correct weights, cast to correct device, place in evaluation mode\n",
|
||||
"predict_model = get_instance_segmentation_model(NUM_CLASSES)\n",
|
||||
"predict_model.to(device)\n",
|
||||
"predict_model.load_state_dict(torch.load(path, map_location=device))\n",
|
||||
"predict_model.eval()\n",
|
||||
"\n",
|
||||
"# Load dataset\n",
|
||||
"root_dir=download_data()\n",
|
||||
"dataset_test = PennFudanDataset(root=root_dir, transforms=get_transform(train=False))\n",
|
||||
"\n",
|
||||
"# pick one image from the test set\n",
|
||||
"img, _ = dataset_test[0]\n",
|
||||
"\n",
|
||||
"with torch.no_grad():\n",
|
||||
" prediction = predict_model([img.to(device)])\n",
|
||||
"\n",
|
||||
"# model = torch.load(path)\n",
|
||||
"#torch.load(model.get_model_path(model_name='outputs/model.pt'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Display the input image\n",
|
||||
"\n",
|
||||
"While tensors are great for computers, a tensor of RGB values doesn't mean much to a human. Let's display the input image in a way that a human could understand."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from PIL import Image\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Display the predicted masks\n",
|
||||
"\n",
|
||||
"The prediction consists of masks, displaying the outline of pedestrians in the image. Let's take a look at the first two masks, below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Image.fromarray(prediction[0]['masks'][0, 0].mul(255).byte().cpu().numpy())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Image.fromarray(prediction[0]['masks'][1, 0].mul(255).byte().cpu().numpy())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"Congratulations! You just trained a Mask R-CNN model with PyTorch in Azure Machine Learning. As next steps, consider:\n",
|
||||
"1. Learn more about using PyTorch in Azure Machine Learning service by checking out the [README](./README.md]\n",
|
||||
"2. Try exporting your model to [ONNX](https://docs.microsoft.com/azure/machine-learning/concept-onnx) for accelerated inferencing."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "gopalv"
|
||||
}
|
||||
],
|
||||
"category": "training",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"Custom"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"PyTorch"
|
||||
],
|
||||
"friendly_name": "PyTorch object detection",
|
||||
"index_order": 1,
|
||||
"kernel_info": {
|
||||
"name": "python3"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5-final"
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
},
|
||||
"tags": [
|
||||
"remote run",
|
||||
"docker"
|
||||
],
|
||||
"task": "Fine-tune PyTorch object detection model with a custom dockerfile"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
name: pytorch-mask-rcnn
|
||||
dependencies:
|
||||
- cython
|
||||
- pytorch==1.4.0 -c pytorch
|
||||
- torchvision -c pytorch
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-dataprep
|
||||
- fuse
|
||||
- pandas
|
||||
- matplotlib
|
||||
- pillow==7.0.0
|
||||
- git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI
|
||||
@@ -0,0 +1,117 @@
|
||||
import argparse
|
||||
import os
|
||||
import torch
|
||||
import torchvision
|
||||
import transforms as T
|
||||
import urllib.request
|
||||
import utils
|
||||
|
||||
from azureml.core import Dataset, Run
|
||||
from data import PennFudanDataset
|
||||
from engine import train_one_epoch, evaluate
|
||||
from model import get_instance_segmentation_model
|
||||
from zipfile import ZipFile
|
||||
|
||||
NUM_CLASSES = 2
|
||||
|
||||
|
||||
def download_data():
|
||||
data_file = 'PennFudanPed.zip'
|
||||
ds_path = 'PennFudanPed/'
|
||||
urllib.request.urlretrieve('https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip', data_file)
|
||||
zip = ZipFile(file=data_file)
|
||||
zip.extractall(path=ds_path)
|
||||
return os.path.join(ds_path, zip.namelist()[0])
|
||||
|
||||
|
||||
def get_transform(train):
|
||||
transforms = []
|
||||
# converts the image, a PIL image, into a PyTorch Tensor
|
||||
transforms.append(T.ToTensor())
|
||||
if train:
|
||||
# during training, randomly flip the training images
|
||||
# and ground-truth for data augmentation
|
||||
transforms.append(T.RandomHorizontalFlip(0.5))
|
||||
return T.Compose(transforms)
|
||||
|
||||
|
||||
def main():
|
||||
print("Torch version:", torch.__version__)
|
||||
# get command-line arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model_name', type=str, default="pytorch-peds.pt",
|
||||
help='name with which to register your model')
|
||||
parser.add_argument('--output_dir', default="local-outputs",
|
||||
type=str, help='output directory')
|
||||
parser.add_argument('--n_epochs', type=int,
|
||||
default=10, help='number of epochs')
|
||||
args = parser.parse_args()
|
||||
|
||||
# In case user inputs a nested output directory
|
||||
os.makedirs(name=args.output_dir, exist_ok=True)
|
||||
|
||||
# Get a dataset by name
|
||||
root_dir = download_data()
|
||||
|
||||
# use our dataset and defined transformations
|
||||
dataset = PennFudanDataset(root=root_dir, transforms=get_transform(train=True))
|
||||
dataset_test = PennFudanDataset(root=root_dir, transforms=get_transform(train=False))
|
||||
|
||||
# split the dataset in train and test set
|
||||
torch.manual_seed(1)
|
||||
indices = torch.randperm(len(dataset)).tolist()
|
||||
dataset = torch.utils.data.Subset(dataset, indices[:-50])
|
||||
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
|
||||
|
||||
# define training and validation data loaders
|
||||
data_loader = torch.utils.data.DataLoader(
|
||||
dataset, batch_size=2, shuffle=True, num_workers=4,
|
||||
collate_fn=utils.collate_fn)
|
||||
|
||||
data_loader_test = torch.utils.data.DataLoader(
|
||||
dataset_test, batch_size=1, shuffle=False, num_workers=4,
|
||||
collate_fn=utils.collate_fn)
|
||||
|
||||
if torch.cuda.is_available():
|
||||
print('Using GPU')
|
||||
device = torch.device('cuda')
|
||||
else:
|
||||
print('Using CPU')
|
||||
device = torch.device('cpu')
|
||||
|
||||
# our dataset has two classes only - background and person
|
||||
num_classes = NUM_CLASSES
|
||||
|
||||
# get the model using our helper function
|
||||
model = get_instance_segmentation_model(num_classes)
|
||||
|
||||
# move model to the right device
|
||||
model.to(device)
|
||||
|
||||
# construct an optimizer
|
||||
params = [p for p in model.parameters() if p.requires_grad]
|
||||
optimizer = torch.optim.SGD(params, lr=0.005,
|
||||
momentum=0.9, weight_decay=0.0005)
|
||||
|
||||
# and a learning rate scheduler which decreases the learning rate by
|
||||
# 10x every 3 epochs
|
||||
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
|
||||
step_size=3,
|
||||
gamma=0.1)
|
||||
|
||||
for epoch in range(args.n_epochs):
|
||||
# train for one epoch, printing every 10 iterations
|
||||
train_one_epoch(
|
||||
model, optimizer, data_loader, device, epoch, print_freq=10)
|
||||
# update the learning rate
|
||||
lr_scheduler.step()
|
||||
# evaluate on the test dataset
|
||||
evaluate(model, data_loader_test, device=device)
|
||||
|
||||
# Saving the state dict is recommended method, per
|
||||
# https://pytorch.org/tutorials/beginner/saving_loading_models.html
|
||||
torch.save(model.state_dict(), os.path.join(args.output_dir, args.model_name))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,50 @@
|
||||
import random
|
||||
import torch
|
||||
|
||||
from torchvision.transforms import functional as F
|
||||
|
||||
|
||||
def _flip_coco_person_keypoints(kps, width):
|
||||
flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
||||
flipped_data = kps[:, flip_inds]
|
||||
flipped_data[..., 0] = width - flipped_data[..., 0]
|
||||
# Maintain COCO convention that if visibility == 0, then x, y = 0
|
||||
inds = flipped_data[..., 2] == 0
|
||||
flipped_data[inds] = 0
|
||||
return flipped_data
|
||||
|
||||
|
||||
class Compose(object):
|
||||
def __init__(self, transforms):
|
||||
self.transforms = transforms
|
||||
|
||||
def __call__(self, image, target):
|
||||
for t in self.transforms:
|
||||
image, target = t(image, target)
|
||||
return image, target
|
||||
|
||||
|
||||
class RandomHorizontalFlip(object):
|
||||
def __init__(self, prob):
|
||||
self.prob = prob
|
||||
|
||||
def __call__(self, image, target):
|
||||
if random.random() < self.prob:
|
||||
height, width = image.shape[-2:]
|
||||
image = image.flip(-1)
|
||||
bbox = target["boxes"]
|
||||
bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
|
||||
target["boxes"] = bbox
|
||||
if "masks" in target:
|
||||
target["masks"] = target["masks"].flip(-1)
|
||||
if "keypoints" in target:
|
||||
keypoints = target["keypoints"]
|
||||
keypoints = _flip_coco_person_keypoints(keypoints, width)
|
||||
target["keypoints"] = keypoints
|
||||
return image, target
|
||||
|
||||
|
||||
class ToTensor(object):
|
||||
def __call__(self, image, target):
|
||||
image = F.to_tensor(image)
|
||||
return image, target
|
||||
@@ -0,0 +1,326 @@
|
||||
from __future__ import print_function
|
||||
|
||||
from collections import defaultdict, deque
|
||||
import datetime
|
||||
import pickle
|
||||
import time
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
import errno
|
||||
import os
|
||||
|
||||
|
||||
class SmoothedValue(object):
|
||||
"""Track a series of values and provide access to smoothed values over a
|
||||
window or the global series average.
|
||||
"""
|
||||
|
||||
def __init__(self, window_size=20, fmt=None):
|
||||
if fmt is None:
|
||||
fmt = "{median:.4f} ({global_avg:.4f})"
|
||||
self.deque = deque(maxlen=window_size)
|
||||
self.total = 0.0
|
||||
self.count = 0
|
||||
self.fmt = fmt
|
||||
|
||||
def update(self, value, n=1):
|
||||
self.deque.append(value)
|
||||
self.count += n
|
||||
self.total += value * n
|
||||
|
||||
def synchronize_between_processes(self):
|
||||
"""
|
||||
Warning: does not synchronize the deque!
|
||||
"""
|
||||
if not is_dist_avail_and_initialized():
|
||||
return
|
||||
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
|
||||
dist.barrier()
|
||||
dist.all_reduce(t)
|
||||
t = t.tolist()
|
||||
self.count = int(t[0])
|
||||
self.total = t[1]
|
||||
|
||||
@property
|
||||
def median(self):
|
||||
d = torch.tensor(list(self.deque))
|
||||
return d.median().item()
|
||||
|
||||
@property
|
||||
def avg(self):
|
||||
d = torch.tensor(list(self.deque), dtype=torch.float32)
|
||||
return d.mean().item()
|
||||
|
||||
@property
|
||||
def global_avg(self):
|
||||
return self.total / self.count
|
||||
|
||||
@property
|
||||
def max(self):
|
||||
return max(self.deque)
|
||||
|
||||
@property
|
||||
def value(self):
|
||||
return self.deque[-1]
|
||||
|
||||
def __str__(self):
|
||||
return self.fmt.format(
|
||||
median=self.median,
|
||||
avg=self.avg,
|
||||
global_avg=self.global_avg,
|
||||
max=self.max,
|
||||
value=self.value)
|
||||
|
||||
|
||||
def all_gather(data):
|
||||
"""
|
||||
Run all_gather on arbitrary picklable data (not necessarily tensors)
|
||||
Args:
|
||||
data: any picklable object
|
||||
Returns:
|
||||
list[data]: list of data gathered from each rank
|
||||
"""
|
||||
world_size = get_world_size()
|
||||
if world_size == 1:
|
||||
return [data]
|
||||
|
||||
# serialized to a Tensor
|
||||
buffer = pickle.dumps(data)
|
||||
storage = torch.ByteStorage.from_buffer(buffer)
|
||||
tensor = torch.ByteTensor(storage).to("cuda")
|
||||
|
||||
# obtain Tensor size of each rank
|
||||
local_size = torch.tensor([tensor.numel()], device="cuda")
|
||||
size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
|
||||
dist.all_gather(size_list, local_size)
|
||||
size_list = [int(size.item()) for size in size_list]
|
||||
max_size = max(size_list)
|
||||
|
||||
# receiving Tensor from all ranks
|
||||
# we pad the tensor because torch all_gather does not support
|
||||
# gathering tensors of different shapes
|
||||
tensor_list = []
|
||||
for _ in size_list:
|
||||
tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
|
||||
if local_size != max_size:
|
||||
padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
|
||||
tensor = torch.cat((tensor, padding), dim=0)
|
||||
dist.all_gather(tensor_list, tensor)
|
||||
|
||||
data_list = []
|
||||
for size, tensor in zip(size_list, tensor_list):
|
||||
buffer = tensor.cpu().numpy().tobytes()[:size]
|
||||
data_list.append(pickle.loads(buffer))
|
||||
|
||||
return data_list
|
||||
|
||||
|
||||
def reduce_dict(input_dict, average=True):
|
||||
"""
|
||||
Args:
|
||||
input_dict (dict): all the values will be reduced
|
||||
average (bool): whether to do average or sum
|
||||
Reduce the values in the dictionary from all processes so that all processes
|
||||
have the averaged results. Returns a dict with the same fields as
|
||||
input_dict, after reduction.
|
||||
"""
|
||||
world_size = get_world_size()
|
||||
if world_size < 2:
|
||||
return input_dict
|
||||
with torch.no_grad():
|
||||
names = []
|
||||
values = []
|
||||
# sort the keys so that they are consistent across processes
|
||||
for k in sorted(input_dict.keys()):
|
||||
names.append(k)
|
||||
values.append(input_dict[k])
|
||||
values = torch.stack(values, dim=0)
|
||||
dist.all_reduce(values)
|
||||
if average:
|
||||
values /= world_size
|
||||
reduced_dict = {k: v for k, v in zip(names, values)}
|
||||
return reduced_dict
|
||||
|
||||
|
||||
class MetricLogger(object):
|
||||
def __init__(self, delimiter="\t"):
|
||||
self.meters = defaultdict(SmoothedValue)
|
||||
self.delimiter = delimiter
|
||||
|
||||
def update(self, **kwargs):
|
||||
for k, v in kwargs.items():
|
||||
if isinstance(v, torch.Tensor):
|
||||
v = v.item()
|
||||
assert isinstance(v, (float, int))
|
||||
self.meters[k].update(v)
|
||||
|
||||
def __getattr__(self, attr):
|
||||
if attr in self.meters:
|
||||
return self.meters[attr]
|
||||
if attr in self.__dict__:
|
||||
return self.__dict__[attr]
|
||||
raise AttributeError("'{}' object has no attribute '{}'".format(
|
||||
type(self).__name__, attr))
|
||||
|
||||
def __str__(self):
|
||||
loss_str = []
|
||||
for name, meter in self.meters.items():
|
||||
loss_str.append(
|
||||
"{}: {}".format(name, str(meter))
|
||||
)
|
||||
return self.delimiter.join(loss_str)
|
||||
|
||||
def synchronize_between_processes(self):
|
||||
for meter in self.meters.values():
|
||||
meter.synchronize_between_processes()
|
||||
|
||||
def add_meter(self, name, meter):
|
||||
self.meters[name] = meter
|
||||
|
||||
def log_every(self, iterable, print_freq, header=None):
|
||||
i = 0
|
||||
if not header:
|
||||
header = ''
|
||||
start_time = time.time()
|
||||
end = time.time()
|
||||
iter_time = SmoothedValue(fmt='{avg:.4f}')
|
||||
data_time = SmoothedValue(fmt='{avg:.4f}')
|
||||
space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
|
||||
if torch.cuda.is_available():
|
||||
log_msg = self.delimiter.join([
|
||||
header,
|
||||
'[{0' + space_fmt + '}/{1}]',
|
||||
'eta: {eta}',
|
||||
'{meters}',
|
||||
'time: {time}',
|
||||
'data: {data}',
|
||||
'max mem: {memory:.0f}'
|
||||
])
|
||||
else:
|
||||
log_msg = self.delimiter.join([
|
||||
header,
|
||||
'[{0' + space_fmt + '}/{1}]',
|
||||
'eta: {eta}',
|
||||
'{meters}',
|
||||
'time: {time}',
|
||||
'data: {data}'
|
||||
])
|
||||
MB = 1024.0 * 1024.0
|
||||
for obj in iterable:
|
||||
data_time.update(time.time() - end)
|
||||
yield obj
|
||||
iter_time.update(time.time() - end)
|
||||
if i % print_freq == 0 or i == len(iterable) - 1:
|
||||
eta_seconds = iter_time.global_avg * (len(iterable) - i)
|
||||
eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
|
||||
if torch.cuda.is_available():
|
||||
print(log_msg.format(
|
||||
i, len(iterable), eta=eta_string,
|
||||
meters=str(self),
|
||||
time=str(iter_time), data=str(data_time),
|
||||
memory=torch.cuda.max_memory_allocated() / MB))
|
||||
else:
|
||||
print(log_msg.format(
|
||||
i, len(iterable), eta=eta_string,
|
||||
meters=str(self),
|
||||
time=str(iter_time), data=str(data_time)))
|
||||
i += 1
|
||||
end = time.time()
|
||||
total_time = time.time() - start_time
|
||||
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
|
||||
print('{} Total time: {} ({:.4f} s / it)'.format(
|
||||
header, total_time_str, total_time / len(iterable)))
|
||||
|
||||
|
||||
def collate_fn(batch):
|
||||
return tuple(zip(*batch))
|
||||
|
||||
|
||||
def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
|
||||
|
||||
def f(x):
|
||||
if x >= warmup_iters:
|
||||
return 1
|
||||
alpha = float(x) / warmup_iters
|
||||
return warmup_factor * (1 - alpha) + alpha
|
||||
|
||||
return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
|
||||
|
||||
|
||||
def mkdir(path):
|
||||
try:
|
||||
os.makedirs(path)
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
|
||||
|
||||
def setup_for_distributed(is_master):
|
||||
"""
|
||||
This function disables printing when not in master process
|
||||
"""
|
||||
import builtins as __builtin__
|
||||
builtin_print = __builtin__.print
|
||||
|
||||
def print(*args, **kwargs):
|
||||
force = kwargs.pop('force', False)
|
||||
if is_master or force:
|
||||
builtin_print(*args, **kwargs)
|
||||
|
||||
__builtin__.print = print
|
||||
|
||||
|
||||
def is_dist_avail_and_initialized():
|
||||
if not dist.is_available():
|
||||
return False
|
||||
if not dist.is_initialized():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_world_size():
|
||||
if not is_dist_avail_and_initialized():
|
||||
return 1
|
||||
return dist.get_world_size()
|
||||
|
||||
|
||||
def get_rank():
|
||||
if not is_dist_avail_and_initialized():
|
||||
return 0
|
||||
return dist.get_rank()
|
||||
|
||||
|
||||
def is_main_process():
|
||||
return get_rank() == 0
|
||||
|
||||
|
||||
def save_on_master(*args, **kwargs):
|
||||
if is_main_process():
|
||||
torch.save(*args, **kwargs)
|
||||
|
||||
|
||||
def init_distributed_mode(args):
|
||||
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
|
||||
args.rank = int(os.environ["RANK"])
|
||||
args.world_size = int(os.environ['WORLD_SIZE'])
|
||||
args.gpu = int(os.environ['LOCAL_RANK'])
|
||||
elif 'SLURM_PROCID' in os.environ:
|
||||
args.rank = int(os.environ['SLURM_PROCID'])
|
||||
args.gpu = args.rank % torch.cuda.device_count()
|
||||
else:
|
||||
print('Not using distributed mode')
|
||||
args.distributed = False
|
||||
return
|
||||
|
||||
args.distributed = True
|
||||
|
||||
torch.cuda.set_device(args.gpu)
|
||||
args.dist_backend = 'nccl'
|
||||
print('| distributed init (rank {}): {}'.format(
|
||||
args.rank, args.dist_url), flush=True)
|
||||
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
torch.distributed.barrier()
|
||||
setup_for_distributed(args.rank == 0)
|
||||
@@ -487,6 +487,15 @@
|
||||
"hyperdrive_run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"assert(hyperdrive_run.get_status() == \"Completed\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -4,33 +4,100 @@
|
||||
import numpy as np
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import tensorflow as tf
|
||||
import time
|
||||
import glob
|
||||
|
||||
from azureml.core import Run
|
||||
from utils import load_data
|
||||
from tensorflow.keras import Model, layers
|
||||
|
||||
|
||||
# Create TF Model.
|
||||
class NeuralNet(Model):
|
||||
# Set layers.
|
||||
def __init__(self):
|
||||
super(NeuralNet, self).__init__()
|
||||
# First hidden layer.
|
||||
self.h1 = layers.Dense(n_h1, activation=tf.nn.relu)
|
||||
# Second hidden layer.
|
||||
self.h2 = layers.Dense(n_h2, activation=tf.nn.relu)
|
||||
self.out = layers.Dense(n_outputs)
|
||||
|
||||
# Set forward pass.
|
||||
def call(self, x, is_training=False):
|
||||
x = self.h1(x)
|
||||
x = self.h2(x)
|
||||
x = self.out(x)
|
||||
if not is_training:
|
||||
# Apply softmax when not training.
|
||||
x = tf.nn.softmax(x)
|
||||
return x
|
||||
|
||||
|
||||
def cross_entropy_loss(y, logits):
|
||||
# Convert labels to int 64 for tf cross-entropy function.
|
||||
y = tf.cast(y, tf.int64)
|
||||
# Apply softmax to logits and compute cross-entropy.
|
||||
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
|
||||
# Average loss across the batch.
|
||||
return tf.reduce_mean(loss)
|
||||
|
||||
|
||||
# Accuracy metric.
|
||||
def accuracy(y_pred, y_true):
|
||||
# Predicted class is the index of highest score in prediction vector (i.e. argmax).
|
||||
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
|
||||
return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)
|
||||
|
||||
|
||||
# Optimization process.
|
||||
def run_optimization(x, y):
|
||||
# Wrap computation inside a GradientTape for automatic differentiation.
|
||||
with tf.GradientTape() as g:
|
||||
# Forward pass.
|
||||
logits = neural_net(x, is_training=True)
|
||||
# Compute loss.
|
||||
loss = cross_entropy_loss(y, logits)
|
||||
|
||||
# Variables to update, i.e. trainable variables.
|
||||
trainable_variables = neural_net.trainable_variables
|
||||
|
||||
# Compute gradients.
|
||||
gradients = g.gradient(loss, trainable_variables)
|
||||
|
||||
# Update W and b following gradients.
|
||||
optimizer.apply_gradients(zip(gradients, trainable_variables))
|
||||
|
||||
|
||||
print("TensorFlow version:", tf.__version__)
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
|
||||
parser.add_argument('--batch-size', type=int, dest='batch_size', default=50, help='mini batch size for training')
|
||||
parser.add_argument('--first-layer-neurons', type=int, dest='n_hidden_1', default=100,
|
||||
parser.add_argument('--data-folder', type=str, dest='data_folder', default='data', help='data folder mounting point')
|
||||
parser.add_argument('--batch-size', type=int, dest='batch_size', default=128, help='mini batch size for training')
|
||||
parser.add_argument('--first-layer-neurons', type=int, dest='n_hidden_1', default=128,
|
||||
help='# of neurons in the first layer')
|
||||
parser.add_argument('--second-layer-neurons', type=int, dest='n_hidden_2', default=100,
|
||||
parser.add_argument('--second-layer-neurons', type=int, dest='n_hidden_2', default=128,
|
||||
help='# of neurons in the second layer')
|
||||
parser.add_argument('--learning-rate', type=float, dest='learning_rate', default=0.01, help='learning rate')
|
||||
parser.add_argument('--resume-from', type=str, default=None,
|
||||
help='location of the model or checkpoint files from where to resume the training')
|
||||
args = parser.parse_args()
|
||||
|
||||
previous_model_location = args.resume_from
|
||||
# You can also use environment variable to get the model/checkpoint files location
|
||||
# previous_model_location = os.path.expandvars(os.getenv("AZUREML_DATAREFERENCE_MODEL_LOCATION", None))
|
||||
|
||||
data_folder = args.data_folder
|
||||
print('Data folder:', data_folder)
|
||||
|
||||
# load train and test set into numpy arrays
|
||||
# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
|
||||
X_train = load_data(glob.glob(os.path.join(data_folder, '**/train-images-idx3-ubyte.gz'),
|
||||
recursive=True)[0], False) / 255.0
|
||||
recursive=True)[0], False) / np.float32(255.0)
|
||||
X_test = load_data(glob.glob(os.path.join(data_folder, '**/t10k-images-idx3-ubyte.gz'),
|
||||
recursive=True)[0], False) / 255.0
|
||||
recursive=True)[0], False) / np.float32(255.0)
|
||||
y_train = load_data(glob.glob(os.path.join(data_folder, '**/train-labels-idx1-ubyte.gz'),
|
||||
recursive=True)[0], True).reshape(-1)
|
||||
y_test = load_data(glob.glob(os.path.join(data_folder, '**/t10k-labels-idx1-ubyte.gz'),
|
||||
@@ -48,33 +115,28 @@ learning_rate = args.learning_rate
|
||||
n_epochs = 20
|
||||
batch_size = args.batch_size
|
||||
|
||||
with tf.name_scope('network'):
|
||||
# construct the DNN
|
||||
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
|
||||
y = tf.placeholder(tf.int64, shape=(None), name='y')
|
||||
h1 = tf.layers.dense(X, n_h1, activation=tf.nn.relu, name='h1')
|
||||
h2 = tf.layers.dense(h1, n_h2, activation=tf.nn.relu, name='h2')
|
||||
output = tf.layers.dense(h2, n_outputs, name='output')
|
||||
# Build neural network model.
|
||||
neural_net = NeuralNet()
|
||||
|
||||
with tf.name_scope('train'):
|
||||
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output)
|
||||
loss = tf.reduce_mean(cross_entropy, name='loss')
|
||||
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
|
||||
train_op = optimizer.minimize(loss)
|
||||
|
||||
with tf.name_scope('eval'):
|
||||
correct = tf.nn.in_top_k(output, y, 1)
|
||||
acc_op = tf.reduce_mean(tf.cast(correct, tf.float32))
|
||||
|
||||
init = tf.global_variables_initializer()
|
||||
saver = tf.train.Saver()
|
||||
# Stochastic gradient descent optimizer.
|
||||
optimizer = tf.optimizers.SGD(learning_rate)
|
||||
|
||||
# start an Azure ML run
|
||||
run = Run.get_context()
|
||||
|
||||
with tf.Session() as sess:
|
||||
init.run()
|
||||
for epoch in range(n_epochs):
|
||||
if previous_model_location:
|
||||
# Restore variables from latest checkpoint.
|
||||
checkpoint = tf.train.Checkpoint(model=neural_net, optimizer=optimizer)
|
||||
checkpoint_file_path = tf.train.latest_checkpoint(previous_model_location)
|
||||
checkpoint.restore(checkpoint_file_path)
|
||||
checkpoint_filename = os.path.basename(checkpoint_file_path)
|
||||
num_found = re.search(r'\d+', checkpoint_filename)
|
||||
if num_found:
|
||||
start_epoch = int(num_found.group(0))
|
||||
print("Resuming from epoch {}".format(str(start_epoch)))
|
||||
|
||||
start_time = time.perf_counter()
|
||||
for epoch in range(0, n_epochs):
|
||||
|
||||
# randomly shuffle training set
|
||||
indices = np.random.permutation(training_set_size)
|
||||
@@ -93,20 +155,36 @@ with tf.Session() as sess:
|
||||
b_end = min(b_start + batch_size, training_set_size)
|
||||
|
||||
# train
|
||||
sess.run(train_op, feed_dict={X: X_batch, y: y_batch})
|
||||
run_optimization(X_batch, y_batch)
|
||||
|
||||
# evaluate training set
|
||||
acc_train = acc_op.eval(feed_dict={X: X_batch, y: y_batch})
|
||||
pred = neural_net(X_batch, is_training=False)
|
||||
acc_train = accuracy(pred, y_batch)
|
||||
|
||||
# evaluate validation set
|
||||
acc_val = acc_op.eval(feed_dict={X: X_test, y: y_test})
|
||||
pred = neural_net(X_test, is_training=False)
|
||||
acc_val = accuracy(pred, y_test)
|
||||
|
||||
# log accuracies
|
||||
run.log('training_acc', np.float(acc_train))
|
||||
run.log('validation_acc', np.float(acc_val))
|
||||
print(epoch, '-- Training accuracy:', acc_train, '\b Validation accuracy:', acc_val)
|
||||
y_hat = np.argmax(output.eval(feed_dict={X: X_test}), axis=1)
|
||||
|
||||
run.log('final_acc', np.float(acc_val))
|
||||
# Save checkpoints in the "./outputs" folder so that they are automatically uploaded into run history.
|
||||
checkpoint_dir = './outputs/'
|
||||
checkpoint = tf.train.Checkpoint(model=neural_net, optimizer=optimizer)
|
||||
|
||||
os.makedirs('./outputs/model', exist_ok=True)
|
||||
# files saved in the "./outputs" folder are automatically uploaded into run history
|
||||
saver.save(sess, './outputs/model/mnist-tf.model')
|
||||
if epoch % 2 == 0:
|
||||
checkpoint.save(checkpoint_dir)
|
||||
|
||||
run.log('final_acc', np.float(acc_val))
|
||||
os.makedirs('./outputs/model', exist_ok=True)
|
||||
|
||||
# files saved in the "./outputs" folder are automatically uploaded into run history
|
||||
# this is workaround for https://github.com/tensorflow/tensorflow/issues/33913 and will be fixed once we move to >tf2.1
|
||||
neural_net._set_inputs(X_train)
|
||||
tf.saved_model.save(neural_net, './outputs/model/')
|
||||
|
||||
stop_time = time.perf_counter()
|
||||
training_time = (stop_time - start_time) * 1000
|
||||
print("Total time in milliseconds for training: {}".format(str(training_time)))
|
||||
|
||||
@@ -170,18 +170,19 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import urllib\n",
|
||||
"data_folder = 'data'\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"data_folder = os.path.join(os.getcwd(), 'data')\n",
|
||||
"os.makedirs(data_folder, exist_ok=True)\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'train-images.gz'))\n",
|
||||
" filename=os.path.join(data_folder, 'train-images-idx3-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'train-labels.gz'))\n",
|
||||
" filename=os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'test-images.gz'))\n",
|
||||
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'test-labels.gz'))"
|
||||
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -209,11 +210,10 @@
|
||||
"from utils import load_data\n",
|
||||
"\n",
|
||||
"# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n",
|
||||
"X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0\n",
|
||||
"y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1)\n",
|
||||
"\n",
|
||||
"X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0\n",
|
||||
"y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1)\n",
|
||||
"X_train = load_data(os.path.join(data_folder, 'train-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
|
||||
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
|
||||
"y_train = load_data(os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
|
||||
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
|
||||
"\n",
|
||||
"count = 0\n",
|
||||
"sample_size = 30\n",
|
||||
@@ -255,7 +255,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You may want to regiester datasets using the register() method to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script."
|
||||
"You may want to regiester datasets using the register() method to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script.\n",
|
||||
"You can try get the dataset first to see if it's already registered."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -264,8 +265,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = dataset.register(workspace = ws,\n",
|
||||
" name = 'mnist dataset',\n",
|
||||
"dataset_registered = False\n",
|
||||
"try:\n",
|
||||
" temp = Dataset.get_by_name(workspace = ws, name = 'mnist-dataset')\n",
|
||||
" dataset_registered = True\n",
|
||||
"except:\n",
|
||||
" print(\"The dataset mnist-dataset is not registered in workspace yet.\")\n",
|
||||
"\n",
|
||||
"if not dataset_registered:\n",
|
||||
" dataset = dataset.register(workspace = ws,\n",
|
||||
" name = 'mnist-dataset',\n",
|
||||
" description='training and test dataset',\n",
|
||||
" create_new_version=True)\n",
|
||||
"# list the files referenced by dataset\n",
|
||||
@@ -438,9 +447,9 @@
|
||||
"\n",
|
||||
"script_params = {\n",
|
||||
" '--data-folder': dataset.as_named_input('mnist').as_mount(),\n",
|
||||
" '--batch-size': 50,\n",
|
||||
" '--first-layer-neurons': 300,\n",
|
||||
" '--second-layer-neurons': 100,\n",
|
||||
" '--batch-size': 64,\n",
|
||||
" '--first-layer-neurons': 256,\n",
|
||||
" '--second-layer-neurons': 128,\n",
|
||||
" '--learning-rate': 0.01\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
@@ -449,6 +458,7 @@
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='tf_mnist.py',\n",
|
||||
" use_gpu=True,\n",
|
||||
" framework_version='2.0',\n",
|
||||
" pip_packages=['azureml-dataprep[pandas,fuse]'])"
|
||||
]
|
||||
},
|
||||
@@ -613,14 +623,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create a model folder in the current directory\n",
|
||||
"os.makedirs('./model', exist_ok=True)\n",
|
||||
"\n",
|
||||
"for f in run.get_file_names():\n",
|
||||
" if f.startswith('outputs/model'):\n",
|
||||
" output_file_path = os.path.join('./model', f.split('/')[-1])\n",
|
||||
" print('Downloading from {} to {} ...'.format(f, output_file_path))\n",
|
||||
" run.download_file(name=f, output_file_path=output_file_path)"
|
||||
"run.download_files(prefix='outputs/model', output_directory='./model', append_prefix=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -640,22 +643,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import tensorflow as tf\n",
|
||||
"\n",
|
||||
"tf.reset_default_graph()\n",
|
||||
"\n",
|
||||
"saver = tf.train.import_meta_graph(\"./model/mnist-tf.model.meta\")\n",
|
||||
"graph = tf.get_default_graph()\n",
|
||||
"\n",
|
||||
"for op in graph.get_operations():\n",
|
||||
" if op.name.startswith('network'):\n",
|
||||
" print(op.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Feed test dataset to the persisted model to get predictions."
|
||||
"imported_model = tf.saved_model.load('./model')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -664,16 +652,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# input tensor. this is an array of 784 elements, each representing the intensity of a pixel in the digit image.\n",
|
||||
"X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n",
|
||||
"# output tensor. this is an array of 10 elements, each representing the probability of predicted value of the digit.\n",
|
||||
"output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n",
|
||||
"\n",
|
||||
"with tf.Session() as sess:\n",
|
||||
" saver.restore(sess, './model/mnist-tf.model')\n",
|
||||
" k = output.eval(feed_dict={X : X_test})\n",
|
||||
"# get the prediction, which is the index of the element that has the largest probability value.\n",
|
||||
"y_hat = np.argmax(k, axis=1)\n",
|
||||
"pred =imported_model(X_test)\n",
|
||||
"y_hat = np.argmax(pred, axis=1)\n",
|
||||
"\n",
|
||||
"# print the first 30 labels and predictions\n",
|
||||
"print('labels: \\t', y_test[:30])\n",
|
||||
@@ -681,10 +661,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Calculate the overall accuracy by comparing the predicted value against the test set."
|
||||
"print(\"Accuracy on the test set:\", np.average(y_hat == y_test))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -715,9 +697,9 @@
|
||||
"\n",
|
||||
"ps = RandomParameterSampling(\n",
|
||||
" {\n",
|
||||
" '--batch-size': choice(25, 50, 100),\n",
|
||||
" '--first-layer-neurons': choice(10, 50, 200, 300, 500),\n",
|
||||
" '--second-layer-neurons': choice(10, 50, 200, 500),\n",
|
||||
" '--batch-size': choice(32, 64, 128),\n",
|
||||
" '--first-layer-neurons': choice(16, 64, 128, 256, 512),\n",
|
||||
" '--second-layer-neurons': choice(16, 64, 256, 512),\n",
|
||||
" '--learning-rate': loguniform(-6, -1)\n",
|
||||
" }\n",
|
||||
")"
|
||||
@@ -739,7 +721,8 @@
|
||||
"est = TensorFlow(source_directory=script_folder,\n",
|
||||
" script_params={'--data-folder': dataset.as_named_input('mnist').as_mount()},\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='tf_mnist.py', \n",
|
||||
" entry_script='tf_mnist.py',\n",
|
||||
" framework_version='2.0',\n",
|
||||
" use_gpu=True,\n",
|
||||
" pip_packages=['azureml-dataprep[pandas,fuse]'])"
|
||||
]
|
||||
@@ -823,6 +806,15 @@
|
||||
"htr.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"assert(htr.get_status() == \"Completed\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -910,24 +902,20 @@
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global X, output, sess\n",
|
||||
" tf.reset_default_graph()\n",
|
||||
" global tf_model\n",
|
||||
" model_root = os.getenv('AZUREML_MODEL_DIR')\n",
|
||||
" # the name of the folder in which to look for tensorflow model files\n",
|
||||
" tf_model_folder = 'model'\n",
|
||||
" saver = tf.train.import_meta_graph(\n",
|
||||
" os.path.join(model_root, tf_model_folder, 'mnist-tf.model.meta'))\n",
|
||||
" X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n",
|
||||
" output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n",
|
||||
"\n",
|
||||
" sess = tf.Session()\n",
|
||||
" saver.restore(sess, os.path.join(model_root, tf_model_folder, 'mnist-tf.model'))\n",
|
||||
" \n",
|
||||
" tf_model = tf.saved_model.load(os.path.join(model_root, tf_model_folder))\n",
|
||||
"\n",
|
||||
"def run(raw_data):\n",
|
||||
" data = np.array(json.loads(raw_data)['data'])\n",
|
||||
" data = np.array(json.loads(raw_data)['data'], dtype=np.float32)\n",
|
||||
" \n",
|
||||
" # make prediction\n",
|
||||
" out = output.eval(session=sess, feed_dict={X: data})\n",
|
||||
" out = tf_model(data)\n",
|
||||
" y_hat = np.argmax(out, axis=1)\n",
|
||||
"\n",
|
||||
" return y_hat.tolist()"
|
||||
]
|
||||
},
|
||||
@@ -949,7 +937,7 @@
|
||||
"\n",
|
||||
"cd = CondaDependencies.create()\n",
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_pip_package('tensorflow==1.13.1')\n",
|
||||
"cd.add_pip_package('tensorflow==2.0.0')\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
name: train-hyperparameter-tune-deploy-with-tensorflow
|
||||
dependencies:
|
||||
- numpy
|
||||
- tensorflow==1.10.0
|
||||
- matplotlib
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- pandas
|
||||
- keras
|
||||
- tensorflow==2.0.0
|
||||
- matplotlib
|
||||
- azureml-dataprep
|
||||
- fuse
|
||||
|
||||
@@ -175,13 +175,13 @@
|
||||
"os.makedirs(data_folder, exist_ok=True)\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'train-images.gz'))\n",
|
||||
" filename=os.path.join(data_folder, 'train-images-idx3-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'train-labels.gz'))\n",
|
||||
" filename=os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'test-images.gz'))\n",
|
||||
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'test-labels.gz'))"
|
||||
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -209,10 +209,10 @@
|
||||
"from utils import load_data\n",
|
||||
"\n",
|
||||
"# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the model converge faster.\n",
|
||||
"X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0\n",
|
||||
"X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0\n",
|
||||
"y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1)\n",
|
||||
"y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1)\n",
|
||||
"X_train = load_data(os.path.join(data_folder, 'train-images-idx3-ubyte.gz'), False) / 255.0\n",
|
||||
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / 255.0\n",
|
||||
"y_train = load_data(os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
|
||||
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
|
||||
"\n",
|
||||
"# now let's show some randomly chosen images from the training set.\n",
|
||||
"count = 0\n",
|
||||
@@ -243,10 +243,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"web_paths = ['http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',\n",
|
||||
" 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',\n",
|
||||
" 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',\n",
|
||||
" 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'\n",
|
||||
"web_paths = ['https://azureopendatastorage.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz',\n",
|
||||
" 'https://azureopendatastorage.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz',\n",
|
||||
" 'https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
|
||||
" 'https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz'\n",
|
||||
" ]\n",
|
||||
"dataset = Dataset.File.from_files(path = web_paths)"
|
||||
]
|
||||
@@ -255,7 +255,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use the register() method to register datasets to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script."
|
||||
"Use the register() method to register datasets to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script.\n",
|
||||
"You can try get the dataset first to see if it's already registered."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -264,8 +265,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = dataset.register(workspace = ws,\n",
|
||||
" name = 'mnist dataset',\n",
|
||||
"dataset_registered = False\n",
|
||||
"try:\n",
|
||||
" temp = Dataset.get_by_name(workspace = ws, name = 'mnist-dataset')\n",
|
||||
" dataset_registered = True\n",
|
||||
"except:\n",
|
||||
" print(\"The dataset mnist-dataset is not registered in workspace yet.\")\n",
|
||||
"\n",
|
||||
"if not dataset_registered:\n",
|
||||
" dataset = dataset.register(workspace = ws,\n",
|
||||
" name = 'mnist-dataset',\n",
|
||||
" description='training and test dataset',\n",
|
||||
" create_new_version=True)"
|
||||
]
|
||||
@@ -436,9 +445,9 @@
|
||||
"# ensure latest azureml-dataprep and other required packages installed in the environment\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['keras',\n",
|
||||
" 'azureml-sdk',\n",
|
||||
" 'tensorflow==1.14.0',\n",
|
||||
" 'tensorflow==2.0.0',\n",
|
||||
" 'matplotlib',\n",
|
||||
" 'azureml-dataprep[pandas,fuse]>=1.1.14'])\n",
|
||||
" 'azureml-dataprep[pandas,fuse]'])\n",
|
||||
"\n",
|
||||
"env.python.conda_dependencies = cd"
|
||||
]
|
||||
@@ -457,9 +466,9 @@
|
||||
"\n",
|
||||
"script_params = {\n",
|
||||
" '--data-folder': dataset.as_named_input('mnist').as_mount(),\n",
|
||||
" '--batch-size': 50,\n",
|
||||
" '--first-layer-neurons': 300,\n",
|
||||
" '--second-layer-neurons': 100,\n",
|
||||
" '--batch-size': 64,\n",
|
||||
" '--first-layer-neurons': 256,\n",
|
||||
" '--second-layer-neurons': 128,\n",
|
||||
" '--learning-rate': 0.01\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
@@ -467,7 +476,7 @@
|
||||
" script_params=script_params,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='tf_mnist.py', \n",
|
||||
" framework_version='1.13',\n",
|
||||
" framework_version='2.0',\n",
|
||||
" environment_definition= env)"
|
||||
]
|
||||
},
|
||||
@@ -525,9 +534,9 @@
|
||||
"\n",
|
||||
"ps = RandomParameterSampling(\n",
|
||||
" {\n",
|
||||
" '--batch-size': choice(25, 50, 100),\n",
|
||||
" '--first-layer-neurons': choice(10, 50, 200, 300, 500),\n",
|
||||
" '--second-layer-neurons': choice(10, 50, 200, 500),\n",
|
||||
" '--batch-size': choice(32, 64, 128),\n",
|
||||
" '--first-layer-neurons': choice(16, 64, 128, 256, 512),\n",
|
||||
" '--second-layer-neurons': choice(16, 64, 256, 512),\n",
|
||||
" '--learning-rate': loguniform(-6, -1)\n",
|
||||
" }\n",
|
||||
")"
|
||||
@@ -549,7 +558,8 @@
|
||||
"est = TensorFlow(source_directory=script_folder,\n",
|
||||
" script_params={'--data-folder': dataset.as_named_input('mnist').as_mount()},\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='tf_mnist.py', \n",
|
||||
" entry_script='tf_mnist.py',\n",
|
||||
" framework_version='2.0',\n",
|
||||
" environment_definition = env)"
|
||||
]
|
||||
},
|
||||
@@ -557,7 +567,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next we will define an early termnination policy. This will terminate poorly performing runs automatically, reducing wastage of resources and instead efficiently using these resources for exploring other parameter configurations. In this example, we will use the `TruncationSelectionPolicy`, truncating the bottom performing 10% runs. It states to check the job every 2 iterations. If the primary metric (defined later) falls in the bottom 25% range, Azure ML terminate the job. This saves us from continuing to explore hyperparameters that don't show promise of helping reach our target metric."
|
||||
"Next we will define an early termnination policy. This will terminate poorly performing runs automatically, reducing wastage of resources and instead efficiently using these resources for exploring other parameter configurations. In this example, we will use the `TruncationSelectionPolicy`, truncating the bottom performing 25% runs. It states to check the job every 2 iterations. If the primary metric (defined later) falls in the bottom 25% range, Azure ML terminate the job. This saves us from continuing to explore hyperparameters that don't show promise of helping reach our target metric."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -634,6 +644,15 @@
|
||||
"htr.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"assert(htr.get_status() == \"Completed\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -7,7 +7,7 @@ dependencies:
|
||||
- azureml-widgets
|
||||
- pandas
|
||||
- keras
|
||||
- tensorflow==1.14.0
|
||||
- tensorflow
|
||||
- matplotlib
|
||||
- azureml-dataprep
|
||||
- fuse
|
||||
|
||||
@@ -11,15 +11,74 @@ import glob
|
||||
|
||||
from azureml.core import Run
|
||||
from utils import load_data
|
||||
from tensorflow.keras import Model, layers
|
||||
|
||||
|
||||
# Create TF Model.
|
||||
class NeuralNet(Model):
|
||||
# Set layers.
|
||||
def __init__(self):
|
||||
super(NeuralNet, self).__init__()
|
||||
# First hidden layer.
|
||||
self.h1 = layers.Dense(n_h1, activation=tf.nn.relu)
|
||||
# Second hidden layer.
|
||||
self.h2 = layers.Dense(n_h2, activation=tf.nn.relu)
|
||||
self.out = layers.Dense(n_outputs)
|
||||
|
||||
# Set forward pass.
|
||||
def call(self, x, is_training=False):
|
||||
x = self.h1(x)
|
||||
x = self.h2(x)
|
||||
x = self.out(x)
|
||||
if not is_training:
|
||||
# Apply softmax when not training.
|
||||
x = tf.nn.softmax(x)
|
||||
return x
|
||||
|
||||
|
||||
def cross_entropy_loss(y, logits):
|
||||
# Convert labels to int 64 for tf cross-entropy function.
|
||||
y = tf.cast(y, tf.int64)
|
||||
# Apply softmax to logits and compute cross-entropy.
|
||||
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
|
||||
# Average loss across the batch.
|
||||
return tf.reduce_mean(loss)
|
||||
|
||||
|
||||
# Accuracy metric.
|
||||
def accuracy(y_pred, y_true):
|
||||
# Predicted class is the index of highest score in prediction vector (i.e. argmax).
|
||||
correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
|
||||
return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)
|
||||
|
||||
|
||||
# Optimization process.
|
||||
def run_optimization(x, y):
|
||||
# Wrap computation inside a GradientTape for automatic differentiation.
|
||||
with tf.GradientTape() as g:
|
||||
# Forward pass.
|
||||
logits = neural_net(x, is_training=True)
|
||||
# Compute loss.
|
||||
loss = cross_entropy_loss(y, logits)
|
||||
|
||||
# Variables to update, i.e. trainable variables.
|
||||
trainable_variables = neural_net.trainable_variables
|
||||
|
||||
# Compute gradients.
|
||||
gradients = g.gradient(loss, trainable_variables)
|
||||
|
||||
# Update W and b following gradients.
|
||||
optimizer.apply_gradients(zip(gradients, trainable_variables))
|
||||
|
||||
|
||||
print("TensorFlow version:", tf.__version__)
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
|
||||
parser.add_argument('--batch-size', type=int, dest='batch_size', default=50, help='mini batch size for training')
|
||||
parser.add_argument('--first-layer-neurons', type=int, dest='n_hidden_1', default=100,
|
||||
parser.add_argument('--data-folder', type=str, dest='data_folder', default='data', help='data folder mounting point')
|
||||
parser.add_argument('--batch-size', type=int, dest='batch_size', default=128, help='mini batch size for training')
|
||||
parser.add_argument('--first-layer-neurons', type=int, dest='n_hidden_1', default=128,
|
||||
help='# of neurons in the first layer')
|
||||
parser.add_argument('--second-layer-neurons', type=int, dest='n_hidden_2', default=100,
|
||||
parser.add_argument('--second-layer-neurons', type=int, dest='n_hidden_2', default=128,
|
||||
help='# of neurons in the second layer')
|
||||
parser.add_argument('--learning-rate', type=float, dest='learning_rate', default=0.01, help='learning rate')
|
||||
parser.add_argument('--resume-from', type=str, default=None,
|
||||
@@ -36,9 +95,9 @@ print('Data folder:', data_folder)
|
||||
# load train and test set into numpy arrays
|
||||
# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
|
||||
X_train = load_data(glob.glob(os.path.join(data_folder, '**/train-images-idx3-ubyte.gz'),
|
||||
recursive=True)[0], False) / 255.0
|
||||
recursive=True)[0], False) / np.float32(255.0)
|
||||
X_test = load_data(glob.glob(os.path.join(data_folder, '**/t10k-images-idx3-ubyte.gz'),
|
||||
recursive=True)[0], False) / 255.0
|
||||
recursive=True)[0], False) / np.float32(255.0)
|
||||
y_train = load_data(glob.glob(os.path.join(data_folder, '**/train-labels-idx1-ubyte.gz'),
|
||||
recursive=True)[0], True).reshape(-1)
|
||||
y_test = load_data(glob.glob(os.path.join(data_folder, '**/t10k-labels-idx1-ubyte.gz'),
|
||||
@@ -56,46 +115,28 @@ learning_rate = args.learning_rate
|
||||
n_epochs = 20
|
||||
batch_size = args.batch_size
|
||||
|
||||
with tf.name_scope('network'):
|
||||
# construct the DNN
|
||||
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X')
|
||||
y = tf.placeholder(tf.int64, shape=(None), name='y')
|
||||
h1 = tf.layers.dense(X, n_h1, activation=tf.nn.relu, name='h1')
|
||||
h2 = tf.layers.dense(h1, n_h2, activation=tf.nn.relu, name='h2')
|
||||
output = tf.layers.dense(h2, n_outputs, name='output')
|
||||
# Build neural network model.
|
||||
neural_net = NeuralNet()
|
||||
|
||||
with tf.name_scope('train'):
|
||||
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output)
|
||||
loss = tf.reduce_mean(cross_entropy, name='loss')
|
||||
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
|
||||
train_op = optimizer.minimize(loss)
|
||||
|
||||
with tf.name_scope('eval'):
|
||||
correct = tf.nn.in_top_k(output, y, 1)
|
||||
acc_op = tf.reduce_mean(tf.cast(correct, tf.float32))
|
||||
|
||||
init = tf.global_variables_initializer()
|
||||
saver = tf.train.Saver()
|
||||
# Stochastic gradient descent optimizer.
|
||||
optimizer = tf.optimizers.SGD(learning_rate)
|
||||
|
||||
# start an Azure ML run
|
||||
run = Run.get_context()
|
||||
|
||||
with tf.Session() as sess:
|
||||
start_time = time.perf_counter()
|
||||
|
||||
start_epoch = 0
|
||||
if previous_model_location:
|
||||
if previous_model_location:
|
||||
# Restore variables from latest checkpoint.
|
||||
checkpoint = tf.train.Checkpoint(model=neural_net, optimizer=optimizer)
|
||||
checkpoint_file_path = tf.train.latest_checkpoint(previous_model_location)
|
||||
saver.restore(sess, checkpoint_file_path)
|
||||
checkpoint.restore(checkpoint_file_path)
|
||||
checkpoint_filename = os.path.basename(checkpoint_file_path)
|
||||
num_found = re.search(r'\d+', checkpoint_filename)
|
||||
if num_found:
|
||||
start_epoch = int(num_found.group(0))
|
||||
print("Resuming from epoch {}".format(str(start_epoch)))
|
||||
else:
|
||||
init.run()
|
||||
|
||||
for epoch in range(start_epoch, n_epochs):
|
||||
start_time = time.perf_counter()
|
||||
for epoch in range(0, n_epochs):
|
||||
|
||||
# randomly shuffle training set
|
||||
indices = np.random.permutation(training_set_size)
|
||||
@@ -114,30 +155,37 @@ with tf.Session() as sess:
|
||||
b_end = min(b_start + batch_size, training_set_size)
|
||||
|
||||
# train
|
||||
sess.run(train_op, feed_dict={X: X_batch, y: y_batch})
|
||||
# evaluate training set
|
||||
acc_train = acc_op.eval(feed_dict={X: X_batch, y: y_batch})
|
||||
# evaluate validation set
|
||||
acc_val = acc_op.eval(feed_dict={X: X_test, y: y_test})
|
||||
run_optimization(X_batch, y_batch)
|
||||
|
||||
time.sleep(10)
|
||||
# evaluate training set
|
||||
pred = neural_net(X_batch, is_training=False)
|
||||
acc_train = accuracy(pred, y_batch)
|
||||
|
||||
# evaluate validation set
|
||||
pred = neural_net(X_test, is_training=False)
|
||||
acc_val = accuracy(pred, y_test)
|
||||
|
||||
# log accuracies
|
||||
run.log('training_acc', np.float(acc_train))
|
||||
run.log('validation_acc', np.float(acc_val))
|
||||
print(epoch, '-- Training accuracy:', acc_train, '\b Validation accuracy:', acc_val)
|
||||
y_hat = np.argmax(output.eval(feed_dict={X: X_test}), axis=1)
|
||||
|
||||
# Save checkpoints in the "./outputs" folder so that they are automatically uploaded into run history.
|
||||
checkpoint_dir = './outputs/'
|
||||
checkpoint = tf.train.Checkpoint(model=neural_net, optimizer=optimizer)
|
||||
|
||||
if epoch % 2 == 0:
|
||||
saver.save(sess, './outputs/', global_step=epoch)
|
||||
checkpoint.save(checkpoint_dir)
|
||||
time.sleep(3)
|
||||
|
||||
run.log('final_acc', np.float(acc_val))
|
||||
run.log('final_acc', np.float(acc_val))
|
||||
os.makedirs('./outputs/model', exist_ok=True)
|
||||
|
||||
os.makedirs('./outputs/model', exist_ok=True)
|
||||
# files saved in the "./outputs" folder are automatically uploaded into run history
|
||||
saver.save(sess, './outputs/model/mnist-tf.model')
|
||||
# files saved in the "./outputs" folder are automatically uploaded into run history
|
||||
# this is workaround for https://github.com/tensorflow/tensorflow/issues/33913 and will be fixed once we move to >tf2.1
|
||||
neural_net._set_inputs(X_train)
|
||||
tf.saved_model.save(neural_net, './outputs/model/')
|
||||
|
||||
stop_time = time.perf_counter()
|
||||
training_time = (stop_time - start_time) * 1000
|
||||
print("Total time in milliseconds for training: {}".format(str(training_time)))
|
||||
stop_time = time.perf_counter()
|
||||
training_time = (stop_time - start_time) * 1000
|
||||
print("Total time in milliseconds for training: {}".format(str(training_time)))
|
||||
|
||||
@@ -170,7 +170,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"you may want to register datasets using the register() method to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script."
|
||||
"you may want to register datasets using the register() method to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script.\n",
|
||||
"You can try get the dataset first to see if it's already registered."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -179,9 +180,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#register dataset to workspace\n",
|
||||
"dataset = dataset.register(workspace = ws,\n",
|
||||
" name = 'mnist dataset',\n",
|
||||
"dataset_registered = False\n",
|
||||
"try:\n",
|
||||
" temp = Dataset.get_by_name(workspace = ws, name = 'mnist-dataset')\n",
|
||||
" dataset_registered = True\n",
|
||||
"except:\n",
|
||||
" print(\"The dataset mnist-dataset is not registered in workspace yet.\")\n",
|
||||
"\n",
|
||||
"if not dataset_registered:\n",
|
||||
" #register dataset to workspace\n",
|
||||
" dataset = dataset.register(workspace = ws,\n",
|
||||
" name = 'mnist-dataset',\n",
|
||||
" description='training and test dataset',\n",
|
||||
" create_new_version=True)"
|
||||
]
|
||||
|
||||
@@ -184,11 +184,10 @@
|
||||
"prov_config = AksCompute.provisioning_configuration()\n",
|
||||
"\n",
|
||||
"aks_name = 'drift-aks'\n",
|
||||
"aks_target = ws.compute_targets.get(aks_name)\n",
|
||||
"\n",
|
||||
"# Create the cluster\n",
|
||||
"try:\n",
|
||||
" aks_target = ws.compute_targets[aks_name]\n",
|
||||
"except KeyError:\n",
|
||||
"if not aks_target:\n",
|
||||
" aks_target = ComputeTarget.create(workspace = ws,\n",
|
||||
" name = aks_name,\n",
|
||||
" provisioning_configuration = prov_config)\n",
|
||||
|
||||
118
how-to-use-azureml/reinforcement-learning/README.md
Normal file
118
how-to-use-azureml/reinforcement-learning/README.md
Normal file
@@ -0,0 +1,118 @@
|
||||
|
||||
# Azure Machine Learning - Reinforcement Learning (Public Preview)
|
||||
|
||||
<!--
|
||||
Guidelines on README format: https://review.docs.microsoft.com/help/onboard/admin/samples/concepts/readme-template?branch=master
|
||||
|
||||
Guidance on onboarding samples to docs.microsoft.com/samples: https://review.docs.microsoft.com/help/onboard/admin/samples/process/onboarding?branch=master
|
||||
|
||||
Taxonomies for products and languages: https://review.docs.microsoft.com/new-hope/information-architecture/metadata/taxonomies?branch=master
|
||||
-->
|
||||
|
||||
This is an introduction to the [Azure Machine Learning](https://docs.microsoft.com/en-us/azure/machine-learning/service/) Reinforcement Learning (Public Preview) using the [Ray](https://github.com/ray-project/ray/) framework.
|
||||
|
||||
Using these samples, you will be able to do the following.
|
||||
|
||||
1. Use an Azure Machine Learning workspace, set up virtual network and create compute clusters for running Ray.
|
||||
2. Run some experiments to train a reinforcement learning agent using Ray and RLlib.
|
||||
|
||||
## Contents
|
||||
|
||||
| File/folder | Description |
|
||||
|-------------------|--------------------------------------------|
|
||||
| [README.md](README.md) | This README file. |
|
||||
| [devenv_setup.ipynb](setup/devenv_setup.ipynb) | Notebook to setup development environment for Azure ML RL |
|
||||
| [cartpole_ci.ipynb](cartpole-on-compute-instance/cartpole_ci.ipynb) | Notebook to train a Cartpole playing agent on an Azure ML Compute Instance |
|
||||
| [cartpole_cc.ipynb](cartpole-on-single-compute/cartpole_cc.ipynb) | Notebook to train a Cartpole playing agent on an Azure ML Compute Cluster (single node) |
|
||||
| [pong_rllib.ipynb](atari-on-distributed-compute/pong_rllib.ipynb) | Notebook to train Pong agent using RLlib on multiple compute targets |
|
||||
|
||||
## Prerequisites
|
||||
|
||||
To make use of these samples, you need the following.
|
||||
|
||||
* A Microsoft Azure subscription.
|
||||
* A Microsoft Azure resource group.
|
||||
* An Azure Machine Learning Workspace in the resource group. Please make sure that the VM sizes `STANDARD_NC6` and `STANDARD_D2_V2` are supported in the workspace's region.
|
||||
* A virtual network set up in the resource group.
|
||||
* A virtual network is needed for the examples training on multiple compute targets.
|
||||
* The [devenv_setup.ipynb](setup/devenv_setup.ipynb) notebook shows you how to create a virtual network. You can alternatively use an existing virtual network, make sure it's in the same region as workspace is.
|
||||
* Any network security group defined on the virtual network must allow network traffic on ports used by Azure infrastructure services. This is described in more detail in the [devenv_setup.ipynb](setup/devenv_setup.ipynb) notebook.
|
||||
|
||||
|
||||
## Setup
|
||||
|
||||
You can run these samples in the following ways.
|
||||
|
||||
* On an Azure ML Compute Instance or Notebook VM.
|
||||
* On a workstation with Python and the Azure ML Python SDK installed.
|
||||
|
||||
### Azure ML Compute Instance or Notebook VM
|
||||
#### Update packages
|
||||
|
||||
|
||||
We recommend that you update the required Python packages before you proceed. The following commands are for entering in a Python interpreter such as a notebook.
|
||||
|
||||
```shell
|
||||
# We recommend updating pip to the latest version.
|
||||
!pip install --upgrade pip
|
||||
# Update matplotlib for plotting charts
|
||||
!pip install --upgrade matplotlib
|
||||
# Update Azure Machine Learning SDK to the latest version
|
||||
!pip install --upgrade azureml-sdk
|
||||
# For Jupyter notebook widget used in samples
|
||||
!pip install --upgrade azureml-widgets
|
||||
# For Tensorboard used in samples
|
||||
!pip install --upgrade azureml-tensorboard
|
||||
# Install Azure Machine Learning Reinforcement Learning SDK
|
||||
!pip install --upgrade azureml-contrib-reinforcementlearning
|
||||
```
|
||||
|
||||
### Your own workstation
|
||||
#### Install/update packages
|
||||
|
||||
For a local workstation, create a Python environment and install [Azure Machine Learning SDK](https://docs.microsoft.com/en-us/python/api/overview/azure/ml/install?view=azure-ml-py) and the RL SDK. We recommend Python 3.6 and higher.
|
||||
|
||||
```shell
|
||||
# Activate your environment first.
|
||||
# e.g.,
|
||||
# conda activate amlrl
|
||||
# We recommend updating pip to the latest version.
|
||||
pip install --upgrade pip
|
||||
# Install/upgrade matplotlib for plotting charts
|
||||
pip install --upgrade matplotlib
|
||||
# Install/upgrade tensorboard used in samples
|
||||
pip install --upgrade tensorboard
|
||||
# Install/upgrade Azure ML SDK to the latest version
|
||||
pip install --upgrade azureml-sdk
|
||||
# For Jupyter notebook widget used in samples
|
||||
pip install --upgrade azureml-widgets
|
||||
# For Tensorboard used in samples
|
||||
pip install --upgrade azureml-tensorboard
|
||||
# Install Azure Machine Learning Reinforcement Learning SDK
|
||||
pip install --upgrade azureml-contrib-reinforcementlearning
|
||||
# To use the notebook widget, you may need to register and enable the Azure ML extensions first.
|
||||
jupyter nbextension install --py --user azureml.widgets
|
||||
jupyter nbextension enable --py --user azureml.widgets
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
This project welcomes contributions and suggestions. Most contributions require you to agree to a
|
||||
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
|
||||
the rights to use your contribution. For details, visit https://cla.opensource.microsoft.com.
|
||||
|
||||
When you submit a pull request, a CLA bot will automatically determine whether you need to provide
|
||||
a CLA and decorate the PR appropriately (e.g., status check, comment). Simply follow the instructions
|
||||
provided by the bot. You will only need to do this once across all repos using our CLA.
|
||||
|
||||
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
|
||||
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
|
||||
contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
|
||||
|
||||
For more on SDK concepts, please refer to [notebooks](https://github.com/Azure/MachineLearningNotebooks).
|
||||
|
||||
**Please let us know your feedback.**
|
||||
|
||||
|
||||
|
||||

|
||||
@@ -0,0 +1,39 @@
|
||||
import ray
|
||||
import ray.tune as tune
|
||||
from ray.rllib import train
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from azureml.core import Run
|
||||
from utils import callbacks
|
||||
|
||||
DEFAULT_RAY_ADDRESS = 'localhost:6379'
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
# Parse arguments
|
||||
train_parser = train.create_parser()
|
||||
|
||||
args = train_parser.parse_args()
|
||||
print("Algorithm config:", args.config)
|
||||
|
||||
if args.ray_address is None:
|
||||
args.ray_address = DEFAULT_RAY_ADDRESS
|
||||
|
||||
ray.init(address=args.ray_address)
|
||||
|
||||
tune.run(run_or_experiment=args.run,
|
||||
config={
|
||||
"env": args.env,
|
||||
"num_gpus": args.config["num_gpus"],
|
||||
"num_workers": args.config["num_workers"],
|
||||
"callbacks": {"on_train_result": callbacks.on_train_result},
|
||||
"sample_batch_size": 50,
|
||||
"train_batch_size": 1000,
|
||||
"num_sgd_iter": 2,
|
||||
"num_data_loader_buffers": 2,
|
||||
"model": {"dim": 42},
|
||||
},
|
||||
stop=args.stop,
|
||||
local_dir='./logs')
|
||||
@@ -0,0 +1,17 @@
|
||||
'''RLlib callbacks module:
|
||||
Common callback methods to be passed to RLlib trainer.
|
||||
'''
|
||||
|
||||
from azureml.core import Run
|
||||
|
||||
|
||||
def on_train_result(info):
|
||||
'''Callback on train result to record metrics returned by trainer.
|
||||
'''
|
||||
run = Run.get_context()
|
||||
run.log(
|
||||
name='episode_reward_mean',
|
||||
value=info["result"]["episode_reward_mean"])
|
||||
run.log(
|
||||
name='episodes_total',
|
||||
value=info["result"]["episodes_total"])
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 340 KiB |
@@ -0,0 +1,604 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure ML Reinforcement Learning Sample - Pong problem\n",
|
||||
"Azure ML Reinforcement Learning (Azure ML RL) is a managed service for running distributed RL (reinforcement learning) simulation and training using the Ray framework.\n",
|
||||
"This example uses Ray RLlib to train a Pong playing agent on a multi-node cluster.\n",
|
||||
"\n",
|
||||
"## Pong problem\n",
|
||||
"[Pong](https://en.wikipedia.org/wiki/Pong) is a two-dimensional sports game that simulates table tennis. The player controls an in-game paddle by moving it vertically across the left or right side of the screen. They can compete against another player controlling a second paddle on the opposing side. Players use the paddles to hit a ball back and forth."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<table style=\"width:50%\">\n",
|
||||
" <tr>\n",
|
||||
" <th style=\"text-align: center;\"><img src=\"./images/pong.gif\" alt=\"Pong image\" align=\"middle\" margin-left=\"auto\" margin-right=\"auto\"/></th>\n",
|
||||
" </tr>\n",
|
||||
" <tr style=\"text-align: center;\">\n",
|
||||
" <th>Fig 1. Pong game animation (from <a href=\"https://towardsdatascience.com/intro-to-reinforcement-learning-pong-92a94aa0f84d\">towardsdatascience.com</a>).</th>\n",
|
||||
" </tr>\n",
|
||||
"</table>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The goal here is to train an agent to win an episode of Pong game against opponent with the score of at least 18 points. An episode in Pong runs until one of the players reaches a score of 21. Episodes are a terminology that is used across all the [OpenAI gym](https://gym.openai.com/envs/Pong-v0/) environments that contains a strictly defined task.\n",
|
||||
"\n",
|
||||
"Training a Pong agent is a CPU intensive task and this example demonstrates the use of Azure ML RL service to train an agent faster in a distributed, parallel environment. You'll learn more about using the head and the worker compute targets to train an agent in this notebook below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisite\n",
|
||||
"\n",
|
||||
"The user should have completed the [Azure ML Reinforcement Learning Sample - Setting Up Development Environment](../setup/devenv_setup.ipynb) to setup a virtual network. This virtual network will be used here for head and worker compute targets. It is highly recommended that the user should go through the [Azure ML Reinforcement Learning Sample - Cartpole Problem](../cartpole-on-single-compute/cartpole_cc.ipynb) to understand the basics of Azure ML RL and Ray RLlib used in this notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up Development Environment\n",
|
||||
"The following subsections show typical steps to setup your development environment. Setup includes:\n",
|
||||
"\n",
|
||||
"* Connecting to a workspace to enable communication between your local machine and remote resources\n",
|
||||
"* Creating an experiment to track all your runs\n",
|
||||
"* Creating a remote head and worker compute target on a vnet to use for training"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Azure Machine Learning SDK\n",
|
||||
"Display the Azure Machine Learning SDK version."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"# Azure ML core imports\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get Azure ML workspace\n",
|
||||
"Get a reference to an existing Azure ML workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.location, ws.resource_group, sep = ' | ')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Azure ML experiment\n",
|
||||
"Create an experiment to track the runs in your workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"\n",
|
||||
"# Experiment name\n",
|
||||
"experiment_name = 'rllib-pong-multi-node'\n",
|
||||
"exp = Experiment(workspace=ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Specify the name of your vnet\n",
|
||||
"\n",
|
||||
"The resource group you use must contain a vnet. Specify the name of the vnet here created in the [Azure ML Reinforcement Learning Sample - Setting Up Development Environment](../setup/devenv_setup.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Virtual network name\n",
|
||||
"vnet_name = 'your_vnet'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create head computing cluster\n",
|
||||
"\n",
|
||||
"In this example, we show how to set up separate compute clusters for the Ray head and Ray worker nodes. First we define the head cluster with GPU for the Ray head node. One CPU of the head node will be used for the Ray head process and the rest of the CPUs will be used by the Ray worker processes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
||||
"\n",
|
||||
"# Choose a name for the Ray head cluster\n",
|
||||
"head_compute_name = 'head-gpu'\n",
|
||||
"head_compute_min_nodes = 0\n",
|
||||
"head_compute_max_nodes = 2\n",
|
||||
"\n",
|
||||
"# This example uses GPU VM. For using CPU VM, set SKU to STANDARD_D2_V2\n",
|
||||
"head_vm_size = 'STANDARD_NC6'\n",
|
||||
"\n",
|
||||
"if head_compute_name in ws.compute_targets:\n",
|
||||
" head_compute_target = ws.compute_targets[head_compute_name]\n",
|
||||
" if head_compute_target and type(head_compute_target) is AmlCompute:\n",
|
||||
" if head_compute_target.provisioning_state == 'Succeeded':\n",
|
||||
" print('found head compute target. just use it', head_compute_name)\n",
|
||||
" else: \n",
|
||||
" raise Exception('found head compute target but it is in state', head_compute_target.provisioning_state)\n",
|
||||
"else:\n",
|
||||
" print('creating a new head compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size=head_vm_size,\n",
|
||||
" min_nodes=head_compute_min_nodes, \n",
|
||||
" max_nodes=head_compute_max_nodes,\n",
|
||||
" vnet_resourcegroup_name=ws.resource_group,\n",
|
||||
" vnet_name=vnet_name,\n",
|
||||
" subnet_name='default')\n",
|
||||
"\n",
|
||||
" # Create the cluster\n",
|
||||
" head_compute_target = ComputeTarget.create(ws, head_compute_name, provisioning_config)\n",
|
||||
" \n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||
" # If no min node count is provided it will use the scale settings for the cluster\n",
|
||||
" head_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||
" \n",
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()\n",
|
||||
" print(head_compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create worker computing cluster\n",
|
||||
"\n",
|
||||
"Now we create a compute cluster with CPUs for the additional Ray worker nodes. CPUs in these worker nodes are used by Ray worker processes. Each Ray worker node may have multiple Ray worker processes depending on CPUs on the worker node. Ray can distribute multiple worker tasks on each worker node."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Choose a name for your Ray worker cluster\n",
|
||||
"worker_compute_name = 'worker-cpu'\n",
|
||||
"worker_compute_min_nodes = 0 \n",
|
||||
"worker_compute_max_nodes = 4\n",
|
||||
"\n",
|
||||
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
|
||||
"worker_vm_size = 'STANDARD_D2_V2'\n",
|
||||
"\n",
|
||||
"# Create the compute target if it hasn't been created already\n",
|
||||
"if worker_compute_name in ws.compute_targets:\n",
|
||||
" worker_compute_target = ws.compute_targets[worker_compute_name]\n",
|
||||
" if worker_compute_target and type(worker_compute_target) is AmlCompute:\n",
|
||||
" if worker_compute_target.provisioning_state == 'Succeeded':\n",
|
||||
" print('found worker compute target. just use it', worker_compute_name)\n",
|
||||
" else: \n",
|
||||
" raise Exception('found worker compute target but it is in state', head_compute_target.provisioning_state)\n",
|
||||
"else:\n",
|
||||
" print('creating a new worker compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size=worker_vm_size,\n",
|
||||
" min_nodes=worker_compute_min_nodes, \n",
|
||||
" max_nodes=worker_compute_max_nodes,\n",
|
||||
" vnet_resourcegroup_name=ws.resource_group,\n",
|
||||
" vnet_name=vnet_name,\n",
|
||||
" subnet_name='default')\n",
|
||||
"\n",
|
||||
" # Create the cluster\n",
|
||||
" worker_compute_target = ComputeTarget.create(ws, worker_compute_name, provisioning_config)\n",
|
||||
" \n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||
" # If no min node count is provided it will use the scale settings for the cluster\n",
|
||||
" worker_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||
" \n",
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()\n",
|
||||
" print(worker_compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train Pong Agent Using Azure ML RL\n",
|
||||
"To facilitate reinforcement learning, Azure Machine Learning Python SDK provides a high level abstraction, the _ReinforcementLearningEstimator_ class, which allows users to easily construct RL run configurations for the underlying RL framework. Azure ML RL initially supports the [Ray framework](https://ray.io/) and its highly customizable [RLLib](https://ray.readthedocs.io/en/latest/rllib.html#rllib-scalable-reinforcement-learning). In this section we show how to use _ReinforcementLearningEstimator_ and Ray/RLLib framework to train a Pong playing agent.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Define worker configuration\n",
|
||||
"Define a `WorkerConfiguration` using your worker compute target. We also specify the number of nodes in the worker compute target to be used for training and additional PIP packages to install on those nodes as a part of setup.\n",
|
||||
"In this case, we define the PIP packages as dependencies for both head and worker nodes. With this setup, the game simulations will run directly on the worker compute nodes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.train.rl import WorkerConfiguration\n",
|
||||
"\n",
|
||||
"# Pip packages we will use for both head and worker\n",
|
||||
"pip_packages=[\"ray[rllib]==0.8.3\"] # Latest version of Ray has fixes for isses related to object transfers\n",
|
||||
"\n",
|
||||
"# Specify the Ray worker configuration\n",
|
||||
"worker_conf = WorkerConfiguration(\n",
|
||||
" \n",
|
||||
" # Azure ML compute cluster to run Ray workers\n",
|
||||
" compute_target=worker_compute_target, \n",
|
||||
" \n",
|
||||
" # Number of worker nodes\n",
|
||||
" node_count=4,\n",
|
||||
" \n",
|
||||
" # GPU\n",
|
||||
" use_gpu=False, \n",
|
||||
" \n",
|
||||
" # PIP packages to use\n",
|
||||
" pip_packages=pip_packages\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create reinforcement learning estimator\n",
|
||||
"\n",
|
||||
"The `ReinforcementLearningEstimator` is used to submit a job to Azure Machine Learning to start the Ray experiment run. We define the training script parameters here that will be passed to estimator. \n",
|
||||
"\n",
|
||||
"We specify `episode_reward_mean` to 18 as we want to stop the training as soon as the trained agent reaches an average win margin of at least 18 point over opponent over all episodes in the training epoch.\n",
|
||||
"Number of Ray worker processes are defined by parameter `num_workers`. We set it to 13 as we have 13 CPUs available in our compute targets. Multiple Ray worker processes parallelizes agent training and helps in achieving our goal faster. \n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"Number of CPUs in head_compute_target = 6 CPUs in 1 node = 6\n",
|
||||
"Number of CPUs in worker_compute_target = 2 CPUs in each of 4 nodes = 8\n",
|
||||
"Number of CPUs available = (Number of CPUs in head_compute_target) + (Number of CPUs in worker_compute_target) - (1 CPU for head node) = 6 + 8 - 1 = 13\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.train.rl import ReinforcementLearningEstimator, Ray\n",
|
||||
"\n",
|
||||
"training_algorithm = \"IMPALA\"\n",
|
||||
"rl_environment = \"PongNoFrameskip-v4\"\n",
|
||||
"\n",
|
||||
"# Training script parameters\n",
|
||||
"script_params = {\n",
|
||||
" \n",
|
||||
" # Training algorithm, IMPALA in this case\n",
|
||||
" \"--run\": training_algorithm,\n",
|
||||
" \n",
|
||||
" # Environment, Pong in this case\n",
|
||||
" \"--env\": rl_environment,\n",
|
||||
" \n",
|
||||
" # Add additional single quotes at the both ends of string values as we have spaces in the \n",
|
||||
" # string parameters, outermost quotes are not passed to scripts as they are not actually part of string\n",
|
||||
" # Number of GPUs\n",
|
||||
" # Number of ray workers\n",
|
||||
" \"--config\": '\\'{\"num_gpus\": 1, \"num_workers\": 13}\\'',\n",
|
||||
" \n",
|
||||
" # Target episode reward mean to stop the training\n",
|
||||
" # Total training time in seconds\n",
|
||||
" \"--stop\": '\\'{\"episode_reward_mean\": 18, \"time_total_s\": 3600}\\'',\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# RL estimator\n",
|
||||
"rl_estimator = ReinforcementLearningEstimator(\n",
|
||||
" \n",
|
||||
" # Location of source files\n",
|
||||
" source_directory='files',\n",
|
||||
" \n",
|
||||
" # Python script file\n",
|
||||
" entry_script=\"pong_rllib.py\",\n",
|
||||
" \n",
|
||||
" # Parameters to pass to the script file\n",
|
||||
" # Defined above.\n",
|
||||
" script_params=script_params,\n",
|
||||
" \n",
|
||||
" # The Azure ML compute target set up for Ray head nodes\n",
|
||||
" compute_target=head_compute_target,\n",
|
||||
" \n",
|
||||
" # Pip packages\n",
|
||||
" pip_packages=pip_packages,\n",
|
||||
" \n",
|
||||
" # GPU usage\n",
|
||||
" use_gpu=True,\n",
|
||||
" \n",
|
||||
" # RL framework. Currently must be Ray.\n",
|
||||
" rl_framework=Ray(),\n",
|
||||
" \n",
|
||||
" # Ray worker configuration defined above.\n",
|
||||
" worker_configuration=worker_conf,\n",
|
||||
" \n",
|
||||
" # How long to wait for whole cluster to start\n",
|
||||
" cluster_coordination_timeout_seconds=3600,\n",
|
||||
" \n",
|
||||
" # Maximum time for the whole Ray job to run\n",
|
||||
" # This will cut off the run after an hour\n",
|
||||
" max_run_duration_seconds=3600,\n",
|
||||
" \n",
|
||||
" # Allow the docker container Ray runs in to make full use\n",
|
||||
" # of the shared memory available from the host OS.\n",
|
||||
" shm_size=24*1024*1024*1024\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Training script\n",
|
||||
"As recommended in [RLLib](https://ray.readthedocs.io/en/latest/rllib.html) documentations, we use Ray [Tune](https://ray.readthedocs.io/en/latest/tune.html) API to run training algorithm. All the RLLib built-in trainers are compatible with the Tune API. Here we use tune.run() to execute a built-in training algorithm. For convenience, down below you can see part of the entry script where we make this call.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
" tune.run(run_or_experiment=args.run,\n",
|
||||
" config={\n",
|
||||
" \"env\": args.env,\n",
|
||||
" \"num_gpus\": args.config[\"num_gpus\"],\n",
|
||||
" \"num_workers\": args.config[\"num_workers\"],\n",
|
||||
" \"callbacks\": {\"on_train_result\": callbacks.on_train_result},\n",
|
||||
" \"sample_batch_size\": 50,\n",
|
||||
" \"train_batch_size\": 1000,\n",
|
||||
" \"num_sgd_iter\": 2,\n",
|
||||
" \"num_data_loader_buffers\": 2,\n",
|
||||
" \"model\": {\"dim\": 42},\n",
|
||||
" },\n",
|
||||
" stop=args.stop,\n",
|
||||
" local_dir='./logs')\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit the estimator to start a run\n",
|
||||
"Now we use the rl_estimator configured above to submit a run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = exp.submit(config=rl_estimator)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor the run\n",
|
||||
"\n",
|
||||
"Azure ML provides a Jupyter widget to show the real-time status of an experiment run. You could use this widget to monitor the status of runs. The widget shows the list of two child runs, one for head compute target run and one for worker compute target run, as well. You can click on the link under Status to see the details of the child run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"RunDetails(run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Wait for the run to complete before proceeding. If you want to stop the run, you may skip this and move to next section below. \n",
|
||||
"\n",
|
||||
"**Note: the run may take anywhere from 30 minutes to 45 minutes to complete.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Stop the run\n",
|
||||
"\n",
|
||||
"To cancel the run, call run.cancel()."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# run.cancel()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Performance of the agent during training\n",
|
||||
"\n",
|
||||
"Let's get the reward metrics for the training run agent and observe how the agent's rewards improved over the training iterations and how the agent learns to win the Pong game. \n",
|
||||
"\n",
|
||||
"Collect the episode reward metrics from the worker run's metrics. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get all child runs\n",
|
||||
"child_runs = list(run.get_children(_rehydrate_runs=False))\n",
|
||||
"\n",
|
||||
"# Get the reward metrics from worker run\n",
|
||||
"if child_runs[0].id.endswith(\"_worker\"):\n",
|
||||
" episode_reward_mean = child_runs[0].get_metrics(name='episode_reward_mean')\n",
|
||||
"else:\n",
|
||||
" episode_reward_mean = child_runs[1].get_metrics(name='episode_reward_mean')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Plot the reward metrics. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"plt.plot(episode_reward_mean['episode_reward_mean'])\n",
|
||||
"plt.xlabel('training_iteration')\n",
|
||||
"plt.ylabel('episode_reward_mean')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We observe that during the training over multiple episodes, the agent learn to win the Pong game against opponent with our target of 18 points in each episode of 21 points.\n",
|
||||
"**Congratulations!! You have trained your Pong agent to win a game marvelously.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cleaning up\n",
|
||||
"For your convenience, below you can find code snippets to clean up any resources created as part of this tutorial that you don't wish to retain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# To archive the created experiment:\n",
|
||||
"#experiment.archive()\n",
|
||||
"\n",
|
||||
"# To delete the compute targets:\n",
|
||||
"#head_compute_target.delete()\n",
|
||||
"#worker_compute_target.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"In this example, you learnt how to solve distributed RL training problems using head and worker compute targets. This is currently the last introductory tutorial for Azure Machine Learning service's Reinforcement Learning offering. We would love to hear your feedback to build the features you need!"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "vineetg"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.4"
|
||||
},
|
||||
"notice": "Copyright (c) Microsoft Corporation. All rights reserved.\u00e2\u20ac\u00afLicensed under the MIT License.\u00e2\u20ac\u00af "
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
name: pong_rllib
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-contrib-reinforcementlearning
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user