mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 01:27:06 -05:00
Compare commits
19 Commits
jeffshep/s
...
azureml-sd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3fd1ce8993 | ||
|
|
aa93588190 | ||
|
|
12520400e5 | ||
|
|
35614e83fa | ||
|
|
ff22ac01cc | ||
|
|
e7dd826f34 | ||
|
|
fcc882174b | ||
|
|
6872d8a3bb | ||
|
|
a2cb4c3589 | ||
|
|
15008962b2 | ||
|
|
9414b51fac | ||
|
|
80ac414582 | ||
|
|
cbc151660b | ||
|
|
0024abc6e3 | ||
|
|
fa13385860 | ||
|
|
0c5f6daf52 | ||
|
|
c11e9fc1da | ||
|
|
280150713e | ||
|
|
bb11c80b1b |
@@ -1,8 +1,6 @@
|
|||||||
# Azure Machine Learning Python SDK notebooks
|
# Azure Machine Learning Python SDK notebooks
|
||||||
|
|
||||||
|
### **With the introduction of AzureML SDK v2, this samples repository for the v1 SDK is now deprecated and will not be monitored or updated. Users are encouraged to visit the [v2 SDK samples repository](https://github.com/Azure/azureml-examples) instead for up-to-date and enhanced examples of how to build, train, and deploy machine learning models with AzureML's newest features.**
|
||||||
** **With the introduction of AzureML SDK v2, this samples repository for the v1 SDK is now deprecated and will not be monitored or updated. Users are encouraged to visit the [v2 SDK samples repository](https://github.com/Azure/azureml-examples) instead for up-to-date and enhanced examples of how to build, train, and deploy machine learning models with AzureML's newest features.** **
|
|
||||||
|
|
||||||
|
|
||||||
Welcome to the Azure Machine Learning Python SDK notebooks repository!
|
Welcome to the Azure Machine Learning Python SDK notebooks repository!
|
||||||
|
|
||||||
|
|||||||
@@ -103,7 +103,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"This notebook was created using version 1.51.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.55.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -329,7 +329,7 @@
|
|||||||
" print(\"Creating new gpu-cluster\")\n",
|
" print(\"Creating new gpu-cluster\")\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # Specify the configuration for the new cluster\n",
|
" # Specify the configuration for the new cluster\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"Standard_NC6s_v3\",\n",
|
||||||
" min_nodes=0,\n",
|
" min_nodes=0,\n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
" # Create the cluster with the specified name and configuration\n",
|
" # Create the cluster with the specified name and configuration\n",
|
||||||
|
|||||||
@@ -174,7 +174,7 @@
|
|||||||
"else:\n",
|
"else:\n",
|
||||||
" print(\"creating new cluster\")\n",
|
" print(\"creating new cluster\")\n",
|
||||||
" # vm_size parameter below could be modified to one of the RAPIDS-supported VM types\n",
|
" # vm_size parameter below could be modified to one of the RAPIDS-supported VM types\n",
|
||||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"Standard_NC6s_v2\", min_nodes=1, max_nodes = 1)\n",
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"Standard_NC6s_v3\", min_nodes=1, max_nodes = 1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)\n",
|
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)\n",
|
||||||
|
|||||||
@@ -3,10 +3,10 @@ dependencies:
|
|||||||
- pip:
|
- pip:
|
||||||
- azureml-sdk
|
- azureml-sdk
|
||||||
- azureml-contrib-fairness
|
- azureml-contrib-fairness
|
||||||
- fairlearn>=0.6.2
|
- fairlearn>=0.6.2,<=0.7.0
|
||||||
- joblib
|
- joblib
|
||||||
- liac-arff
|
- liac-arff
|
||||||
- raiwidgets~=0.26.0
|
- raiwidgets~=0.33.0
|
||||||
- itsdangerous==2.0.1
|
- itsdangerous==2.0.1
|
||||||
- markupsafe<2.1.0
|
- markupsafe<2.1.0
|
||||||
- protobuf==3.20.0
|
- protobuf==3.20.0
|
||||||
|
|||||||
@@ -3,10 +3,10 @@ dependencies:
|
|||||||
- pip:
|
- pip:
|
||||||
- azureml-sdk
|
- azureml-sdk
|
||||||
- azureml-contrib-fairness
|
- azureml-contrib-fairness
|
||||||
- fairlearn>=0.6.2
|
- fairlearn>=0.6.2,<=0.7.0
|
||||||
- joblib
|
- joblib
|
||||||
- liac-arff
|
- liac-arff
|
||||||
- raiwidgets~=0.26.0
|
- raiwidgets~=0.33.0
|
||||||
- itsdangerous==2.0.1
|
- itsdangerous==2.0.1
|
||||||
- markupsafe<2.1.0
|
- markupsafe<2.1.0
|
||||||
- protobuf==3.20.0
|
- protobuf==3.20.0
|
||||||
|
|||||||
@@ -5,21 +5,21 @@ channels:
|
|||||||
- main
|
- main
|
||||||
dependencies:
|
dependencies:
|
||||||
# The python interpreter version.
|
# The python interpreter version.
|
||||||
# Azure ML only supports 3.7.0 and later.
|
# Azure ML only supports 3.8 and later.
|
||||||
- pip==22.3.1
|
- pip==22.3.1
|
||||||
- python>=3.7,<3.9
|
- python>=3.9,<3.10
|
||||||
- conda-forge::fbprophet==0.7.1
|
- holidays==0.29
|
||||||
- pandas==1.1.5
|
- scipy==1.10.1
|
||||||
- scipy==1.5.3
|
- tqdm==4.66.1
|
||||||
- Cython==0.29.14
|
|
||||||
- tqdm==4.65.0
|
|
||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
- azureml-widgets~=1.51.0
|
- azureml-widgets~=1.55.0
|
||||||
- azureml-defaults~=1.51.0
|
- azureml-defaults~=1.55.0
|
||||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.51.0/validated_win32_requirements.txt [--no-deps]
|
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.55.0/validated_win32_requirements.txt [--no-deps]
|
||||||
- matplotlib==3.6.2
|
- matplotlib==3.7.1
|
||||||
- xgboost==1.3.3
|
- xgboost==1.3.3
|
||||||
- cmdstanpy==0.9.5
|
- prophet==1.1.4
|
||||||
|
- pandas==1.3.5
|
||||||
|
- cmdstanpy==1.1.0
|
||||||
- setuptools-git==1.2
|
- setuptools-git==1.2
|
||||||
|
|||||||
@@ -7,26 +7,24 @@ dependencies:
|
|||||||
# The python interpreter version.
|
# The python interpreter version.
|
||||||
# Azure ML only supports 3.7 and later.
|
# Azure ML only supports 3.7 and later.
|
||||||
- pip==22.3.1
|
- pip==22.3.1
|
||||||
- python>=3.7,<3.9
|
- python>=3.9,<3.10
|
||||||
- matplotlib==3.2.1
|
- matplotlib==3.7.1
|
||||||
- numpy>=1.21.6,<=1.22.3
|
- numpy>=1.21.6,<=1.23.5
|
||||||
- cython==0.29.14
|
|
||||||
- urllib3==1.26.7
|
- urllib3==1.26.7
|
||||||
- scipy>=1.4.1,<=1.5.3
|
- scipy==1.10.1
|
||||||
- scikit-learn==0.22.1
|
- scikit-learn=1.1.3
|
||||||
- py-xgboost<=1.3.3
|
- py-xgboost<=1.3.3
|
||||||
- holidays==0.10.3
|
- holidays==0.29
|
||||||
- conda-forge::fbprophet==0.7.1
|
|
||||||
- pytorch::pytorch=1.11.0
|
- pytorch::pytorch=1.11.0
|
||||||
- cudatoolkit=10.1.243
|
- cudatoolkit=10.1.243
|
||||||
- notebook
|
- notebook
|
||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
- azureml-widgets~=1.51.0
|
- azureml-widgets~=1.55.0
|
||||||
- azureml-defaults~=1.51.0
|
- azureml-defaults~=1.55.0
|
||||||
- pytorch-transformers==1.0.0
|
- pytorch-transformers==1.0.0
|
||||||
- spacy==2.2.4
|
- spacy==2.3.9
|
||||||
- pystan==2.19.1.1
|
- prophet==1.1.4
|
||||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.3.1.tar.gz
|
||||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.51.0/validated_linux_requirements.txt [--no-deps]
|
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.55.0/validated_linux_requirements.txt [--no-deps]
|
||||||
|
|||||||
@@ -7,26 +7,20 @@ dependencies:
|
|||||||
# The python interpreter version.
|
# The python interpreter version.
|
||||||
# Currently Azure ML only supports 3.7 and later.
|
# Currently Azure ML only supports 3.7 and later.
|
||||||
- pip==22.3.1
|
- pip==22.3.1
|
||||||
- python>=3.7,<3.9
|
- python>=3.9,<3.10
|
||||||
- matplotlib==3.2.1
|
- numpy>=1.21.6,<=1.23.5
|
||||||
- numpy>=1.21.6,<=1.22.3
|
- scipy==1.10.1
|
||||||
- cython==0.29.14
|
- scikit-learn==1.1.3
|
||||||
- urllib3==1.26.7
|
- holidays==0.29
|
||||||
- scipy>=1.4.1,<=1.5.3
|
|
||||||
- scikit-learn==0.22.1
|
|
||||||
- py-xgboost<=1.3.3
|
|
||||||
- holidays==0.10.3
|
|
||||||
- pytorch::pytorch=1.11.0
|
|
||||||
- cudatoolkit=9.0
|
|
||||||
- notebook
|
|
||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
- azureml-widgets~=1.51.0
|
- azureml-widgets~=1.55.0
|
||||||
- azureml-defaults~=1.51.0
|
- azureml-defaults~=1.55.0
|
||||||
- pytorch-transformers==1.0.0
|
- pytorch-transformers==1.0.0
|
||||||
- spacy==2.2.4
|
- prophet==1.1.4
|
||||||
- pystan==2.19.1.1
|
- xgboost==1.3.3
|
||||||
- fbprophet==0.7.1
|
- spacy==2.3.9
|
||||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
- matplotlib==3.7.1
|
||||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.51.0/validated_darwin_requirements.txt [--no-deps]
|
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.3.1.tar.gz
|
||||||
|
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.55.0/validated_darwin_requirements.txt [--no-deps]
|
||||||
|
|||||||
@@ -1,5 +1,21 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -830,9 +846,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {},
|
||||||
"scrolled": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"%matplotlib notebook\n",
|
"%matplotlib notebook\n",
|
||||||
|
|||||||
@@ -1,5 +1,21 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -1,593 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Automated Machine Learning\n",
|
|
||||||
"_**Text Classification Using Deep Learning**_\n",
|
|
||||||
"\n",
|
|
||||||
"## Contents\n",
|
|
||||||
"1. [Introduction](#Introduction)\n",
|
|
||||||
"1. [Setup](#Setup)\n",
|
|
||||||
"1. [Data](#Data)\n",
|
|
||||||
"1. [Train](#Train)\n",
|
|
||||||
"1. [Evaluate](#Evaluate)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Introduction\n",
|
|
||||||
"This notebook demonstrates classification with text data using deep learning in AutoML.\n",
|
|
||||||
"\n",
|
|
||||||
"AutoML highlights here include using deep neural networks (DNNs) to create embedded features from text data. Depending on the compute cluster the user provides, AutoML tried out Bidirectional Encoder Representations from Transformers (BERT) when a GPU compute is used, and Bidirectional Long-Short Term neural network (BiLSTM) when a CPU compute is used, thereby optimizing the choice of DNN for the uesr's setup.\n",
|
|
||||||
"\n",
|
|
||||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
|
||||||
"\n",
|
|
||||||
"Notebook synopsis:\n",
|
|
||||||
"\n",
|
|
||||||
"1. Creating an Experiment in an existing Workspace\n",
|
|
||||||
"2. Configuration and remote run of AutoML for a text dataset (20 Newsgroups dataset from scikit-learn) for classification\n",
|
|
||||||
"3. Registering the best model for future use\n",
|
|
||||||
"4. Evaluating the final model on a test set"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Setup"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import json\n",
|
|
||||||
"import logging\n",
|
|
||||||
"import os\n",
|
|
||||||
"import shutil\n",
|
|
||||||
"\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core.experiment import Experiment\n",
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"from azureml.core.dataset import Dataset\n",
|
|
||||||
"from azureml.core.compute import AmlCompute\n",
|
|
||||||
"from azureml.core.compute import ComputeTarget\n",
|
|
||||||
"from azureml.core.run import Run\n",
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"from helper import run_inference, get_result_df\n",
|
|
||||||
"from azureml.train.automl import AutoMLConfig\n",
|
|
||||||
"from sklearn.datasets import fetch_20newsgroups"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"As part of the setup you have already created a <b>Workspace</b>. To run AutoML, you also need to create an <b>Experiment</b>. An Experiment corresponds to a prediction problem you are trying to solve, while a Run corresponds to a specific approach to the problem."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose an experiment name.\n",
|
|
||||||
"experiment_name = \"automl-classification-text-dnn\"\n",
|
|
||||||
"\n",
|
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
|
||||||
"\n",
|
|
||||||
"output = {}\n",
|
|
||||||
"output[\"Subscription ID\"] = ws.subscription_id\n",
|
|
||||||
"output[\"Workspace Name\"] = ws.name\n",
|
|
||||||
"output[\"Resource Group\"] = ws.resource_group\n",
|
|
||||||
"output[\"Location\"] = ws.location\n",
|
|
||||||
"output[\"Experiment Name\"] = experiment.name\n",
|
|
||||||
"output[\"SDK Version\"] = azureml.core.VERSION\n",
|
|
||||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
|
||||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
|
||||||
"outputDf.T"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Set up a compute cluster\n",
|
|
||||||
"This section uses a user-provided compute cluster (named \"dnntext-cluster\" in this example). If a cluster with this name does not exist in the user's workspace, the below code will create a new cluster. You can choose the parameters of the cluster as mentioned in the comments.\n",
|
|
||||||
"\n",
|
|
||||||
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
|
|
||||||
"\n",
|
|
||||||
"Whether you provide/select a CPU or GPU cluster, AutoML will choose the appropriate DNN for that setup - BiLSTM or BERT text featurizer will be included in the candidate featurizers on CPU and GPU respectively. If your goal is to obtain the most accurate model, we recommend you use GPU clusters since BERT featurizers usually outperform BiLSTM featurizers."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
|
||||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
|
||||||
"\n",
|
|
||||||
"num_nodes = 2\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose a name for your cluster.\n",
|
|
||||||
"amlcompute_cluster_name = \"dnntext-cluster\"\n",
|
|
||||||
"\n",
|
|
||||||
"# Verify that cluster does not exist already\n",
|
|
||||||
"try:\n",
|
|
||||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
|
||||||
" print(\"Found existing cluster, use it.\")\n",
|
|
||||||
"except ComputeTargetException:\n",
|
|
||||||
" compute_config = AmlCompute.provisioning_configuration(\n",
|
|
||||||
" vm_size=\"STANDARD_NC6\", # CPU for BiLSTM, such as \"STANDARD_D2_V2\"\n",
|
|
||||||
" # To use BERT (this is recommended for best performance), select a GPU such as \"STANDARD_NC6\"\n",
|
|
||||||
" # or similar GPU option\n",
|
|
||||||
" # available in your workspace\n",
|
|
||||||
" idle_seconds_before_scaledown=60,\n",
|
|
||||||
" max_nodes=num_nodes,\n",
|
|
||||||
" )\n",
|
|
||||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
|
||||||
"\n",
|
|
||||||
"compute_target.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Get data\n",
|
|
||||||
"For this notebook we will use 20 Newsgroups data from scikit-learn. We filter the data to contain four classes and take a sample as training data. Please note that for accuracy improvement, more data is needed. For this notebook we provide a small-data example so that you can use this template to use with your larger sized data."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"data_dir = \"text-dnn-data\" # Local directory to store data\n",
|
|
||||||
"blobstore_datadir = data_dir # Blob store directory to store data in\n",
|
|
||||||
"target_column_name = \"y\"\n",
|
|
||||||
"feature_column_name = \"X\"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"def get_20newsgroups_data():\n",
|
|
||||||
" \"\"\"Fetches 20 Newsgroups data from scikit-learn\n",
|
|
||||||
" Returns them in form of pandas dataframes\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" remove = (\"headers\", \"footers\", \"quotes\")\n",
|
|
||||||
" categories = [\n",
|
|
||||||
" \"rec.sport.baseball\",\n",
|
|
||||||
" \"rec.sport.hockey\",\n",
|
|
||||||
" \"comp.graphics\",\n",
|
|
||||||
" \"sci.space\",\n",
|
|
||||||
" ]\n",
|
|
||||||
"\n",
|
|
||||||
" data = fetch_20newsgroups(\n",
|
|
||||||
" subset=\"train\",\n",
|
|
||||||
" categories=categories,\n",
|
|
||||||
" shuffle=True,\n",
|
|
||||||
" random_state=42,\n",
|
|
||||||
" remove=remove,\n",
|
|
||||||
" )\n",
|
|
||||||
" data = pd.DataFrame(\n",
|
|
||||||
" {feature_column_name: data.data, target_column_name: data.target}\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" data_train = data[:200]\n",
|
|
||||||
" data_test = data[200:300]\n",
|
|
||||||
"\n",
|
|
||||||
" data_train = remove_blanks_20news(\n",
|
|
||||||
" data_train, feature_column_name, target_column_name\n",
|
|
||||||
" )\n",
|
|
||||||
" data_test = remove_blanks_20news(data_test, feature_column_name, target_column_name)\n",
|
|
||||||
"\n",
|
|
||||||
" return data_train, data_test\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"def remove_blanks_20news(data, feature_column_name, target_column_name):\n",
|
|
||||||
"\n",
|
|
||||||
" for index, row in data.iterrows():\n",
|
|
||||||
" data.at[index, feature_column_name] = (\n",
|
|
||||||
" row[feature_column_name].replace(\"\\n\", \" \").strip()\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
" data = data[data[feature_column_name] != \"\"]\n",
|
|
||||||
"\n",
|
|
||||||
" return data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Fetch data and upload to datastore for use in training"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"data_train, data_test = get_20newsgroups_data()\n",
|
|
||||||
"\n",
|
|
||||||
"if not os.path.isdir(data_dir):\n",
|
|
||||||
" os.mkdir(data_dir)\n",
|
|
||||||
"\n",
|
|
||||||
"train_data_fname = data_dir + \"/train_data.csv\"\n",
|
|
||||||
"test_data_fname = data_dir + \"/test_data.csv\"\n",
|
|
||||||
"\n",
|
|
||||||
"data_train.to_csv(train_data_fname, index=False)\n",
|
|
||||||
"data_test.to_csv(test_data_fname, index=False)\n",
|
|
||||||
"\n",
|
|
||||||
"datastore = ws.get_default_datastore()\n",
|
|
||||||
"datastore.upload(src_dir=data_dir, target_path=blobstore_datadir, overwrite=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"train_dataset = Dataset.Tabular.from_delimited_files(\n",
|
|
||||||
" path=[(datastore, blobstore_datadir + \"/train_data.csv\")]\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Prepare AutoML run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"This notebook uses the blocked_models parameter to exclude some models that can take a longer time to train on some text datasets. You can choose to remove models from the blocked_models list but you may need to increase the experiment_timeout_hours parameter value to get results."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"automl_settings = {\n",
|
|
||||||
" \"experiment_timeout_minutes\": 30,\n",
|
|
||||||
" \"primary_metric\": \"accuracy\",\n",
|
|
||||||
" \"max_concurrent_iterations\": num_nodes,\n",
|
|
||||||
" \"max_cores_per_iteration\": -1,\n",
|
|
||||||
" \"enable_dnn\": True,\n",
|
|
||||||
" \"enable_early_stopping\": True,\n",
|
|
||||||
" \"validation_size\": 0.3,\n",
|
|
||||||
" \"verbosity\": logging.INFO,\n",
|
|
||||||
" \"enable_voting_ensemble\": False,\n",
|
|
||||||
" \"enable_stack_ensemble\": False,\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"automl_config = AutoMLConfig(\n",
|
|
||||||
" task=\"classification\",\n",
|
|
||||||
" debug_log=\"automl_errors.log\",\n",
|
|
||||||
" compute_target=compute_target,\n",
|
|
||||||
" training_data=train_dataset,\n",
|
|
||||||
" label_column_name=target_column_name,\n",
|
|
||||||
" blocked_models=[\"LightGBM\", \"XGBoostClassifier\"],\n",
|
|
||||||
" **automl_settings,\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Submit AutoML Run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"automl_run = experiment.submit(automl_config, show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Displaying the run objects gives you links to the visual tools in the Azure Portal. Go try them!"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Retrieve the Best Model\n",
|
|
||||||
"Below we select the best model pipeline from our iterations, use it to test on test data on the same compute cluster."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"For local inferencing, you can load the model locally via. the method `remote_run.get_output()`. For more information on the arguments expected by this method, you can run `remote_run.get_output??`.\n",
|
|
||||||
"Note that when the model contains BERT, this step will require pytorch and pytorch-transformers installed in your local environment. The exact versions of these packages can be found in the **automl_env.yml** file located in the local copy of your azureml-examples folder here: \"azureml-examples/python-sdk/tutorials/automl-with-azureml\""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Retrieve the best Run object\n",
|
|
||||||
"best_run = automl_run.get_best_child()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can now see what text transformations are used to convert text data to features for this dataset, including deep learning transformations based on BiLSTM or Transformer (BERT is one implementation of a Transformer) models."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Download the featurization summary JSON file locally\n",
|
|
||||||
"best_run.download_file(\n",
|
|
||||||
" \"outputs/featurization_summary.json\", \"featurization_summary.json\"\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"# Render the JSON as a pandas DataFrame\n",
|
|
||||||
"with open(\"featurization_summary.json\", \"r\") as f:\n",
|
|
||||||
" records = json.load(f)\n",
|
|
||||||
"\n",
|
|
||||||
"featurization_summary = pd.DataFrame.from_records(records)\n",
|
|
||||||
"featurization_summary[\"Transformations\"].tolist()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Registering the best model\n",
|
|
||||||
"We now register the best fitted model from the AutoML Run for use in future deployments. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Get results stats, extract the best model from AutoML run, download and register the resultant best model"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"summary_df = get_result_df(automl_run)\n",
|
|
||||||
"best_dnn_run_id = summary_df[\"run_id\"].iloc[0]\n",
|
|
||||||
"best_dnn_run = Run(experiment, best_dnn_run_id)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"model_dir = \"Model\" # Local folder where the model will be stored temporarily\n",
|
|
||||||
"if not os.path.isdir(model_dir):\n",
|
|
||||||
" os.mkdir(model_dir)\n",
|
|
||||||
"\n",
|
|
||||||
"best_dnn_run.download_file(\"outputs/model.pkl\", model_dir + \"/model.pkl\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Register the model in your Azure Machine Learning Workspace. If you previously registered a model, please make sure to delete it so as to replace it with this new model."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Register the model\n",
|
|
||||||
"model_name = \"textDNN-20News\"\n",
|
|
||||||
"model = Model.register(\n",
|
|
||||||
" model_path=model_dir + \"/model.pkl\", model_name=model_name, tags=None, workspace=ws\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Evaluate on Test Data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"We now use the best fitted model from the AutoML Run to make predictions on the test set. \n",
|
|
||||||
"\n",
|
|
||||||
"Test set schema should match that of the training set."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"test_dataset = Dataset.Tabular.from_delimited_files(\n",
|
|
||||||
" path=[(datastore, blobstore_datadir + \"/test_data.csv\")]\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"# preview the first 3 rows of the dataset\n",
|
|
||||||
"test_dataset.take(3).to_pandas_dataframe()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"test_experiment = Experiment(ws, experiment_name + \"_test\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"script_folder = os.path.join(os.getcwd(), \"inference\")\n",
|
|
||||||
"os.makedirs(script_folder, exist_ok=True)\n",
|
|
||||||
"shutil.copy(\"infer.py\", script_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"test_run = run_inference(\n",
|
|
||||||
" test_experiment,\n",
|
|
||||||
" compute_target,\n",
|
|
||||||
" script_folder,\n",
|
|
||||||
" best_dnn_run,\n",
|
|
||||||
" test_dataset,\n",
|
|
||||||
" target_column_name,\n",
|
|
||||||
" model_name,\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Display computed metrics"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"test_run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"RunDetails(test_run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"test_run.wait_for_completion()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"pd.Series(test_run.get_metrics())"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "anshirga"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"compute": [
|
|
||||||
"AML Compute"
|
|
||||||
],
|
|
||||||
"datasets": [
|
|
||||||
"None"
|
|
||||||
],
|
|
||||||
"deployment": [
|
|
||||||
"None"
|
|
||||||
],
|
|
||||||
"exclude_from_index": false,
|
|
||||||
"framework": [
|
|
||||||
"None"
|
|
||||||
],
|
|
||||||
"friendly_name": "DNN Text Featurization",
|
|
||||||
"index_order": 2,
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.8 - AzureML",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python38-azureml"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.7"
|
|
||||||
},
|
|
||||||
"tags": [
|
|
||||||
"None"
|
|
||||||
],
|
|
||||||
"task": "Text featurization using DNNs for classification"
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-classification-text-dnn
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -1,70 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
from azureml.core import Environment, ScriptRunConfig
|
|
||||||
from azureml.core.run import Run
|
|
||||||
|
|
||||||
|
|
||||||
def run_inference(
|
|
||||||
test_experiment,
|
|
||||||
compute_target,
|
|
||||||
script_folder,
|
|
||||||
train_run,
|
|
||||||
test_dataset,
|
|
||||||
target_column_name,
|
|
||||||
model_name,
|
|
||||||
):
|
|
||||||
|
|
||||||
inference_env = train_run.get_environment()
|
|
||||||
|
|
||||||
est = ScriptRunConfig(
|
|
||||||
source_directory=script_folder,
|
|
||||||
script="infer.py",
|
|
||||||
arguments=[
|
|
||||||
"--target_column_name",
|
|
||||||
target_column_name,
|
|
||||||
"--model_name",
|
|
||||||
model_name,
|
|
||||||
"--input-data",
|
|
||||||
test_dataset.as_named_input("data"),
|
|
||||||
],
|
|
||||||
compute_target=compute_target,
|
|
||||||
environment=inference_env,
|
|
||||||
)
|
|
||||||
|
|
||||||
run = test_experiment.submit(
|
|
||||||
est,
|
|
||||||
tags={
|
|
||||||
"training_run_id": train_run.id,
|
|
||||||
"run_algorithm": train_run.properties["run_algorithm"],
|
|
||||||
"valid_score": train_run.properties["score"],
|
|
||||||
"primary_metric": train_run.properties["primary_metric"],
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
run.log("run_algorithm", run.tags["run_algorithm"])
|
|
||||||
return run
|
|
||||||
|
|
||||||
|
|
||||||
def get_result_df(remote_run):
|
|
||||||
|
|
||||||
children = list(remote_run.get_children(recursive=True))
|
|
||||||
summary_df = pd.DataFrame(
|
|
||||||
index=["run_id", "run_algorithm", "primary_metric", "Score"]
|
|
||||||
)
|
|
||||||
goal_minimize = False
|
|
||||||
for run in children:
|
|
||||||
if "run_algorithm" in run.properties and "score" in run.properties:
|
|
||||||
summary_df[run.id] = [
|
|
||||||
run.id,
|
|
||||||
run.properties["run_algorithm"],
|
|
||||||
run.properties["primary_metric"],
|
|
||||||
float(run.properties["score"]),
|
|
||||||
]
|
|
||||||
if "goal" in run.properties:
|
|
||||||
goal_minimize = run.properties["goal"].split("_")[-1] == "min"
|
|
||||||
|
|
||||||
summary_df = summary_df.T.sort_values(
|
|
||||||
"Score", ascending=goal_minimize
|
|
||||||
).drop_duplicates(["run_algorithm"])
|
|
||||||
summary_df = summary_df.set_index("run_algorithm")
|
|
||||||
|
|
||||||
return summary_df
|
|
||||||
@@ -1,70 +0,0 @@
|
|||||||
import argparse
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from sklearn.externals import joblib
|
|
||||||
|
|
||||||
from azureml.automl.runtime.shared.score import scoring, constants
|
|
||||||
from azureml.core import Run, Dataset
|
|
||||||
from azureml.core.model import Model
|
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument(
|
|
||||||
"--target_column_name",
|
|
||||||
type=str,
|
|
||||||
dest="target_column_name",
|
|
||||||
help="Target Column Name",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--model_name", type=str, dest="model_name", help="Name of registered model"
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument("--input-data", type=str, dest="input_data", help="Dataset")
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
target_column_name = args.target_column_name
|
|
||||||
model_name = args.model_name
|
|
||||||
|
|
||||||
print("args passed are: ")
|
|
||||||
print("Target column name: ", target_column_name)
|
|
||||||
print("Name of registered model: ", model_name)
|
|
||||||
|
|
||||||
model_path = Model.get_model_path(model_name)
|
|
||||||
# deserialize the model file back into a sklearn model
|
|
||||||
model = joblib.load(model_path)
|
|
||||||
|
|
||||||
run = Run.get_context()
|
|
||||||
|
|
||||||
test_dataset = Dataset.get_by_id(run.experiment.workspace, id=args.input_data)
|
|
||||||
|
|
||||||
X_test_df = test_dataset.drop_columns(
|
|
||||||
columns=[target_column_name]
|
|
||||||
).to_pandas_dataframe()
|
|
||||||
y_test_df = (
|
|
||||||
test_dataset.with_timestamp_columns(None)
|
|
||||||
.keep_columns(columns=[target_column_name])
|
|
||||||
.to_pandas_dataframe()
|
|
||||||
)
|
|
||||||
|
|
||||||
predicted = model.predict_proba(X_test_df)
|
|
||||||
|
|
||||||
if isinstance(predicted, pd.DataFrame):
|
|
||||||
predicted = predicted.values
|
|
||||||
|
|
||||||
# Use the AutoML scoring module
|
|
||||||
train_labels = model.classes_
|
|
||||||
class_labels = np.unique(
|
|
||||||
np.concatenate((y_test_df.values, np.reshape(train_labels, (-1, 1))))
|
|
||||||
)
|
|
||||||
classification_metrics = list(constants.CLASSIFICATION_SCALAR_SET)
|
|
||||||
scores = scoring.score_classification(
|
|
||||||
y_test_df.values, predicted, classification_metrics, class_labels, train_labels
|
|
||||||
)
|
|
||||||
|
|
||||||
print("scores:")
|
|
||||||
print(scores)
|
|
||||||
|
|
||||||
for key, value in scores.items():
|
|
||||||
run.log(key, value)
|
|
||||||
@@ -1,5 +1,21 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ try:
|
|||||||
model = Model(ws, args.model_name)
|
model = Model(ws, args.model_name)
|
||||||
last_train_time = model.created_time
|
last_train_time = model.created_time
|
||||||
print("Model was last trained on {0}.".format(last_train_time))
|
print("Model was last trained on {0}.".format(last_train_time))
|
||||||
except Exception as e:
|
except Exception:
|
||||||
print("Could not get last model train time.")
|
print("Could not get last model train time.")
|
||||||
last_train_time = datetime.min.replace(tzinfo=pytz.UTC)
|
last_train_time = datetime.min.replace(tzinfo=pytz.UTC)
|
||||||
|
|
||||||
|
|||||||
@@ -97,7 +97,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.51.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.55.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -97,7 +97,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.51.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.55.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,21 +1,9 @@
|
|||||||
name: azure_automl_experimental
|
name: azure_automl_experimental
|
||||||
dependencies:
|
dependencies:
|
||||||
# The python interpreter version.
|
# The python interpreter version.
|
||||||
<<<<<<< HEAD
|
|
||||||
# Currently Azure ML only supports 3.6.0 and later.
|
|
||||||
- pip<=20.2.4
|
|
||||||
- python>=3.6.0,<3.10
|
|
||||||
- cython==0.29.14
|
|
||||||
- urllib3==1.26.7
|
|
||||||
- PyJWT < 2.0.0
|
|
||||||
- numpy==1.22.3
|
|
||||||
- pywin32==227
|
|
||||||
- cryptography<37.0.0
|
|
||||||
=======
|
|
||||||
# Currently Azure ML only supports 3.7.0 and later.
|
# Currently Azure ML only supports 3.7.0 and later.
|
||||||
- pip<=22.3.1
|
- pip<=22.3.1
|
||||||
- python>=3.7.0,<3.11
|
- python>=3.7.0,<3.11
|
||||||
>>>>>>> 4671acd451ce979c3cebcd3917804861a333b710
|
|
||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
@@ -25,4 +13,3 @@ dependencies:
|
|||||||
- azureml-mlflow
|
- azureml-mlflow
|
||||||
- pandas
|
- pandas
|
||||||
- mlflow
|
- mlflow
|
||||||
- docker<6.0.0
|
|
||||||
|
|||||||
@@ -92,7 +92,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.51.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.55.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -91,7 +91,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.51.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.55.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License.\n",
|
"Licensed under the MIT License.\n",
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -11,7 +10,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -19,7 +17,13 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-bike-share)).</font>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -37,7 +41,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -56,7 +59,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -86,7 +88,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -103,7 +104,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -137,7 +137,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -177,7 +176,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -201,7 +199,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nteract": {
|
"nteract": {
|
||||||
@@ -237,7 +234,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nteract": {
|
"nteract": {
|
||||||
@@ -277,7 +273,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -316,7 +311,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -334,7 +328,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -359,7 +352,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -378,7 +370,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -398,7 +389,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -441,7 +431,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -467,7 +456,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -486,7 +474,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -512,7 +499,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -556,7 +542,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -564,7 +549,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -583,7 +567,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -606,7 +589,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -637,7 +619,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -656,7 +637,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -673,7 +653,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -705,7 +684,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -715,7 +693,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -747,7 +724,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -822,7 +798,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.9"
|
"version": "3.8.10"
|
||||||
},
|
},
|
||||||
"microsoft": {
|
"microsoft": {
|
||||||
"ms_spell_check": {
|
"ms_spell_check": {
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import argparse
|
import argparse
|
||||||
from azureml.core import Dataset, Run
|
from azureml.core import Dataset, Run
|
||||||
from sklearn.externals import joblib
|
import joblib
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
@@ -2,22 +2,30 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
""
|
""
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/blob/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-energy-demand/automl-forecasting-task-energy-demand-advanced-mlflow.ipynb)).</font>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Automated Machine Learning\n",
|
"# Automated Machine Learning\n",
|
||||||
"_**Forecasting using the Energy Demand Dataset**_\n",
|
"_**Forecasting using the Energy Demand Dataset**_\n",
|
||||||
@@ -32,11 +40,11 @@
|
|||||||
"Advanced Forecasting\n",
|
"Advanced Forecasting\n",
|
||||||
"1. [Advanced Training](#advanced_training)\n",
|
"1. [Advanced Training](#advanced_training)\n",
|
||||||
"1. [Advanced Results](#advanced_results)"
|
"1. [Advanced Results](#advanced_results)"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Introduction<a id=\"introduction\"></a>\n",
|
"# Introduction<a id=\"introduction\"></a>\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -52,18 +60,20 @@
|
|||||||
"1. Generate the forecast and compute the out-of-sample accuracy metrics\n",
|
"1. Generate the forecast and compute the out-of-sample accuracy metrics\n",
|
||||||
"1. Configuration and remote run of AutoML for a time-series model with lag and rolling window features\n",
|
"1. Configuration and remote run of AutoML for a time-series model with lag and rolling window features\n",
|
||||||
"1. Run and explore the forecast with lagging features"
|
"1. Run and explore the forecast with lagging features"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Setup<a id=\"setup\"></a>"
|
"# Setup<a id=\"setup\"></a>"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import logging\n",
|
"import logging\n",
|
||||||
@@ -82,36 +92,36 @@
|
|||||||
"from azureml.core import Experiment, Workspace, Dataset\n",
|
"from azureml.core import Experiment, Workspace, Dataset\n",
|
||||||
"from azureml.train.automl import AutoMLConfig\n",
|
"from azureml.train.automl import AutoMLConfig\n",
|
||||||
"from datetime import datetime"
|
"from datetime import datetime"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"This notebook is compatible with Azure ML SDK version 1.35.0 or later."
|
"This notebook is compatible with Azure ML SDK version 1.35.0 or later."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"As part of the setup you have already created an Azure ML `Workspace` object. For Automated ML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
"As part of the setup you have already created an Azure ML `Workspace` object. For Automated ML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"ws = Workspace.from_config()\n",
|
"ws = Workspace.from_config()\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -133,13 +143,11 @@
|
|||||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
"pd.set_option(\"display.max_colwidth\", None)\n",
|
||||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
||||||
"outputDf.T"
|
"outputDf.T"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Create or Attach existing AmlCompute\n",
|
"## Create or Attach existing AmlCompute\n",
|
||||||
"A compute target is required to execute a remote Automated ML run. \n",
|
"A compute target is required to execute a remote Automated ML run. \n",
|
||||||
@@ -149,11 +157,13 @@
|
|||||||
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
||||||
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
@@ -172,24 +182,22 @@
|
|||||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"compute_target.wait_for_completion(show_output=True)"
|
"compute_target.wait_for_completion(show_output=True)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Data<a id=\"data\"></a>\n",
|
"# Data<a id=\"data\"></a>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"We will use energy consumption [data from New York City](http://mis.nyiso.com/public/P-58Blist.htm) for model training. The data is stored in a tabular format and includes energy demand and basic weather data at an hourly frequency. \n",
|
"We will use energy consumption [data from New York City](http://mis.nyiso.com/public/P-58Blist.htm) for model training. The data is stored in a tabular format and includes energy demand and basic weather data at an hourly frequency. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"With Azure Machine Learning datasets you can keep a single copy of data in your storage, easily access data during model training, share data and collaborate with other users. Below, we will upload the datatset and create a [tabular dataset](https://docs.microsoft.com/bs-latn-ba/azure/machine-learning/service/how-to-create-register-datasets#dataset-types) to be used training and prediction."
|
"With Azure Machine Learning datasets you can keep a single copy of data in your storage, easily access data during model training, share data and collaborate with other users. Below, we will upload the datatset and create a [tabular dataset](https://docs.microsoft.com/bs-latn-ba/azure/machine-learning/service/how-to-create-register-datasets#dataset-types) to be used training and prediction."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Let's set up what we know about the dataset.\n",
|
"Let's set up what we know about the dataset.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -197,64 +205,66 @@
|
|||||||
"<b>Time column</b> is the time axis along which to predict.\n",
|
"<b>Time column</b> is the time axis along which to predict.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The other columns, \"temp\" and \"precip\", are implicitly designated as features."
|
"The other columns, \"temp\" and \"precip\", are implicitly designated as features."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"target_column_name = \"demand\"\n",
|
"target_column_name = \"demand\"\n",
|
||||||
"time_column_name = \"timeStamp\""
|
"time_column_name = \"timeStamp\""
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"dataset = Dataset.Tabular.from_delimited_files(\n",
|
"dataset = Dataset.Tabular.from_delimited_files(\n",
|
||||||
" path=\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/nyc_energy.csv\"\n",
|
" path=\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/nyc_energy.csv\"\n",
|
||||||
").with_timestamp_columns(fine_grain_timestamp=time_column_name)\n",
|
").with_timestamp_columns(fine_grain_timestamp=time_column_name)\n",
|
||||||
"dataset.take(5).to_pandas_dataframe().reset_index(drop=True)"
|
"dataset.take(5).to_pandas_dataframe().reset_index(drop=True)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"The NYC Energy dataset is missing energy demand values for all datetimes later than August 10th, 2017 5AM. Below, we trim the rows containing these missing values from the end of the dataset."
|
"The NYC Energy dataset is missing energy demand values for all datetimes later than August 10th, 2017 5AM. Below, we trim the rows containing these missing values from the end of the dataset."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Cut off the end of the dataset due to large number of nan values\n",
|
"# Cut off the end of the dataset due to large number of nan values\n",
|
||||||
"dataset = dataset.time_before(datetime(2017, 10, 10, 5))"
|
"dataset = dataset.time_before(datetime(2017, 10, 10, 5))"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Split the data into train and test sets"
|
"## Split the data into train and test sets"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"The first split we make is into train and test sets. Note that we are splitting on time. Data before and including August 8th, 2017 5AM will be used for training, and data after will be used for testing."
|
"The first split we make is into train and test sets. Note that we are splitting on time. Data before and including August 8th, 2017 5AM will be used for training, and data after will be used for testing."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# split into train based on time\n",
|
"# split into train based on time\n",
|
||||||
"train = (\n",
|
"train = (\n",
|
||||||
@@ -263,13 +273,13 @@
|
|||||||
" .reset_index(drop=True)\n",
|
" .reset_index(drop=True)\n",
|
||||||
")\n",
|
")\n",
|
||||||
"train.sort_values(time_column_name).tail(5)"
|
"train.sort_values(time_column_name).tail(5)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# split into test based on time\n",
|
"# split into test based on time\n",
|
||||||
"test = (\n",
|
"test = (\n",
|
||||||
@@ -278,13 +288,22 @@
|
|||||||
" .reset_index(drop=True)\n",
|
" .reset_index(drop=True)\n",
|
||||||
")\n",
|
")\n",
|
||||||
"test.head(5)"
|
"test.head(5)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"jupyter": {
|
||||||
|
"outputs_hidden": false
|
||||||
|
},
|
||||||
|
"nteract": {
|
||||||
|
"transient": {
|
||||||
|
"deleting": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# register the splitted train and test data in workspace storage\n",
|
"# register the splitted train and test data in workspace storage\n",
|
||||||
"from azureml.data.dataset_factory import TabularDatasetFactory\n",
|
"from azureml.data.dataset_factory import TabularDatasetFactory\n",
|
||||||
@@ -296,23 +315,11 @@
|
|||||||
"test_dataset = TabularDatasetFactory.register_pandas_dataframe(\n",
|
"test_dataset = TabularDatasetFactory.register_pandas_dataframe(\n",
|
||||||
" test, target=(datastore, \"dataset/\"), name=\"nyc_energy_test\"\n",
|
" test, target=(datastore, \"dataset/\"), name=\"nyc_energy_test\"\n",
|
||||||
")"
|
")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"jupyter": {
|
|
||||||
"source_hidden": false,
|
|
||||||
"outputs_hidden": false
|
|
||||||
},
|
|
||||||
"nteract": {
|
|
||||||
"transient": {
|
|
||||||
"deleting": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Setting the maximum forecast horizon\n",
|
"### Setting the maximum forecast horizon\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -321,20 +328,20 @@
|
|||||||
"Learn more about forecast horizons in our [Auto-train a time-series forecast model](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-auto-train-forecast#configure-and-run-experiment) guide.\n",
|
"Learn more about forecast horizons in our [Auto-train a time-series forecast model](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-auto-train-forecast#configure-and-run-experiment) guide.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example, we set the horizon to 48 hours."
|
"In this example, we set the horizon to 48 hours."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"forecast_horizon = 48"
|
"forecast_horizon = 48"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Forecasting Parameters\n",
|
"## Forecasting Parameters\n",
|
||||||
"To define forecasting parameters for your experiment training, you can leverage the ForecastingParameters class. The table below details the forecasting parameter we will be passing into our experiment.\n",
|
"To define forecasting parameters for your experiment training, you can leverage the ForecastingParameters class. The table below details the forecasting parameter we will be passing into our experiment.\n",
|
||||||
@@ -345,11 +352,11 @@
|
|||||||
"|**forecast_horizon**|The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly).|\n",
|
"|**forecast_horizon**|The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly).|\n",
|
||||||
"|**freq**|Forecast frequency. This optional parameter represents the period with which the forecast is desired, for example, daily, weekly, yearly, etc. Use this parameter for the correction of time series containing irregular data points or for padding of short time series. The frequency needs to be a pandas offset alias. Please refer to [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects) for more information.\n",
|
"|**freq**|Forecast frequency. This optional parameter represents the period with which the forecast is desired, for example, daily, weekly, yearly, etc. Use this parameter for the correction of time series containing irregular data points or for padding of short time series. The frequency needs to be a pandas offset alias. Please refer to [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects) for more information.\n",
|
||||||
"|**cv_step_size**|Number of periods between two consecutive cross-validation folds. The default value is \"auto\", in which case AutoMl determines the cross-validation step size automatically, if a validation set is not provided. Or users could specify an integer value."
|
"|**cv_step_size**|Number of periods between two consecutive cross-validation folds. The default value is \"auto\", in which case AutoMl determines the cross-validation step size automatically, if a validation set is not provided. Or users could specify an integer value."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Train<a id=\"train\"></a>\n",
|
"# Train<a id=\"train\"></a>\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -367,18 +374,20 @@
|
|||||||
"|**n_cross_validations**|Number of cross-validation folds to use for model/pipeline selection. The default value is \"auto\", in which case AutoMl determines the number of cross-validations automatically, if a validation set is not provided. Or users could specify an integer value.\n",
|
"|**n_cross_validations**|Number of cross-validation folds to use for model/pipeline selection. The default value is \"auto\", in which case AutoMl determines the number of cross-validations automatically, if a validation set is not provided. Or users could specify an integer value.\n",
|
||||||
"|**enable_early_stopping**|Flag to enble early termination if the score is not improving in the short term.|\n",
|
"|**enable_early_stopping**|Flag to enble early termination if the score is not improving in the short term.|\n",
|
||||||
"|**forecasting_parameters**|A class holds all the forecasting related parameters.|\n"
|
"|**forecasting_parameters**|A class holds all the forecasting related parameters.|\n"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"This notebook uses the blocked_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blocked_models list but you may need to increase the experiment_timeout_hours parameter value to get results."
|
"This notebook uses the blocked_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blocked_models list but you may need to increase the experiment_timeout_hours parameter value to get results."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.automl.core.forecasting_parameters import ForecastingParameters\n",
|
"from azureml.automl.core.forecasting_parameters import ForecastingParameters\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -402,65 +411,65 @@
|
|||||||
" verbosity=logging.INFO,\n",
|
" verbosity=logging.INFO,\n",
|
||||||
" forecasting_parameters=forecasting_parameters,\n",
|
" forecasting_parameters=forecasting_parameters,\n",
|
||||||
")"
|
")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. Depending on the data and the number of iterations this can run for a while.\n",
|
"Call the `submit` method on the experiment object and pass the run configuration. Depending on the data and the number of iterations this can run for a while.\n",
|
||||||
"One may specify `show_output = True` to print currently running iterations to the console."
|
"One may specify `show_output = True` to print currently running iterations to the console."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"remote_run = experiment.submit(automl_config, show_output=False)"
|
"remote_run = experiment.submit(automl_config, show_output=False)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"remote_run.wait_for_completion()"
|
"remote_run.wait_for_completion()"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Retrieve the Best Run details\n",
|
"## Retrieve the Best Run details\n",
|
||||||
"Below we retrieve the best Run object from among all the runs in the experiment."
|
"Below we retrieve the best Run object from among all the runs in the experiment."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"best_run = remote_run.get_best_child()\n",
|
"best_run = remote_run.get_best_child()\n",
|
||||||
"best_run"
|
"best_run"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Featurization\n",
|
"## Featurization\n",
|
||||||
"We can look at the engineered feature names generated in time-series featurization via. the JSON file named 'engineered_feature_names.json' under the run outputs."
|
"We can look at the engineered feature names generated in time-series featurization via. the JSON file named 'engineered_feature_names.json' under the run outputs."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Download the JSON file locally\n",
|
"# Download the JSON file locally\n",
|
||||||
"best_run.download_file(\n",
|
"best_run.download_file(\n",
|
||||||
@@ -470,13 +479,11 @@
|
|||||||
" records = json.load(f)\n",
|
" records = json.load(f)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"records"
|
"records"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### View featurization summary\n",
|
"### View featurization summary\n",
|
||||||
"You can also see what featurization steps were performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:\n",
|
"You can also see what featurization steps were performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:\n",
|
||||||
@@ -486,11 +493,13 @@
|
|||||||
"+ Type detected\n",
|
"+ Type detected\n",
|
||||||
"+ If feature was dropped\n",
|
"+ If feature was dropped\n",
|
||||||
"+ List of feature transformations for the raw feature"
|
"+ List of feature transformations for the raw feature"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Download the featurization summary JSON file locally\n",
|
"# Download the featurization summary JSON file locally\n",
|
||||||
"best_run.download_file(\n",
|
"best_run.download_file(\n",
|
||||||
@@ -512,41 +521,41 @@
|
|||||||
" \"Transformations\",\n",
|
" \"Transformations\",\n",
|
||||||
" ]\n",
|
" ]\n",
|
||||||
"]"
|
"]"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Forecasting<a id=\"forecast\"></a>\n",
|
"# Forecasting<a id=\"forecast\"></a>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Now that we have retrieved the best pipeline/model, it can be used to make predictions on test data. We will do batch scoring on the test dataset which should have the same schema as training dataset.\n",
|
"Now that we have retrieved the best pipeline/model, it can be used to make predictions on test data. We will do batch scoring on the test dataset which should have the same schema as training dataset.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The inference will run on a remote compute. In this example, it will re-use the training compute."
|
"The inference will run on a remote compute. In this example, it will re-use the training compute."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"test_experiment = Experiment(ws, experiment_name + \"_inference\")"
|
"test_experiment = Experiment(ws, experiment_name + \"_inference\")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Retrieving forecasts from the model\n",
|
"### Retrieving forecasts from the model\n",
|
||||||
"We have created a function called `run_forecast` that submits the test data to the best model determined during the training run and retrieves forecasts. This function uses a helper script `forecasting_script` which is uploaded and expecuted on the remote compute."
|
"We have created a function called `run_forecast` that submits the test data to the best model determined during the training run and retrieves forecasts. This function uses a helper script `forecasting_script` which is uploaded and expecuted on the remote compute."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from run_forecast import run_remote_inference\n",
|
"from run_forecast import run_remote_inference\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -561,32 +570,32 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# download the inference output file to the local machine\n",
|
"# download the inference output file to the local machine\n",
|
||||||
"remote_run_infer.download_file(\"outputs/predictions.csv\", \"predictions.csv\")"
|
"remote_run_infer.download_file(\"outputs/predictions.csv\", \"predictions.csv\")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Evaluate\n",
|
"### Evaluate\n",
|
||||||
"To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE). For more metrics that can be used for evaluation after training, please see [supported metrics](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#regressionforecasting-metrics), and [how to calculate residuals](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#residuals)."
|
"To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE). For more metrics that can be used for evaluation after training, please see [supported metrics](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#regressionforecasting-metrics), and [how to calculate residuals](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#residuals)."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# load forecast data frame\n",
|
"# load forecast data frame\n",
|
||||||
"fcst_df = pd.read_csv(\"predictions.csv\", parse_dates=[time_column_name])\n",
|
"fcst_df = pd.read_csv(\"predictions.csv\", parse_dates=[time_column_name])\n",
|
||||||
"fcst_df.head()"
|
"fcst_df.head()"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.automl.core.shared import constants\n",
|
"from azureml.automl.core.shared import constants\n",
|
||||||
"from azureml.automl.runtime.shared.score import scoring\n",
|
"from azureml.automl.runtime.shared.score import scoring\n",
|
||||||
@@ -613,31 +622,31 @@
|
|||||||
" (test_pred, test_test), (\"prediction\", \"truth\"), loc=\"upper left\", fontsize=8\n",
|
" (test_pred, test_test), (\"prediction\", \"truth\"), loc=\"upper left\", fontsize=8\n",
|
||||||
")\n",
|
")\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Advanced Training <a id=\"advanced_training\"></a>\n",
|
"# Advanced Training <a id=\"advanced_training\"></a>\n",
|
||||||
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, time series identifier columns and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation."
|
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, time series identifier columns and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Using lags and rolling window features\n",
|
"### Using lags and rolling window features\n",
|
||||||
"Now we will configure the target lags, that is the previous values of the target variables, meaning the prediction is no longer horizon-less. We therefore must still specify the `forecast_horizon` that the model will learn to forecast. The `target_lags` keyword specifies how far back we will construct the lags of the target variable, and the `target_rolling_window_size` specifies the size of the rolling window over which we will generate the `max`, `min` and `sum` features.\n",
|
"Now we will configure the target lags, that is the previous values of the target variables, meaning the prediction is no longer horizon-less. We therefore must still specify the `forecast_horizon` that the model will learn to forecast. The `target_lags` keyword specifies how far back we will construct the lags of the target variable, and the `target_rolling_window_size` specifies the size of the rolling window over which we will generate the `max`, `min` and `sum` features.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This notebook uses the blocked_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blocked_models list but you may need to increase the iteration_timeout_minutes parameter value to get results."
|
"This notebook uses the blocked_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blocked_models list but you may need to increase the iteration_timeout_minutes parameter value to get results."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"advanced_forecasting_parameters = ForecastingParameters(\n",
|
"advanced_forecasting_parameters = ForecastingParameters(\n",
|
||||||
" time_column_name=time_column_name,\n",
|
" time_column_name=time_column_name,\n",
|
||||||
@@ -668,63 +677,63 @@
|
|||||||
" verbosity=logging.INFO,\n",
|
" verbosity=logging.INFO,\n",
|
||||||
" forecasting_parameters=advanced_forecasting_parameters,\n",
|
" forecasting_parameters=advanced_forecasting_parameters,\n",
|
||||||
")"
|
")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We now start a new remote run, this time with lag and rolling window featurization. AutoML applies featurizations in the setup stage, prior to iterating over ML models. The full training set is featurized first, followed by featurization of each of the CV splits. Lag and rolling window features introduce additional complexity, so the run will take longer than in the previous example that lacked these featurizations."
|
"We now start a new remote run, this time with lag and rolling window featurization. AutoML applies featurizations in the setup stage, prior to iterating over ML models. The full training set is featurized first, followed by featurization of each of the CV splits. Lag and rolling window features introduce additional complexity, so the run will take longer than in the previous example that lacked these featurizations."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"advanced_remote_run = experiment.submit(automl_config, show_output=False)"
|
"advanced_remote_run = experiment.submit(automl_config, show_output=False)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"advanced_remote_run.wait_for_completion()"
|
"advanced_remote_run.wait_for_completion()"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Run details"
|
"### Retrieve the Best Run details"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"best_run_lags = remote_run.get_best_child()\n",
|
"best_run_lags = remote_run.get_best_child()\n",
|
||||||
"best_run_lags"
|
"best_run_lags"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Advanced Results<a id=\"advanced_results\"></a>\n",
|
"# Advanced Results<a id=\"advanced_results\"></a>\n",
|
||||||
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, time series identifier columns and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation."
|
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, time series identifier columns and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"test_experiment_advanced = Experiment(ws, experiment_name + \"_inference_advanced\")\n",
|
"test_experiment_advanced = Experiment(ws, experiment_name + \"_inference_advanced\")\n",
|
||||||
"advanced_remote_run_infer = run_remote_inference(\n",
|
"advanced_remote_run_infer = run_remote_inference(\n",
|
||||||
@@ -741,23 +750,23 @@
|
|||||||
"advanced_remote_run_infer.download_file(\n",
|
"advanced_remote_run_infer.download_file(\n",
|
||||||
" \"outputs/predictions.csv\", \"predictions_advanced.csv\"\n",
|
" \"outputs/predictions.csv\", \"predictions_advanced.csv\"\n",
|
||||||
")"
|
")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"fcst_adv_df = pd.read_csv(\"predictions_advanced.csv\", parse_dates=[time_column_name])\n",
|
"fcst_adv_df = pd.read_csv(\"predictions_advanced.csv\", parse_dates=[time_column_name])\n",
|
||||||
"fcst_adv_df.head()"
|
"fcst_adv_df.head()"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.automl.core.shared import constants\n",
|
"from azureml.automl.core.shared import constants\n",
|
||||||
"from azureml.automl.runtime.shared.score import scoring\n",
|
"from azureml.automl.runtime.shared.score import scoring\n",
|
||||||
@@ -786,10 +795,7 @@
|
|||||||
" (test_pred, test_test), (\"prediction\", \"truth\"), loc=\"upper left\", fontsize=8\n",
|
" (test_pred, test_test), (\"prediction\", \"truth\"), loc=\"upper left\", fontsize=8\n",
|
||||||
")\n",
|
")\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -802,40 +808,40 @@
|
|||||||
"how-to-use-azureml",
|
"how-to-use-azureml",
|
||||||
"automated-machine-learning"
|
"automated-machine-learning"
|
||||||
],
|
],
|
||||||
|
"kernel_info": {
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"name": "python38-azureml",
|
"display_name": "Python 3.8 - AzureML",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"display_name": "Python 3.8 - AzureML"
|
"name": "python38-azureml"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"name": "python",
|
|
||||||
"version": "3.8.5",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
"version": 3
|
"version": 3
|
||||||
},
|
},
|
||||||
"pygments_lexer": "ipython3",
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"file_extension": ".py"
|
"pygments_lexer": "ipython3",
|
||||||
},
|
"version": "3.8.10"
|
||||||
"vscode": {
|
|
||||||
"interpreter": {
|
|
||||||
"hash": "6bd77c88278e012ef31757c15997a7bea8c943977c43d6909403c00ae11d43ca"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"microsoft": {
|
"microsoft": {
|
||||||
"ms_spell_check": {
|
"ms_spell_check": {
|
||||||
"ms_spell_check_language": "en"
|
"ms_spell_check_language": "en"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"kernel_info": {
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"nteract": {
|
"nteract": {
|
||||||
"version": "nteract-front-end@1.0.0"
|
"version": "nteract-front-end@1.0.0"
|
||||||
|
},
|
||||||
|
"vscode": {
|
||||||
|
"interpreter": {
|
||||||
|
"hash": "6bd77c88278e012ef31757c15997a7bea8c943977c43d6909403c00ae11d43ca"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 2
|
"nbformat_minor": 4
|
||||||
}
|
}
|
||||||
@@ -6,7 +6,7 @@ compute instance.
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
from azureml.core import Dataset, Run
|
from azureml.core import Dataset, Run
|
||||||
from sklearn.externals import joblib
|
import joblib
|
||||||
from pandas.tseries.frequencies import to_offset
|
from pandas.tseries.frequencies import to_offset
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|||||||
@@ -19,7 +19,14 @@
|
|||||||
"hidePrompt": false
|
"hidePrompt": false
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
""
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-github-dau)).</font>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -382,7 +389,7 @@
|
|||||||
"automl_config = AutoMLConfig(\n",
|
"automl_config = AutoMLConfig(\n",
|
||||||
" task=\"forecasting\",\n",
|
" task=\"forecasting\",\n",
|
||||||
" primary_metric=\"normalized_root_mean_squared_error\",\n",
|
" primary_metric=\"normalized_root_mean_squared_error\",\n",
|
||||||
" experiment_timeout_hours=1,\n",
|
" experiment_timeout_hours=1.5,\n",
|
||||||
" training_data=train_dataset,\n",
|
" training_data=train_dataset,\n",
|
||||||
" label_column_name=target_column_name,\n",
|
" label_column_name=target_column_name,\n",
|
||||||
" validation_data=valid_dataset,\n",
|
" validation_data=valid_dataset,\n",
|
||||||
@@ -695,7 +702,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.9"
|
"version": "3.8.10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import os
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from sklearn.externals import joblib
|
import joblib
|
||||||
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
||||||
|
|
||||||
from azureml.automl.runtime.shared.score import scoring, constants
|
from azureml.automl.runtime.shared.score import scoring, constants
|
||||||
|
|||||||
@@ -16,6 +16,13 @@
|
|||||||
""
|
""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-hierarchical-timeseries-in-pipeline)).</font>"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -666,7 +673,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.8"
|
"version": "3.8.10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -16,6 +16,13 @@
|
|||||||
""
|
""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-many-models-in-pipeline)).</font>"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -306,7 +313,7 @@
|
|||||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Name your cluster\n",
|
"# Name your cluster\n",
|
||||||
"compute_name = \"mm-compute\"\n",
|
"compute_name = \"mm-compute-v1\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if compute_name in ws.compute_targets:\n",
|
"if compute_name in ws.compute_targets:\n",
|
||||||
@@ -316,7 +323,7 @@
|
|||||||
"else:\n",
|
"else:\n",
|
||||||
" print(\"Creating a new compute target...\")\n",
|
" print(\"Creating a new compute target...\")\n",
|
||||||
" provisioning_config = AmlCompute.provisioning_configuration(\n",
|
" provisioning_config = AmlCompute.provisioning_configuration(\n",
|
||||||
" vm_size=\"STANDARD_D16S_V3\", max_nodes=20\n",
|
" vm_size=\"STANDARD_D14_V2\", max_nodes=20\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" # Create the compute target\n",
|
" # Create the compute target\n",
|
||||||
" compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)\n",
|
" compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)\n",
|
||||||
@@ -878,7 +885,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.10"
|
||||||
},
|
},
|
||||||
"vscode": {
|
"vscode": {
|
||||||
"interpreter": {
|
"interpreter": {
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -11,7 +10,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -19,7 +17,13 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales)).</font>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -37,7 +41,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -50,7 +53,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -75,7 +77,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -92,7 +93,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -126,7 +126,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -166,7 +165,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -190,7 +188,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -211,7 +208,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -231,7 +227,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -254,7 +249,9 @@
|
|||||||
" time_series_id_column_names, group_keys=False\n",
|
" time_series_id_column_names, group_keys=False\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
" df_head = df_grouped.apply(lambda dfg: dfg.iloc[:-n])\n",
|
" df_head = df_grouped.apply(lambda dfg: dfg.iloc[:-n])\n",
|
||||||
|
" df_head.reset_index(inplace=True, drop=True)\n",
|
||||||
" df_tail = df_grouped.apply(lambda dfg: dfg.iloc[-n:])\n",
|
" df_tail = df_grouped.apply(lambda dfg: dfg.iloc[-n:])\n",
|
||||||
|
" df_tail.reset_index(inplace=True, drop=True)\n",
|
||||||
" return df_head, df_tail\n",
|
" return df_head, df_tail\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -262,7 +259,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -288,7 +284,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -305,7 +300,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -333,7 +327,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -372,7 +365,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -390,7 +382,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -464,7 +455,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -491,7 +481,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -511,7 +500,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -549,7 +537,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -570,7 +557,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -579,7 +565,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -608,7 +593,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -664,7 +648,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -672,7 +655,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -695,7 +677,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -715,7 +696,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -761,7 +741,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -810,7 +789,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -866,7 +844,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.10"
|
||||||
},
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"None"
|
"None"
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ compute instance.
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
from azureml.core import Dataset, Run
|
from azureml.core import Dataset, Run
|
||||||
from sklearn.externals import joblib
|
import joblib
|
||||||
from pandas.tseries.frequencies import to_offset
|
from pandas.tseries.frequencies import to_offset
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|||||||
@@ -1,5 +1,37 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-forecasting-in-pipeline)).</font>\n",
|
||||||
|
"</br>\n",
|
||||||
|
"</br>\n",
|
||||||
|
"<font color=\"red\" size=\"5\">\n",
|
||||||
|
"For examples illustrating how to build pipelines with components, please use the following links:</font>\n",
|
||||||
|
"<ul>\n",
|
||||||
|
" <li><a href=\"https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-many-models-in-pipeline\">Many Models</a></li>\n",
|
||||||
|
" <li><a href=\"https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-hierarchical-timeseries-in-pipeline\">Hierarchical Time Series</a></li>\n",
|
||||||
|
" <li><a href=\"https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-distributed-tcn\">Distributed TCN</a></li>\n",
|
||||||
|
"</ul>"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -39,7 +71,6 @@
|
|||||||
"import logging\n",
|
"import logging\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from matplotlib import pyplot as plt\n",
|
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
"\n",
|
"\n",
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
@@ -425,8 +456,6 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.pipeline.core import PipelineData\n",
|
|
||||||
"\n",
|
|
||||||
"# The model name with which to register the trained model in the workspace.\n",
|
"# The model name with which to register the trained model in the workspace.\n",
|
||||||
"model_name_str = \"ojmodel\"\n",
|
"model_name_str = \"ojmodel\"\n",
|
||||||
"model_name = PipelineParameter(\"model_name\", default_value=model_name_str)\n",
|
"model_name = PipelineParameter(\"model_name\", default_value=model_name_str)\n",
|
||||||
@@ -555,40 +584,15 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Model\n",
|
"from azureml.train.automl.run import AutoMLRun\n",
|
||||||
"\n",
|
"\n",
|
||||||
"model = Model(ws, model_name_str)\n",
|
"for step in training_pipeline_run.get_steps():\n",
|
||||||
"download_path = model.download(model_name_str, exist_ok=True)"
|
" if step.properties.get(\"StepType\") == \"AutoMLStep\":\n",
|
||||||
]
|
" automl_run = AutoMLRun(experiment, step.id)\n",
|
||||||
},
|
" break\n",
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"After all the files are downloaded, we can generate the run config for inference runs."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Environment, RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"env_file = os.path.join(download_path, \"conda_env_v_1_0_0.yml\")\n",
|
"best_run = automl_run.get_best_child()\n",
|
||||||
"inference_env = Environment(\"oj-inference-env\")\n",
|
"inference_env = best_run.get_environment()"
|
||||||
"inference_env.python.conda_dependencies = CondaDependencies(\n",
|
|
||||||
" conda_dependencies_file_path=env_file\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"[Optional] The enviroment can also be assessed from the training run using `get_environment()` API."
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ import numpy as np
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from pandas.tseries.frequencies import to_offset
|
from pandas.tseries.frequencies import to_offset
|
||||||
from sklearn.externals import joblib
|
import joblib
|
||||||
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
||||||
|
|
||||||
from azureml.data.dataset_factory import TabularDatasetFactory
|
from azureml.data.dataset_factory import TabularDatasetFactory
|
||||||
@@ -30,7 +30,7 @@ def infer_forecasting_dataset_tcn(
|
|||||||
|
|
||||||
run = Run.get_context()
|
run = Run.get_context()
|
||||||
|
|
||||||
registered_train = TabularDatasetFactory.register_pandas_dataframe(
|
TabularDatasetFactory.register_pandas_dataframe(
|
||||||
df_all,
|
df_all,
|
||||||
target=(
|
target=(
|
||||||
run.experiment.workspace.get_default_datastore(),
|
run.experiment.workspace.get_default_datastore(),
|
||||||
|
|||||||
@@ -2,22 +2,30 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
""
|
""
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-recipes-univariate)).</font>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"In this notebook we will explore the univariate time-series data to determine the settings for an automated ML experiment. We will follow the thought process depicted in the following diagram:<br/>\n",
|
"In this notebook we will explore the univariate time-series data to determine the settings for an automated ML experiment. We will follow the thought process depicted in the following diagram:<br/>\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -40,11 +48,13 @@
|
|||||||
"</ol>\n",
|
"</ol>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The answers to these questions will help determine the appropriate settings for the automated ML experiment.\n"
|
"The answers to these questions will help determine the appropriate settings for the automated ML experiment.\n"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import warnings\n",
|
"import warnings\n",
|
||||||
@@ -65,13 +75,13 @@
|
|||||||
"# set printing options\n",
|
"# set printing options\n",
|
||||||
"pd.set_option(\"display.max_columns\", 500)\n",
|
"pd.set_option(\"display.max_columns\", 500)\n",
|
||||||
"pd.set_option(\"display.width\", 1000)"
|
"pd.set_option(\"display.width\", 1000)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# load data\n",
|
"# load data\n",
|
||||||
"main_data_loc = \"data\"\n",
|
"main_data_loc = \"data\"\n",
|
||||||
@@ -86,13 +96,13 @@
|
|||||||
"df.sort_values(by=TIME_COLNAME, inplace=True)\n",
|
"df.sort_values(by=TIME_COLNAME, inplace=True)\n",
|
||||||
"df.set_index(TIME_COLNAME, inplace=True)\n",
|
"df.set_index(TIME_COLNAME, inplace=True)\n",
|
||||||
"df.head(2)"
|
"df.head(2)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# plot the entire dataset\n",
|
"# plot the entire dataset\n",
|
||||||
"fig, ax = plt.subplots(figsize=(6, 2), dpi=180)\n",
|
"fig, ax = plt.subplots(figsize=(6, 2), dpi=180)\n",
|
||||||
@@ -100,20 +110,20 @@
|
|||||||
"ax.title.set_text(\"Original Data Series\")\n",
|
"ax.title.set_text(\"Original Data Series\")\n",
|
||||||
"locs, labels = plt.xticks()\n",
|
"locs, labels = plt.xticks()\n",
|
||||||
"plt.xticks(rotation=45)"
|
"plt.xticks(rotation=45)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"The graph plots the alcohol sales in the United States. Because the data is trending, it can be difficult to see cycles, seasonality or other interesting behaviors due to the scaling issues. For example, if there is a seasonal pattern, which we will discuss later, we cannot see them on the trending data. In such case, it is worth plotting the same data in first differences."
|
"The graph plots the alcohol sales in the United States. Because the data is trending, it can be difficult to see cycles, seasonality or other interesting behaviors due to the scaling issues. For example, if there is a seasonal pattern, which we will discuss later, we cannot see them on the trending data. In such case, it is worth plotting the same data in first differences."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# plot the entire dataset in first differences\n",
|
"# plot the entire dataset in first differences\n",
|
||||||
"fig, ax = plt.subplots(figsize=(6, 2), dpi=180)\n",
|
"fig, ax = plt.subplots(figsize=(6, 2), dpi=180)\n",
|
||||||
@@ -121,20 +131,18 @@
|
|||||||
"ax.title.set_text(\"Data in first differences\")\n",
|
"ax.title.set_text(\"Data in first differences\")\n",
|
||||||
"locs, labels = plt.xticks()\n",
|
"locs, labels = plt.xticks()\n",
|
||||||
"plt.xticks(rotation=45)"
|
"plt.xticks(rotation=45)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"In the previous plot we observe that the data is more volatile towards the end of the series. This period coincides with the Covid-19 period, so we will exclude it from our experiment. Since in this example there are no user-provided features it is hard to make an argument that a model trained on the less volatile pre-covid data will be able to accurately predict the covid period."
|
"In the previous plot we observe that the data is more volatile towards the end of the series. This period coincides with the Covid-19 period, so we will exclude it from our experiment. Since in this example there are no user-provided features it is hard to make an argument that a model trained on the less volatile pre-covid data will be able to accurately predict the covid period."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 1. Seasonality\n",
|
"# 1. Seasonality\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -143,11 +151,13 @@
|
|||||||
"2. If it's seasonal, does the data exhibit a trend (up or down)?\n",
|
"2. If it's seasonal, does the data exhibit a trend (up or down)?\n",
|
||||||
"\n",
|
"\n",
|
||||||
"It is hard to visually detect seasonality when the data is trending. The reason being is scale of seasonal fluctuations is dwarfed by the range of the trend in the data. One way to deal with this is to de-trend the data by taking the first differences. We will discuss this in more detail in the next section."
|
"It is hard to visually detect seasonality when the data is trending. The reason being is scale of seasonal fluctuations is dwarfed by the range of the trend in the data. One way to deal with this is to de-trend the data by taking the first differences. We will discuss this in more detail in the next section."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# plot the entire dataset in first differences\n",
|
"# plot the entire dataset in first differences\n",
|
||||||
"fig, ax = plt.subplots(figsize=(6, 2), dpi=180)\n",
|
"fig, ax = plt.subplots(figsize=(6, 2), dpi=180)\n",
|
||||||
@@ -155,20 +165,20 @@
|
|||||||
"ax.title.set_text(\"Data in first differences\")\n",
|
"ax.title.set_text(\"Data in first differences\")\n",
|
||||||
"locs, labels = plt.xticks()\n",
|
"locs, labels = plt.xticks()\n",
|
||||||
"plt.xticks(rotation=45)"
|
"plt.xticks(rotation=45)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"For the next plot, we will exclude the Covid period again. We will also shorten the length of data because plotting a very long time series may prevent us from seeing seasonal patterns, if there are any, because the plot may look like a random walk."
|
"For the next plot, we will exclude the Covid period again. We will also shorten the length of data because plotting a very long time series may prevent us from seeing seasonal patterns, if there are any, because the plot may look like a random walk."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# remove COVID period\n",
|
"# remove COVID period\n",
|
||||||
"df = df[:COVID_PERIOD_START]\n",
|
"df = df[:COVID_PERIOD_START]\n",
|
||||||
@@ -179,13 +189,11 @@
|
|||||||
"ax.title.set_text(\"Data in first differences\")\n",
|
"ax.title.set_text(\"Data in first differences\")\n",
|
||||||
"locs, labels = plt.xticks()\n",
|
"locs, labels = plt.xticks()\n",
|
||||||
"plt.xticks(rotation=45)"
|
"plt.xticks(rotation=45)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<p style=\"font-size:150%; color:blue\"> Conclusion </p>\n",
|
"<p style=\"font-size:150%; color:blue\"> Conclusion </p>\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -204,11 +212,11 @@
|
|||||||
" <li> In the first case, by taking first differences we are removing stochastic trend, but we do not remove seasonal patterns. In the second case, we do not remove the stochastic trend and it can be captured by the trend component of the STL decomposition. It is hard to say which option will work best in your case, hence you will need to run both options to see which one results in more accurate forecasts. </li>\n",
|
" <li> In the first case, by taking first differences we are removing stochastic trend, but we do not remove seasonal patterns. In the second case, we do not remove the stochastic trend and it can be captured by the trend component of the STL decomposition. It is hard to say which option will work best in your case, hence you will need to run both options to see which one results in more accurate forecasts. </li>\n",
|
||||||
" </ul>\n",
|
" </ul>\n",
|
||||||
"</ol>"
|
"</ol>"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 2. Stationarity\n",
|
"# 2. Stationarity\n",
|
||||||
"If the data does not exhibit seasonal patterns, we would like to see if the data is non-stationary. Particularly, we want to see if there is a clear trending behavior. If such behavior is observed, we would like to first difference the data and examine the plot of an auto-correlation function (ACF) known as correlogram. If the data is seasonal, differencing it will not get rid off the seasonality and this will be shown on the correlogram as well.\n",
|
"If the data does not exhibit seasonal patterns, we would like to see if the data is non-stationary. Particularly, we want to see if there is a clear trending behavior. If such behavior is observed, we would like to first difference the data and examine the plot of an auto-correlation function (ACF) known as correlogram. If the data is seasonal, differencing it will not get rid off the seasonality and this will be shown on the correlogram as well.\n",
|
||||||
@@ -236,11 +244,13 @@
|
|||||||
"</ol>\n",
|
"</ol>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"To answer the first question, we run a series of tests (we call them unit root tests)."
|
"To answer the first question, we run a series of tests (we call them unit root tests)."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# unit root tests\n",
|
"# unit root tests\n",
|
||||||
"test = unit_root_test_wrapper(df[TARGET_COLNAME])\n",
|
"test = unit_root_test_wrapper(df[TARGET_COLNAME])\n",
|
||||||
@@ -248,13 +258,11 @@
|
|||||||
"print(\"Summary table\", \"\\n\", test[\"summary\"], \"\\n\")\n",
|
"print(\"Summary table\", \"\\n\", test[\"summary\"], \"\\n\")\n",
|
||||||
"print(\"Is the {} series stationary?: {}\".format(TARGET_COLNAME, test[\"stationary\"]))\n",
|
"print(\"Is the {} series stationary?: {}\".format(TARGET_COLNAME, test[\"stationary\"]))\n",
|
||||||
"print(\"---------------\", \"\\n\")"
|
"print(\"---------------\", \"\\n\")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"In the previous cell, we ran a series of unit root tests. The summary table contains the following columns:\n",
|
"In the previous cell, we ran a series of unit root tests. The summary table contains the following columns:\n",
|
||||||
"<ul> \n",
|
"<ul> \n",
|
||||||
@@ -276,11 +284,13 @@
|
|||||||
"Each of the tests shows that the original time series is non-stationary. The final decision is based on the majority rule. If, there is a split decision, the algorithm will claim it is stationary. We run a series of tests because each test by itself may not be accurate. In many cases when there are conflicting test results, the user needs to make determination if the series is stationary or not.\n",
|
"Each of the tests shows that the original time series is non-stationary. The final decision is based on the majority rule. If, there is a split decision, the algorithm will claim it is stationary. We run a series of tests because each test by itself may not be accurate. In many cases when there are conflicting test results, the user needs to make determination if the series is stationary or not.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Since we found the series to be non-stationary, we will difference it and then test if the differenced series is stationary."
|
"Since we found the series to be non-stationary, we will difference it and then test if the differenced series is stationary."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# unit root tests\n",
|
"# unit root tests\n",
|
||||||
"test = unit_root_test_wrapper(df[TARGET_COLNAME].diff().dropna())\n",
|
"test = unit_root_test_wrapper(df[TARGET_COLNAME].diff().dropna())\n",
|
||||||
@@ -288,20 +298,20 @@
|
|||||||
"print(\"Summary table\", \"\\n\", test[\"summary\"], \"\\n\")\n",
|
"print(\"Summary table\", \"\\n\", test[\"summary\"], \"\\n\")\n",
|
||||||
"print(\"Is the {} series stationary?: {}\".format(TARGET_COLNAME, test[\"stationary\"]))\n",
|
"print(\"Is the {} series stationary?: {}\".format(TARGET_COLNAME, test[\"stationary\"]))\n",
|
||||||
"print(\"---------------\", \"\\n\")"
|
"print(\"---------------\", \"\\n\")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Four out of five tests show that the series in first differences is stationary. Notice that this decision is not unanimous. Next, let's plot the original series in first-differences to illustrate the difference between non-stationary (unit root) process vs the stationary one."
|
"Four out of five tests show that the series in first differences is stationary. Notice that this decision is not unanimous. Next, let's plot the original series in first-differences to illustrate the difference between non-stationary (unit root) process vs the stationary one."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# plot original and stationary data\n",
|
"# plot original and stationary data\n",
|
||||||
"fig = plt.figure(figsize=(10, 10))\n",
|
"fig = plt.figure(figsize=(10, 10))\n",
|
||||||
@@ -311,28 +321,26 @@
|
|||||||
"ax2.plot(df[TARGET_COLNAME].diff().dropna(), \"-b\")\n",
|
"ax2.plot(df[TARGET_COLNAME].diff().dropna(), \"-b\")\n",
|
||||||
"ax1.title.set_text(\"Original data\")\n",
|
"ax1.title.set_text(\"Original data\")\n",
|
||||||
"ax2.title.set_text(\"Data in first differences\")"
|
"ax2.title.set_text(\"Data in first differences\")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"If you were asked a question \"What is the mean of the series before and after 2008?\", for the series titled \"Original data\" the mean values will be significantly different. This implies that the first moment of the series (in this case, it is the mean) is time dependent, i.e., mean changes depending on the interval one is looking at. Thus, the series is deemed to be non-stationary. On the other hand, for the series titled \"Data in first differences\" the means for both periods are roughly the same. Hence, the first moment is time invariant; meaning it does not depend on the interval of time one is looking at. In this example it is easy to visually distinguish between stationary and non-stationary data. Often this distinction is not easy to make, therefore we rely on the statistical tests described above to help us make an informed decision. "
|
"If you were asked a question \"What is the mean of the series before and after 2008?\", for the series titled \"Original data\" the mean values will be significantly different. This implies that the first moment of the series (in this case, it is the mean) is time dependent, i.e., mean changes depending on the interval one is looking at. Thus, the series is deemed to be non-stationary. On the other hand, for the series titled \"Data in first differences\" the means for both periods are roughly the same. Hence, the first moment is time invariant; meaning it does not depend on the interval of time one is looking at. In this example it is easy to visually distinguish between stationary and non-stationary data. Often this distinction is not easy to make, therefore we rely on the statistical tests described above to help us make an informed decision. "
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<p style=\"font-size:150%; color:blue\"> Conclusion </p>\n",
|
"<p style=\"font-size:150%; color:blue\"> Conclusion </p>\n",
|
||||||
"Since we found the original process to be non-stationary (contains unit root), we will have to model the data in first differences. As a result, we will set the DIFFERENCE_SERIES parameter to True."
|
"Since we found the original process to be non-stationary (contains unit root), we will have to model the data in first differences. As a result, we will set the DIFFERENCE_SERIES parameter to True."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 3 Check if there is a clear auto-regressive pattern\n",
|
"# 3 Check if there is a clear auto-regressive pattern\n",
|
||||||
"We need to determine if we should include lags of the target variable as features in order to improve forecast accuracy. To do this, we will examine the ACF and partial ACF (PACF) plots of the stationary series. In our case, it is a series in first differences.\n",
|
"We need to determine if we should include lags of the target variable as features in order to improve forecast accuracy. To do this, we will examine the ACF and partial ACF (PACF) plots of the stationary series. In our case, it is a series in first differences.\n",
|
||||||
@@ -346,11 +354,11 @@
|
|||||||
" The lag order is on the x-axis while the auto- and partial-correlation coefficients are on the y-axis. Vertical lines that are outside the shaded area represent statistically significant lags. Notice, the ACF function decays to zero and the PACF shows 2 significant spikes (we ignore the first spike for lag 0 in both plots since the linear relationship of any series with itself is always 1). <li/>\n",
|
" The lag order is on the x-axis while the auto- and partial-correlation coefficients are on the y-axis. Vertical lines that are outside the shaded area represent statistically significant lags. Notice, the ACF function decays to zero and the PACF shows 2 significant spikes (we ignore the first spike for lag 0 in both plots since the linear relationship of any series with itself is always 1). <li/>\n",
|
||||||
" </ul>\n",
|
" </ul>\n",
|
||||||
"<ul/>"
|
"<ul/>"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<ul>\n",
|
"<ul>\n",
|
||||||
" <li> Question: What do I do if I observe an auto-regressive behavior? </li>\n",
|
" <li> Question: What do I do if I observe an auto-regressive behavior? </li>\n",
|
||||||
@@ -364,32 +372,32 @@
|
|||||||
" <br/>\n",
|
" <br/>\n",
|
||||||
" <li> Next, let's examine the ACF and PACF plots of the stationary target variable (depicted below). Here, we do not see a decay in the ACF, instead we see a decay in PACF. It is hard to make an argument the the target variable exhibits auto-regressive behavior. </li>\n",
|
" <li> Next, let's examine the ACF and PACF plots of the stationary target variable (depicted below). Here, we do not see a decay in the ACF, instead we see a decay in PACF. It is hard to make an argument the the target variable exhibits auto-regressive behavior. </li>\n",
|
||||||
" </ul>"
|
" </ul>"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Plot the ACF/PACF for the series in differences\n",
|
"# Plot the ACF/PACF for the series in differences\n",
|
||||||
"fig, ax = plt.subplots(1, 2, figsize=(10, 5))\n",
|
"fig, ax = plt.subplots(1, 2, figsize=(10, 5))\n",
|
||||||
"plot_acf(df[TARGET_COLNAME].diff().dropna().values.squeeze(), ax=ax[0])\n",
|
"plot_acf(df[TARGET_COLNAME].diff().dropna().values.squeeze(), ax=ax[0])\n",
|
||||||
"plot_pacf(df[TARGET_COLNAME].diff().dropna().values.squeeze(), ax=ax[1])\n",
|
"plot_pacf(df[TARGET_COLNAME].diff().dropna().values.squeeze(), ax=ax[1])\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<p style=\"font-size:150%; color:blue\"> Conclusion </p>\n",
|
"<p style=\"font-size:150%; color:blue\"> Conclusion </p>\n",
|
||||||
"Since we do not see a clear indication of an AR(p) process, we will not be using target lags and will set the TARGET_LAGS parameter to None."
|
"Since we do not see a clear indication of an AR(p) process, we will not be using target lags and will set the TARGET_LAGS parameter to None."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<p style=\"font-size:150%; color:blue; font-weight: bold\"> AutoML Experiment Settings </p>\n",
|
"<p style=\"font-size:150%; color:blue; font-weight: bold\"> AutoML Experiment Settings </p>\n",
|
||||||
"Based on the analysis performed, we should try the following settings for the AutoML experiment and use them in the \"2_run_experiment\" notebook.\n",
|
"Based on the analysis performed, we should try the following settings for the AutoML experiment and use them in the \"2_run_experiment\" notebook.\n",
|
||||||
@@ -398,11 +406,11 @@
|
|||||||
" <li> DIFFERENCE_SERIES=True </li>\n",
|
" <li> DIFFERENCE_SERIES=True </li>\n",
|
||||||
" <li> TARGET_LAGS=None </li>\n",
|
" <li> TARGET_LAGS=None </li>\n",
|
||||||
"</ul>"
|
"</ul>"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Appendix: ACF, PACF and Lag Selection\n",
|
"# Appendix: ACF, PACF and Lag Selection\n",
|
||||||
"To do this, we will examine the ACF and partial ACF (PACF) plots of the differenced series. \n",
|
"To do this, we will examine the ACF and partial ACF (PACF) plots of the differenced series. \n",
|
||||||
@@ -424,11 +432,11 @@
|
|||||||
" <li> In the interpretation posted above we need to be careful not to confuse the word \"leads\" with \"causes\" since these are not the same thing. We do not know the lagged value of the variable causes it to change. After all, there are probably many other features that may explain the movement in $Y_t$. All we are trying to do in this section is to identify situations when the variable contains the strong auto-regressive components that needs to be included in the model to improve forecast accuracy. </li>\n",
|
" <li> In the interpretation posted above we need to be careful not to confuse the word \"leads\" with \"causes\" since these are not the same thing. We do not know the lagged value of the variable causes it to change. After all, there are probably many other features that may explain the movement in $Y_t$. All we are trying to do in this section is to identify situations when the variable contains the strong auto-regressive components that needs to be included in the model to improve forecast accuracy. </li>\n",
|
||||||
" </ul>\n",
|
" </ul>\n",
|
||||||
"</ul>"
|
"</ul>"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<ul>\n",
|
"<ul>\n",
|
||||||
" <li> Question: What is the PACF? </li>\n",
|
" <li> Question: What is the PACF? </li>\n",
|
||||||
@@ -445,11 +453,11 @@
|
|||||||
" </li>\n",
|
" </li>\n",
|
||||||
" </ul>\n",
|
" </ul>\n",
|
||||||
"</ul>"
|
"</ul>"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<ul>\n",
|
"<ul>\n",
|
||||||
" <li> Question: Auto-regressive pattern? What are we looking for? </li>\n",
|
" <li> Question: Auto-regressive pattern? What are we looking for? </li>\n",
|
||||||
@@ -461,8 +469,7 @@
|
|||||||
" <li> This is why it is important to examine both the ACF and the PACF plots when trying to determine the auto regressive order for the variable in question. <li/>\n",
|
" <li> This is why it is important to examine both the ACF and the PACF plots when trying to determine the auto regressive order for the variable in question. <li/>\n",
|
||||||
" </ul>\n",
|
" </ul>\n",
|
||||||
"</ul> "
|
"</ul> "
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -471,31 +478,31 @@
|
|||||||
"name": "vlbejan"
|
"name": "vlbejan"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
"kernel_info": {
|
||||||
|
"name": "python38-azureml"
|
||||||
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"name": "python38-azureml",
|
"display_name": "Python 3.8 - AzureML",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"display_name": "Python 3.8 - AzureML"
|
"name": "python38-azureml"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"name": "python",
|
|
||||||
"version": "3.8.10",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
"version": 3
|
"version": 3
|
||||||
},
|
},
|
||||||
"pygments_lexer": "ipython3",
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"file_extension": ".py"
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.10"
|
||||||
},
|
},
|
||||||
"microsoft": {
|
"microsoft": {
|
||||||
"ms_spell_check": {
|
"ms_spell_check": {
|
||||||
"ms_spell_check_language": "en"
|
"ms_spell_check_language": "en"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"kernel_info": {
|
|
||||||
"name": "python38-azureml"
|
|
||||||
},
|
|
||||||
"nteract": {
|
"nteract": {
|
||||||
"version": "nteract-front-end@1.0.0"
|
"version": "nteract-front-end@1.0.0"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,22 +2,30 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
""
|
""
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-recipes-univariate)).</font>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Running AutoML experiments\n",
|
"# Running AutoML experiments\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -26,18 +34,20 @@
|
|||||||
"<br/>\n",
|
"<br/>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The output generated by this notebook is saved in the `experiment_output`folder."
|
"The output generated by this notebook is saved in the `experiment_output`folder."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Setup"
|
"### Setup"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import logging\n",
|
"import logging\n",
|
||||||
@@ -60,21 +70,21 @@
|
|||||||
"np.set_printoptions(precision=4, suppress=True, linewidth=100)\n",
|
"np.set_printoptions(precision=4, suppress=True, linewidth=100)\n",
|
||||||
"pd.set_option(\"display.max_columns\", 500)\n",
|
"pd.set_option(\"display.max_columns\", 500)\n",
|
||||||
"pd.set_option(\"display.width\", 1000)"
|
"pd.set_option(\"display.width\", 1000)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"As part of the setup you have already created a **Workspace**. You will also need to create a [compute target](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute) for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
"As part of the setup you have already created a **Workspace**. You will also need to create a [compute target](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute) for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||||
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist."
|
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"ws = Workspace.from_config()\n",
|
"ws = Workspace.from_config()\n",
|
||||||
"amlcompute_cluster_name = \"recipe-cluster\"\n",
|
"amlcompute_cluster_name = \"recipe-cluster\"\n",
|
||||||
@@ -104,22 +114,22 @@
|
|||||||
"compute_target.wait_for_completion(\n",
|
"compute_target.wait_for_completion(\n",
|
||||||
" show_output=True, min_node_count=None, timeout_in_minutes=20\n",
|
" show_output=True, min_node_count=None, timeout_in_minutes=20\n",
|
||||||
")"
|
")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Data\n",
|
"### Data\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Here, we will load the data from the csv file and drop the Covid period."
|
"Here, we will load the data from the csv file and drop the Covid period."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"main_data_loc = \"data\"\n",
|
"main_data_loc = \"data\"\n",
|
||||||
"train_file_name = \"S4248SM144SCEN.csv\"\n",
|
"train_file_name = \"S4248SM144SCEN.csv\"\n",
|
||||||
@@ -137,34 +147,32 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# remove the Covid period\n",
|
"# remove the Covid period\n",
|
||||||
"df = df.query('{} <= \"{}\"'.format(TIME_COLNAME, COVID_PERIOD_START))"
|
"df = df.query('{} <= \"{}\"'.format(TIME_COLNAME, COVID_PERIOD_START))"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Set parameters\n",
|
"### Set parameters\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The first set of parameters is based on the analysis performed in the `auto-ml-forecasting-univariate-recipe-experiment-settings` notebook. "
|
"The first set of parameters is based on the analysis performed in the `auto-ml-forecasting-univariate-recipe-experiment-settings` notebook. "
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# set parameters based on the settings notebook analysis\n",
|
"# set parameters based on the settings notebook analysis\n",
|
||||||
"DIFFERENCE_SERIES = True\n",
|
"DIFFERENCE_SERIES = True\n",
|
||||||
"TARGET_LAGS = None\n",
|
"TARGET_LAGS = None\n",
|
||||||
"STL_TYPE = None"
|
"STL_TYPE = None"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Next, define additional parameters to be used in the <a href=\"https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.automlconfig?view=azure-ml-py\"> AutoML config </a> class.\n",
|
"Next, define additional parameters to be used in the <a href=\"https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.automlconfig?view=azure-ml-py\"> AutoML config </a> class.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -179,30 +187,32 @@
|
|||||||
" </ul>\n",
|
" </ul>\n",
|
||||||
" </li>\n",
|
" </li>\n",
|
||||||
"</ul>\n"
|
"</ul>\n"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# set other parameters\n",
|
"# set other parameters\n",
|
||||||
"FORECAST_HORIZON = 12\n",
|
"FORECAST_HORIZON = 12\n",
|
||||||
"TIME_SERIES_ID_COLNAMES = []\n",
|
"TIME_SERIES_ID_COLNAMES = []\n",
|
||||||
"BLOCKED_MODELS = []"
|
"BLOCKED_MODELS = []"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"To run AutoML, you also need to create an **Experiment**. An Experiment corresponds to a prediction problem you are trying to solve, while a Run corresponds to a specific approach to the problem."
|
"To run AutoML, you also need to create an **Experiment**. An Experiment corresponds to a prediction problem you are trying to solve, while a Run corresponds to a specific approach to the problem."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# choose a name for the run history container in the workspace\n",
|
"# choose a name for the run history container in the workspace\n",
|
||||||
"if isinstance(TARGET_LAGS, list):\n",
|
"if isinstance(TARGET_LAGS, list):\n",
|
||||||
@@ -229,38 +239,38 @@
|
|||||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
"pd.set_option(\"display.max_colwidth\", None)\n",
|
||||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
||||||
"print(outputDf.T)"
|
"print(outputDf.T)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# create output directory\n",
|
"# create output directory\n",
|
||||||
"output_dir = \"experiment_output/{}\".format(experiment_desc)\n",
|
"output_dir = \"experiment_output/{}\".format(experiment_desc)\n",
|
||||||
"if not os.path.exists(output_dir):\n",
|
"if not os.path.exists(output_dir):\n",
|
||||||
" os.makedirs(output_dir)"
|
" os.makedirs(output_dir)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# difference data and test for unit root\n",
|
"# difference data and test for unit root\n",
|
||||||
"if DIFFERENCE_SERIES:\n",
|
"if DIFFERENCE_SERIES:\n",
|
||||||
" df_delta = df.copy()\n",
|
" df_delta = df.copy()\n",
|
||||||
" df_delta[TARGET_COLNAME] = df[TARGET_COLNAME].diff()\n",
|
" df_delta[TARGET_COLNAME] = df[TARGET_COLNAME].diff()\n",
|
||||||
" df_delta.dropna(axis=0, inplace=True)"
|
" df_delta.dropna(axis=0, inplace=True)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# split the data into train and test set\n",
|
"# split the data into train and test set\n",
|
||||||
"if DIFFERENCE_SERIES:\n",
|
"if DIFFERENCE_SERIES:\n",
|
||||||
@@ -278,21 +288,21 @@
|
|||||||
" time_colname=TIME_COLNAME,\n",
|
" time_colname=TIME_COLNAME,\n",
|
||||||
" ts_id_colnames=TIME_SERIES_ID_COLNAMES,\n",
|
" ts_id_colnames=TIME_SERIES_ID_COLNAMES,\n",
|
||||||
" )"
|
" )"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Upload files to the Datastore\n",
|
"### Upload files to the Datastore\n",
|
||||||
"The [Machine Learning service workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-workspace) is paired with the storage account, which contains the default data store. We will use it to upload the bike share data and create [tabular dataset](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.tabulardataset?view=azure-ml-py) for training. A tabular dataset defines a series of lazily-evaluated, immutable operations to load data from the data source into tabular representation."
|
"The [Machine Learning service workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-workspace) is paired with the storage account, which contains the default data store. We will use it to upload the bike share data and create [tabular dataset](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.tabulardataset?view=azure-ml-py) for training. A tabular dataset defines a series of lazily-evaluated, immutable operations to load data from the data source into tabular representation."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"df_train.to_csv(\"train.csv\", index=False)\n",
|
"df_train.to_csv(\"train.csv\", index=False)\n",
|
||||||
"df_test.to_csv(\"test.csv\", index=False)\n",
|
"df_test.to_csv(\"test.csv\", index=False)\n",
|
||||||
@@ -309,20 +319,20 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# print the first 5 rows of the Dataset\n",
|
"# print the first 5 rows of the Dataset\n",
|
||||||
"train_dataset.to_pandas_dataframe().reset_index(drop=True).head(5)"
|
"train_dataset.to_pandas_dataframe().reset_index(drop=True).head(5)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Config AutoML"
|
"### Config AutoML"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"time_series_settings = {\n",
|
"time_series_settings = {\n",
|
||||||
" \"time_column_name\": TIME_COLNAME,\n",
|
" \"time_column_name\": TIME_COLNAME,\n",
|
||||||
@@ -349,76 +359,76 @@
|
|||||||
" compute_target=compute_target,\n",
|
" compute_target=compute_target,\n",
|
||||||
" **time_series_settings,\n",
|
" **time_series_settings,\n",
|
||||||
")"
|
")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We will now run the experiment, you can go to Azure ML portal to view the run details."
|
"We will now run the experiment, you can go to Azure ML portal to view the run details."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"remote_run = experiment.submit(automl_config, show_output=False)\n",
|
"remote_run = experiment.submit(automl_config, show_output=False)\n",
|
||||||
"remote_run.wait_for_completion()"
|
"remote_run.wait_for_completion()"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Run details\n",
|
"### Retrieve the Best Run details\n",
|
||||||
"Below we retrieve the best Run object from among all the runs in the experiment."
|
"Below we retrieve the best Run object from among all the runs in the experiment."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"best_run = remote_run.get_best_child()\n",
|
"best_run = remote_run.get_best_child()\n",
|
||||||
"best_run"
|
"best_run"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Inference\n",
|
"### Inference\n",
|
||||||
"\n",
|
"\n",
|
||||||
"We now use the best fitted model from the AutoML Run to make forecasts for the test set. We will do batch scoring on the test dataset which should have the same schema as training dataset.\n",
|
"We now use the best fitted model from the AutoML Run to make forecasts for the test set. We will do batch scoring on the test dataset which should have the same schema as training dataset.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The inference will run on a remote compute. In this example, it will re-use the training compute."
|
"The inference will run on a remote compute. In this example, it will re-use the training compute."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"test_experiment = Experiment(ws, experiment_name + \"_inference\")"
|
"test_experiment = Experiment(ws, experiment_name + \"_inference\")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Retreiving forecasts from the model\n",
|
"## Retreiving forecasts from the model\n",
|
||||||
"We have created a function called `run_forecast` that submits the test data to the best model determined during the training run and retrieves forecasts. This function uses a helper script `forecasting_script` which is uploaded and expecuted on the remote compute."
|
"We have created a function called `run_forecast` that submits the test data to the best model determined during the training run and retrieves forecasts. This function uses a helper script `forecasting_script` which is uploaded and expecuted on the remote compute."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from run_forecast import run_remote_inference\n",
|
"from run_forecast import run_remote_inference\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -432,31 +442,31 @@
|
|||||||
"remote_run.wait_for_completion(show_output=False)\n",
|
"remote_run.wait_for_completion(show_output=False)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"remote_run.download_file(\"outputs/predictions.csv\", f\"{output_dir}/predictions.csv\")"
|
"remote_run.download_file(\"outputs/predictions.csv\", f\"{output_dir}/predictions.csv\")"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Download the prediction result for metrics calcuation\n",
|
"### Download the prediction result for metrics calcuation\n",
|
||||||
"The test data with predictions are saved in artifact `outputs/predictions.csv`. We will use it to calculate accuracy metrics and vizualize predictions versus actuals."
|
"The test data with predictions are saved in artifact `outputs/predictions.csv`. We will use it to calculate accuracy metrics and vizualize predictions versus actuals."
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"X_trans = pd.read_csv(f\"{output_dir}/predictions.csv\", parse_dates=[TIME_COLNAME])\n",
|
"X_trans = pd.read_csv(f\"{output_dir}/predictions.csv\", parse_dates=[TIME_COLNAME])\n",
|
||||||
"X_trans.head()"
|
"X_trans.head()"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# convert forecast in differences to levels\n",
|
"# convert forecast in differences to levels\n",
|
||||||
"def convert_fcst_diff_to_levels(fcst, yt, df_orig):\n",
|
"def convert_fcst_diff_to_levels(fcst, yt, df_orig):\n",
|
||||||
@@ -470,13 +480,13 @@
|
|||||||
" )\n",
|
" )\n",
|
||||||
" out.rename(columns={TARGET_COLNAME: \"actual_level\"}, inplace=True)\n",
|
" out.rename(columns={TARGET_COLNAME: \"actual_level\"}, inplace=True)\n",
|
||||||
" return out"
|
" return out"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"if DIFFERENCE_SERIES:\n",
|
"if DIFFERENCE_SERIES:\n",
|
||||||
" # convert forecast in differences to the levels\n",
|
" # convert forecast in differences to the levels\n",
|
||||||
@@ -490,20 +500,20 @@
|
|||||||
" fcst_df[\"predicted_level\"] = y_predictions\n",
|
" fcst_df[\"predicted_level\"] = y_predictions\n",
|
||||||
"\n",
|
"\n",
|
||||||
"del X_trans"
|
"del X_trans"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Calculate metrics and save output"
|
"### Calculate metrics and save output"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# compute metrics\n",
|
"# compute metrics\n",
|
||||||
"metrics_df = compute_metrics(fcst_df=fcst_df, metric_name=None, ts_id_colnames=None)\n",
|
"metrics_df = compute_metrics(fcst_df=fcst_df, metric_name=None, ts_id_colnames=None)\n",
|
||||||
@@ -514,20 +524,20 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"metrics_df.to_csv(os.path.join(output_dir, metrics_file_name), index=True)\n",
|
"metrics_df.to_csv(os.path.join(output_dir, metrics_file_name), index=True)\n",
|
||||||
"fcst_df.to_csv(os.path.join(output_dir, fcst_file_name), index=True)"
|
"fcst_df.to_csv(os.path.join(output_dir, fcst_file_name), index=True)"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Generate and save visuals"
|
"### Generate and save visuals"
|
||||||
],
|
]
|
||||||
"metadata": {}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"plot_df = df.query('{} > \"2010-01-01\"'.format(TIME_COLNAME))\n",
|
"plot_df = df.query('{} > \"2010-01-01\"'.format(TIME_COLNAME))\n",
|
||||||
"plot_df.set_index(TIME_COLNAME, inplace=True)\n",
|
"plot_df.set_index(TIME_COLNAME, inplace=True)\n",
|
||||||
@@ -546,10 +556,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"plt.setp(labels, rotation=45)\n",
|
"plt.setp(labels, rotation=45)\n",
|
||||||
"plt.savefig(os.path.join(output_dir, plot_file_name))"
|
"plt.savefig(os.path.join(output_dir, plot_file_name))"
|
||||||
],
|
]
|
||||||
"outputs": [],
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {}
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -558,38 +565,38 @@
|
|||||||
"name": "vlbejan"
|
"name": "vlbejan"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
"kernel_info": {
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"name": "python38-azureml",
|
"display_name": "Python 3.8 - AzureML",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"display_name": "Python 3.8 - AzureML"
|
"name": "python38-azureml"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"name": "python",
|
|
||||||
"version": "3.8.5",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
"version": 3
|
"version": 3
|
||||||
},
|
},
|
||||||
"pygments_lexer": "ipython3",
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"file_extension": ".py"
|
"pygments_lexer": "ipython3",
|
||||||
},
|
"version": "3.8.10"
|
||||||
"vscode": {
|
|
||||||
"interpreter": {
|
|
||||||
"hash": "6bd77c88278e012ef31757c15997a7bea8c943977c43d6909403c00ae11d43ca"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"microsoft": {
|
"microsoft": {
|
||||||
"ms_spell_check": {
|
"ms_spell_check": {
|
||||||
"ms_spell_check_language": "en"
|
"ms_spell_check_language": "en"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"kernel_info": {
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"nteract": {
|
"nteract": {
|
||||||
"version": "nteract-front-end@1.0.0"
|
"version": "nteract-front-end@1.0.0"
|
||||||
|
},
|
||||||
|
"vscode": {
|
||||||
|
"interpreter": {
|
||||||
|
"hash": "6bd77c88278e012ef31757c15997a7bea8c943977c43d6909403c00ae11d43ca"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ compute instance.
|
|||||||
import argparse
|
import argparse
|
||||||
from azureml.core import Dataset, Run
|
from azureml.core import Dataset, Run
|
||||||
from azureml.automl.core.shared.constants import TimeSeriesInternal
|
from azureml.automl.core.shared.constants import TimeSeriesInternal
|
||||||
from sklearn.externals import joblib
|
import joblib
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
@@ -1,5 +1,21 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -1,5 +1,27 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"hideCode": false,
|
||||||
|
"hidePrompt": false
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"hideCode": false,
|
||||||
|
"hidePrompt": false
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -495,6 +517,30 @@
|
|||||||
"#### Create conda configuration for model explanations experiment from automl_run object"
|
"#### Create conda configuration for model explanations experiment from automl_run object"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"from azureml.core import Environment\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def get_environment_safe(parent_run):\n",
|
||||||
|
" \"\"\"Get the environment from parent run\"\"\"\n",
|
||||||
|
" try:\n",
|
||||||
|
" return parent_run.get_environment()\n",
|
||||||
|
" except BaseException:\n",
|
||||||
|
" run_details = parent_run.get_details()\n",
|
||||||
|
" run_def = run_details.get(\"runDefinition\")\n",
|
||||||
|
" env = run_def.get(\"environment\")\n",
|
||||||
|
" if env is None:\n",
|
||||||
|
" raise\n",
|
||||||
|
" json.dump(env, open(\"azureml_environment.json\", \"w\"))\n",
|
||||||
|
" return Environment.load_from_directory(\".\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -502,8 +548,6 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"import pkg_resources\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# create a new RunConfig object\n",
|
"# create a new RunConfig object\n",
|
||||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||||
@@ -513,7 +557,7 @@
|
|||||||
"conda_run_config.environment.docker.enabled = True\n",
|
"conda_run_config.environment.docker.enabled = True\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# specify CondaDependencies obj\n",
|
"# specify CondaDependencies obj\n",
|
||||||
"conda_run_config.environment = automl_run.get_environment()"
|
"conda_run_config.environment = get_environment_safe(automl_run)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -686,7 +730,7 @@
|
|||||||
" description=\"Get local explanations for Machine test data\",\n",
|
" description=\"Get local explanations for Machine test data\",\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = automl_run.get_environment()\n",
|
"myenv = get_environment_safe(automl_run)\n",
|
||||||
"inference_config = InferenceConfig(entry_script=\"score_explain.py\", environment=myenv)\n",
|
"inference_config = InferenceConfig(entry_script=\"score_explain.py\", environment=myenv)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Use configs and models generated above\n",
|
"# Use configs and models generated above\n",
|
||||||
@@ -909,7 +953,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.7"
|
"version": "3.8.7"
|
||||||
},
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"featurization",
|
"featurization",
|
||||||
|
|||||||
@@ -1,5 +1,21 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -1,12 +0,0 @@
|
|||||||
# Model Deployment with Azure ML service
|
|
||||||
You can use Azure Machine Learning to package, debug, validate and deploy inference containers to a variety of compute targets. This process is known as "MLOps" (ML operationalization).
|
|
||||||
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where
|
|
||||||
|
|
||||||
## Get Started
|
|
||||||
To begin, you will need an ML workspace.
|
|
||||||
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace
|
|
||||||
|
|
||||||
## Deploy to the cloud
|
|
||||||
You can deploy to the cloud using the Azure ML CLI or the Azure ML SDK.
|
|
||||||
- CLI example: https://aka.ms/azmlcli
|
|
||||||
- Notebook example: [model-register-and-deploy](./model-register-and-deploy.ipynb).
|
|
||||||
@@ -1,599 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Register model and deploy as webservice in ACI\n",
|
|
||||||
"\n",
|
|
||||||
"Following this notebook, you will:\n",
|
|
||||||
"\n",
|
|
||||||
" - Learn how to register a model in your Azure Machine Learning Workspace.\n",
|
|
||||||
" - Deploy your model as a web service in an Azure Container Instance."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"\n",
|
|
||||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration notebook](../../../configuration.ipynb) to install the Azure Machine Learning Python SDK and create a workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# Check core SDK version number.\n",
|
|
||||||
"print('SDK version:', azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Create a [Workspace](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace%28class%29?view=azure-ml-py) object from your persisted configuration."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"create workspace"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create trained model\n",
|
|
||||||
"\n",
|
|
||||||
"For this example, we will train a small model on scikit-learn's [diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html). "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import dill\n",
|
|
||||||
"\n",
|
|
||||||
"from sklearn.datasets import load_diabetes\n",
|
|
||||||
"from sklearn.linear_model import Ridge\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"dataset_x, dataset_y = load_diabetes(return_X_y=True)\n",
|
|
||||||
"\n",
|
|
||||||
"model = Ridge().fit(dataset_x, dataset_y)\n",
|
|
||||||
"\n",
|
|
||||||
"dill.dump(model, open('sklearn_regression_model.pkl', 'wb'))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Register input and output datasets\n",
|
|
||||||
"\n",
|
|
||||||
"Here, you will register the data used to create the model in your workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import numpy as np\n",
|
|
||||||
"\n",
|
|
||||||
"from azureml.core import Dataset\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"np.savetxt('features.csv', dataset_x, delimiter=',')\n",
|
|
||||||
"np.savetxt('labels.csv', dataset_y, delimiter=',')\n",
|
|
||||||
"\n",
|
|
||||||
"datastore = ws.get_default_datastore()\n",
|
|
||||||
"datastore.upload_files(files=['./features.csv', './labels.csv'],\n",
|
|
||||||
" target_path='sklearn_regression/',\n",
|
|
||||||
" overwrite=True)\n",
|
|
||||||
"\n",
|
|
||||||
"input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'sklearn_regression/features.csv')])\n",
|
|
||||||
"output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'sklearn_regression/labels.csv')])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Register model\n",
|
|
||||||
"\n",
|
|
||||||
"Register a file or folder as a model by calling [Model.register()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py#register-workspace--model-path--model-name--tags-none--properties-none--description-none--datasets-none--model-framework-none--model-framework-version-none--child-paths-none-).\n",
|
|
||||||
"\n",
|
|
||||||
"In addition to the content of the model file itself, your registered model will also store model metadata -- model description, tags, and framework information -- that will be useful when managing and deploying models in your workspace. Using tags, for instance, you can categorize your models and apply filters when listing models in your workspace. Also, marking this model with the scikit-learn framework will simplify deploying it as a web service, as we'll see later."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"register model from file",
|
|
||||||
"sample-model-register"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import sklearn\n",
|
|
||||||
"\n",
|
|
||||||
"from azureml.core import Model\n",
|
|
||||||
"from azureml.core.resource_configuration import ResourceConfiguration\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"model = Model.register(workspace=ws,\n",
|
|
||||||
" model_name='my-sklearn-model', # Name of the registered model in your workspace.\n",
|
|
||||||
" model_path='./sklearn_regression_model.pkl', # Local file to upload and register as a model.\n",
|
|
||||||
" model_framework=Model.Framework.SCIKITLEARN, # Framework used to create the model.\n",
|
|
||||||
" model_framework_version=sklearn.__version__, # Version of scikit-learn used to create the model.\n",
|
|
||||||
" sample_input_dataset=input_dataset,\n",
|
|
||||||
" sample_output_dataset=output_dataset,\n",
|
|
||||||
" resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),\n",
|
|
||||||
" description='Ridge regression model to predict diabetes progression.',\n",
|
|
||||||
" tags={'area': 'diabetes', 'type': 'regression'})\n",
|
|
||||||
"\n",
|
|
||||||
"print('Name:', model.name)\n",
|
|
||||||
"print('Version:', model.version)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Deploy model\n",
|
|
||||||
"\n",
|
|
||||||
"Deploy your model as a web service using [Model.deploy()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py#deploy-workspace--name--models--inference-config--deployment-config-none--deployment-target-none-). Web services take one or more models, load them in an environment, and run them on one of several supported deployment targets. For more information on all your options when deploying models, see the [next steps](#Next-steps) section at the end of this notebook.\n",
|
|
||||||
"\n",
|
|
||||||
"For this example, we will deploy your scikit-learn model to an Azure Container Instance (ACI)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Use a default environment (for supported models)\n",
|
|
||||||
"\n",
|
|
||||||
"The Azure Machine Learning service provides a default environment for supported model frameworks, including scikit-learn, based on the metadata you provided when registering your model. This is the easiest way to deploy your model.\n",
|
|
||||||
"\n",
|
|
||||||
"Even when you deploy your model to ACI with a default environment you can still customize the deploy configuration (i.e. the number of cores and amount of memory made available for the deployment) using the [AciWebservice.deploy_configuration()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.webservice.aci.aciwebservice#deploy-configuration-cpu-cores-none--memory-gb-none--tags-none--properties-none--description-none--location-none--auth-enabled-none--ssl-enabled-none--enable-app-insights-none--ssl-cert-pem-file-none--ssl-key-pem-file-none--ssl-cname-none--dns-name-label-none--). Look at the \"Use a custom environment\" section of this notebook for more information on deploy configuration.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note**: This step can take several minutes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"service_name = 'my-sklearn-service'\n",
|
|
||||||
"\n",
|
|
||||||
"service = Model.deploy(ws, service_name, [model], overwrite=True)\n",
|
|
||||||
"service.wait_for_deployment(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"After your model is deployed, perform a call to the web service using [service.run()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice%28class%29?view=azure-ml-py#run-input-)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import json\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"input_payload = json.dumps({\n",
|
|
||||||
" 'data': dataset_x[0:2].tolist(),\n",
|
|
||||||
" 'method': 'predict' # If you have a classification model, you can get probabilities by changing this to 'predict_proba'.\n",
|
|
||||||
"})\n",
|
|
||||||
"\n",
|
|
||||||
"output = service.run(input_payload)\n",
|
|
||||||
"\n",
|
|
||||||
"print(output)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"When you are finished testing your service, clean up the deployment with [service.delete()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice%28class%29?view=azure-ml-py#delete--)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"service.delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Use a custom environment\n",
|
|
||||||
"\n",
|
|
||||||
"If you want more control over how your model is run, if it uses another framework, or if it has special runtime requirements, you can instead specify your own environment and scoring method. Custom environments can be used for any model you want to deploy.\n",
|
|
||||||
"\n",
|
|
||||||
"Specify the model's runtime environment by creating an [Environment](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.environment%28class%29?view=azure-ml-py) object and providing the [CondaDependencies](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.conda_dependencies.condadependencies?view=azure-ml-py) needed by your model."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Environment\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"environment = Environment('my-sklearn-environment')\n",
|
|
||||||
"environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[\n",
|
|
||||||
" 'pip==20.2.4'],\n",
|
|
||||||
" pip_packages=[\n",
|
|
||||||
" 'azureml-defaults',\n",
|
|
||||||
" 'inference-schema[numpy-support]',\n",
|
|
||||||
" 'joblib',\n",
|
|
||||||
" 'dill==0.3.6',\n",
|
|
||||||
" 'numpy==1.23',\n",
|
|
||||||
" 'scikit-learn=={}'.format(sklearn.__version__)\n",
|
|
||||||
"])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"When using a custom environment, you must also provide Python code for initializing and running your model. An example script is included with this notebook."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"with open('score.py') as f:\n",
|
|
||||||
" print(f.read())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Deploy your model in the custom environment by providing an [InferenceConfig](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.inferenceconfig?view=azure-ml-py) object to [Model.deploy()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py#deploy-workspace--name--models--inference-config--deployment-config-none--deployment-target-none-). In this case we are also using the [AciWebservice.deploy_configuration()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.webservice.aci.aciwebservice#deploy-configuration-cpu-cores-none--memory-gb-none--tags-none--properties-none--description-none--location-none--auth-enabled-none--ssl-enabled-none--enable-app-insights-none--ssl-cert-pem-file-none--ssl-key-pem-file-none--ssl-cname-none--dns-name-label-none--) method to generate a custom deploy configuration.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note**: This step can take several minutes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"azuremlexception-remarks-sample",
|
|
||||||
"sample-aciwebservice-deploy-config"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.model import InferenceConfig\n",
|
|
||||||
"from azureml.core.webservice import AciWebservice\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"service_name = 'my-custom-env-service'\n",
|
|
||||||
"\n",
|
|
||||||
"inference_config = InferenceConfig(entry_script='score.py', environment=environment)\n",
|
|
||||||
"aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)\n",
|
|
||||||
"\n",
|
|
||||||
"service = Model.deploy(workspace=ws,\n",
|
|
||||||
" name=service_name,\n",
|
|
||||||
" models=[model],\n",
|
|
||||||
" inference_config=inference_config,\n",
|
|
||||||
" deployment_config=aci_config,\n",
|
|
||||||
" overwrite=True)\n",
|
|
||||||
"service.wait_for_deployment(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"After your model is deployed, make a call to the web service using [service.run()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice%28class%29?view=azure-ml-py#run-input-)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"input_payload = json.dumps({\n",
|
|
||||||
" 'data': dataset_x[0:2].tolist()\n",
|
|
||||||
"})\n",
|
|
||||||
"\n",
|
|
||||||
"output = service.run(input_payload)\n",
|
|
||||||
"\n",
|
|
||||||
"print(output)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"When you are finished testing your service, clean up the deployment with [service.delete()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice%28class%29?view=azure-ml-py#delete--)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"service.delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Model Profiling\n",
|
|
||||||
"\n",
|
|
||||||
"Profile your model to understand how much CPU and memory the service, created as a result of its deployment, will need. Profiling returns information such as CPU usage, memory usage, and response latency. It also provides a CPU and memory recommendation based on the resource usage. You can profile your model (or more precisely the service built based on your model) on any CPU and/or memory combination where 0.1 <= CPU <= 3.5 and 0.1GB <= memory <= 15GB. If you do not provide a CPU and/or memory requirement, we will test it on the default configuration of 3.5 CPU and 15GB memory.\n",
|
|
||||||
"\n",
|
|
||||||
"In order to profile your model you will need:\n",
|
|
||||||
"- a registered model\n",
|
|
||||||
"- an entry script\n",
|
|
||||||
"- an inference configuration\n",
|
|
||||||
"- a single column tabular dataset, where each row contains a string representing sample request data sent to the service.\n",
|
|
||||||
"\n",
|
|
||||||
"Please, note that profiling is a long running operation and can take up to 25 minutes depending on the size of the dataset.\n",
|
|
||||||
"\n",
|
|
||||||
"At this point we only support profiling of services that expect their request data to be a string, for example: string serialized json, text, string serialized image, etc. The content of each row of the dataset (string) will be put into the body of the HTTP request and sent to the service encapsulating the model for scoring.\n",
|
|
||||||
"\n",
|
|
||||||
"Below is an example of how you can construct an input dataset to profile a service which expects its incoming requests to contain serialized json. In this case we created a dataset based one hundred instances of the same request data. In real world scenarios however, we suggest that you use larger datasets with various inputs, especially if your model resource usage/behavior is input dependent."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You may want to register datasets using the register() method to your workspace so they can be shared with others, reused and referred to by name in your script.\n",
|
|
||||||
"You can try get the dataset first to see if it's already registered."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Datastore\n",
|
|
||||||
"from azureml.core.dataset import Dataset\n",
|
|
||||||
"from azureml.data import dataset_type_definitions\n",
|
|
||||||
"\n",
|
|
||||||
"dataset_name='diabetes_sample_request_data'\n",
|
|
||||||
"\n",
|
|
||||||
"dataset_registered = False\n",
|
|
||||||
"try:\n",
|
|
||||||
" sample_request_data = Dataset.get_by_name(workspace = ws, name = dataset_name)\n",
|
|
||||||
" dataset_registered = True\n",
|
|
||||||
"except:\n",
|
|
||||||
" print(\"The dataset {} is not registered in workspace yet.\".format(dataset_name))\n",
|
|
||||||
"\n",
|
|
||||||
"if not dataset_registered:\n",
|
|
||||||
" # create a string that can be utf-8 encoded and\n",
|
|
||||||
" # put in the body of the request\n",
|
|
||||||
" serialized_input_json = json.dumps({\n",
|
|
||||||
" 'data': [\n",
|
|
||||||
" [ 0.03807591, 0.05068012, 0.06169621, 0.02187235, -0.0442235,\n",
|
|
||||||
" -0.03482076, -0.04340085, -0.00259226, 0.01990842, -0.01764613]\n",
|
|
||||||
" ]\n",
|
|
||||||
" })\n",
|
|
||||||
" dataset_content = []\n",
|
|
||||||
" for i in range(100):\n",
|
|
||||||
" dataset_content.append(serialized_input_json)\n",
|
|
||||||
" dataset_content = '\\n'.join(dataset_content)\n",
|
|
||||||
" file_name = \"{}.txt\".format(dataset_name)\n",
|
|
||||||
" f = open(file_name, 'w')\n",
|
|
||||||
" f.write(dataset_content)\n",
|
|
||||||
" f.close()\n",
|
|
||||||
"\n",
|
|
||||||
" # upload the txt file created above to the Datastore and create a dataset from it\n",
|
|
||||||
" data_store = Datastore.get_default(ws)\n",
|
|
||||||
" data_store.upload_files(['./' + file_name], target_path='sample_request_data')\n",
|
|
||||||
" datastore_path = [(data_store, 'sample_request_data' +'/' + file_name)]\n",
|
|
||||||
" sample_request_data = Dataset.Tabular.from_delimited_files(\n",
|
|
||||||
" datastore_path,\n",
|
|
||||||
" separator='\\n',\n",
|
|
||||||
" infer_column_types=True,\n",
|
|
||||||
" header=dataset_type_definitions.PromoteHeadersBehavior.NO_HEADERS)\n",
|
|
||||||
" sample_request_data = sample_request_data.register(workspace=ws,\n",
|
|
||||||
" name=dataset_name,\n",
|
|
||||||
" create_new_version=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Now that we have an input dataset we are ready to go ahead with profiling. In this case we are testing the previously introduced sklearn regression model on 1 CPU and 0.5 GB memory. The memory usage and recommendation presented in the result is measured in Gigabytes. The CPU usage and recommendation is measured in CPU cores."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from datetime import datetime\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"environment = Environment('my-sklearn-environment')\n",
|
|
||||||
"environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[\n",
|
|
||||||
" 'pip==20.2.4'],\n",
|
|
||||||
" pip_packages=[\n",
|
|
||||||
" 'azureml-defaults',\n",
|
|
||||||
" 'inference-schema[numpy-support]',\n",
|
|
||||||
" 'joblib',\n",
|
|
||||||
" 'dill==0.3.6',\n",
|
|
||||||
" 'numpy==1.23',\n",
|
|
||||||
" 'scikit-learn=={}'.format(sklearn.__version__)\n",
|
|
||||||
"])\n",
|
|
||||||
"inference_config = InferenceConfig(entry_script='score.py', environment=environment)\n",
|
|
||||||
"# if cpu and memory_in_gb parameters are not provided\n",
|
|
||||||
"# the model will be profiled on default configuration of\n",
|
|
||||||
"# 3.5CPU and 15GB memory\n",
|
|
||||||
"profile = Model.profile(ws,\n",
|
|
||||||
" 'rgrsn-%s' % datetime.now().strftime('%m%d%Y-%H%M%S'),\n",
|
|
||||||
" [model],\n",
|
|
||||||
" inference_config,\n",
|
|
||||||
" input_dataset=sample_request_data,\n",
|
|
||||||
" cpu=1.0,\n",
|
|
||||||
" memory_in_gb=0.5)\n",
|
|
||||||
"\n",
|
|
||||||
"# profiling is a long running operation and may take up to 25 min\n",
|
|
||||||
"profile.wait_for_completion(True)\n",
|
|
||||||
"details = profile.get_details()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Model packaging\n",
|
|
||||||
"\n",
|
|
||||||
"If you want to build a Docker image that encapsulates your model and its dependencies, you can use the model packaging option. The output image will be pushed to your workspace's ACR.\n",
|
|
||||||
"\n",
|
|
||||||
"You must include an Environment object in your inference configuration to use `Model.package()`.\n",
|
|
||||||
"\n",
|
|
||||||
"```python\n",
|
|
||||||
"package = Model.package(ws, [model], inference_config)\n",
|
|
||||||
"package.wait_for_creation(show_output=True) # Or show_output=False to hide the Docker build logs.\n",
|
|
||||||
"package.pull()\n",
|
|
||||||
"```\n",
|
|
||||||
"\n",
|
|
||||||
"Instead of a fully-built image, you can also generate a Dockerfile and download all the assets needed to build an image on top of your Environment.\n",
|
|
||||||
"\n",
|
|
||||||
"```python\n",
|
|
||||||
"package = Model.package(ws, [model], inference_config, generate_dockerfile=True)\n",
|
|
||||||
"package.wait_for_creation(show_output=True)\n",
|
|
||||||
"package.save(\"./local_context_dir\")\n",
|
|
||||||
"```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Next steps\n",
|
|
||||||
"\n",
|
|
||||||
" - To run a production-ready web service, see the [notebook on deployment to Azure Kubernetes Service](../production-deploy-to-aks/production-deploy-to-aks.ipynb).\n",
|
|
||||||
" - To run a local web service, see the [notebook on deployment to a local Docker container](../deploy-to-local/register-model-deploy-local.ipynb).\n",
|
|
||||||
" - For more information on datasets, see the [notebook on training with datasets](../../work-with-data/datasets-tutorial/train-with-datasets/train-with-datasets.ipynb).\n",
|
|
||||||
" - For more information on environments, see the [notebook on using environments](../../training/using-environments/using-environments.ipynb).\n",
|
|
||||||
" - For information on all the available deployment targets, see [“How and where to deploy models”](https://docs.microsoft.com/azure/machine-learning/v1/how-to-deploy-and-where#choose-a-compute-target)."
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "vaidyas"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"category": "deployment",
|
|
||||||
"compute": [
|
|
||||||
"None"
|
|
||||||
],
|
|
||||||
"datasets": [
|
|
||||||
"Diabetes"
|
|
||||||
],
|
|
||||||
"deployment": [
|
|
||||||
"Azure Container Instance"
|
|
||||||
],
|
|
||||||
"exclude_from_index": false,
|
|
||||||
"framework": [
|
|
||||||
"Scikit-learn"
|
|
||||||
],
|
|
||||||
"friendly_name": "Register model and deploy as webservice",
|
|
||||||
"index_order": 3,
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.8 - AzureML",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python38-azureml"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.7.0"
|
|
||||||
},
|
|
||||||
"star_tag": [
|
|
||||||
"featured"
|
|
||||||
],
|
|
||||||
"tags": [
|
|
||||||
"None"
|
|
||||||
],
|
|
||||||
"task": "Deploy a model with Azure Machine Learning"
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
name: model-register-and-deploy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- numpy
|
|
||||||
- scikit-learn
|
|
||||||
@@ -1,38 +0,0 @@
|
|||||||
import joblib
|
|
||||||
import numpy as np
|
|
||||||
import os
|
|
||||||
|
|
||||||
from inference_schema.schema_decorators import input_schema, output_schema
|
|
||||||
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
|
|
||||||
|
|
||||||
|
|
||||||
# The init() method is called once, when the web service starts up.
|
|
||||||
#
|
|
||||||
# Typically you would deserialize the model file, as shown here using joblib,
|
|
||||||
# and store it in a global variable so your run() method can access it later.
|
|
||||||
def init():
|
|
||||||
global model
|
|
||||||
|
|
||||||
# The AZUREML_MODEL_DIR environment variable indicates
|
|
||||||
# a directory containing the model file you registered.
|
|
||||||
model_filename = 'sklearn_regression_model.pkl'
|
|
||||||
model_path = os.path.join(os.environ['AZUREML_MODEL_DIR'], model_filename)
|
|
||||||
|
|
||||||
model = joblib.load(model_path)
|
|
||||||
|
|
||||||
|
|
||||||
# The run() method is called each time a request is made to the scoring API.
|
|
||||||
#
|
|
||||||
# Shown here are the optional input_schema and output_schema decorators
|
|
||||||
# from the inference-schema pip package. Using these decorators on your
|
|
||||||
# run() method parses and validates the incoming payload against
|
|
||||||
# the example input you provide here. This will also generate a Swagger
|
|
||||||
# API document for your web service.
|
|
||||||
@input_schema('data', NumpyParameterType(np.array([[0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9, 9.0]])))
|
|
||||||
@output_schema(NumpyParameterType(np.array([4429.929236457418])))
|
|
||||||
def run(data):
|
|
||||||
# Use the model object loaded by init().
|
|
||||||
result = model.predict(data)
|
|
||||||
|
|
||||||
# You can return any JSON-serializable object.
|
|
||||||
return result.tolist()
|
|
||||||
@@ -123,7 +123,7 @@
|
|||||||
"import pickle\n",
|
"import pickle\n",
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import numpy\n",
|
"import numpy\n",
|
||||||
"from sklearn.externals import joblib\n",
|
"import joblib\n",
|
||||||
"from sklearn.linear_model import Ridge\n",
|
"from sklearn.linear_model import Ridge\n",
|
||||||
"import time\n",
|
"import time\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
@@ -260,7 +260,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\", \"azureml-defaults\"])\n",
|
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime==1.15.1\", \"azureml-core\", \"azureml-defaults\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
|||||||
@@ -1,801 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Facial Expression Recognition (FER+) using ONNX Runtime on Azure ML\n",
|
|
||||||
"\n",
|
|
||||||
"This example shows how to deploy an image classification neural network using the Facial Expression Recognition ([FER](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. This tutorial will show you how to deploy a FER+ model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
|
|
||||||
"\n",
|
|
||||||
"Throughout this tutorial, we will be referring to ONNX, a neural network exchange format used to represent deep learning models. With ONNX, AI developers can more easily move models between state-of-the-art tools (CNTK, PyTorch, Caffe, MXNet, TensorFlow) and choose the combination that is best for them. ONNX is developed and supported by a community of partners including Microsoft AI, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai) and [open source files](https://github.com/onnx).\n",
|
|
||||||
"\n",
|
|
||||||
"[ONNX Runtime](https://aka.ms/onnxruntime-python) is the runtime engine that enables evaluation of trained machine learning (Traditional ML and Deep Learning) models with high performance and low resource utilization. We use the CPU version of ONNX Runtime in this tutorial, but will soon be releasing an additional tutorial for deploying this model using ONNX Runtime GPU.\n",
|
|
||||||
"\n",
|
|
||||||
"#### Tutorial Objectives:\n",
|
|
||||||
"\n",
|
|
||||||
"1. Describe the FER+ dataset and pretrained Convolutional Neural Net ONNX model for Emotion Recognition, stored in the ONNX model zoo.\n",
|
|
||||||
"2. Deploy and run the pretrained FER+ ONNX model on an Azure Machine Learning instance\n",
|
|
||||||
"3. Predict labels for test set data points in the cloud using ONNX Runtime and Azure ML"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"\n",
|
|
||||||
"### 1. Install Azure ML SDK and create a new workspace\n",
|
|
||||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, please follow [Azure ML configuration notebook](../../../configuration.ipynb) to set up your environment.\n",
|
|
||||||
"\n",
|
|
||||||
"### 2. Install additional packages needed for this Notebook\n",
|
|
||||||
"You need to install the popular plotting library `matplotlib`, the image manipulation library `opencv`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed.\n",
|
|
||||||
"\n",
|
|
||||||
"```sh\n",
|
|
||||||
"(myenv) $ pip install matplotlib onnx opencv-python\n",
|
|
||||||
"```\n",
|
|
||||||
"\n",
|
|
||||||
"**Debugging tip**: Make sure that to activate your virtual environment (myenv) before you re-launch this notebook using the `jupyter notebook` comand. Choose the respective Python kernel for your new virtual environment using the `Kernel > Change Kernel` menu above. If you have completed the steps correctly, the upper right corner of your screen should state `Python [conda env:myenv]` instead of `Python [default]`.\n",
|
|
||||||
"\n",
|
|
||||||
"### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n",
|
|
||||||
"\n",
|
|
||||||
"In the following lines of code, we download [the trained ONNX Emotion FER+ model and corresponding test data](https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus) and place them in the same folder as this tutorial notebook. For more information about the FER+ dataset, please visit Microsoft Researcher Emad Barsoum's [FER+ source data repository](https://github.com/ebarsoum/FERPlus)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# urllib is a built-in Python library to download files from URLs\n",
|
|
||||||
"\n",
|
|
||||||
"# Objective: retrieve the latest version of the ONNX Emotion FER+ model files from the\n",
|
|
||||||
"# ONNX Model Zoo and save it in the same folder as this tutorial\n",
|
|
||||||
"\n",
|
|
||||||
"import urllib.request\n",
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"onnx_model_url = \"https://github.com/onnx/models/blob/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-7.tar.gz?raw=true\"\n",
|
|
||||||
"\n",
|
|
||||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"emotion-ferplus-7.tar.gz\")\n",
|
|
||||||
"os.mkdir(\"emotion_ferplus\")\n",
|
|
||||||
"\n",
|
|
||||||
"# the ! magic command tells our jupyter notebook kernel to run the following line of \n",
|
|
||||||
"# code from the command line instead of the notebook kernel\n",
|
|
||||||
"\n",
|
|
||||||
"# We use tar and xvcf to unzip the files we just retrieved from the ONNX model zoo\n",
|
|
||||||
"\n",
|
|
||||||
"!tar xvzf emotion-ferplus-7.tar.gz -C emotion_ferplus"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Deploy a VM with your ONNX model in the Cloud\n",
|
|
||||||
"\n",
|
|
||||||
"### Load Azure ML workspace\n",
|
|
||||||
"\n",
|
|
||||||
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.location, ws.resource_group, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Registering your model with Azure ML"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"model_dir = \"emotion_ferplus/model\" # replace this with the location of your model files\n",
|
|
||||||
"\n",
|
|
||||||
"# leave as is if it's in the same folder as this notebook"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"\n",
|
|
||||||
"model = Model.register(model_path = model_dir + \"/\" + \"model.onnx\",\n",
|
|
||||||
" model_name = \"onnx_emotion\",\n",
|
|
||||||
" tags = {\"onnx\": \"demo\"},\n",
|
|
||||||
" description = \"FER+ emotion recognition CNN from ONNX Model Zoo\",\n",
|
|
||||||
" workspace = ws)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Optional: Displaying your registered models\n",
|
|
||||||
"\n",
|
|
||||||
"This step is not required, so feel free to skip it."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"models = ws.models\n",
|
|
||||||
"for name, m in models.items():\n",
|
|
||||||
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### ONNX FER+ Model Methodology\n",
|
|
||||||
"\n",
|
|
||||||
"The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the well-known FER+ data set, provided as part of the [trained Emotion Recognition model](https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus) in the ONNX model zoo.\n",
|
|
||||||
"\n",
|
|
||||||
"The original Facial Emotion Recognition (FER) Dataset was released in 2013 by Pierre-Luc Carrier and Aaron Courville as part of a [Kaggle Competition](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data), but some of the labels are not entirely appropriate for the expression. In the FER+ Dataset, each photo was evaluated by at least 10 croud sourced reviewers, creating a more accurate basis for ground truth. \n",
|
|
||||||
"\n",
|
|
||||||
"You can see the difference of label quality in the sample model input below. The FER labels are the first word below each image, and the FER+ labels are the second word below each image.\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"***Input: Photos of cropped faces from FER+ Dataset***\n",
|
|
||||||
"\n",
|
|
||||||
"***Task: Classify each facial image into its appropriate emotions in the emotion table***\n",
|
|
||||||
"\n",
|
|
||||||
"``` emotion_table = {'neutral':0, 'happiness':1, 'surprise':2, 'sadness':3, 'anger':4, 'disgust':5, 'fear':6, 'contempt':7} ```\n",
|
|
||||||
"\n",
|
|
||||||
"***Output: Emotion prediction for input image***\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"Remember, once the application is deployed in Azure ML, you can use your own images as input for the model to classify."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# for images and plots in this notebook\n",
|
|
||||||
"import matplotlib.pyplot as plt \n",
|
|
||||||
"\n",
|
|
||||||
"# display images inline\n",
|
|
||||||
"%matplotlib inline"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Model Description\n",
|
|
||||||
"\n",
|
|
||||||
"The FER+ model from the ONNX Model Zoo is summarized by the graphic below. You can see the entire workflow of our pre-trained model in the following image from Barsoum et. al's paper [\"Training Deep Networks for Facial Expression Recognition\n",
|
|
||||||
"with Crowd-Sourced Label Distribution\"](https://arxiv.org/pdf/1608.01041.pdf), with our (64 x 64) input images and our output probabilities for each of the labels."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Specify our Score and Environment Files"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"We are now going to deploy our ONNX Model on AML with inference in ONNX Runtime. We begin by writing a score.py file, which will help us run the model in our Azure ML virtual machine (VM), and then specify our environment by writing a yml file. You will also notice that we import the onnxruntime library to do runtime inference on our ONNX models (passing in input and evaluating out model's predicted output). More information on the API and commands can be found in the [ONNX Runtime documentation](https://aka.ms/onnxruntime).\n",
|
|
||||||
"\n",
|
|
||||||
"### Write Score File\n",
|
|
||||||
"\n",
|
|
||||||
"A score file is what tells our Azure cloud service what to do. After initializing our model using azureml.core.model, we start an ONNX Runtime inference session to evaluate the data passed in on our function calls."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile score.py\n",
|
|
||||||
"import json\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import onnxruntime\n",
|
|
||||||
"import sys\n",
|
|
||||||
"import os\n",
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"def init():\n",
|
|
||||||
" global session, input_name, output_name\n",
|
|
||||||
" model = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.onnx')\n",
|
|
||||||
" session = onnxruntime.InferenceSession(model, None)\n",
|
|
||||||
" input_name = session.get_inputs()[0].name\n",
|
|
||||||
" output_name = session.get_outputs()[0].name \n",
|
|
||||||
" \n",
|
|
||||||
"def run(input_data):\n",
|
|
||||||
" '''Purpose: evaluate test input in Azure Cloud using onnxruntime.\n",
|
|
||||||
" We will call the run function later from our Jupyter Notebook \n",
|
|
||||||
" so our azure service can evaluate our model input in the cloud. '''\n",
|
|
||||||
"\n",
|
|
||||||
" try:\n",
|
|
||||||
" # load in our data, convert to readable format\n",
|
|
||||||
" data = np.array(json.loads(input_data)['data']).astype('float32')\n",
|
|
||||||
" \n",
|
|
||||||
" start = time.time()\n",
|
|
||||||
" r = session.run([output_name], {input_name : data})\n",
|
|
||||||
" end = time.time()\n",
|
|
||||||
" \n",
|
|
||||||
" result = emotion_map(postprocess(r[0]))\n",
|
|
||||||
" \n",
|
|
||||||
" result_dict = {\"result\": result,\n",
|
|
||||||
" \"time_in_sec\": [end - start]}\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" result_dict = {\"error\": str(e)}\n",
|
|
||||||
" \n",
|
|
||||||
" return json.dumps(result_dict)\n",
|
|
||||||
"\n",
|
|
||||||
"def emotion_map(classes, N=1):\n",
|
|
||||||
" \"\"\"Take the most probable labels (output of postprocess) and returns the \n",
|
|
||||||
" top N emotional labels that fit the picture.\"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" emotion_table = {'neutral':0, 'happiness':1, 'surprise':2, 'sadness':3, \n",
|
|
||||||
" 'anger':4, 'disgust':5, 'fear':6, 'contempt':7}\n",
|
|
||||||
" \n",
|
|
||||||
" emotion_keys = list(emotion_table.keys())\n",
|
|
||||||
" emotions = []\n",
|
|
||||||
" for i in range(N):\n",
|
|
||||||
" emotions.append(emotion_keys[classes[i]])\n",
|
|
||||||
" return emotions\n",
|
|
||||||
"\n",
|
|
||||||
"def softmax(x):\n",
|
|
||||||
" \"\"\"Compute softmax values (probabilities from 0 to 1) for each possible label.\"\"\"\n",
|
|
||||||
" x = x.reshape(-1)\n",
|
|
||||||
" e_x = np.exp(x - np.max(x))\n",
|
|
||||||
" return e_x / e_x.sum(axis=0)\n",
|
|
||||||
"\n",
|
|
||||||
"def postprocess(scores):\n",
|
|
||||||
" \"\"\"This function takes the scores generated by the network and \n",
|
|
||||||
" returns the class IDs in decreasing order of probability.\"\"\"\n",
|
|
||||||
" prob = softmax(scores)\n",
|
|
||||||
" prob = np.squeeze(prob)\n",
|
|
||||||
" classes = np.argsort(prob)[::-1]\n",
|
|
||||||
" return classes"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Write Environment File\n",
|
|
||||||
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\", \"azureml-defaults\"])\n",
|
|
||||||
"\n",
|
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
|
||||||
" f.write(myenv.serialize_to_string())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Setup inference configuration"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.model import InferenceConfig\n",
|
|
||||||
"from azureml.core.environment import Environment\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
|
||||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Deploy the model"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import AciWebservice\n",
|
|
||||||
"\n",
|
|
||||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
|
||||||
" memory_gb = 1, \n",
|
|
||||||
" tags = {'demo': 'onnx'}, \n",
|
|
||||||
" description = 'ONNX for emotion recognition model')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The following cell will likely take a few minutes to run as well."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"aci_service_name = 'onnx-demo-emotion'\n",
|
|
||||||
"print(\"Service\", aci_service_name)\n",
|
|
||||||
"aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n",
|
|
||||||
"aci_service.wait_for_deployment(True)\n",
|
|
||||||
"print(aci_service.state)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if aci_service.state != 'Healthy':\n",
|
|
||||||
" # run this command for debugging.\n",
|
|
||||||
" print(aci_service.get_logs())\n",
|
|
||||||
"\n",
|
|
||||||
" # If your deployment fails, make sure to delete your aci_service before trying again!\n",
|
|
||||||
" # aci_service.delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Success!\n",
|
|
||||||
"\n",
|
|
||||||
"If you've made it this far, you've deployed a working VM with a facial emotion recognition model running in the cloud using Azure ML. Congratulations!\n",
|
|
||||||
"\n",
|
|
||||||
"Let's see how well our model deals with our test images."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Testing and Evaluation\n",
|
|
||||||
"\n",
|
|
||||||
"### Useful Helper Functions\n",
|
|
||||||
"\n",
|
|
||||||
"We preprocess and postprocess our data (see score.py file) using the helper functions specified in the [ONNX FER+ Model page in the Model Zoo repository](https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def emotion_map(classes, N=1):\n",
|
|
||||||
" \"\"\"Take the most probable labels (output of postprocess) and returns the \n",
|
|
||||||
" top N emotional labels that fit the picture.\"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" emotion_table = {'neutral':0, 'happiness':1, 'surprise':2, 'sadness':3, \n",
|
|
||||||
" 'anger':4, 'disgust':5, 'fear':6, 'contempt':7}\n",
|
|
||||||
" \n",
|
|
||||||
" emotion_keys = list(emotion_table.keys())\n",
|
|
||||||
" emotions = []\n",
|
|
||||||
" for c in range(N):\n",
|
|
||||||
" emotions.append(emotion_keys[classes[c]])\n",
|
|
||||||
" return emotions\n",
|
|
||||||
"\n",
|
|
||||||
"def softmax(x):\n",
|
|
||||||
" \"\"\"Compute softmax values (probabilities from 0 to 1) for each possible label.\"\"\"\n",
|
|
||||||
" x = x.reshape(-1)\n",
|
|
||||||
" e_x = np.exp(x - np.max(x))\n",
|
|
||||||
" return e_x / e_x.sum(axis=0)\n",
|
|
||||||
"\n",
|
|
||||||
"def postprocess(scores):\n",
|
|
||||||
" \"\"\"This function takes the scores generated by the network and \n",
|
|
||||||
" returns the class IDs in decreasing order of probability.\"\"\"\n",
|
|
||||||
" prob = softmax(scores)\n",
|
|
||||||
" prob = np.squeeze(prob)\n",
|
|
||||||
" classes = np.argsort(prob)[::-1]\n",
|
|
||||||
" return classes"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Load Test Data\n",
|
|
||||||
"\n",
|
|
||||||
"These are already in your directory from your ONNX model download (from the model zoo).\n",
|
|
||||||
"\n",
|
|
||||||
"Notice that our Model Zoo files have a .pb extension. This is because they are [protobuf files (Protocol Buffers)](https://developers.google.com/protocol-buffers/docs/pythontutorial), so we need to read in our data through our ONNX TensorProto reader into a format we can work with, like numerical arrays."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# to manipulate our arrays\n",
|
|
||||||
"import numpy as np \n",
|
|
||||||
"\n",
|
|
||||||
"# read in test data protobuf files included with the model\n",
|
|
||||||
"import onnx\n",
|
|
||||||
"from onnx import numpy_helper\n",
|
|
||||||
"\n",
|
|
||||||
"# to use parsers to read in our model/data\n",
|
|
||||||
"import json\n",
|
|
||||||
"\n",
|
|
||||||
"test_inputs = []\n",
|
|
||||||
"test_outputs = []\n",
|
|
||||||
"\n",
|
|
||||||
"# read in 1 testing images from .pb files\n",
|
|
||||||
"test_data_size = 1\n",
|
|
||||||
"\n",
|
|
||||||
"for num in np.arange(test_data_size):\n",
|
|
||||||
" input_test_data = os.path.join(model_dir, 'test_data_set_{0}'.format(num), 'input_0.pb')\n",
|
|
||||||
" output_test_data = os.path.join(model_dir, 'test_data_set_{0}'.format(num), 'output_0.pb')\n",
|
|
||||||
" \n",
|
|
||||||
" # convert protobuf tensors to np arrays using the TensorProto reader from ONNX\n",
|
|
||||||
" tensor = onnx.TensorProto()\n",
|
|
||||||
" with open(input_test_data, 'rb') as f:\n",
|
|
||||||
" tensor.ParseFromString(f.read())\n",
|
|
||||||
" \n",
|
|
||||||
" input_data = numpy_helper.to_array(tensor)\n",
|
|
||||||
" test_inputs.append(input_data)\n",
|
|
||||||
" \n",
|
|
||||||
" with open(output_test_data, 'rb') as f:\n",
|
|
||||||
" tensor.ParseFromString(f.read())\n",
|
|
||||||
" \n",
|
|
||||||
" output_data = numpy_helper.to_array(tensor)\n",
|
|
||||||
" output_processed = emotion_map(postprocess(output_data[0]))[0]\n",
|
|
||||||
" test_outputs.append(output_processed)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"nbpresent": {
|
|
||||||
"id": "c3f2f57c-7454-4d3e-b38d-b0946cf066ea"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"### Show some sample images\n",
|
|
||||||
"We use `matplotlib` to plot 1 test images from the dataset."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"nbpresent": {
|
|
||||||
"id": "396d478b-34aa-4afa-9898-cdce8222a516"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"plt.figure(figsize = (20, 20))\n",
|
|
||||||
"for test_image in np.arange(test_data_size):\n",
|
|
||||||
" test_inputs[test_image].reshape(1, 64, 64)\n",
|
|
||||||
" plt.subplot(1, 8, test_image+1)\n",
|
|
||||||
" plt.axhline('')\n",
|
|
||||||
" plt.axvline('')\n",
|
|
||||||
" plt.text(x = 10, y = -10, s = test_outputs[test_image], fontsize = 18)\n",
|
|
||||||
" plt.imshow(test_inputs[test_image].reshape(64, 64), cmap = plt.cm.gray)\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Run evaluation / prediction"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"plt.figure(figsize = (16, 6))\n",
|
|
||||||
"plt.subplot(1, 8, 1)\n",
|
|
||||||
"\n",
|
|
||||||
"plt.text(x = 0, y = -30, s = \"True Label: \", fontsize = 13, color = 'black')\n",
|
|
||||||
"plt.text(x = 0, y = -20, s = \"Result: \", fontsize = 13, color = 'black')\n",
|
|
||||||
"plt.text(x = 0, y = -10, s = \"Inference Time: \", fontsize = 13, color = 'black')\n",
|
|
||||||
"plt.text(x = 3, y = 14, s = \"Model Input\", fontsize = 12, color = 'black')\n",
|
|
||||||
"plt.text(x = 6, y = 18, s = \"(64 x 64)\", fontsize = 12, color = 'black')\n",
|
|
||||||
"plt.imshow(np.ones((28,28)), cmap=plt.cm.Greys) \n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"for i in np.arange(test_data_size):\n",
|
|
||||||
" \n",
|
|
||||||
" input_data = json.dumps({'data': test_inputs[i].tolist()})\n",
|
|
||||||
"\n",
|
|
||||||
" # predict using the deployed model\n",
|
|
||||||
" r = json.loads(aci_service.run(input_data))\n",
|
|
||||||
" \n",
|
|
||||||
" if \"error\" in r:\n",
|
|
||||||
" print(r['error'])\n",
|
|
||||||
" break\n",
|
|
||||||
" \n",
|
|
||||||
" result = r['result'][0]\n",
|
|
||||||
" time_ms = np.round(r['time_in_sec'][0] * 1000, 2)\n",
|
|
||||||
" \n",
|
|
||||||
" ground_truth = test_outputs[i]\n",
|
|
||||||
" \n",
|
|
||||||
" # compare actual value vs. the predicted values:\n",
|
|
||||||
" plt.subplot(1, 8, i+2)\n",
|
|
||||||
" plt.axhline('')\n",
|
|
||||||
" plt.axvline('')\n",
|
|
||||||
"\n",
|
|
||||||
" # use different color for misclassified sample\n",
|
|
||||||
" font_color = 'red' if ground_truth != result else 'black'\n",
|
|
||||||
" clr_map = plt.cm.Greys if ground_truth != result else plt.cm.gray\n",
|
|
||||||
"\n",
|
|
||||||
" # ground truth labels are in blue\n",
|
|
||||||
" plt.text(x = 10, y = -70, s = ground_truth, fontsize = 18, color = 'blue')\n",
|
|
||||||
" \n",
|
|
||||||
" # predictions are in black if correct, red if incorrect\n",
|
|
||||||
" plt.text(x = 10, y = -45, s = result, fontsize = 18, color = font_color)\n",
|
|
||||||
" plt.text(x = 5, y = -22, s = str(time_ms) + ' ms', fontsize = 14, color = font_color)\n",
|
|
||||||
"\n",
|
|
||||||
" \n",
|
|
||||||
" plt.imshow(test_inputs[i].reshape(64, 64), cmap = clr_map)\n",
|
|
||||||
"\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Try classifying your own images!"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Preprocessing functions take your image and format it so it can be passed\n",
|
|
||||||
"# as input into our ONNX model\n",
|
|
||||||
"\n",
|
|
||||||
"import cv2\n",
|
|
||||||
"\n",
|
|
||||||
"def rgb2gray(rgb):\n",
|
|
||||||
" \"\"\"Convert the input image into grayscale\"\"\"\n",
|
|
||||||
" return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])\n",
|
|
||||||
"\n",
|
|
||||||
"def resize_img(img_to_resize):\n",
|
|
||||||
" \"\"\"Resize image to FER+ model input dimensions\"\"\"\n",
|
|
||||||
" r_img = cv2.resize(img_to_resize, dsize=(64, 64), interpolation=cv2.INTER_AREA)\n",
|
|
||||||
" r_img.resize((1, 1, 64, 64))\n",
|
|
||||||
" return r_img\n",
|
|
||||||
"\n",
|
|
||||||
"def preprocess(img_to_preprocess):\n",
|
|
||||||
" \"\"\"Resize input images and convert them to grayscale.\"\"\"\n",
|
|
||||||
" if img_to_preprocess.shape == (64, 64):\n",
|
|
||||||
" img_to_preprocess.resize((1, 1, 64, 64))\n",
|
|
||||||
" return img_to_preprocess\n",
|
|
||||||
" \n",
|
|
||||||
" grayscale = rgb2gray(img_to_preprocess)\n",
|
|
||||||
" processed_img = resize_img(grayscale)\n",
|
|
||||||
" return processed_img"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Replace the following string with your own path/test image\n",
|
|
||||||
"# Make sure your image is square and the dimensions are equal (i.e. 100 * 100 pixels or 28 * 28 pixels)\n",
|
|
||||||
"\n",
|
|
||||||
"# Any PNG or JPG image file should work\n",
|
|
||||||
"# Make sure to include the entire path with // instead of /\n",
|
|
||||||
"\n",
|
|
||||||
"# e.g. your_test_image = \"C:/Users/vinitra.swamy/Pictures/face.png\"\n",
|
|
||||||
"\n",
|
|
||||||
"your_test_image = \"<path to file>\"\n",
|
|
||||||
"\n",
|
|
||||||
"import matplotlib.image as mpimg\n",
|
|
||||||
"\n",
|
|
||||||
"if your_test_image != \"<path to file>\":\n",
|
|
||||||
" img = mpimg.imread(your_test_image)\n",
|
|
||||||
" plt.subplot(1,3,1)\n",
|
|
||||||
" plt.imshow(img, cmap = plt.cm.Greys)\n",
|
|
||||||
" print(\"Old Dimensions: \", img.shape)\n",
|
|
||||||
" img = preprocess(img)\n",
|
|
||||||
" print(\"New Dimensions: \", img.shape)\n",
|
|
||||||
"else:\n",
|
|
||||||
" img = None"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if img is None:\n",
|
|
||||||
" print(\"Add the path for your image data.\")\n",
|
|
||||||
"else:\n",
|
|
||||||
" input_data = json.dumps({'data': img.tolist()})\n",
|
|
||||||
"\n",
|
|
||||||
" try:\n",
|
|
||||||
" r = json.loads(aci_service.run(input_data))\n",
|
|
||||||
" result = r['result'][0]\n",
|
|
||||||
" time_ms = np.round(r['time_in_sec'][0] * 1000, 2)\n",
|
|
||||||
" except KeyError as e:\n",
|
|
||||||
" print(str(e))\n",
|
|
||||||
"\n",
|
|
||||||
" plt.figure(figsize = (16, 6))\n",
|
|
||||||
" plt.subplot(1,8,1)\n",
|
|
||||||
" plt.axhline('')\n",
|
|
||||||
" plt.axvline('')\n",
|
|
||||||
" plt.text(x = -10, y = -40, s = \"Model prediction: \", fontsize = 14)\n",
|
|
||||||
" plt.text(x = -10, y = -25, s = \"Inference time: \", fontsize = 14)\n",
|
|
||||||
" plt.text(x = 100, y = -40, s = str(result), fontsize = 14)\n",
|
|
||||||
" plt.text(x = 100, y = -25, s = str(time_ms) + \" ms\", fontsize = 14)\n",
|
|
||||||
" plt.text(x = -10, y = -10, s = \"Model Input image: \", fontsize = 14)\n",
|
|
||||||
" plt.imshow(img.reshape((64, 64)), cmap = plt.cm.gray) \n",
|
|
||||||
" "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# remember to delete your service after you are done using it!\n",
|
|
||||||
"\n",
|
|
||||||
"aci_service.delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Conclusion\n",
|
|
||||||
"\n",
|
|
||||||
"Congratulations!\n",
|
|
||||||
"\n",
|
|
||||||
"In this tutorial, you have:\n",
|
|
||||||
"- familiarized yourself with ONNX Runtime inference and the pretrained models in the ONNX model zoo\n",
|
|
||||||
"- understood a state-of-the-art convolutional neural net image classification model (FER+ in ONNX) and deployed it in the Azure ML cloud\n",
|
|
||||||
"- ensured that your deep learning model is working perfectly (in the cloud) on test data, and checked it against some of your own!\n",
|
|
||||||
"\n",
|
|
||||||
"Next steps:\n",
|
|
||||||
"- If you have not already, check out another interesting ONNX/AML application that lets you set up a state-of-the-art [handwritten image classification model (MNIST)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb) in the cloud! This tutorial deploys a pre-trained ONNX Computer Vision model for handwritten digit classification in an Azure ML virtual machine.\n",
|
|
||||||
"- Keep an eye out for an updated version of this tutorial that uses ONNX Runtime GPU.\n",
|
|
||||||
"- Contribute to our [open source ONNX repository on github](http://github.com/onnx/onnx) and/or add to our [ONNX model zoo](http://github.com/onnx/models)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "viswamy"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"category": "deployment",
|
|
||||||
"compute": [
|
|
||||||
"Local"
|
|
||||||
],
|
|
||||||
"datasets": [
|
|
||||||
"Emotion FER"
|
|
||||||
],
|
|
||||||
"deployment": [
|
|
||||||
"Azure Container Instance"
|
|
||||||
],
|
|
||||||
"exclude_from_index": false,
|
|
||||||
"framework": [
|
|
||||||
"ONNX"
|
|
||||||
],
|
|
||||||
"friendly_name": "Deploy Facial Expression Recognition (FER+) with ONNX Runtime",
|
|
||||||
"index_order": 2,
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.8 - AzureML",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python38-azureml"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
},
|
|
||||||
"msauthor": "vinitra.swamy",
|
|
||||||
"star_tag": [],
|
|
||||||
"tags": [
|
|
||||||
"ONNX Model Zoo"
|
|
||||||
],
|
|
||||||
"task": "Facial Expression Recognition"
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
name: onnx-inference-facial-expression-recognition-deploy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- matplotlib
|
|
||||||
- numpy
|
|
||||||
- onnx<1.7.0
|
|
||||||
- opencv-python-headless
|
|
||||||
@@ -1,778 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Handwritten Digit Classification (MNIST) using ONNX Runtime on Azure ML\n",
|
|
||||||
"\n",
|
|
||||||
"This example shows how to deploy an image classification neural network using the Modified National Institute of Standards and Technology ([MNIST](http://yann.lecun.com/exdb/mnist/)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing number from 0 to 9. This tutorial will show you how to deploy a MNIST model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
|
|
||||||
"\n",
|
|
||||||
"Throughout this tutorial, we will be referring to ONNX, a neural network exchange format used to represent deep learning models. With ONNX, AI developers can more easily move models between state-of-the-art tools (CNTK, PyTorch, Caffe, MXNet, TensorFlow) and choose the combination that is best for them. ONNX is developed and supported by a community of partners including Microsoft AI, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai) and [open source files](https://github.com/onnx).\n",
|
|
||||||
"\n",
|
|
||||||
"[ONNX Runtime](https://aka.ms/onnxruntime-python) is the runtime engine that enables evaluation of trained machine learning (Traditional ML and Deep Learning) models with high performance and low resource utilization.\n",
|
|
||||||
"\n",
|
|
||||||
"#### Tutorial Objectives:\n",
|
|
||||||
"\n",
|
|
||||||
"- Describe the MNIST dataset and pretrained Convolutional Neural Net ONNX model, stored in the ONNX model zoo.\n",
|
|
||||||
"- Deploy and run the pretrained MNIST ONNX model on an Azure Machine Learning instance\n",
|
|
||||||
"- Predict labels for test set data points in the cloud using ONNX Runtime and Azure ML"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"\n",
|
|
||||||
"### 1. Install Azure ML SDK and create a new workspace\n",
|
|
||||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, please follow [Azure ML configuration notebook](../../../configuration.ipynb) to set up your environment.\n",
|
|
||||||
"\n",
|
|
||||||
"### 2. Install additional packages needed for this tutorial notebook\n",
|
|
||||||
"You need to install the popular plotting library `matplotlib`, the image manipulation library `opencv`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed. \n",
|
|
||||||
"\n",
|
|
||||||
"```sh\n",
|
|
||||||
"(myenv) $ pip install matplotlib onnx opencv-python\n",
|
|
||||||
"```\n",
|
|
||||||
"\n",
|
|
||||||
"**Debugging tip**: Make sure that you run the \"jupyter notebook\" command to launch this notebook after activating your virtual environment. Choose the respective Python kernel for your new virtual environment using the `Kernel > Change Kernel` menu above. If you have completed the steps correctly, the upper right corner of your screen should state `Python [conda env:myenv]` instead of `Python [default]`.\n",
|
|
||||||
"\n",
|
|
||||||
"### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n",
|
|
||||||
"\n",
|
|
||||||
"In the following lines of code, we download [the trained ONNX MNIST model and corresponding test data](https://github.com/onnx/models/tree/master/vision/classification/mnist) and place them in the same folder as this tutorial notebook. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# urllib is a built-in Python library to download files from URLs\n",
|
|
||||||
"\n",
|
|
||||||
"# Objective: retrieve the latest version of the ONNX MNIST model files from the\n",
|
|
||||||
"# ONNX Model Zoo and save it in the same folder as this tutorial\n",
|
|
||||||
"\n",
|
|
||||||
"import urllib.request\n",
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"onnx_model_url = \"https://github.com/onnx/models/blob/main/vision/classification/mnist/model/mnist-7.tar.gz?raw=true\"\n",
|
|
||||||
"\n",
|
|
||||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"mnist-7.tar.gz\")\n",
|
|
||||||
"os.mkdir(\"mnist\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# the ! magic command tells our jupyter notebook kernel to run the following line of \n",
|
|
||||||
"# code from the command line instead of the notebook kernel\n",
|
|
||||||
"\n",
|
|
||||||
"# We use tar and xvcf to unzip the files we just retrieved from the ONNX model zoo\n",
|
|
||||||
"\n",
|
|
||||||
"!tar xvzf mnist-7.tar.gz -C mnist"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Deploy a VM with your ONNX model in the Cloud\n",
|
|
||||||
"\n",
|
|
||||||
"### Load Azure ML workspace\n",
|
|
||||||
"\n",
|
|
||||||
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Registering your model with Azure ML"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"model_dir = \"mnist/model\" # replace this with the location of your model files\n",
|
|
||||||
"\n",
|
|
||||||
"# leave as is if it's in the same folder as this notebook"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"\n",
|
|
||||||
"model = Model.register(workspace = ws,\n",
|
|
||||||
" model_path = model_dir + \"/\" + \"model.onnx\",\n",
|
|
||||||
" model_name = \"mnist_1\",\n",
|
|
||||||
" tags = {\"onnx\": \"demo\"},\n",
|
|
||||||
" description = \"MNIST image classification CNN from ONNX Model Zoo\",)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Optional: Displaying your registered models\n",
|
|
||||||
"\n",
|
|
||||||
"This step is not required, so feel free to skip it."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"models = ws.models\n",
|
|
||||||
"for name, m in models.items():\n",
|
|
||||||
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"nbpresent": {
|
|
||||||
"id": "c3f2f57c-7454-4d3e-b38d-b0946cf066ea"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"### ONNX MNIST Model Methodology\n",
|
|
||||||
"\n",
|
|
||||||
"The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the famous MNIST data set, provided as part of the [trained MNIST model](https://github.com/onnx/models/tree/master/vision/classification/mnist) in the ONNX model zoo.\n",
|
|
||||||
"\n",
|
|
||||||
"***Input: Handwritten Images from MNIST Dataset***\n",
|
|
||||||
"\n",
|
|
||||||
"***Task: Classify each MNIST image into an appropriate digit***\n",
|
|
||||||
"\n",
|
|
||||||
"***Output: Digit prediction for input image***\n",
|
|
||||||
"\n",
|
|
||||||
"Run the cell below to look at some of the sample images from the MNIST dataset that we used to train this ONNX model. Remember, once the application is deployed in Azure ML, you can use your own images as input for the model to classify!"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# for images and plots in this notebook\n",
|
|
||||||
"import matplotlib.pyplot as plt \n",
|
|
||||||
"from IPython.display import Image\n",
|
|
||||||
"\n",
|
|
||||||
"# display images inline\n",
|
|
||||||
"%matplotlib inline"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"Image(url=\"http://3.bp.blogspot.com/_UpN7DfJA0j4/TJtUBWPk0SI/AAAAAAAAABY/oWPMtmqJn3k/s1600/mnist_originals.png\", width=200, height=200)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Specify our Score and Environment Files"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"We are now going to deploy our ONNX Model on AML with inference in ONNX Runtime. We begin by writing a score.py file, which will help us run the model in our Azure ML virtual machine (VM), and then specify our environment by writing a yml file. You will also notice that we import the onnxruntime library to do runtime inference on our ONNX models (passing in input and evaluating out model's predicted output). More information on the API and commands can be found in the [ONNX Runtime documentation](https://aka.ms/onnxruntime).\n",
|
|
||||||
"\n",
|
|
||||||
"### Write Score File\n",
|
|
||||||
"\n",
|
|
||||||
"A score file is what tells our Azure cloud service what to do. After initializing our model using azureml.core.model, we start an ONNX Runtime inference session to evaluate the data passed in on our function calls."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile score.py\n",
|
|
||||||
"import json\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import onnxruntime\n",
|
|
||||||
"import sys\n",
|
|
||||||
"import os\n",
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"def init():\n",
|
|
||||||
" global session, input_name, output_name\n",
|
|
||||||
" # AZUREML_MODEL_DIR is an environment variable created during deployment.\n",
|
|
||||||
" # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)\n",
|
|
||||||
" # For multiple models, it points to the folder containing all deployed models (./azureml-models)\n",
|
|
||||||
" model = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'model.onnx')\n",
|
|
||||||
" session = onnxruntime.InferenceSession(model, None)\n",
|
|
||||||
" input_name = session.get_inputs()[0].name\n",
|
|
||||||
" output_name = session.get_outputs()[0].name \n",
|
|
||||||
" \n",
|
|
||||||
"\n",
|
|
||||||
"def preprocess(input_data_json):\n",
|
|
||||||
" # convert the JSON data into the tensor input\n",
|
|
||||||
" return np.array(json.loads(input_data_json)['data']).astype('float32')\n",
|
|
||||||
"\n",
|
|
||||||
"def postprocess(result):\n",
|
|
||||||
" # We use argmax to pick the highest confidence label\n",
|
|
||||||
" return int(np.argmax(np.array(result).squeeze(), axis=0))\n",
|
|
||||||
" \n",
|
|
||||||
"def run(input_data):\n",
|
|
||||||
"\n",
|
|
||||||
" try:\n",
|
|
||||||
" # load in our data, convert to readable format\n",
|
|
||||||
" data = preprocess(input_data)\n",
|
|
||||||
" \n",
|
|
||||||
" # start timer\n",
|
|
||||||
" start = time.time()\n",
|
|
||||||
" \n",
|
|
||||||
" r = session.run([output_name], {input_name: data})\n",
|
|
||||||
" \n",
|
|
||||||
" #end timer\n",
|
|
||||||
" end = time.time()\n",
|
|
||||||
" \n",
|
|
||||||
" result = postprocess(r)\n",
|
|
||||||
" result_dict = {\"result\": result,\n",
|
|
||||||
" \"time_in_sec\": end - start}\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" result_dict = {\"error\": str(e)}\n",
|
|
||||||
" \n",
|
|
||||||
" return result_dict\n",
|
|
||||||
"\n",
|
|
||||||
"def choose_class(result_prob):\n",
|
|
||||||
" \"\"\"We use argmax to determine the right label to choose from our output\"\"\"\n",
|
|
||||||
" return int(np.argmax(result_prob, axis=0))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Write Environment File\n",
|
|
||||||
"\n",
|
|
||||||
"This step creates a YAML environment file that specifies which dependencies we would like to see in our Linux Virtual Machine. Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
|
||||||
"\n",
|
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\", \"azureml-defaults\"])\n",
|
|
||||||
"\n",
|
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
|
||||||
" f.write(myenv.serialize_to_string())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create Inference Configuration"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.model import InferenceConfig\n",
|
|
||||||
"from azureml.core.environment import Environment\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
|
||||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Deploy the model"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import AciWebservice\n",
|
|
||||||
"\n",
|
|
||||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
|
||||||
" memory_gb = 1, \n",
|
|
||||||
" tags = {'demo': 'onnx'}, \n",
|
|
||||||
" description = 'ONNX for mnist model')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The following cell will likely take a few minutes to run."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"aci_service_name = 'onnx-demo-mnist'\n",
|
|
||||||
"print(\"Service\", aci_service_name)\n",
|
|
||||||
"aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n",
|
|
||||||
"aci_service.wait_for_deployment(True)\n",
|
|
||||||
"print(aci_service.state)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if aci_service.state != 'Healthy':\n",
|
|
||||||
" # run this command for debugging.\n",
|
|
||||||
" print(aci_service.get_logs())\n",
|
|
||||||
"\n",
|
|
||||||
" # If your deployment fails, make sure to delete your aci_service or rename your service before trying again!\n",
|
|
||||||
" # aci_service.delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Success!\n",
|
|
||||||
"\n",
|
|
||||||
"If you've made it this far, you've deployed a working VM with a handwritten digit classifier running in the cloud using Azure ML. Congratulations!\n",
|
|
||||||
"\n",
|
|
||||||
"You can get the URL for the webservice with the code below. Let's now see how well our model deals with our test images."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(aci_service.scoring_uri)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Testing and Evaluation\n",
|
|
||||||
"\n",
|
|
||||||
"### Load Test Data\n",
|
|
||||||
"\n",
|
|
||||||
"These are already in your directory from your ONNX model download (from the model zoo).\n",
|
|
||||||
"\n",
|
|
||||||
"Notice that our Model Zoo files have a .pb extension. This is because they are [protobuf files (Protocol Buffers)](https://developers.google.com/protocol-buffers/docs/pythontutorial), so we need to read in our data through our ONNX TensorProto reader into a format we can work with, like numerical arrays."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# to manipulate our arrays\n",
|
|
||||||
"import numpy as np \n",
|
|
||||||
"\n",
|
|
||||||
"# read in test data protobuf files included with the model\n",
|
|
||||||
"import onnx\n",
|
|
||||||
"from onnx import numpy_helper\n",
|
|
||||||
"\n",
|
|
||||||
"# to use parsers to read in our model/data\n",
|
|
||||||
"import json\n",
|
|
||||||
"\n",
|
|
||||||
"test_inputs = []\n",
|
|
||||||
"test_outputs = []\n",
|
|
||||||
"\n",
|
|
||||||
"# read in 1 testing images from .pb files\n",
|
|
||||||
"test_data_size = 1\n",
|
|
||||||
"\n",
|
|
||||||
"for i in np.arange(test_data_size):\n",
|
|
||||||
" input_test_data = os.path.join(model_dir, 'test_data_set_{0}'.format(i), 'input_0.pb')\n",
|
|
||||||
" output_test_data = os.path.join(model_dir, 'test_data_set_{0}'.format(i), 'output_0.pb')\n",
|
|
||||||
" \n",
|
|
||||||
" # convert protobuf tensors to np arrays using the TensorProto reader from ONNX\n",
|
|
||||||
" tensor = onnx.TensorProto()\n",
|
|
||||||
" with open(input_test_data, 'rb') as f:\n",
|
|
||||||
" tensor.ParseFromString(f.read())\n",
|
|
||||||
" \n",
|
|
||||||
" input_data = numpy_helper.to_array(tensor)\n",
|
|
||||||
" test_inputs.append(input_data)\n",
|
|
||||||
" \n",
|
|
||||||
" with open(output_test_data, 'rb') as f:\n",
|
|
||||||
" tensor.ParseFromString(f.read())\n",
|
|
||||||
" \n",
|
|
||||||
" output_data = numpy_helper.to_array(tensor)\n",
|
|
||||||
" test_outputs.append(output_data)\n",
|
|
||||||
" \n",
|
|
||||||
"if len(test_inputs) == test_data_size:\n",
|
|
||||||
" print('Test data loaded successfully.')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {
|
|
||||||
"nbpresent": {
|
|
||||||
"id": "c3f2f57c-7454-4d3e-b38d-b0946cf066ea"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"source": [
|
|
||||||
"### Show some sample images\n",
|
|
||||||
"We use `matplotlib` to plot 1 test images from the dataset."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"nbpresent": {
|
|
||||||
"id": "396d478b-34aa-4afa-9898-cdce8222a516"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"plt.figure(figsize = (16, 6))\n",
|
|
||||||
"for test_image in np.arange(test_data_size):\n",
|
|
||||||
" plt.subplot(1, 15, test_image+1)\n",
|
|
||||||
" plt.axhline('')\n",
|
|
||||||
" plt.axvline('')\n",
|
|
||||||
" plt.imshow(test_inputs[test_image].reshape(28, 28), cmap = plt.cm.Greys)\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Run evaluation / prediction"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"plt.figure(figsize = (16, 6))\n",
|
|
||||||
"plt.subplot(1, 8, 1)\n",
|
|
||||||
"\n",
|
|
||||||
"plt.text(x = 0, y = -30, s = \"True Label: \", fontsize = 13, color = 'black')\n",
|
|
||||||
"plt.text(x = 0, y = -20, s = \"Result: \", fontsize = 13, color = 'black')\n",
|
|
||||||
"plt.text(x = 0, y = -10, s = \"Inference Time: \", fontsize = 13, color = 'black')\n",
|
|
||||||
"plt.text(x = 3, y = 14, s = \"Model Input\", fontsize = 12, color = 'black')\n",
|
|
||||||
"plt.text(x = 6, y = 18, s = \"(28 x 28)\", fontsize = 12, color = 'black')\n",
|
|
||||||
"plt.imshow(np.ones((28,28)), cmap=plt.cm.Greys) \n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"for i in np.arange(test_data_size):\n",
|
|
||||||
" \n",
|
|
||||||
" input_data = json.dumps({'data': test_inputs[i].tolist()})\n",
|
|
||||||
" \n",
|
|
||||||
" # predict using the deployed model\n",
|
|
||||||
" r = aci_service.run(input_data)\n",
|
|
||||||
" \n",
|
|
||||||
" if \"error\" in r:\n",
|
|
||||||
" print(r['error'])\n",
|
|
||||||
" break\n",
|
|
||||||
" \n",
|
|
||||||
" result = r['result']\n",
|
|
||||||
" time_ms = np.round(r['time_in_sec'] * 1000, 2)\n",
|
|
||||||
" \n",
|
|
||||||
" ground_truth = int(np.argmax(test_outputs[i]))\n",
|
|
||||||
" \n",
|
|
||||||
" # compare actual value vs. the predicted values:\n",
|
|
||||||
" plt.subplot(1, 8, i+2)\n",
|
|
||||||
" plt.axhline('')\n",
|
|
||||||
" plt.axvline('')\n",
|
|
||||||
"\n",
|
|
||||||
" # use different color for misclassified sample\n",
|
|
||||||
" font_color = 'red' if ground_truth != result else 'black'\n",
|
|
||||||
" clr_map = plt.cm.gray if ground_truth != result else plt.cm.Greys\n",
|
|
||||||
"\n",
|
|
||||||
" # ground truth labels are in blue\n",
|
|
||||||
" plt.text(x = 10, y = -30, s = ground_truth, fontsize = 18, color = 'blue')\n",
|
|
||||||
" \n",
|
|
||||||
" # predictions are in black if correct, red if incorrect\n",
|
|
||||||
" plt.text(x = 10, y = -20, s = result, fontsize = 18, color = font_color)\n",
|
|
||||||
" plt.text(x = 5, y = -10, s = str(time_ms) + ' ms', fontsize = 14, color = font_color)\n",
|
|
||||||
"\n",
|
|
||||||
" \n",
|
|
||||||
" plt.imshow(test_inputs[i].reshape(28, 28), cmap = clr_map)\n",
|
|
||||||
"\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Try classifying your own images!\n",
|
|
||||||
"\n",
|
|
||||||
"Create your own handwritten image and pass it into the model."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Preprocessing functions take your image and format it so it can be passed\n",
|
|
||||||
"# as input into our ONNX model\n",
|
|
||||||
"\n",
|
|
||||||
"import cv2\n",
|
|
||||||
"\n",
|
|
||||||
"def rgb2gray(rgb):\n",
|
|
||||||
" \"\"\"Convert the input image into grayscale\"\"\"\n",
|
|
||||||
" return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])\n",
|
|
||||||
"\n",
|
|
||||||
"def resize_img(img_to_resize):\n",
|
|
||||||
" \"\"\"Resize image to MNIST model input dimensions\"\"\"\n",
|
|
||||||
" r_img = cv2.resize(img_to_resize, dsize=(28, 28), interpolation=cv2.INTER_AREA)\n",
|
|
||||||
" r_img.resize((1, 1, 28, 28))\n",
|
|
||||||
" return r_img\n",
|
|
||||||
"\n",
|
|
||||||
"def preprocess(img_to_preprocess):\n",
|
|
||||||
" \"\"\"Resize input images and convert them to grayscale.\"\"\"\n",
|
|
||||||
" if img_to_preprocess.shape == (28, 28):\n",
|
|
||||||
" img_to_preprocess.resize((1, 1, 28, 28))\n",
|
|
||||||
" return img_to_preprocess\n",
|
|
||||||
" \n",
|
|
||||||
" grayscale = rgb2gray(img_to_preprocess)\n",
|
|
||||||
" processed_img = resize_img(grayscale)\n",
|
|
||||||
" return processed_img"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Replace this string with your own path/test image\n",
|
|
||||||
"# Make sure your image is square and the dimensions are equal (i.e. 100 * 100 pixels or 28 * 28 pixels)\n",
|
|
||||||
"\n",
|
|
||||||
"# Any PNG or JPG image file should work\n",
|
|
||||||
"\n",
|
|
||||||
"your_test_image = \"<path to file>\"\n",
|
|
||||||
"\n",
|
|
||||||
"# e.g. your_test_image = \"C:/Users/vinitra.swamy/Pictures/handwritten_digit.png\"\n",
|
|
||||||
"\n",
|
|
||||||
"import matplotlib.image as mpimg\n",
|
|
||||||
"\n",
|
|
||||||
"if your_test_image != \"<path to file>\":\n",
|
|
||||||
" img = mpimg.imread(your_test_image)\n",
|
|
||||||
" plt.subplot(1,3,1)\n",
|
|
||||||
" plt.imshow(img, cmap = plt.cm.Greys)\n",
|
|
||||||
" print(\"Old Dimensions: \", img.shape)\n",
|
|
||||||
" img = preprocess(img)\n",
|
|
||||||
" print(\"New Dimensions: \", img.shape)\n",
|
|
||||||
"else:\n",
|
|
||||||
" img = None"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if img is None:\n",
|
|
||||||
" print(\"Add the path for your image data.\")\n",
|
|
||||||
"else:\n",
|
|
||||||
" input_data = json.dumps({'data': img.tolist()})\n",
|
|
||||||
"\n",
|
|
||||||
" try:\n",
|
|
||||||
" r = aci_service.run(input_data)\n",
|
|
||||||
" result = r['result']\n",
|
|
||||||
" time_ms = np.round(r['time_in_sec'] * 1000, 2)\n",
|
|
||||||
" except KeyError as e:\n",
|
|
||||||
" print(str(e))\n",
|
|
||||||
"\n",
|
|
||||||
" plt.figure(figsize = (16, 6))\n",
|
|
||||||
" plt.subplot(1, 15,1)\n",
|
|
||||||
" plt.axhline('')\n",
|
|
||||||
" plt.axvline('')\n",
|
|
||||||
" plt.text(x = -100, y = -20, s = \"Model prediction: \", fontsize = 14)\n",
|
|
||||||
" plt.text(x = -100, y = -10, s = \"Inference time: \", fontsize = 14)\n",
|
|
||||||
" plt.text(x = 0, y = -20, s = str(result), fontsize = 14)\n",
|
|
||||||
" plt.text(x = 0, y = -10, s = str(time_ms) + \" ms\", fontsize = 14)\n",
|
|
||||||
" plt.text(x = -100, y = 14, s = \"Input image: \", fontsize = 14)\n",
|
|
||||||
" plt.imshow(img.reshape(28, 28), cmap = plt.cm.gray) "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Optional: How does our ONNX MNIST model work? \n",
|
|
||||||
"#### A brief explanation of Convolutional Neural Networks\n",
|
|
||||||
"\n",
|
|
||||||
"A [convolutional neural network](https://en.wikipedia.org/wiki/Convolutional_neural_network) (CNN, or ConvNet) is a type of [feed-forward](https://en.wikipedia.org/wiki/Feedforward_neural_network) artificial neural network made up of neurons that have learnable weights and biases. The CNNs take advantage of the spatial nature of the data. In nature, we perceive different objects by their shapes, size and colors. For example, objects in a natural scene are typically edges, corners/vertices (defined by two of more edges), color patches etc. These primitives are often identified using different detectors (e.g., edge detection, color detector) or combination of detectors interacting to facilitate image interpretation (object classification, region of interest detection, scene description etc.) in real world vision related tasks. These detectors are also known as filters. Convolution is a mathematical operator that takes an image and a filter as input and produces a filtered output (representing say edges, corners, or colors in the input image). \n",
|
|
||||||
"\n",
|
|
||||||
"Historically, these filters are a set of weights that were often hand crafted or modeled with mathematical functions (e.g., [Gaussian](https://en.wikipedia.org/wiki/Gaussian_filter) / [Laplacian](http://homepages.inf.ed.ac.uk/rbf/HIPR2/log.htm) / [Canny](https://en.wikipedia.org/wiki/Canny_edge_detector) filter). The filter outputs are mapped through non-linear activation functions mimicking human brain cells called [neurons](https://en.wikipedia.org/wiki/Neuron). Popular deep CNNs or ConvNets (such as [AlexNet](https://en.wikipedia.org/wiki/AlexNet), [VGG](https://arxiv.org/abs/1409.1556), [Inception](http://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Szegedy_Going_Deeper_With_2015_CVPR_paper.pdf), [ResNet](https://arxiv.org/pdf/1512.03385v1.pdf)) that are used for various [computer vision](https://en.wikipedia.org/wiki/Computer_vision) tasks have many of these architectural primitives (inspired from biology). \n",
|
|
||||||
"\n",
|
|
||||||
"### Convolution Layer\n",
|
|
||||||
"\n",
|
|
||||||
"A convolution layer is a set of filters. Each filter is defined by a weight (**W**) matrix, and bias ($b$).\n",
|
|
||||||
"\n",
|
|
||||||
"These filters are scanned across the image performing the dot product between the weights and corresponding input value ($x$). The bias value is added to the output of the dot product and the resulting sum is optionally mapped through an activation function."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Model Description\n",
|
|
||||||
"\n",
|
|
||||||
"The MNIST model from the ONNX Model Zoo uses maxpooling to update the weights in its convolutions, summarized by the graphic below. You can see the entire workflow of our pre-trained model in the following image, with our input images and our output probabilities of each of our 10 labels. If you're interested in exploring the logic behind creating a Deep Learning model further, please look at the [training tutorial for our ONNX MNIST Convolutional Neural Network](https://github.com/Microsoft/CNTK/blob/master/Tutorials/CNTK_103D_MNIST_ConvolutionalNeuralNetwork.ipynb). "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# remember to delete your service after you are done using it!\n",
|
|
||||||
"\n",
|
|
||||||
"aci_service.delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Conclusion\n",
|
|
||||||
"\n",
|
|
||||||
"Congratulations!\n",
|
|
||||||
"\n",
|
|
||||||
"In this tutorial, you have:\n",
|
|
||||||
"- familiarized yourself with ONNX Runtime inference and the pretrained models in the ONNX model zoo\n",
|
|
||||||
"- understood a state-of-the-art convolutional neural net image classification model (MNIST in ONNX) and deployed it in Azure ML cloud\n",
|
|
||||||
"- ensured that your deep learning model is working perfectly (in the cloud) on test data, and checked it against some of your own!\n",
|
|
||||||
"\n",
|
|
||||||
"Next steps:\n",
|
|
||||||
"- Check out another interesting application based on a Microsoft Research computer vision paper that lets you set up a [facial emotion recognition model](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb) in the cloud! This tutorial deploys a pre-trained ONNX Computer Vision model in an Azure ML virtual machine.\n",
|
|
||||||
"- Contribute to our [open source ONNX repository on github](http://github.com/onnx/onnx) and/or add to our [ONNX model zoo](http://github.com/onnx/models)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "viswamy"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"category": "deployment",
|
|
||||||
"compute": [
|
|
||||||
"Local"
|
|
||||||
],
|
|
||||||
"datasets": [
|
|
||||||
"MNIST"
|
|
||||||
],
|
|
||||||
"deployment": [
|
|
||||||
"Azure Container Instance"
|
|
||||||
],
|
|
||||||
"exclude_from_index": false,
|
|
||||||
"framework": [
|
|
||||||
"ONNX"
|
|
||||||
],
|
|
||||||
"friendly_name": "Deploy MNIST digit recognition with ONNX Runtime",
|
|
||||||
"index_order": 1,
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.8 - AzureML",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python38-azureml"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
},
|
|
||||||
"msauthor": "vinitra.swamy",
|
|
||||||
"star_tag": [],
|
|
||||||
"tags": [
|
|
||||||
"ONNX Model Zoo"
|
|
||||||
],
|
|
||||||
"task": "Image Classification"
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
name: onnx-inference-mnist-deploy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- matplotlib
|
|
||||||
- numpy
|
|
||||||
- onnx<1.7.0
|
|
||||||
- opencv-python-headless
|
|
||||||
@@ -242,7 +242,7 @@
|
|||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\", \"azureml-defaults\"])\n",
|
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime==1.15.1\", \"azureml-core\", \"azureml-defaults\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
|||||||
@@ -105,7 +105,7 @@
|
|||||||
" print('Found existing compute target.')\n",
|
" print('Found existing compute target.')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
|
||||||
" max_nodes=6)\n",
|
" max_nodes=6)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
@@ -416,7 +416,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\", \"azureml-defaults\"])\n",
|
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime==1.15.1\",\"azureml-core\", \"azureml-defaults\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
@@ -620,7 +620,7 @@
|
|||||||
},
|
},
|
||||||
"manual": null
|
"manual": null
|
||||||
},
|
},
|
||||||
"vm_size": "STANDARD_NC6"
|
"vm_size": "Standard_NC6s_v3"
|
||||||
},
|
},
|
||||||
"error": "",
|
"error": "",
|
||||||
"layout": "IPY_MODEL_c899ddfc2b134ca9b89a4f278ac7c997",
|
"layout": "IPY_MODEL_c899ddfc2b134ca9b89a4f278ac7c997",
|
||||||
|
|||||||
@@ -136,6 +136,9 @@
|
|||||||
"# Choose a name for your GPU cluster\n",
|
"# Choose a name for your GPU cluster\n",
|
||||||
"gpu_cluster_name = \"aks-gpu-cluster\"\n",
|
"gpu_cluster_name = \"aks-gpu-cluster\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Choose a location for your GPU cluster\n",
|
||||||
|
"gpu_cluster_location = \"eastus\"\n",
|
||||||
|
"\n",
|
||||||
"# Verify that cluster does not exist already\n",
|
"# Verify that cluster does not exist already\n",
|
||||||
"try:\n",
|
"try:\n",
|
||||||
" gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)\n",
|
" gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)\n",
|
||||||
@@ -146,7 +149,8 @@
|
|||||||
" # Specify the configuration for the new cluster\n",
|
" # Specify the configuration for the new cluster\n",
|
||||||
" compute_config = AksCompute.provisioning_configuration(cluster_purpose=AksCompute.ClusterPurpose.DEV_TEST,\n",
|
" compute_config = AksCompute.provisioning_configuration(cluster_purpose=AksCompute.ClusterPurpose.DEV_TEST,\n",
|
||||||
" agent_count=1,\n",
|
" agent_count=1,\n",
|
||||||
" vm_size=\"Standard_NV6\")\n",
|
" vm_size=\"Standard_NC6s_v3\",\n",
|
||||||
|
" location=gpu_cluster_location)\n",
|
||||||
" # Create the cluster with the specified name and configuration\n",
|
" # Create the cluster with the specified name and configuration\n",
|
||||||
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)\n",
|
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -170,7 +174,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"%%writefile score.py\n",
|
"%%writefile score.py\n",
|
||||||
"import tensorflow as tf\n",
|
"import tensorflow.compat.v1 as tf\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
@@ -240,9 +244,9 @@
|
|||||||
"# Please see [Azure ML Containers repository](https://github.com/Azure/AzureML-Containers#featured-tags)\n",
|
"# Please see [Azure ML Containers repository](https://github.com/Azure/AzureML-Containers#featured-tags)\n",
|
||||||
"# for open-sourced GPU base images.\n",
|
"# for open-sourced GPU base images.\n",
|
||||||
"env.docker.base_image = DEFAULT_GPU_IMAGE\n",
|
"env.docker.base_image = DEFAULT_GPU_IMAGE\n",
|
||||||
"env.python.conda_dependencies = CondaDependencies.create(python_version=\"3.6.2\", pin_sdk_version=False,\n",
|
"env.python.conda_dependencies = CondaDependencies.create(python_version=\"3.8\", pin_sdk_version=False,\n",
|
||||||
" conda_packages=['tensorflow-gpu==1.12.0','numpy'],\n",
|
" conda_packages=['tensorflow-gpu','numpy'],\n",
|
||||||
" pip_packages=['azureml-contrib-services==1.47.0', 'azureml-defaults==1.47.0'])\n",
|
" pip_packages=['azureml-contrib-services', 'azureml-defaults'])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=env)\n",
|
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=env)\n",
|
||||||
"aks_config = AksWebservice.deploy_configuration()\n",
|
"aks_config = AksWebservice.deploy_configuration()\n",
|
||||||
|
|||||||
@@ -2,4 +2,3 @@ name: production-deploy-to-aks-gpu
|
|||||||
dependencies:
|
dependencies:
|
||||||
- pip:
|
- pip:
|
||||||
- azureml-sdk
|
- azureml-sdk
|
||||||
- tensorflow
|
|
||||||
|
|||||||
@@ -154,7 +154,7 @@
|
|||||||
"import pickle\n",
|
"import pickle\n",
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import numpy\n",
|
"import numpy\n",
|
||||||
"from sklearn.externals import joblib\n",
|
"import joblib\n",
|
||||||
"from sklearn.linear_model import Ridge\n",
|
"from sklearn.linear_model import Ridge\n",
|
||||||
"from inference_schema.schema_decorators import input_schema, output_schema\n",
|
"from inference_schema.schema_decorators import input_schema, output_schema\n",
|
||||||
"from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType\n",
|
"from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType\n",
|
||||||
|
|||||||
@@ -154,7 +154,7 @@
|
|||||||
"import pickle\n",
|
"import pickle\n",
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import numpy\n",
|
"import numpy\n",
|
||||||
"from sklearn.externals import joblib\n",
|
"import joblib\n",
|
||||||
"from sklearn.linear_model import Ridge\n",
|
"from sklearn.linear_model import Ridge\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def init():\n",
|
"def init():\n",
|
||||||
|
|||||||
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
{"class":"org.apache.spark.ml.classification.LogisticRegressionModel","timestamp":1570147252329,"sparkVersion":"2.4.0","uid":"LogisticRegression_5df3978caaf3","paramMap":{"regParam":0.01},"defaultParamMap":{"aggregationDepth":2,"threshold":0.5,"rawPredictionCol":"rawPrediction","featuresCol":"features","labelCol":"label","predictionCol":"prediction","family":"auto","regParam":0.0,"tol":1.0E-6,"probabilityCol":"probability","standardization":true,"elasticNetParam":0.0,"maxIter":100,"fitIntercept":true}}
|
|
||||||
@@ -1,349 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Register Spark Model and deploy as Webservice\n",
|
|
||||||
"\n",
|
|
||||||
"This example shows how to deploy a Webservice in step-by-step fashion:\n",
|
|
||||||
"\n",
|
|
||||||
" 1. Register Spark Model\n",
|
|
||||||
" 2. Deploy Spark Model as Webservice"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a workspace object from persisted configuration."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"create workspace"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Register Model"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can add tags and descriptions to your Models. Note you need to have a `iris.model` file in the current directory. This model file is generated using [train in spark](../training/train-in-spark/train-in-spark.ipynb) notebook. The below call registers that file as a Model with the same name `iris.model` in the workspace.\n",
|
|
||||||
"\n",
|
|
||||||
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model. Note that tags must be alphanumeric."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"register model from file"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"\n",
|
|
||||||
"model = Model.register(model_path=\"iris.model\",\n",
|
|
||||||
" model_name=\"iris.model\",\n",
|
|
||||||
" tags={'type': \"regression\"},\n",
|
|
||||||
" description=\"Logistic regression model to predict iris species\",\n",
|
|
||||||
" workspace=ws)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Fetch Environment"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can now create and/or use an Environment object when deploying a Webservice. The Environment can have been previously registered with your Workspace, or it will be registered with it as a part of the Webservice deployment.\n",
|
|
||||||
"\n",
|
|
||||||
"More information can be found in our [using environments notebook](../training/using-environments/using-environments.ipynb)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Environment\r\n",
|
|
||||||
"from azureml.core.environment import SparkPackage\r\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\r\n",
|
|
||||||
"\r\n",
|
|
||||||
"myenv = Environment('my-pyspark-environment')\r\n",
|
|
||||||
"myenv.docker.base_image = \"mcr.microsoft.com/mmlspark/release:0.15\"\r\n",
|
|
||||||
"myenv.inferencing_stack_version = \"latest\"\r\n",
|
|
||||||
"myenv.python.conda_dependencies = CondaDependencies.create(pip_packages=[\"azureml-core\",\"azureml-defaults\",\"azureml-telemetry\",\"azureml-train-restclients-hyperdrive\",\"azureml-train-core\"], python_version=\"3.7.0\")\r\n",
|
|
||||||
"myenv.python.conda_dependencies.add_channel(\"conda-forge\")\r\n",
|
|
||||||
"myenv.spark.packages = [SparkPackage(\"com.microsoft.ml.spark\", \"mmlspark_2.11\", \"0.15\"), SparkPackage(\"com.microsoft.azure\", \"azure-storage\", \"2.0.0\"), SparkPackage(\"org.apache.hadoop\", \"hadoop-azure\", \"2.7.0\")]\r\n",
|
|
||||||
"myenv.spark.repositories = [\"https://mmlspark.azureedge.net/maven\"]\r\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create Inference Configuration\n",
|
|
||||||
"\n",
|
|
||||||
"There is now support for a source directory, you can upload an entire folder from your local machine as dependencies for the Webservice.\n",
|
|
||||||
"Note: in that case, your entry_script is relative path to the source_directory path.\n",
|
|
||||||
"\n",
|
|
||||||
"Sample code for using a source directory:\n",
|
|
||||||
"\n",
|
|
||||||
"```python\n",
|
|
||||||
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
|
|
||||||
" entry_script=\"x/y/score.py\",\n",
|
|
||||||
" environment=environment)\n",
|
|
||||||
"```\n",
|
|
||||||
"\n",
|
|
||||||
" - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
|
|
||||||
" - entry_script = contains logic specific to initializing your model and running predictions\n",
|
|
||||||
" - environment = An environment object to use for the deployment. Doesn't have to be registered"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"create image"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.model import InferenceConfig\n",
|
|
||||||
"\n",
|
|
||||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Deploy Model as Webservice on Azure Container Instance\n",
|
|
||||||
"\n",
|
|
||||||
"Note that the service creation can take few minutes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"azuremlexception-remarks-sample"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
|
||||||
"from azureml.exceptions import WebserviceException\n",
|
|
||||||
"\n",
|
|
||||||
"deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)\n",
|
|
||||||
"aci_service_name = 'aciservice1'\n",
|
|
||||||
"\n",
|
|
||||||
"try:\n",
|
|
||||||
" # if you want to get existing service below is the command\n",
|
|
||||||
" # since aci name needs to be unique in subscription deleting existing aci if any\n",
|
|
||||||
" # we use aci_service_name to create azure aci\n",
|
|
||||||
" service = Webservice(ws, name=aci_service_name)\n",
|
|
||||||
" if service:\n",
|
|
||||||
" service.delete()\n",
|
|
||||||
"except WebserviceException as e:\n",
|
|
||||||
" print()\n",
|
|
||||||
"\n",
|
|
||||||
"service = Model.deploy(ws, aci_service_name, [model], inference_config, deployment_config)\n",
|
|
||||||
"\n",
|
|
||||||
"service.wait_for_deployment(True)\n",
|
|
||||||
"print(service.state)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Test web service"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import json\n",
|
|
||||||
"test_sample = json.dumps({'features':{'type':1,'values':[4.3,3.0,1.1,0.1]},'label':2.0})\n",
|
|
||||||
"\n",
|
|
||||||
"test_sample_encoded = bytes(test_sample, encoding='utf8')\n",
|
|
||||||
"prediction = service.run(input_data=test_sample_encoded)\n",
|
|
||||||
"print(prediction)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Delete ACI to clean up"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"deploy service",
|
|
||||||
"aci"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"service.delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Model Profiling\n",
|
|
||||||
"\n",
|
|
||||||
"You can also take advantage of the profiling feature to estimate CPU and memory requirements for models.\n",
|
|
||||||
"\n",
|
|
||||||
"```python\n",
|
|
||||||
"profile = Model.profile(ws, \"profilename\", [model], inference_config, test_sample)\n",
|
|
||||||
"profile.wait_for_profiling(True)\n",
|
|
||||||
"profiling_results = profile.get_results()\n",
|
|
||||||
"print(profiling_results)\n",
|
|
||||||
"```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Model Packaging\n",
|
|
||||||
"\n",
|
|
||||||
"If you want to build a Docker image that encapsulates your model and its dependencies, you can use the model packaging option. The output image will be pushed to your workspace's ACR.\n",
|
|
||||||
"\n",
|
|
||||||
"You must include an Environment object in your inference configuration to use `Model.package()`.\n",
|
|
||||||
"\n",
|
|
||||||
"```python\n",
|
|
||||||
"package = Model.package(ws, [model], inference_config)\n",
|
|
||||||
"package.wait_for_creation(show_output=True) # Or show_output=False to hide the Docker build logs.\n",
|
|
||||||
"package.pull()\n",
|
|
||||||
"```\n",
|
|
||||||
"\n",
|
|
||||||
"Instead of a fully-built image, you can also generate a Dockerfile and download all the assets needed to build an image on top of your Environment.\n",
|
|
||||||
"\n",
|
|
||||||
"```python\n",
|
|
||||||
"package = Model.package(ws, [model], inference_config, generate_dockerfile=True)\n",
|
|
||||||
"package.wait_for_creation(show_output=True)\n",
|
|
||||||
"package.save(\"./local_context_dir\")\n",
|
|
||||||
"```"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "vaidyas"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"category": "deployment",
|
|
||||||
"compute": [
|
|
||||||
"None"
|
|
||||||
],
|
|
||||||
"datasets": [
|
|
||||||
"Iris"
|
|
||||||
],
|
|
||||||
"deployment": [
|
|
||||||
"Azure Container Instance"
|
|
||||||
],
|
|
||||||
"exclude_from_index": false,
|
|
||||||
"framework": [
|
|
||||||
"PySpark"
|
|
||||||
],
|
|
||||||
"friendly_name": "Register Spark model and deploy as webservice",
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.8 - AzureML",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python38-azureml"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.7.0"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
name: model-register-and-deploy-spark
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
import traceback
|
|
||||||
from pyspark.ml.linalg import VectorUDT
|
|
||||||
from azureml.core.model import Model
|
|
||||||
from pyspark.ml.classification import LogisticRegressionModel
|
|
||||||
from pyspark.sql.types import StructType, StructField
|
|
||||||
from pyspark.sql.types import DoubleType
|
|
||||||
from pyspark.sql import SQLContext
|
|
||||||
from pyspark import SparkContext
|
|
||||||
|
|
||||||
sc = SparkContext.getOrCreate()
|
|
||||||
sqlContext = SQLContext(sc)
|
|
||||||
spark = sqlContext.sparkSession
|
|
||||||
|
|
||||||
input_schema = StructType([StructField("features", VectorUDT()), StructField("label", DoubleType())])
|
|
||||||
reader = spark.read
|
|
||||||
reader.schema(input_schema)
|
|
||||||
|
|
||||||
|
|
||||||
def init():
|
|
||||||
global model
|
|
||||||
# note here "iris.model" is the name of the model registered under the workspace
|
|
||||||
# this call should return the path to the model.pkl file on the local disk.
|
|
||||||
model_path = Model.get_model_path('iris.model')
|
|
||||||
# Load the model file back into a LogisticRegression model
|
|
||||||
model = LogisticRegressionModel.load(model_path)
|
|
||||||
|
|
||||||
|
|
||||||
def run(data):
|
|
||||||
try:
|
|
||||||
input_df = reader.json(sc.parallelize([data]))
|
|
||||||
result = model.transform(input_df)
|
|
||||||
# you can return any datatype as long as it is JSON-serializable
|
|
||||||
return result.collect()[0]['prediction']
|
|
||||||
except Exception as e:
|
|
||||||
traceback.print_exc()
|
|
||||||
error = str(e)
|
|
||||||
return error
|
|
||||||
@@ -1,59 +0,0 @@
|
|||||||
# Copyright (c) Microsoft. All rights reserved.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
from azureml.core.run import Run
|
|
||||||
from azureml.interpret import ExplanationClient
|
|
||||||
from interpret_community.adapter import ExplanationAdapter
|
|
||||||
import joblib
|
|
||||||
import os
|
|
||||||
import shap
|
|
||||||
import xgboost
|
|
||||||
|
|
||||||
OUTPUT_DIR = './outputs/'
|
|
||||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
||||||
|
|
||||||
run = Run.get_context()
|
|
||||||
client = ExplanationClient.from_run(run)
|
|
||||||
|
|
||||||
# get a dataset on income prediction
|
|
||||||
X, y = shap.datasets.adult()
|
|
||||||
features = X.columns.values
|
|
||||||
|
|
||||||
# train an XGBoost model (but any other tree model type should work)
|
|
||||||
model = xgboost.XGBClassifier()
|
|
||||||
model.fit(X, y)
|
|
||||||
|
|
||||||
explainer = shap.explainers.GPUTree(model, X)
|
|
||||||
X_shap = X[:100]
|
|
||||||
shap_values = explainer(X_shap)
|
|
||||||
|
|
||||||
print("computed shap values:")
|
|
||||||
print(shap_values)
|
|
||||||
|
|
||||||
# Use the explanation adapter to convert the importances into an interpret-community
|
|
||||||
# style explanation which can be uploaded to AzureML or visualized in the
|
|
||||||
# ExplanationDashboard widget
|
|
||||||
adapter = ExplanationAdapter(features, classification=True)
|
|
||||||
global_explanation = adapter.create_global(shap_values.values, X_shap, expected_values=shap_values.base_values)
|
|
||||||
|
|
||||||
# write X_shap out as a pickle file for later visualization
|
|
||||||
x_shap_pkl = 'x_shap.pkl'
|
|
||||||
with open(x_shap_pkl, 'wb') as file:
|
|
||||||
joblib.dump(value=X_shap, filename=os.path.join(OUTPUT_DIR, x_shap_pkl))
|
|
||||||
run.upload_file('x_shap_adult_census.pkl', os.path.join(OUTPUT_DIR, x_shap_pkl))
|
|
||||||
|
|
||||||
model_file_name = 'xgboost_.pkl'
|
|
||||||
# save model in the outputs folder so it automatically gets uploaded
|
|
||||||
with open(model_file_name, 'wb') as file:
|
|
||||||
joblib.dump(value=model, filename=os.path.join(OUTPUT_DIR,
|
|
||||||
model_file_name))
|
|
||||||
|
|
||||||
# register the model
|
|
||||||
run.upload_file('xgboost_model.pkl', os.path.join('./outputs/', model_file_name))
|
|
||||||
original_model = run.register_model(model_name='xgboost_with_gpu_tree_explainer',
|
|
||||||
model_path='xgboost_model.pkl')
|
|
||||||
|
|
||||||
# Uploading model explanation data for storage or visualization in webUX
|
|
||||||
# The explanation can then be downloaded on any compute
|
|
||||||
comment = 'Global explanation on classification model trained on adult census income dataset'
|
|
||||||
client.upload_model_explanation(global_explanation, comment=comment, model_id=original_model.id)
|
|
||||||
@@ -1,517 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Explain tree-based models on GPU using GPUTreeExplainer\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"_**This notebook illustrates how to use shap's GPUTreeExplainer on an Azure GPU machine.**_\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"Problem: Train a tree-based model and explain the model on an Azure GPU machine using the GPUTreeExplainer.\n",
|
|
||||||
"\n",
|
|
||||||
"---\n",
|
|
||||||
"\n",
|
|
||||||
"## Table of Contents\n",
|
|
||||||
"\n",
|
|
||||||
"1. [Introduction](#Introduction)\n",
|
|
||||||
"1. [Setup](#Setup)\n",
|
|
||||||
"1. [Run model explainer locally at training time](#Explain)\n",
|
|
||||||
" 1. Apply feature transformations\n",
|
|
||||||
" 1. Train a binary classification model\n",
|
|
||||||
" 1. Explain the model on raw features\n",
|
|
||||||
" 1. Generate global explanations\n",
|
|
||||||
" 1. Generate local explanations\n",
|
|
||||||
"1. [Visualize explanations](#Visualize)\n",
|
|
||||||
"1. [Deploy model and scoring explainer](#Deploy)\n",
|
|
||||||
"1. [Next steps](#Next)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Introduction\n",
|
|
||||||
"This notebook demonstrates how to use the GPUTreeExplainer on some simple datasets. Like the TreeExplainer, the GPUTreeExplainer is specifically designed for tree-based machine learning models, but it is designed to accelerate the computations using NVIDIA GPUs.\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
|
||||||
"\n",
|
|
||||||
"Notebook synopsis:\n",
|
|
||||||
"\n",
|
|
||||||
"1. Creating an Experiment in an existing Workspace\n",
|
|
||||||
"2. Configuration and remote run with a GPU machine"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Setup"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import logging\n",
|
|
||||||
"import os\n",
|
|
||||||
"import shutil\n",
|
|
||||||
"\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core.experiment import Experiment\n",
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"from azureml.core.dataset import Dataset\n",
|
|
||||||
"from azureml.core.compute import AmlCompute\n",
|
|
||||||
"from azureml.core.compute import ComputeTarget\n",
|
|
||||||
"from azureml.core.run import Run\n",
|
|
||||||
"from azureml.core.model import Model"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(\"This notebook was created using version 1.51.0 of the Azure ML SDK\")\n",
|
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"As part of the setup you have already created a <b>Workspace</b>. To run the script, you also need to create an <b>Experiment</b>. An Experiment corresponds to a prediction problem you are trying to solve, while a Run corresponds to a specific approach to the problem."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose an experiment name.\n",
|
|
||||||
"experiment_name = 'gpu-tree-explainer'\n",
|
|
||||||
"\n",
|
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
|
||||||
"\n",
|
|
||||||
"output = {}\n",
|
|
||||||
"output['Subscription ID'] = ws.subscription_id\n",
|
|
||||||
"output['Workspace Name'] = ws.name\n",
|
|
||||||
"output['Resource Group'] = ws.resource_group\n",
|
|
||||||
"output['Location'] = ws.location\n",
|
|
||||||
"output['Experiment Name'] = experiment.name\n",
|
|
||||||
"pd.set_option('display.max_colwidth', -1)\n",
|
|
||||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
|
||||||
"outputDf.T"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create project directory\n",
|
|
||||||
"\n",
|
|
||||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import shutil\n",
|
|
||||||
"\n",
|
|
||||||
"project_folder = './azureml-shap-gpu-tree-explainer'\n",
|
|
||||||
"os.makedirs(project_folder, exist_ok=True)\n",
|
|
||||||
"shutil.copy('gpu_tree_explainer.py', project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Set up a compute cluster\n",
|
|
||||||
"This section uses a user-provided compute cluster (named \"gpu-shap-cluster\" in this example). If a cluster with this name does not exist in the user's workspace, the below code will create a new cluster. You can choose the parameters of the cluster as mentioned in the comments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
|
||||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
|
||||||
"\n",
|
|
||||||
"num_nodes = 1\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose a name for your cluster.\n",
|
|
||||||
"amlcompute_cluster_name = \"gpu-shap-cluster\"\n",
|
|
||||||
"\n",
|
|
||||||
"# Verify that cluster does not exist already\n",
|
|
||||||
"try:\n",
|
|
||||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
|
||||||
" print('Found existing cluster, use it.')\n",
|
|
||||||
"except ComputeTargetException:\n",
|
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\",\n",
|
|
||||||
" # To use GPUTreeExplainer, select a GPU such as \"STANDARD_NC6\" \n",
|
|
||||||
" # or similar GPU option\n",
|
|
||||||
" # available in your workspace\n",
|
|
||||||
" max_nodes = num_nodes)\n",
|
|
||||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
|
||||||
"\n",
|
|
||||||
"compute_target.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Configure & Run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"# Create a new RunConfig object\n",
|
|
||||||
"run_config = RunConfiguration(framework=\"python\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Set compute target to AmlCompute target created in previous step\n",
|
|
||||||
"run_config.target = amlcompute_cluster_name\n",
|
|
||||||
"\n",
|
|
||||||
"from azureml.core import Environment\n",
|
|
||||||
"\n",
|
|
||||||
"environment_name = \"shapgpu\"\n",
|
|
||||||
"env = Environment(environment_name)\n",
|
|
||||||
"\n",
|
|
||||||
"env.docker.enabled = True\n",
|
|
||||||
"env.docker.base_image = None\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# Note: this is to pin the pandas and xgboost versions to be same as notebook.\n",
|
|
||||||
"# In production scenario user would choose their dependencies\n",
|
|
||||||
"import pkg_resources\n",
|
|
||||||
"from distutils.version import LooseVersion\n",
|
|
||||||
"available_packages = pkg_resources.working_set\n",
|
|
||||||
"pandas_ver = None\n",
|
|
||||||
"numpy_ver = None\n",
|
|
||||||
"sklearn_ver = None\n",
|
|
||||||
"for dist in list(available_packages):\n",
|
|
||||||
" if dist.key == 'pandas':\n",
|
|
||||||
" pandas_ver = dist.version\n",
|
|
||||||
" if dist.key == 'numpy':\n",
|
|
||||||
" if LooseVersion(dist.version) >= LooseVersion('1.20.0'):\n",
|
|
||||||
" numpy_ver = dist.version\n",
|
|
||||||
" else:\n",
|
|
||||||
" numpy_ver = '1.21.6'\n",
|
|
||||||
" if dist.key == 'scikit-learn':\n",
|
|
||||||
" sklearn_ver = dist.version\n",
|
|
||||||
"pandas_dep = 'pandas'\n",
|
|
||||||
"numpy_dep = 'numpy'\n",
|
|
||||||
"sklearn_dep = 'scikit-learn'\n",
|
|
||||||
"if pandas_ver:\n",
|
|
||||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
|
||||||
"if numpy_ver:\n",
|
|
||||||
" numpy_dep = 'numpy=={}'.format(numpy_ver)\n",
|
|
||||||
"if sklearn_ver:\n",
|
|
||||||
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
|
||||||
"\n",
|
|
||||||
"# Note: we build shap at commit 690245 for Tesla K80 GPUs\n",
|
|
||||||
"env.docker.base_dockerfile = f\"\"\"\n",
|
|
||||||
"FROM nvidia/cuda:10.2-devel-ubuntu18.04\n",
|
|
||||||
"ENV PATH=\"/root/miniconda3/bin:${{PATH}}\"\n",
|
|
||||||
"ARG PATH=\"/root/miniconda3/bin:${{PATH}}\"\n",
|
|
||||||
"RUN apt-get update && \\\n",
|
|
||||||
"apt-get install -y fuse && \\\n",
|
|
||||||
"apt-get install -y build-essential && \\\n",
|
|
||||||
"apt-get install -y python3-dev && \\\n",
|
|
||||||
"apt-get install -y wget && \\\n",
|
|
||||||
"apt-get install -y git && \\\n",
|
|
||||||
"rm -rf /var/lib/apt/lists/* && \\\n",
|
|
||||||
"wget \\\n",
|
|
||||||
"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \\\n",
|
|
||||||
"mkdir /root/.conda && \\\n",
|
|
||||||
"bash Miniconda3-latest-Linux-x86_64.sh -b && \\\n",
|
|
||||||
"rm -f Miniconda3-latest-Linux-x86_64.sh && \\\n",
|
|
||||||
"conda init bash && \\\n",
|
|
||||||
". ~/.bashrc && \\\n",
|
|
||||||
"conda create -n shapgpu python=3.8 && \\\n",
|
|
||||||
"conda activate shapgpu && \\\n",
|
|
||||||
"apt-get install -y g++ && \\\n",
|
|
||||||
"printenv && \\\n",
|
|
||||||
"echo \"which nvcc: \" && \\\n",
|
|
||||||
"which nvcc && \\\n",
|
|
||||||
"pip install azureml-defaults && \\\n",
|
|
||||||
"pip install azureml-telemetry && \\\n",
|
|
||||||
"pip install azureml-interpret && \\\n",
|
|
||||||
"pip install {pandas_dep} && \\\n",
|
|
||||||
"cd /usr/local/src && \\\n",
|
|
||||||
"git clone https://github.com/slundberg/shap.git --single-branch && \\\n",
|
|
||||||
"cd shap && \\\n",
|
|
||||||
"git reset --hard 690245c6ab043edf40cfce3d8438a62e29ab599f && \\\n",
|
|
||||||
"mkdir build && \\\n",
|
|
||||||
"python setup.py install --user && \\\n",
|
|
||||||
"pip uninstall -y xgboost && \\\n",
|
|
||||||
"conda install py-xgboost==1.3.3 && \\\n",
|
|
||||||
"pip uninstall -y numpy && \\\n",
|
|
||||||
"pip install {numpy_dep} && \\\n",
|
|
||||||
"pip install {sklearn_dep} && \\\n",
|
|
||||||
"pip install chardet \\\n",
|
|
||||||
"\"\"\"\n",
|
|
||||||
"\n",
|
|
||||||
"env.python.user_managed_dependencies = True\n",
|
|
||||||
"env.python.interpreter_path = '/root/miniconda3/envs/shapgpu/bin/python'\n",
|
|
||||||
"\n",
|
|
||||||
"from azureml.core import Run\n",
|
|
||||||
"from azureml.core import ScriptRunConfig\n",
|
|
||||||
"\n",
|
|
||||||
"src = ScriptRunConfig(source_directory=project_folder, \n",
|
|
||||||
" script='gpu_tree_explainer.py', \n",
|
|
||||||
" compute_target=amlcompute_cluster_name,\n",
|
|
||||||
" environment=env) \n",
|
|
||||||
"run = experiment.submit(config=src)\n",
|
|
||||||
"run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"# Shows output of the run on stdout.\n",
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.get_metrics()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Download \n",
|
|
||||||
"1. Download model explanation data."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.interpret import ExplanationClient\n",
|
|
||||||
"\n",
|
|
||||||
"# Get model explanation data\n",
|
|
||||||
"client = ExplanationClient.from_run(run)\n",
|
|
||||||
"global_explanation = client.download_model_explanation()\n",
|
|
||||||
"local_importance_values = global_explanation.local_importance_values\n",
|
|
||||||
"expected_values = global_explanation.expected_values"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Get the top k (e.g., 4) most important features with their importance values\n",
|
|
||||||
"global_explanation_topk = client.download_model_explanation(top_k=4)\n",
|
|
||||||
"global_importance_values = global_explanation_topk.get_ranked_global_values()\n",
|
|
||||||
"global_importance_names = global_explanation_topk.get_ranked_global_names()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print('global importance values: {}'.format(global_importance_values))\n",
|
|
||||||
"print('global importance names: {}'.format(global_importance_names))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"2. Download model file."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Retrieve model for visualization and deployment\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"import joblib\n",
|
|
||||||
"original_model = Model(ws, 'xgboost_with_gpu_tree_explainer')\n",
|
|
||||||
"model_path = original_model.download(exist_ok=True)\n",
|
|
||||||
"original_model = joblib.load(model_path)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"3. Download test dataset."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Retrieve x_test for visualization\n",
|
|
||||||
"x_test_path = './x_shap_adult_census.pkl'\n",
|
|
||||||
"run.download_file('x_shap_adult_census.pkl', output_file_path=x_test_path)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"x_test = joblib.load('x_shap_adult_census.pkl')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Visualize\n",
|
|
||||||
"Load the visualization dashboard"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from raiwidgets import ExplanationDashboard"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from interpret_community.common.model_wrapper import wrap_model\n",
|
|
||||||
"from interpret_community.dataset.dataset_wrapper import DatasetWrapper\n",
|
|
||||||
"# note we need to wrap the XGBoost model to output predictions and probabilities in the scikit-learn format\n",
|
|
||||||
"class WrappedXGBoostModel(object):\n",
|
|
||||||
" \"\"\"A class for wrapping an XGBoost model to output integer predicted classes.\"\"\"\n",
|
|
||||||
"\n",
|
|
||||||
" def __init__(self, model):\n",
|
|
||||||
" self.model = model\n",
|
|
||||||
"\n",
|
|
||||||
" def predict(self, dataset):\n",
|
|
||||||
" return self.model.predict(dataset).astype(int)\n",
|
|
||||||
"\n",
|
|
||||||
" def predict_proba(self, dataset):\n",
|
|
||||||
" return self.model.predict_proba(dataset)\n",
|
|
||||||
"\n",
|
|
||||||
"wrapped_model = WrappedXGBoostModel(wrap_model(original_model, DatasetWrapper(x_test), model_task='classification'))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ExplanationDashboard(global_explanation, wrapped_model, dataset=x_test)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "ilmat"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.8 - AzureML",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python38-azureml"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.8"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,18 +0,0 @@
|
|||||||
name: train-explain-model-gpu-tree-explainer
|
|
||||||
dependencies:
|
|
||||||
- py-xgboost==1.3.3
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-interpret
|
|
||||||
- flask
|
|
||||||
- flask-cors
|
|
||||||
- gevent>=1.3.6
|
|
||||||
- ipython
|
|
||||||
- matplotlib
|
|
||||||
- ipywidgets
|
|
||||||
- raiwidgets~=0.26.0
|
|
||||||
- itsdangerous==2.0.1
|
|
||||||
- markupsafe<2.1.0
|
|
||||||
- scipy>=1.5.3
|
|
||||||
- protobuf==3.20.0
|
|
||||||
- jinja2==3.0.3
|
|
||||||
@@ -270,6 +270,7 @@
|
|||||||
"sklearn_ver = None\n",
|
"sklearn_ver = None\n",
|
||||||
"pandas_ver = None\n",
|
"pandas_ver = None\n",
|
||||||
"joblib_ver = None\n",
|
"joblib_ver = None\n",
|
||||||
|
"scipy_ver = None\n",
|
||||||
"for dist in list(available_packages):\n",
|
"for dist in list(available_packages):\n",
|
||||||
" if dist.key == 'scikit-learn':\n",
|
" if dist.key == 'scikit-learn':\n",
|
||||||
" sklearn_ver = dist.version\n",
|
" sklearn_ver = dist.version\n",
|
||||||
@@ -277,21 +278,26 @@
|
|||||||
" pandas_ver = dist.version\n",
|
" pandas_ver = dist.version\n",
|
||||||
" elif dist.key == 'joblib':\n",
|
" elif dist.key == 'joblib':\n",
|
||||||
" joblib_ver = dist.version\n",
|
" joblib_ver = dist.version\n",
|
||||||
|
" elif dist.key == 'scipy':\n",
|
||||||
|
" scipy_ver = dist.version\n",
|
||||||
"sklearn_dep = 'scikit-learn'\n",
|
"sklearn_dep = 'scikit-learn'\n",
|
||||||
"pandas_dep = 'pandas'\n",
|
"pandas_dep = 'pandas'\n",
|
||||||
"joblib_dep = 'joblib'\n",
|
"joblib_dep = 'joblib'\n",
|
||||||
|
"scipy_dep = 'scipy'\n",
|
||||||
"if sklearn_ver:\n",
|
"if sklearn_ver:\n",
|
||||||
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
||||||
"if pandas_ver:\n",
|
"if pandas_ver:\n",
|
||||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||||
"if joblib_ver:\n",
|
"if joblib_ver:\n",
|
||||||
" joblib_dep = 'joblib=={}'.format(joblib_ver)\n",
|
" joblib_dep = 'joblib=={}'.format(joblib_ver)\n",
|
||||||
|
"if scipy_ver:\n",
|
||||||
|
" scipy_dep = 'scipy=={}'.format(scipy_ver)\n",
|
||||||
"# Specify CondaDependencies obj\n",
|
"# Specify CondaDependencies obj\n",
|
||||||
"# The CondaDependencies specifies the conda and pip packages that are installed in the environment\n",
|
"# The CondaDependencies specifies the conda and pip packages that are installed in the environment\n",
|
||||||
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
|
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
|
||||||
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
|
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
|
||||||
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
|
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
|
||||||
"azureml_pip_packages.extend([sklearn_dep, pandas_dep, joblib_dep])\n",
|
"azureml_pip_packages.extend([sklearn_dep, pandas_dep, joblib_dep, scipy_dep])\n",
|
||||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=azureml_pip_packages, python_version=python_version)\n",
|
"run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=azureml_pip_packages, python_version=python_version)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from azureml.core import ScriptRunConfig\n",
|
"from azureml.core import ScriptRunConfig\n",
|
||||||
|
|||||||
@@ -8,9 +8,8 @@ dependencies:
|
|||||||
- gevent>=1.3.6
|
- gevent>=1.3.6
|
||||||
- ipython
|
- ipython
|
||||||
- matplotlib
|
- matplotlib
|
||||||
- azureml-dataset-runtime
|
|
||||||
- ipywidgets
|
- ipywidgets
|
||||||
- raiwidgets~=0.26.0
|
- raiwidgets~=0.33.0
|
||||||
- itsdangerous==2.0.1
|
- itsdangerous==2.0.1
|
||||||
- markupsafe<2.1.0
|
- markupsafe<2.1.0
|
||||||
- scipy>=1.5.3
|
- scipy>=1.5.3
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ dependencies:
|
|||||||
- ipython
|
- ipython
|
||||||
- matplotlib
|
- matplotlib
|
||||||
- ipywidgets
|
- ipywidgets
|
||||||
- raiwidgets~=0.26.0
|
- raiwidgets~=0.33.0
|
||||||
- packaging>=20.9
|
- packaging>=20.9
|
||||||
- itsdangerous==2.0.1
|
- itsdangerous==2.0.1
|
||||||
- markupsafe<2.1.0
|
- markupsafe<2.1.0
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ dependencies:
|
|||||||
- ipython
|
- ipython
|
||||||
- matplotlib
|
- matplotlib
|
||||||
- ipywidgets
|
- ipywidgets
|
||||||
- raiwidgets~=0.26.0
|
- raiwidgets~=0.33.0
|
||||||
- packaging>=20.9
|
- packaging>=20.9
|
||||||
- itsdangerous==2.0.1
|
- itsdangerous==2.0.1
|
||||||
- markupsafe<2.1.0
|
- markupsafe<2.1.0
|
||||||
|
|||||||
@@ -8,10 +8,9 @@ dependencies:
|
|||||||
- gevent>=1.3.6
|
- gevent>=1.3.6
|
||||||
- ipython
|
- ipython
|
||||||
- matplotlib
|
- matplotlib
|
||||||
- azureml-dataset-runtime
|
|
||||||
- azureml-core
|
- azureml-core
|
||||||
- ipywidgets
|
- ipywidgets
|
||||||
- raiwidgets~=0.26.0
|
- raiwidgets~=0.33.0
|
||||||
- itsdangerous==2.0.1
|
- itsdangerous==2.0.1
|
||||||
- markupsafe<2.1.0
|
- markupsafe<2.1.0
|
||||||
- scipy>=1.5.3
|
- scipy>=1.5.3
|
||||||
|
|||||||
@@ -233,7 +233,7 @@
|
|||||||
" print('Found existing compute target {}.'.format(cluster_name))\n",
|
" print('Found existing compute target {}.'.format(cluster_name))\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"Standard_NC6s_v3\",\n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
|||||||
@@ -133,7 +133,7 @@
|
|||||||
" \n",
|
" \n",
|
||||||
"if not found:\n",
|
"if not found:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"Standard_NC6s_v3\"\n",
|
||||||
" #vm_priority = 'lowpriority', # optional\n",
|
" #vm_priority = 'lowpriority', # optional\n",
|
||||||
" max_nodes = 4)\n",
|
" max_nodes = 4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
@@ -136,7 +136,7 @@
|
|||||||
" \n",
|
" \n",
|
||||||
"if not found:\n",
|
"if not found:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"Standard_NC6s_v3\"\n",
|
||||||
" #vm_priority = 'lowpriority', # optional\n",
|
" #vm_priority = 'lowpriority', # optional\n",
|
||||||
" max_nodes = 4)\n",
|
" max_nodes = 4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
@@ -148,7 +148,7 @@
|
|||||||
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
|
||||||
" print('Found existing cluster, use it.')\n",
|
" print('Found existing cluster, use it.')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',# for GPU, use \"STANDARD_NC6\"\n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',# for GPU, use \"Standard_NC6s_v3\"\n",
|
||||||
" #vm_priority = 'lowpriority', # optional\n",
|
" #vm_priority = 'lowpriority', # optional\n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||||
|
|||||||
@@ -86,7 +86,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
|
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `Standard_NC6s_v3` GPU VMs. This process is broken down into 3 steps:\n",
|
||||||
"1. create the configuration (this step is local and only takes a second)\n",
|
"1. create the configuration (this step is local and only takes a second)\n",
|
||||||
"2. create the cluster (this step will take about **20 seconds**)\n",
|
"2. create the cluster (this step will take about **20 seconds**)\n",
|
||||||
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
|
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
|
||||||
@@ -109,7 +109,7 @@
|
|||||||
" print('Found existing compute target')\n",
|
" print('Found existing compute target')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', max_nodes=4)\n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
" gpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
" gpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
|||||||
@@ -176,7 +176,7 @@
|
|||||||
" \n",
|
" \n",
|
||||||
"if not found:\n",
|
"if not found:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"Standard_NC6s_v3\"\n",
|
||||||
" #vm_priority = 'lowpriority', # optional\n",
|
" #vm_priority = 'lowpriority', # optional\n",
|
||||||
" max_nodes = 4)\n",
|
" max_nodes = 4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
@@ -68,7 +68,13 @@
|
|||||||
"from datetime import datetime\n",
|
"from datetime import datetime\n",
|
||||||
"from dateutil.relativedelta import relativedelta\n",
|
"from dateutil.relativedelta import relativedelta\n",
|
||||||
"\n",
|
"\n",
|
||||||
"green_df_raw = pd.DataFrame([])\n",
|
"green_df_raw = pd.DataFrame([\n",
|
||||||
|
" [2,\"2016-01-03 21:02:35\",\"2016-01-03 21:05:52\",1,0.83,\"\",\"\",-73.98726654052734,40.6938362121582,-73.97611236572266,40.69454574584961,1,\"N\",1,4.5,0.5,0.5,0.3,0.0,0.0,\"\",5.8,1.0],\n",
|
||||||
|
" [2,\"2016-01-19 21:49:17\",\"2016-01-19 21:54:37\",2,1.27,\"\",\"\",-73.94845581054688,40.80146789550781,-73.95975494384766,40.81214904785156,1,\"N\",1,6.0,0.5,0.5,0.3,1.0,0.0,\"\",8.3,1.0],\n",
|
||||||
|
" [2,\"2016-01-05 09:46:18\",\"2016-01-05 09:57:28\",1,1.8,\"\",\"\",-73.9554443359375,40.6797981262207,-73.98030853271484,40.678741455078125,1,\"N\",1,9.5,0.0,0.5,0.3,2.06,0.0,\"\",12.36,1.0],\n",
|
||||||
|
" [1,\"2016-01-08 17:49:12\",\"2016-01-08 17:52:20\",1,0.5,\"\",\"\",-73.92293548583984,40.76081848144531,-73.92549896240234,40.75471496582031,1,\"N\",1,4.0,1.0,0.5,0.3,1.15,0.0,\"\",6.95,1.0],\n",
|
||||||
|
" [1,\"2016-01-29 10:28:21\",\"2016-01-29 10:34:59\",1,0.9,\"\",\"\",-73.92304229736328,40.664939880371094,-73.91104125976562,40.66966247558594,1,\"N\",2,6.0,0.0,0.5,0.3,0.0,0.0,\"\",6.8,1.0],\n",
|
||||||
|
" ] * 50, columns=[\"vendorID\",\"lpepPickupDatetime\",\"lpepDropoffDatetime\",\"passengerCount\",\"tripDistance\",\"puLocationId\",\"doLocationId\",\"pickupLongitude\",\"pickupLatitude\",\"dropoffLongitude\",\"dropoffLatitude\",\"rateCodeID\",\"storeAndFwdFlag\",\"paymentType\",\"fareAmount\",\"extra\",\"mtaTax\",\"improvementSurcharge\",\"tipAmount\",\"tollsAmount\",\"ehailFee\",\"totalAmount\",\"tripType\"])\n",
|
||||||
"start = datetime.strptime(\"1/1/2016\",\"%m/%d/%Y\")\n",
|
"start = datetime.strptime(\"1/1/2016\",\"%m/%d/%Y\")\n",
|
||||||
"end = datetime.strptime(\"1/31/2016\",\"%m/%d/%Y\")\n",
|
"end = datetime.strptime(\"1/31/2016\",\"%m/%d/%Y\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -78,6 +84,8 @@
|
|||||||
"for sample_month in range(number_of_months):\n",
|
"for sample_month in range(number_of_months):\n",
|
||||||
" temp_df_green = NycTlcGreen(start + relativedelta(months=sample_month), end + relativedelta(months=sample_month)) \\\n",
|
" temp_df_green = NycTlcGreen(start + relativedelta(months=sample_month), end + relativedelta(months=sample_month)) \\\n",
|
||||||
" .to_pandas_dataframe()\n",
|
" .to_pandas_dataframe()\n",
|
||||||
|
" if temp_df_green is None:\n",
|
||||||
|
" continue\n",
|
||||||
" green_df_raw = green_df_raw.append(temp_df_green.sample(sample_size))"
|
" green_df_raw = green_df_raw.append(temp_df_green.sample(sample_size))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -87,7 +95,13 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"yellow_df_raw = pd.DataFrame([])\n",
|
"yellow_df_raw = pd.DataFrame([\n",
|
||||||
|
" [2,\"2016-01-06 12:09:13\",\"2016-01-06 12:22:14\",1,2.09,\"\",\"\",-73.98207092285156,40.74605941772461,-74.00462341308594,40.730628967285156,1,\"N\",1,10.5,0.0,0.5,0.3,2.26,0.0,13.56],\n",
|
||||||
|
" [1,\"2016-01-03 17:57:48\",\"2016-01-03 18:08:18\",3,1.5,\"\",\"\",-73.96627044677734,40.764835357666016,-73.98455047607422,40.75786209106445,1,\"N\",2,8.5,1.0,0.5,0.3,0.0,0.0,10.3],\n",
|
||||||
|
" [1,\"2016-01-18 07:37:51\",\"2016-01-18 07:47:01\",1,1.8,\"\",\"\",0.0,0.0,0.0,0.0,1,\"N\",1,8.5,0.0,0.5,0.3,1.85,0.0,11.15],\n",
|
||||||
|
" [2,\"2016-01-26 00:31:36\",\"2016-01-26 00:38:47\",1,1.96,\"\",\"\",-73.9906234741211,40.7553596496582,-73.97895812988281,40.78070831298828,1,\"N\",1,8.0,0.5,0.5,0.3,1.0,0.0,10.3],\n",
|
||||||
|
" [2,\"2016-01-20 23:37:22\",\"2016-01-20 23:51:09\",1,3.6,\"\",\"\",-73.98528289794922,40.76026153564453,-74.01127624511719,40.7148323059082,1,\"N\",1,13.5,0.5,0.5,0.3,2.5,0.0,17.3]\n",
|
||||||
|
" ] * 50, columns=[\"vendorID\",\"tpepPickupDateTime\",\"tpepDropoffDateTime\",\"passengerCount\",\"tripDistance\",\"puLocationId\",\"doLocationId\",\"startLon\",\"startLat\",\"endLon\",\"endLat\",\"rateCodeID\",\"storeAndFwdFlag\",\"paymentType\",\"fareAmount\",\"extra\",\"mtaTax\",\"improvementSurcharge\",\"tipAmount\",\"tollsAmount\",\"totalAmount\"])\n",
|
||||||
"start = datetime.strptime(\"1/1/2016\",\"%m/%d/%Y\")\n",
|
"start = datetime.strptime(\"1/1/2016\",\"%m/%d/%Y\")\n",
|
||||||
"end = datetime.strptime(\"1/31/2016\",\"%m/%d/%Y\")\n",
|
"end = datetime.strptime(\"1/31/2016\",\"%m/%d/%Y\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -96,6 +110,8 @@
|
|||||||
"for sample_month in range(number_of_months):\n",
|
"for sample_month in range(number_of_months):\n",
|
||||||
" temp_df_yellow = NycTlcYellow(start + relativedelta(months=sample_month), end + relativedelta(months=sample_month)) \\\n",
|
" temp_df_yellow = NycTlcYellow(start + relativedelta(months=sample_month), end + relativedelta(months=sample_month)) \\\n",
|
||||||
" .to_pandas_dataframe()\n",
|
" .to_pandas_dataframe()\n",
|
||||||
|
" if temp_df_yellow is None:\n",
|
||||||
|
" continue\n",
|
||||||
" yellow_df_raw = yellow_df_raw.append(temp_df_yellow.sample(sample_size))"
|
" yellow_df_raw = yellow_df_raw.append(temp_df_yellow.sample(sample_size))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -286,7 +302,7 @@
|
|||||||
"# Specify CondaDependencies obj, add necessary packages\n",
|
"# Specify CondaDependencies obj, add necessary packages\n",
|
||||||
"aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n",
|
"aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n",
|
||||||
" conda_packages=['pandas','scikit-learn'], \n",
|
" conda_packages=['pandas','scikit-learn'], \n",
|
||||||
" pip_packages=['azureml-sdk[automl]', 'pyarrow'])\n",
|
" pip_packages=['azureml-sdk[automl]', 'pyarrow==14.0.2'])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print (\"Run configuration created.\")"
|
"print (\"Run configuration created.\")"
|
||||||
]
|
]
|
||||||
@@ -554,7 +570,6 @@
|
|||||||
"Transform the normalized taxi data to final required format. This steps does the following:\n",
|
"Transform the normalized taxi data to final required format. This steps does the following:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- Split the pickup and dropoff date further into the day of the week, day of the month, and month values. \n",
|
"- Split the pickup and dropoff date further into the day of the week, day of the month, and month values. \n",
|
||||||
"- To get the day of the week value, uses the derive_column_by_example() function. The function takes an array parameter of example objects that define the input data, and the preferred output. The function automatically determines the preferred transformation. For the pickup and dropoff time columns, split the time into the hour, minute, and second by using the split_column_by_example() function with no example parameter.\n",
|
|
||||||
"- After new features are generated, use the drop_columns() function to delete the original fields as the newly generated features are preferred. \n",
|
"- After new features are generated, use the drop_columns() function to delete the original fields as the newly generated features are preferred. \n",
|
||||||
"- Rename the rest of the fields to use meaningful descriptions."
|
"- Rename the rest of the fields to use meaningful descriptions."
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -21,11 +21,9 @@ print("Argument 2(output final transformed taxi data): %s" % args.output_transfo
|
|||||||
# These functions transform the renamed data to be used finally for training.
|
# These functions transform the renamed data to be used finally for training.
|
||||||
|
|
||||||
# Split the pickup and dropoff date further into the day of the week, day of the month, and month values.
|
# Split the pickup and dropoff date further into the day of the week, day of the month, and month values.
|
||||||
# To get the day of the week value, use the derive_column_by_example() function.
|
|
||||||
# The function takes an array parameter of example objects that define the input data,
|
# The function takes an array parameter of example objects that define the input data,
|
||||||
# and the preferred output. The function automatically determines your preferred transformation.
|
# and the preferred output. The function automatically determines your preferred transformation.
|
||||||
# For the pickup and dropoff time columns, split the time into the hour, minute, and second by using
|
# For the pickup and dropoff time columns, split the time into the hour, minute, and second by using
|
||||||
# the split_column_by_example() function with no example parameter. After you generate the new features,
|
|
||||||
# use the drop_columns() function to delete the original fields as the newly generated features are preferred.
|
# use the drop_columns() function to delete the original fields as the newly generated features are preferred.
|
||||||
# Rename the rest of the fields to use meaningful descriptions.
|
# Rename the rest of the fields to use meaningful descriptions.
|
||||||
|
|
||||||
|
|||||||
@@ -12,13 +12,16 @@ from azureml.core import Model
|
|||||||
def init():
|
def init():
|
||||||
global g_tf_sess
|
global g_tf_sess
|
||||||
|
|
||||||
|
# Disable eager execution
|
||||||
|
tf.compat.v1.disable_eager_execution()
|
||||||
|
|
||||||
# pull down model from workspace
|
# pull down model from workspace
|
||||||
model_path = Model.get_model_path("mnist-prs")
|
model_path = Model.get_model_path("mnist-prs")
|
||||||
|
|
||||||
# contruct graph to execute
|
# contruct graph to execute
|
||||||
tf.reset_default_graph()
|
tf.compat.v1.reset_default_graph()
|
||||||
saver = tf.train.import_meta_graph(os.path.join(model_path, 'mnist-tf.model.meta'))
|
saver = tf.compat.v1.train.import_meta_graph(os.path.join(model_path, 'mnist-tf.model.meta'))
|
||||||
g_tf_sess = tf.Session(config=tf.ConfigProto(device_count={'GPU': 0}))
|
g_tf_sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(device_count={'GPU': 0}))
|
||||||
saver.restore(g_tf_sess, os.path.join(model_path, 'mnist-tf.model'))
|
saver.restore(g_tf_sess, os.path.join(model_path, 'mnist-tf.model'))
|
||||||
|
|
||||||
|
|
||||||
@@ -33,7 +36,7 @@ def run(mini_batch):
|
|||||||
data = Image.open(image)
|
data = Image.open(image)
|
||||||
np_im = np.array(data).reshape((1, 784))
|
np_im = np.array(data).reshape((1, 784))
|
||||||
# perform inference
|
# perform inference
|
||||||
inference_result = output.eval(feed_dict={in_tensor: np_im}, session=g_tf_sess)
|
inference_result = g_tf_sess.run(output, feed_dict={in_tensor: np_im})
|
||||||
# find best probability, and add to result list
|
# find best probability, and add to result list
|
||||||
best_result = np.argmax(inference_result)
|
best_result = np.argmax(inference_result)
|
||||||
resultList.append("{}: {}".format(os.path.basename(image), best_result))
|
resultList.append("{}: {}".format(os.path.basename(image), best_result))
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
import io
|
|
||||||
import pickle
|
import pickle
|
||||||
import argparse
|
import argparse
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from azureml.core.model import Model
|
from azureml.core.model import Model
|
||||||
from sklearn.linear_model import LogisticRegression
|
|
||||||
|
|
||||||
from azureml_user.parallel_run import EntryScript
|
from azureml_user.parallel_run import EntryScript
|
||||||
|
|
||||||
|
|||||||
@@ -105,7 +105,7 @@
|
|||||||
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
|
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
|
||||||
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 4)\n",
|
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
|
"# This example uses CPU VM. For using GPU VM, set SKU to Standard_NC6s_v3\n",
|
||||||
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
|
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -306,7 +306,7 @@
|
|||||||
"#### An entry script\n",
|
"#### An entry script\n",
|
||||||
"This script accepts requests, scores the requests by using the model, and returns the results.\n",
|
"This script accepts requests, scores the requests by using the model, and returns the results.\n",
|
||||||
"- __init()__ - Typically this function loads the model into a global object. This function is run only once at the start of batch processing per worker node/process. Init method can make use of following environment variables (ParallelRunStep input):\n",
|
"- __init()__ - Typically this function loads the model into a global object. This function is run only once at the start of batch processing per worker node/process. Init method can make use of following environment variables (ParallelRunStep input):\n",
|
||||||
" 1.\tAZUREML_BI_OUTPUT_PATH \u00e2\u20ac\u201c output folder path\n",
|
" 1.\tAZUREML_BI_OUTPUT_PATH - output folder path\n",
|
||||||
"- __run(mini_batch)__ - The method to be parallelized. Each invocation will have one minibatch.<BR>\n",
|
"- __run(mini_batch)__ - The method to be parallelized. Each invocation will have one minibatch.<BR>\n",
|
||||||
"__mini_batch__: Batch inference will invoke run method and pass either a list or Pandas DataFrame as an argument to the method. Each entry in min_batch will be - a filepath if input is a FileDataset, a Pandas DataFrame if input is a TabularDataset.<BR>\n",
|
"__mini_batch__: Batch inference will invoke run method and pass either a list or Pandas DataFrame as an argument to the method. Each entry in min_batch will be - a filepath if input is a FileDataset, a Pandas DataFrame if input is a TabularDataset.<BR>\n",
|
||||||
"__run__ method response: run() method should return a Pandas DataFrame or an array. For append_row output_action, these returned elements are appended into the common output file. For summary_only, the contents of the elements are ignored. For all output actions, each returned output element indicates one successful inference of input element in the input mini-batch.\n",
|
"__run__ method response: run() method should return a Pandas DataFrame or an array. For append_row output_action, these returned elements are appended into the common output file. For summary_only, the contents of the elements are ignored. For all output actions, each returned output element indicates one successful inference of input element in the input mini-batch.\n",
|
||||||
@@ -359,9 +359,9 @@
|
|||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
"from azureml.core.runconfig import CondaDependencies, DEFAULT_CPU_IMAGE\n",
|
"from azureml.core.runconfig import CondaDependencies, DEFAULT_CPU_IMAGE\n",
|
||||||
"\n",
|
"\n",
|
||||||
"batch_conda_deps = CondaDependencies.create(python_version=\"3.7\",\n",
|
"batch_conda_deps = CondaDependencies.create(python_version=\"3.8\",\n",
|
||||||
" conda_packages=['pip==20.2.4'],\n",
|
" conda_packages=['pip==20.2.4'],\n",
|
||||||
" pip_packages=[\"tensorflow==1.15.2\", \"pillow\", \"protobuf==3.20.1\",\n",
|
" pip_packages=[\"tensorflow==2.13.0\", \"pillow\", \"protobuf==4.23.3\",\n",
|
||||||
" \"azureml-core\", \"azureml-dataset-runtime[fuse]\"])\n",
|
" \"azureml-core\", \"azureml-dataset-runtime[fuse]\"])\n",
|
||||||
"batch_env = Environment(name=\"batch_environment\")\n",
|
"batch_env = Environment(name=\"batch_environment\")\n",
|
||||||
"batch_env.python.conda_dependencies = batch_conda_deps\n",
|
"batch_env.python.conda_dependencies = batch_conda_deps\n",
|
||||||
@@ -615,7 +615,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.9"
|
"version": "3.8.16"
|
||||||
},
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"Batch Inferencing",
|
"Batch Inferencing",
|
||||||
|
|||||||
@@ -143,7 +143,7 @@
|
|||||||
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
|
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
|
||||||
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 2)\n",
|
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 2)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
|
"# This example uses CPU VM. For using GPU VM, set SKU to Standard_NC6s_v3\n",
|
||||||
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
|
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -390,7 +390,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.9"
|
"version": "3.8.16"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -103,7 +103,7 @@
|
|||||||
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
|
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
|
||||||
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 4)\n",
|
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
|
"# This example uses CPU VM. For using GPU VM, set SKU to Standard_NC6s_v3\n",
|
||||||
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
|
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -252,7 +252,7 @@
|
|||||||
"#### An entry script\n",
|
"#### An entry script\n",
|
||||||
"This script accepts requests, scores the requests by using the model, and returns the results.\n",
|
"This script accepts requests, scores the requests by using the model, and returns the results.\n",
|
||||||
"- __init()__ - Typically this function loads the model into a global object. This function is run only once at the start of batch processing per worker node/process. init method can make use of following environment variables (ParallelRunStep input):\n",
|
"- __init()__ - Typically this function loads the model into a global object. This function is run only once at the start of batch processing per worker node/process. init method can make use of following environment variables (ParallelRunStep input):\n",
|
||||||
" 1.\tAZUREML_BI_OUTPUT_PATH \u00e2\u20ac\u201c output folder path\n",
|
" 1.\tAZUREML_BI_OUTPUT_PATH - output folder path\n",
|
||||||
"- __run(mini_batch)__ - The method to be parallelized. Each invocation will have one minibatch.<BR>\n",
|
"- __run(mini_batch)__ - The method to be parallelized. Each invocation will have one minibatch.<BR>\n",
|
||||||
"__mini_batch__: Batch inference will invoke run method and pass either a list or Pandas DataFrame as an argument to the method. Each entry in min_batch will be - a filepath if input is a FileDataset, a Pandas DataFrame if input is a TabularDataset.<BR>\n",
|
"__mini_batch__: Batch inference will invoke run method and pass either a list or Pandas DataFrame as an argument to the method. Each entry in min_batch will be - a filepath if input is a FileDataset, a Pandas DataFrame if input is a TabularDataset.<BR>\n",
|
||||||
"__run__ method response: run() method should return a Pandas DataFrame or an array. For append_row output_action, these returned elements are appended into the common output file. For summary_only, the contents of the elements are ignored. For all output actions, each returned output element indicates one successful inference of input element in the input mini-batch.\n",
|
"__run__ method response: run() method should return a Pandas DataFrame or an array. For append_row output_action, these returned elements are appended into the common output file. For summary_only, the contents of the elements are ignored. For all output actions, each returned output element indicates one successful inference of input element in the input mini-batch.\n",
|
||||||
@@ -308,10 +308,11 @@
|
|||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
"from azureml.core.runconfig import CondaDependencies\n",
|
"from azureml.core.runconfig import CondaDependencies\n",
|
||||||
"\n",
|
"\n",
|
||||||
"predict_conda_deps = CondaDependencies.create(python_version=\"3.7\", \n",
|
"predict_conda_deps = CondaDependencies.create(python_version=\"3.8\", \n",
|
||||||
" conda_packages=['pip==20.2.4'],\n",
|
" conda_packages=['pip==20.2.4'],\n",
|
||||||
" pip_packages=[\"scikit-learn==0.20.3\",\n",
|
" pin_sdk_version=False,\n",
|
||||||
" \"azureml-core\", \"azureml-dataset-runtime[pandas,fuse]\"])\n",
|
" pip_packages=[\"numpy==1.19.5\", \"pandas==1.4.4\", \"scikit-learn==0.22.2\",\n",
|
||||||
|
" \"azureml-core==1.54.0\", \"azureml-dataset-runtime[fuse]==1.54.0\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"predict_env = Environment(name=\"predict_environment\")\n",
|
"predict_env = Environment(name=\"predict_environment\")\n",
|
||||||
"predict_env.python.conda_dependencies = predict_conda_deps\n",
|
"predict_env.python.conda_dependencies = predict_conda_deps\n",
|
||||||
@@ -521,7 +522,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.2"
|
"version": "3.8.16"
|
||||||
},
|
},
|
||||||
"tags": [
|
"tags": [
|
||||||
"Batch Inferencing",
|
"Batch Inferencing",
|
||||||
|
|||||||
@@ -165,7 +165,7 @@
|
|||||||
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
|
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
|
||||||
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 2)\n",
|
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 2)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
|
"# This example uses CPU VM. For using GPU VM, set SKU to Standard_NC6s_v3\n",
|
||||||
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
|
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -413,7 +413,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.13"
|
"version": "3.8.16"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -210,7 +210,7 @@
|
|||||||
" print(\"found existing cluster.\")\n",
|
" print(\"found existing cluster.\")\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print(\"creating new cluster\")\n",
|
" print(\"creating new cluster\")\n",
|
||||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\",\n",
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"Standard_NC6s_v3\",\n",
|
||||||
" max_nodes = 3)\n",
|
" max_nodes = 3)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
@@ -308,7 +308,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"cd = CondaDependencies.create(python_version=\"3.7\", conda_packages=['pip==20.2.4'])\n",
|
"cd = CondaDependencies.create(python_version=\"3.8\", conda_packages=['pip==20.2.4'])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"cd.add_channel(\"conda-forge\")\n",
|
"cd.add_channel(\"conda-forge\")\n",
|
||||||
"cd.add_conda_package(\"ffmpeg==4.0.2\")\n",
|
"cd.add_conda_package(\"ffmpeg==4.0.2\")\n",
|
||||||
@@ -401,7 +401,7 @@
|
|||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
"from azureml.core.runconfig import DEFAULT_GPU_IMAGE\n",
|
"from azureml.core.runconfig import DEFAULT_GPU_IMAGE\n",
|
||||||
"\n",
|
"\n",
|
||||||
"parallel_cd = CondaDependencies.create(python_version=\"3.7\", conda_packages=['pip==20.2.4', 'numpy==1.19'])\n",
|
"parallel_cd = CondaDependencies.create(python_version=\"3.8\", conda_packages=['pip==20.2.4', 'numpy==1.19'])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"parallel_cd.add_channel(\"pytorch\")\n",
|
"parallel_cd.add_channel(\"pytorch\")\n",
|
||||||
"parallel_cd.add_conda_package(\"pytorch\")\n",
|
"parallel_cd.add_conda_package(\"pytorch\")\n",
|
||||||
|
|||||||
@@ -246,7 +246,7 @@
|
|||||||
" print('Found existing compute target.')\n",
|
" print('Found existing compute target.')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3',\n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -10,6 +11,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -17,6 +19,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -40,6 +43,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -80,6 +84,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -101,6 +106,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -131,6 +137,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -168,6 +175,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -206,6 +214,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -213,6 +222,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -240,6 +250,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -269,6 +280,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -279,10 +291,11 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
|
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `Standard_NC6s_v3` GPU VMs. This process is broken down into 3 steps:\n",
|
||||||
"1. create the configuration (this step is local and only takes a second)\n",
|
"1. create the configuration (this step is local and only takes a second)\n",
|
||||||
"2. create the cluster (this step will take about **20 seconds**)\n",
|
"2. create the cluster (this step will take about **20 seconds**)\n",
|
||||||
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
|
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
|
||||||
@@ -305,7 +318,7 @@
|
|||||||
" print('Found existing compute target')\n",
|
" print('Found existing compute target')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
@@ -320,6 +333,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -338,6 +352,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -361,6 +376,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -375,6 +391,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -394,6 +411,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -411,6 +429,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -430,12 +449,12 @@
|
|||||||
"channels:\n",
|
"channels:\n",
|
||||||
"- conda-forge\n",
|
"- conda-forge\n",
|
||||||
"dependencies:\n",
|
"dependencies:\n",
|
||||||
"- python=3.7\n",
|
"- python=3.8\n",
|
||||||
"- pip=21.3.1\n",
|
"- pip=23.1.2\n",
|
||||||
"- pip:\n",
|
"- pip:\n",
|
||||||
" - h5py<=2.10.0\n",
|
" - h5py<=2.10.0\n",
|
||||||
" - azureml-defaults\n",
|
" - azureml-defaults\n",
|
||||||
" - tensorflow-gpu==2.0.0\n",
|
" - tensorflow-gpu==2.2.0\n",
|
||||||
" - keras<=2.3.1\n",
|
" - keras<=2.3.1\n",
|
||||||
" - matplotlib\n",
|
" - matplotlib\n",
|
||||||
" - protobuf==3.20.1"
|
" - protobuf==3.20.1"
|
||||||
@@ -457,6 +476,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -501,6 +521,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -518,6 +539,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -547,6 +569,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -572,6 +595,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -579,6 +603,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -619,6 +644,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -626,6 +652,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -649,6 +676,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -657,6 +685,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -668,6 +697,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -675,6 +705,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -691,6 +722,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -698,6 +730,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -712,6 +745,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -719,6 +753,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -726,6 +761,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -753,6 +789,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -775,6 +812,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -791,6 +829,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -813,6 +852,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -829,6 +869,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -863,6 +904,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -890,6 +932,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -906,6 +949,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -922,6 +966,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -970,6 +1015,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -997,6 +1043,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1035,6 +1082,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1051,6 +1099,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1067,6 +1116,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1115,6 +1165,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1133,6 +1184,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1162,6 +1214,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1184,6 +1237,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
|
|||||||
@@ -3,6 +3,4 @@ dependencies:
|
|||||||
- pip:
|
- pip:
|
||||||
- azureml-sdk
|
- azureml-sdk
|
||||||
- azureml-widgets
|
- azureml-widgets
|
||||||
- tensorflow
|
|
||||||
- keras<=2.3.1
|
|
||||||
- matplotlib
|
- matplotlib
|
||||||
|
|||||||
@@ -97,7 +97,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Create or attach existing AmlCompute\n",
|
"## Create or attach existing AmlCompute\n",
|
||||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `Standard_NC6s_v3` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
|
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -123,7 +123,7 @@
|
|||||||
" print('Found existing compute target.')\n",
|
" print('Found existing compute target.')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3',\n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
@@ -282,7 +282,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Create an environment\n",
|
"### Create an environment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this tutorial, we will use one of Azure ML's curated PyTorch environments for training. [Curated environments](https://docs.microsoft.com/azure/machine-learning/how-to-use-environments#use-a-curated-environment) are available in your workspace by default. Specifically, we will use the PyTorch 1.6 GPU curated environment."
|
"In this tutorial, we will use one of Azure ML's curated PyTorch environments for training. [Curated environments](https://docs.microsoft.com/azure/machine-learning/how-to-use-environments#use-a-curated-environment) are available in your workspace by default. Specifically, we will use the PyTorch 2.0 GPU curated environment."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -293,7 +293,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"pytorch_env = Environment.get(ws, name='AzureML-PyTorch-1.6-GPU')"
|
"pytorch_env = Environment.get(ws, name='azureml-acpt-pytorch-1.13-cuda11.7')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -323,7 +323,7 @@
|
|||||||
"To use the per-process launch option in which Azure ML will handle launching each of the processes to run your training script,\n",
|
"To use the per-process launch option in which Azure ML will handle launching each of the processes to run your training script,\n",
|
||||||
"\n",
|
"\n",
|
||||||
"1. Specify the training script and arguments\n",
|
"1. Specify the training script and arguments\n",
|
||||||
"2. Create a `PyTorchConfiguration` and specify `node_count` and `process_count`. The `process_count` is the total number of processes you want to run for the job; this should typically equal the # of GPUs available on each node multiplied by the # of nodes. Since this tutorial uses the `STANDARD_NC6` SKU, which has one GPU, the total process count for a 2-node job is `2`. If you are using a SKU with >1 GPUs, adjust the `process_count` accordingly.\n",
|
"2. Create a `PyTorchConfiguration` and specify `node_count` and `process_count`. The `process_count` is the total number of processes you want to run for the job; this should typically equal the # of GPUs available on each node multiplied by the # of nodes. Since this tutorial uses the `Standard_NC6s_v3` SKU, which has one GPU, the total process count for a 2-node job is `2`. If you are using a SKU with >1 GPUs, adjust the `process_count` accordingly.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, `NODE_RANK`, `WORLD_SIZE` environment variables on each node, in addition to the process-level `RANK` and `LOCAL_RANK` environment variables, that are needed for distributed PyTorch training."
|
"Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, `NODE_RANK`, `WORLD_SIZE` environment variables on each node, in addition to the process-level `RANK` and `LOCAL_RANK` environment variables, that are needed for distributed PyTorch training."
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -97,7 +97,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Create or attach existing AmlCompute\n",
|
"## Create or attach existing AmlCompute\n",
|
||||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `Standard_NC6s_v3` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
|
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -123,7 +123,7 @@
|
|||||||
" print('Found existing compute target.')\n",
|
" print('Found existing compute target.')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3',\n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -10,6 +11,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -17,6 +19,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -28,6 +31,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -48,6 +52,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -71,6 +76,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -94,6 +100,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -124,7 +131,7 @@
|
|||||||
" print('Found existing compute target.')\n",
|
" print('Found existing compute target.')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
@@ -137,6 +144,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -144,6 +152,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -152,6 +161,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -172,6 +182,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -180,6 +191,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -204,6 +216,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -222,6 +235,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -242,32 +256,13 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Create an environment\n",
|
"### Create an environment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Define a conda environment YAML file with your training script dependencies and create an Azure ML environment."
|
"Create an Azure ML environment."
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile conda_dependencies.yml\n",
|
|
||||||
"\n",
|
|
||||||
"channels:\n",
|
|
||||||
"- conda-forge\n",
|
|
||||||
"- pytorch\n",
|
|
||||||
"dependencies:\n",
|
|
||||||
"- python=3.8.12\n",
|
|
||||||
"- pip=21.3.1\n",
|
|
||||||
"- pytorch::pytorch==1.8.1\n",
|
|
||||||
"- pytorch::torchvision==0.9.1\n",
|
|
||||||
"- pip:\n",
|
|
||||||
" - azureml-defaults"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -278,14 +273,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"pytorch_env = Environment.from_conda_specification(name = 'pytorch-1.6-gpu', file_path = './conda_dependencies.yml')\n",
|
"pytorch_env = Environment.get(ws, name='azureml-acpt-pytorch-1.11-cuda11.3')"
|
||||||
"\n",
|
|
||||||
"# Specify a GPU base image\n",
|
|
||||||
"pytorch_env.docker.enabled = True\n",
|
|
||||||
"pytorch_env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.1-cudnn8-ubuntu20.04'"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -310,6 +302,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -338,6 +331,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -357,6 +351,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -373,6 +368,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -381,6 +377,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -417,6 +414,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -434,6 +432,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -451,6 +450,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -476,6 +476,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -517,6 +518,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -534,6 +536,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -542,6 +545,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -555,6 +559,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -564,6 +569,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -598,6 +604,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -605,6 +612,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -621,6 +629,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -637,6 +646,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -695,6 +705,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -10,6 +11,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -17,6 +19,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -25,6 +28,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -32,6 +36,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -51,6 +56,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -58,6 +64,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -76,6 +83,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -83,6 +91,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -105,6 +114,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -112,6 +122,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -123,6 +134,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -164,6 +176,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -171,6 +184,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -178,6 +192,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -185,6 +200,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -192,6 +208,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -211,6 +228,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -218,6 +236,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -258,6 +277,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -265,6 +285,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -284,27 +305,13 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Create an environment\n",
|
"### Create an environment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Define a conda environment YAML file with your training script dependencies and create an Azure ML environment."
|
"Create an Azure ML environment."
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile conda_dependencies.yml\n",
|
|
||||||
"\n",
|
|
||||||
"dependencies:\n",
|
|
||||||
"- python=3.6.2\n",
|
|
||||||
"- scikit-learn\n",
|
|
||||||
"- pip:\n",
|
|
||||||
" - azureml-defaults"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -315,10 +322,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')"
|
"sklearn_env = Environment.get(ws, name='azureml-sklearn-1.0')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -343,6 +351,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -350,6 +359,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -366,6 +376,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -373,6 +384,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -400,6 +412,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -407,6 +420,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -414,6 +428,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -421,6 +436,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -454,6 +470,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -471,6 +488,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -478,6 +496,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -512,6 +531,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -539,6 +559,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -555,6 +576,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -10,6 +11,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -17,6 +19,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -25,6 +28,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -49,6 +53,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -72,6 +77,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -95,6 +101,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -125,7 +132,7 @@
|
|||||||
" print('Found existing compute target')\n",
|
" print('Found existing compute target')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
@@ -138,6 +145,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -145,6 +153,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -152,6 +161,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -159,6 +169,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -179,6 +190,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -195,10 +207,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"tf_env = Environment.get(ws, name='AzureML-tensorflow-2.7-ubuntu20.04-py38-cuda11-gpu')"
|
"tf_env = Environment.get(ws, name='azureml-tensorflow-2.11-cuda11')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -226,6 +239,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -245,6 +259,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -263,6 +278,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -10,6 +11,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -17,6 +19,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -25,6 +28,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -65,6 +69,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -88,6 +93,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -109,6 +115,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -139,6 +146,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -171,6 +179,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -215,6 +224,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -238,6 +248,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -266,6 +277,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -276,10 +288,11 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
|
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `Standard_NC6s_v3` GPU VMs. This process is broken down into 3 steps:\n",
|
||||||
"1. create the configuration (this step is local and only takes a second)\n",
|
"1. create the configuration (this step is local and only takes a second)\n",
|
||||||
"2. create the cluster (this step will take about **20 seconds**)\n",
|
"2. create the cluster (this step will take about **20 seconds**)\n",
|
||||||
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
|
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
|
||||||
@@ -302,7 +315,7 @@
|
|||||||
" print('Found existing compute target')\n",
|
" print('Found existing compute target')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3',\n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
@@ -317,6 +330,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -335,6 +349,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -358,6 +373,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -372,6 +388,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -391,6 +408,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -408,6 +426,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -424,10 +443,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"tf_env = Environment.get(ws, name='AzureML-tensorflow-2.6-ubuntu20.04-py38-cuda11-gpu')"
|
"tf_env = Environment.get(ws, name='azureml-tensorflow-2.11-cuda11')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -457,6 +477,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -484,6 +505,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -494,6 +516,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -519,6 +542,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -541,6 +565,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -558,6 +583,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -581,6 +607,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -597,6 +624,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -631,6 +659,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -648,6 +677,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -664,6 +694,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -680,6 +711,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -710,6 +742,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -735,6 +768,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -753,6 +787,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -783,6 +818,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -808,6 +844,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -833,6 +870,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -10,6 +11,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -17,6 +19,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -39,6 +42,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -79,6 +83,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -102,6 +107,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -123,6 +129,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -153,6 +160,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -186,6 +194,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -229,6 +238,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -252,6 +262,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -283,6 +294,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -293,10 +305,11 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
|
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `Standard_NC6s_v3` GPU VMs. This process is broken down into 3 steps:\n",
|
||||||
"1. create the configuration (this step is local and only takes a second)\n",
|
"1. create the configuration (this step is local and only takes a second)\n",
|
||||||
"2. create the cluster (this step will take about **20 seconds**)\n",
|
"2. create the cluster (this step will take about **20 seconds**)\n",
|
||||||
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
|
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
|
||||||
@@ -319,7 +332,7 @@
|
|||||||
" print('Found existing compute target')\n",
|
" print('Found existing compute target')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
@@ -334,6 +347,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -352,6 +366,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -375,6 +390,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"nbpresent": {
|
"nbpresent": {
|
||||||
@@ -389,6 +405,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -408,6 +425,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -425,6 +443,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -441,10 +460,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"tf_env = Environment.get(ws, name='AzureML-tensorflow-2.6-ubuntu20.04-py38-cuda11-gpu')"
|
"tf_env = Environment.get(ws, name='azureml-tensorflow-2.11-cuda11')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -475,6 +495,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -492,6 +513,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -522,6 +544,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -547,6 +570,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -587,6 +611,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -616,6 +641,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -623,6 +649,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -639,6 +666,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -649,6 +677,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -657,6 +686,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -669,6 +699,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -676,6 +707,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -683,6 +715,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -710,6 +743,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -732,6 +766,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -748,6 +783,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -770,6 +806,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -786,6 +823,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -820,6 +858,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -846,6 +885,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -862,6 +902,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -878,6 +919,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -924,6 +966,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -950,6 +993,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -987,6 +1031,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1003,6 +1048,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1019,6 +1065,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1067,6 +1114,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1096,6 +1144,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -1118,6 +1167,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
|
|||||||
@@ -6,7 +6,5 @@ dependencies:
|
|||||||
- azureml-sdk
|
- azureml-sdk
|
||||||
- azureml-widgets
|
- azureml-widgets
|
||||||
- pandas
|
- pandas
|
||||||
- keras
|
|
||||||
- tensorflow==2.0.0
|
|
||||||
- matplotlib
|
- matplotlib
|
||||||
- fuse
|
- fuse
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import numpy as np
|
|||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import tensorflow as tf
|
import tensorflow.compat.v1 as tf
|
||||||
import glob
|
import glob
|
||||||
|
|
||||||
from azureml.core import Run
|
from azureml.core import Run
|
||||||
@@ -41,8 +41,8 @@ y_test = load_data(glob.glob(os.path.join(data_folder, '**/t10k-labels-idx1-ubyt
|
|||||||
recursive=True)[0], True).reshape(-1)
|
recursive=True)[0], True).reshape(-1)
|
||||||
|
|
||||||
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n')
|
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n')
|
||||||
|
|
||||||
training_set_size = X_train.shape[0]
|
training_set_size = X_train.shape[0]
|
||||||
|
tf.disable_v2_behavior()
|
||||||
|
|
||||||
n_inputs = 28 * 28
|
n_inputs = 28 * 28
|
||||||
n_h1 = 100
|
n_h1 = 100
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -10,6 +11,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -17,6 +19,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -25,6 +28,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -49,6 +53,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -72,6 +77,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -95,6 +101,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -125,7 +132,7 @@
|
|||||||
" print('Found existing compute target.')\n",
|
" print('Found existing compute target.')\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
|
||||||
" max_nodes=4)\n",
|
" max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
@@ -138,6 +145,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -145,6 +153,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -169,6 +178,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -208,6 +218,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -215,6 +226,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -234,6 +246,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -256,6 +269,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -276,6 +290,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -292,10 +307,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Environment\n",
|
"from azureml.core import Environment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"tf_env = Environment.get(ws, name='AzureML-TensorFlow-1.13-GPU')"
|
"tf_env = Environment.get(ws, name='azureml-tensorflow-2.11-cuda11')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -322,6 +338,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -340,6 +357,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -358,6 +376,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -374,6 +393,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -381,6 +401,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -401,6 +422,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -428,6 +450,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user