Compare commits
155 Commits
sdgilley/u
...
akshaya-a-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
42fd4598cb | ||
|
|
476d945439 | ||
|
|
e96bb9bef2 | ||
|
|
2be4a5e54d | ||
|
|
247a25f280 | ||
|
|
5d9d8eade6 | ||
|
|
dba978e42a | ||
|
|
7f4101c33e | ||
|
|
62b0d5df69 | ||
|
|
f10b55a1bc | ||
|
|
da9e86635e | ||
|
|
9ca6388996 | ||
|
|
3ce779063b | ||
|
|
ce635ce4fe | ||
|
|
f08e68c8e9 | ||
|
|
93a1d232db | ||
|
|
923483528c | ||
|
|
cbeacb2ab2 | ||
|
|
c928c50707 | ||
|
|
efb42bacf9 | ||
|
|
d8f349a1ae | ||
|
|
96a61fdc78 | ||
|
|
ff8128f023 | ||
|
|
8260302a68 | ||
|
|
fbd7f4a55b | ||
|
|
d4e4206179 | ||
|
|
a98b918feb | ||
|
|
890490ec70 | ||
|
|
c068c9b979 | ||
|
|
f334a3516f | ||
|
|
96248d8dff | ||
|
|
c42e865700 | ||
|
|
9233ce089a | ||
|
|
6bb1e2a3e3 | ||
|
|
e1724c8a89 | ||
|
|
446e0768cc | ||
|
|
8a2f114a16 | ||
|
|
80c0d4d30f | ||
|
|
e8f4708a5a | ||
|
|
fbaeb84204 | ||
|
|
da1fab0a77 | ||
|
|
94d2890bb5 | ||
|
|
4d1ec4f7d4 | ||
|
|
ace3153831 | ||
|
|
58bbfe57b2 | ||
|
|
11ea00b1d9 | ||
|
|
b81efca3e5 | ||
|
|
d7ceb9bca2 | ||
|
|
17730dc69a | ||
|
|
3a029d48a2 | ||
|
|
06d43956f3 | ||
|
|
a1cb9b33a5 | ||
|
|
fdc3fe2a53 | ||
|
|
628b35912c | ||
|
|
3f4cc22e94 | ||
|
|
18d7afb707 | ||
|
|
cd35ca30d4 | ||
|
|
30eae0b46c | ||
|
|
f16951387f | ||
|
|
0d8de29147 | ||
|
|
836354640c | ||
|
|
6162e80972 | ||
|
|
fe9fe3392d | ||
|
|
5ec6d8861b | ||
|
|
ae188f324e | ||
|
|
4c30c2bdb9 | ||
|
|
b891440e2d | ||
|
|
784827cdd2 | ||
|
|
0957af04ca | ||
|
|
a3bdd193d1 | ||
|
|
dff09970ac | ||
|
|
abc7d21711 | ||
|
|
ec12ef635f | ||
|
|
81b3e6f09f | ||
|
|
cc167dceda | ||
|
|
bc52a6d8ee | ||
|
|
5bbbdbe73c | ||
|
|
fd4de05ddd | ||
|
|
9eaab2189d | ||
|
|
12147754b2 | ||
|
|
90ef263823 | ||
|
|
143590cfb4 | ||
|
|
40379014ad | ||
|
|
f7b0e99fa1 | ||
|
|
7a7ac48411 | ||
|
|
50107c5b1e | ||
|
|
e41d7e6819 | ||
|
|
691e038e84 | ||
|
|
426e79d635 | ||
|
|
326677e87f | ||
|
|
44988e30ae | ||
|
|
646ae37384 | ||
|
|
457e29a663 | ||
|
|
2771edfb2c | ||
|
|
f0001ec322 | ||
|
|
d3e02a017d | ||
|
|
a0ebed6876 | ||
|
|
dc0ab6db47 | ||
|
|
ea7900f82c | ||
|
|
0cb3fd180d | ||
|
|
b05c3e46bb | ||
|
|
a1b7d298d3 | ||
|
|
cc5516c3b3 | ||
|
|
4fb6070b89 | ||
|
|
1b926cdf53 | ||
|
|
72fc00fb65 | ||
|
|
ddc6b57253 | ||
|
|
e8b3b98338 | ||
|
|
66325a1405 | ||
|
|
0efbeaf4b8 | ||
|
|
11d487fb28 | ||
|
|
073e319ef9 | ||
|
|
3ed75f28d1 | ||
|
|
bfc0367f54 | ||
|
|
075eeb583f | ||
|
|
b7531d3b9e | ||
|
|
41dc3bd1cf | ||
|
|
b790b385a4 | ||
|
|
8700328fe9 | ||
|
|
adbd2c8200 | ||
|
|
7d552effb0 | ||
|
|
bc81d2a5a7 | ||
|
|
7620de2d91 | ||
|
|
07a43a0444 | ||
|
|
f4d5874e09 | ||
|
|
8a0b4d24bd | ||
|
|
636f19be1f | ||
|
|
0fd7f7d9b2 | ||
|
|
ab6c66534f | ||
|
|
faccf13759 | ||
|
|
4c6a28e4ed | ||
|
|
64ad88e2cb | ||
|
|
969ac90d39 | ||
|
|
fb977c1e95 | ||
|
|
d5ba3916f7 | ||
|
|
f7f1087337 | ||
|
|
47ea2dbc03 | ||
|
|
bd2cf534e5 | ||
|
|
65f1668d69 | ||
|
|
e0fb7df0aa | ||
|
|
7047f76299 | ||
|
|
c39f2d5eb6 | ||
|
|
5fda69a388 | ||
|
|
87ce954eef | ||
|
|
ebbeac413a | ||
|
|
a68bbaaab4 | ||
|
|
8784dc979f | ||
|
|
f8047544fc | ||
|
|
eeb2a05e4f | ||
|
|
6db9d7bd8b | ||
|
|
80e2fde734 | ||
|
|
ae4f5d40ee | ||
|
|
5516edadfd | ||
|
|
475afbf44b | ||
|
|
197eaf1aab |
@@ -2,7 +2,8 @@
|
||||
|
||||
This repository contains example notebooks demonstrating the [Azure Machine Learning](https://azure.microsoft.com/en-us/services/machine-learning-service/) Python SDK which allows you to build, train, deploy and manage machine learning solutions using Azure. The AML SDK allows you the choice of using local or cloud compute resources, while managing and maintaining the complete data science workflow from the cloud.
|
||||
|
||||

|
||||

|
||||
|
||||
|
||||
## Quick installation
|
||||
```sh
|
||||
@@ -49,10 +50,13 @@ The [How to use Azure ML](./how-to-use-azureml) folder contains specific example
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Community Repository
|
||||
Visit this [community repository](https://github.com/microsoft/MLOps/tree/master/examples) to find useful end-to-end sample notebooks. Also, please follow these [contribution guidelines](https://github.com/microsoft/MLOps/blob/master/contributing.md) when contributing to this repository.
|
||||
|
||||
## Projects using Azure Machine Learning
|
||||
|
||||
Visit following repos to see projects contributed by Azure ML users:
|
||||
|
||||
- [AMLSamples](https://github.com/Azure/AMLSamples) Number of end-to-end examples, including face recognition, predictive maintenance, customer churn and sentiment analysis.
|
||||
- [Fine tune natural language processing models using Azure Machine Learning service](https://github.com/Microsoft/AzureML-BERT)
|
||||
- [Fashion MNIST with Azure ML SDK](https://github.com/amynic/azureml-sdk-fashion)
|
||||
|
||||
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.0.57 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.62 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1,723 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Track Data Drift between Training and Inference Data in Production \n",
|
||||
"\n",
|
||||
"With this notebook, you will learn how to enable the DataDrift service to automatically track and determine whether your inference data is drifting from the data your model was initially trained on. The DataDrift service provides metrics and visualizations to help stakeholders identify which specific features cause the concept drift to occur.\n",
|
||||
"\n",
|
||||
"Please email driftfeedback@microsoft.com with any issues. A member from the DataDrift team will respond shortly. \n",
|
||||
"\n",
|
||||
"The DataDrift Public Preview API can be found [here](https://docs.microsoft.com/en-us/python/api/azureml-contrib-datadrift/?view=azure-ml-py). "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Prerequisites and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Install the DataDrift package\n",
|
||||
"\n",
|
||||
"Install the azureml-contrib-datadrift, azureml-opendatasets and lightgbm packages before running this notebook.\n",
|
||||
"```\n",
|
||||
"pip install azureml-contrib-datadrift\n",
|
||||
"pip install lightgbm\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import Dependencies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"import time\n",
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import requests\n",
|
||||
"from azureml.contrib.datadrift import DataDriftDetector, AlertConfiguration\n",
|
||||
"from azureml.opendatasets import NoaaIsdWeather\n",
|
||||
"from azureml.core import Dataset, Workspace, Run\n",
|
||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from sklearn.model_selection import train_test_split\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up Configuraton and Create Azure ML Workspace\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) first if you haven't already to establish your connection to the AzureML Workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Please type in your initials/alias. The prefix is prepended to the names of resources created by this notebook. \n",
|
||||
"prefix = \"dd\"\n",
|
||||
"\n",
|
||||
"# NOTE: Please do not change the model_name, as it's required by the score.py file\n",
|
||||
"model_name = \"driftmodel\"\n",
|
||||
"image_name = \"{}driftimage\".format(prefix)\n",
|
||||
"service_name = \"{}driftservice\".format(prefix)\n",
|
||||
"\n",
|
||||
"# optionally, set email address to receive an email alert for DataDrift\n",
|
||||
"email_address = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Generate Train/Testing Data\n",
|
||||
"\n",
|
||||
"For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You may replace this step with your own dataset. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"usaf_list = ['725724', '722149', '723090', '722159', '723910', '720279',\n",
|
||||
" '725513', '725254', '726430', '720381', '723074', '726682',\n",
|
||||
" '725486', '727883', '723177', '722075', '723086', '724053',\n",
|
||||
" '725070', '722073', '726060', '725224', '725260', '724520',\n",
|
||||
" '720305', '724020', '726510', '725126', '722523', '703333',\n",
|
||||
" '722249', '722728', '725483', '722972', '724975', '742079',\n",
|
||||
" '727468', '722193', '725624', '722030', '726380', '720309',\n",
|
||||
" '722071', '720326', '725415', '724504', '725665', '725424',\n",
|
||||
" '725066']\n",
|
||||
"\n",
|
||||
"columns = ['usaf', 'wban', 'datetime', 'latitude', 'longitude', 'elevation', 'windAngle', 'windSpeed', 'temperature', 'stationName', 'p_k']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def enrich_weather_noaa_data(noaa_df):\n",
|
||||
" hours_in_day = 23\n",
|
||||
" week_in_year = 52\n",
|
||||
" \n",
|
||||
" noaa_df[\"hour\"] = noaa_df[\"datetime\"].dt.hour\n",
|
||||
" noaa_df[\"weekofyear\"] = noaa_df[\"datetime\"].dt.week\n",
|
||||
" \n",
|
||||
" noaa_df[\"sine_weekofyear\"] = noaa_df['datetime'].transform(lambda x: np.sin((2*np.pi*x.dt.week-1)/week_in_year))\n",
|
||||
" noaa_df[\"cosine_weekofyear\"] = noaa_df['datetime'].transform(lambda x: np.cos((2*np.pi*x.dt.week-1)/week_in_year))\n",
|
||||
"\n",
|
||||
" noaa_df[\"sine_hourofday\"] = noaa_df['datetime'].transform(lambda x: np.sin(2*np.pi*x.dt.hour/hours_in_day))\n",
|
||||
" noaa_df[\"cosine_hourofday\"] = noaa_df['datetime'].transform(lambda x: np.cos(2*np.pi*x.dt.hour/hours_in_day))\n",
|
||||
" \n",
|
||||
" return noaa_df\n",
|
||||
"\n",
|
||||
"def add_window_col(input_df):\n",
|
||||
" shift_interval = pd.Timedelta('-7 days') # your X days interval\n",
|
||||
" df_shifted = input_df.copy()\n",
|
||||
" df_shifted['datetime'] = df_shifted['datetime'] - shift_interval\n",
|
||||
" df_shifted.drop(list(input_df.columns.difference(['datetime', 'usaf', 'wban', 'sine_hourofday', 'temperature'])), axis=1, inplace=True)\n",
|
||||
"\n",
|
||||
" # merge, keeping only observations where -1 lag is present\n",
|
||||
" df2 = pd.merge(input_df,\n",
|
||||
" df_shifted,\n",
|
||||
" on=['datetime', 'usaf', 'wban', 'sine_hourofday'],\n",
|
||||
" how='inner', # use 'left' to keep observations without lags\n",
|
||||
" suffixes=['', '-7'])\n",
|
||||
" return df2\n",
|
||||
"\n",
|
||||
"def get_noaa_data(start_time, end_time, cols, station_list):\n",
|
||||
" isd = NoaaIsdWeather(start_time, end_time, cols=cols)\n",
|
||||
" # Read into Pandas data frame.\n",
|
||||
" noaa_df = isd.to_pandas_dataframe()\n",
|
||||
" noaa_df = noaa_df.rename(columns={\"stationName\": \"station_name\"})\n",
|
||||
" \n",
|
||||
" df_filtered = noaa_df[noaa_df[\"usaf\"].isin(station_list)]\n",
|
||||
" df_filtered.reset_index(drop=True)\n",
|
||||
" \n",
|
||||
" # Enrich with time features\n",
|
||||
" df_enriched = enrich_weather_noaa_data(df_filtered)\n",
|
||||
" \n",
|
||||
" return df_enriched\n",
|
||||
"\n",
|
||||
"def get_featurized_noaa_df(start_time, end_time, cols, station_list):\n",
|
||||
" df_1 = get_noaa_data(start_time - timedelta(days=7), start_time - timedelta(seconds=1), cols, station_list)\n",
|
||||
" df_2 = get_noaa_data(start_time, end_time, cols, station_list)\n",
|
||||
" noaa_df = pd.concat([df_1, df_2])\n",
|
||||
" \n",
|
||||
" print(\"Adding window feature\")\n",
|
||||
" df_window = add_window_col(noaa_df)\n",
|
||||
" \n",
|
||||
" cat_columns = df_window.dtypes == object\n",
|
||||
" cat_columns = cat_columns[cat_columns == True]\n",
|
||||
" \n",
|
||||
" print(\"Encoding categorical columns\")\n",
|
||||
" df_encoded = pd.get_dummies(df_window, columns=cat_columns.keys().tolist())\n",
|
||||
" \n",
|
||||
" print(\"Dropping unnecessary columns\")\n",
|
||||
" df_featurized = df_encoded.drop(['windAngle', 'windSpeed', 'datetime', 'elevation'], axis=1).dropna().drop_duplicates()\n",
|
||||
" \n",
|
||||
" return df_featurized"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Train model on Jan 1 - 14, 2009 data\n",
|
||||
"df = get_featurized_noaa_df(datetime(2009, 1, 1), datetime(2009, 1, 14, 23, 59, 59), columns, usaf_list)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"label = \"temperature\"\n",
|
||||
"x_df = df.drop(label, axis=1)\n",
|
||||
"y_df = df[[label]]\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(df, y_df, test_size=0.2, random_state=223)\n",
|
||||
"print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
|
||||
"\n",
|
||||
"training_dir = 'outputs/training'\n",
|
||||
"training_file = \"training.csv\"\n",
|
||||
"\n",
|
||||
"# Generate training dataframe to register as Training Dataset\n",
|
||||
"os.makedirs(training_dir, exist_ok=True)\n",
|
||||
"training_df = pd.merge(x_train.drop(label, axis=1), y_train, left_index=True, right_index=True)\n",
|
||||
"training_df.to_csv(training_dir + \"/\" + training_file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create/Register Training Dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_name = \"dataset\"\n",
|
||||
"name_suffix = datetime.utcnow().strftime(\"%Y-%m-%d-%H-%M-%S\")\n",
|
||||
"snapshot_name = \"snapshot-{}\".format(name_suffix)\n",
|
||||
"\n",
|
||||
"dstore = ws.get_default_datastore()\n",
|
||||
"dstore.upload(training_dir, \"data/training\", show_progress=True)\n",
|
||||
"dpath = dstore.path(\"data/training/training.csv\")\n",
|
||||
"trainingDataset = Dataset.auto_read_files(dpath, include_path=True)\n",
|
||||
"trainingDataset = trainingDataset.register(workspace=ws, name=dataset_name, description=\"dset\", exist_ok=True)\n",
|
||||
"\n",
|
||||
"datasets = [(Dataset.Scenario.TRAINING, trainingDataset)]\n",
|
||||
"print(\"dataset registration done.\\n\")\n",
|
||||
"datasets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train and Save Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import lightgbm as lgb\n",
|
||||
"\n",
|
||||
"train = lgb.Dataset(data=x_train, \n",
|
||||
" label=y_train)\n",
|
||||
"\n",
|
||||
"test = lgb.Dataset(data=x_test, \n",
|
||||
" label=y_test,\n",
|
||||
" reference=train)\n",
|
||||
"\n",
|
||||
"params = {'learning_rate' : 0.1,\n",
|
||||
" 'boosting' : 'gbdt',\n",
|
||||
" 'metric' : 'rmse',\n",
|
||||
" 'feature_fraction' : 1,\n",
|
||||
" 'bagging_fraction' : 1,\n",
|
||||
" 'max_depth': 6,\n",
|
||||
" 'num_leaves' : 31,\n",
|
||||
" 'objective' : 'regression',\n",
|
||||
" 'bagging_freq' : 1,\n",
|
||||
" \"verbose\": -1,\n",
|
||||
" 'min_data_per_leaf': 100}\n",
|
||||
"\n",
|
||||
"model = lgb.train(params, \n",
|
||||
" num_boost_round=500,\n",
|
||||
" train_set=train,\n",
|
||||
" valid_sets=[train, test],\n",
|
||||
" verbose_eval=50,\n",
|
||||
" early_stopping_rounds=25)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_file = 'outputs/{}.pkl'.format(model_name)\n",
|
||||
"\n",
|
||||
"os.makedirs('outputs', exist_ok=True)\n",
|
||||
"joblib.dump(model, model_file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = Model.register(model_path=model_file,\n",
|
||||
" model_name=model_name,\n",
|
||||
" workspace=ws,\n",
|
||||
" datasets=datasets)\n",
|
||||
"\n",
|
||||
"print(model_name, image_name, service_name, model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Deploy Model To AKS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare Environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn', 'joblib', 'lightgbm', 'pandas'],\n",
|
||||
" pip_packages=['azureml-monitoring', 'azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Image"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Image creation may take up to 15 minutes.\n",
|
||||
"\n",
|
||||
"image_name = image_name + str(model.version)\n",
|
||||
"\n",
|
||||
"if not image_name in ws.images:\n",
|
||||
" # Use the score.py defined in this directory as the execution script\n",
|
||||
" # NOTE: The Model Data Collector must be enabled in the execution script for DataDrift to run correctly\n",
|
||||
" image_config = ContainerImage.image_configuration(execution_script=\"score.py\",\n",
|
||||
" runtime=\"python\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" description=\"Image with weather dataset model\")\n",
|
||||
" image = ContainerImage.create(name=image_name,\n",
|
||||
" models=[model],\n",
|
||||
" image_config=image_config,\n",
|
||||
" workspace=ws)\n",
|
||||
"\n",
|
||||
" image.wait_for_creation(show_output=True)\n",
|
||||
"else:\n",
|
||||
" image = ws.images[image_name]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Compute Target"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_name = 'dd-demo-e2e'\n",
|
||||
"prov_config = AksCompute.provisioning_configuration()\n",
|
||||
"\n",
|
||||
"if not aks_name in ws.compute_targets:\n",
|
||||
" aks_target = ComputeTarget.create(workspace=ws,\n",
|
||||
" name=aks_name,\n",
|
||||
" provisioning_configuration=prov_config)\n",
|
||||
"\n",
|
||||
" aks_target.wait_for_completion(show_output=True)\n",
|
||||
" print(aks_target.provisioning_state)\n",
|
||||
" print(aks_target.provisioning_errors)\n",
|
||||
"else:\n",
|
||||
" aks_target=ws.compute_targets[aks_name]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service_name = service_name\n",
|
||||
"\n",
|
||||
"if not aks_service_name in ws.webservices:\n",
|
||||
" aks_config = AksWebservice.deploy_configuration(collect_model_data=True, enable_app_insights=True)\n",
|
||||
" aks_service = Webservice.deploy_from_image(workspace=ws,\n",
|
||||
" name=aks_service_name,\n",
|
||||
" image=image,\n",
|
||||
" deployment_config=aks_config,\n",
|
||||
" deployment_target=aks_target)\n",
|
||||
" aks_service.wait_for_deployment(show_output=True)\n",
|
||||
" print(aks_service.state)\n",
|
||||
"else:\n",
|
||||
" aks_service = ws.webservices[aks_service_name]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Run DataDrift Analysis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Send Scoring Data to Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Download Scoring Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Score Model on March 15, 2016 data\n",
|
||||
"scoring_df = get_noaa_data(datetime(2016, 3, 15) - timedelta(days=7), datetime(2016, 3, 16), columns, usaf_list)\n",
|
||||
"# Add the window feature column\n",
|
||||
"scoring_df = add_window_col(scoring_df)\n",
|
||||
"\n",
|
||||
"# Drop features not used by the model\n",
|
||||
"print(\"Dropping unnecessary columns\")\n",
|
||||
"scoring_df = scoring_df.drop(['windAngle', 'windSpeed', 'datetime', 'elevation'], axis=1).dropna()\n",
|
||||
"scoring_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# One Hot Encode the scoring dataset to match the training dataset schema\n",
|
||||
"columns_dict = model.datasets[\"training\"][0].get_profile().columns\n",
|
||||
"extra_cols = ('Path', 'Column1')\n",
|
||||
"for k in extra_cols:\n",
|
||||
" columns_dict.pop(k, None)\n",
|
||||
"training_columns = list(columns_dict.keys())\n",
|
||||
"\n",
|
||||
"categorical_columns = scoring_df.dtypes == object\n",
|
||||
"categorical_columns = categorical_columns[categorical_columns == True]\n",
|
||||
"\n",
|
||||
"test_df = pd.get_dummies(scoring_df[categorical_columns.keys().tolist()])\n",
|
||||
"encoded_df = scoring_df.join(test_df)\n",
|
||||
"\n",
|
||||
"# Populate missing OHE columns with 0 values to match traning dataset schema\n",
|
||||
"difference = list(set(training_columns) - set(encoded_df.columns.tolist()))\n",
|
||||
"for col in difference:\n",
|
||||
" encoded_df[col] = 0\n",
|
||||
"encoded_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Serialize dataframe to list of row dictionaries\n",
|
||||
"encoded_dict = encoded_df.to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit Scoring Data to Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"\n",
|
||||
"# retreive the API keys. AML generates two keys.\n",
|
||||
"key1, key2 = aks_service.get_keys()\n",
|
||||
"\n",
|
||||
"total_count = len(scoring_df)\n",
|
||||
"i = 0\n",
|
||||
"load = []\n",
|
||||
"for row in encoded_dict:\n",
|
||||
" load.append(row)\n",
|
||||
" i = i + 1\n",
|
||||
" if i % 100 == 0:\n",
|
||||
" payload = json.dumps({\"data\": load})\n",
|
||||
" \n",
|
||||
" # construct raw HTTP request and send to the service\n",
|
||||
" payload_binary = bytes(payload,encoding = 'utf8')\n",
|
||||
" headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
||||
" resp = requests.post(aks_service.scoring_uri, payload_binary, headers=headers)\n",
|
||||
" \n",
|
||||
" print(\"prediction:\", resp.content, \"Progress: {}/{}\".format(i, total_count)) \n",
|
||||
"\n",
|
||||
" load = []\n",
|
||||
" time.sleep(3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We need to wait up to 10 minutes for the Model Data Collector to dump the model input and inference data to storage in the Workspace, where it's used by the DataDriftDetector job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"time.sleep(600)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure DataDrift"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"services = [service_name]\n",
|
||||
"start = datetime.now() - timedelta(days=2)\n",
|
||||
"end = datetime(year=2020, month=1, day=22, hour=15, minute=16)\n",
|
||||
"feature_list = ['usaf', 'wban', 'latitude', 'longitude', 'station_name', 'p_k', 'sine_hourofday', 'cosine_hourofday', 'temperature-7']\n",
|
||||
"alert_config = AlertConfiguration([email_address]) if email_address else None\n",
|
||||
"\n",
|
||||
"# there will be an exception indicating using get() method if DataDrift object already exist\n",
|
||||
"try:\n",
|
||||
" datadrift = DataDriftDetector.create(ws, model.name, model.version, services, frequency=\"Day\", alert_config=alert_config)\n",
|
||||
"except KeyError:\n",
|
||||
" datadrift = DataDriftDetector.get(ws, model.name, model.version)\n",
|
||||
" \n",
|
||||
"print(\"Details of DataDrift Object:\\n{}\".format(datadrift))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run an Adhoc DataDriftDetector Run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"target_date = datetime.today()\n",
|
||||
"run = datadrift.run(target_date, services, feature_list=feature_list, create_compute_target=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"exp = Experiment(ws, datadrift._id)\n",
|
||||
"dd_run = Run(experiment=exp, run_id=run)\n",
|
||||
"RunDetails(dd_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get Drift Analysis Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"children = list(dd_run.get_children())\n",
|
||||
"for child in children:\n",
|
||||
" child.wait_for_completion()\n",
|
||||
"\n",
|
||||
"drift_metrics = datadrift.get_output(start_time=start, end_time=end)\n",
|
||||
"drift_metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Show all drift figures, one per serivice.\n",
|
||||
"# If setting with_details is False (by default), only drift will be shown; if it's True, all details will be shown.\n",
|
||||
"\n",
|
||||
"drift_figures = datadrift.show(with_details=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Enable DataDrift Schedule"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"datadrift.enable_schedule()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "rafarmah"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
name: azure-ml-datadrift
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-contrib-datadrift
|
||||
- azureml-opendatasets
|
||||
- lightgbm
|
||||
- azureml-widgets
|
||||
@@ -1,58 +0,0 @@
|
||||
import pickle
|
||||
import json
|
||||
import numpy
|
||||
import azureml.train.automl
|
||||
from sklearn.externals import joblib
|
||||
from sklearn.linear_model import Ridge
|
||||
from azureml.core.model import Model
|
||||
from azureml.core.run import Run
|
||||
from azureml.monitoring import ModelDataCollector
|
||||
import time
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def init():
|
||||
global model, inputs_dc, prediction_dc, feature_names, categorical_features
|
||||
|
||||
print("Model is initialized" + time.strftime("%H:%M:%S"))
|
||||
model_path = Model.get_model_path(model_name="driftmodel")
|
||||
model = joblib.load(model_path)
|
||||
|
||||
feature_names = ["usaf", "wban", "latitude", "longitude", "station_name", "p_k",
|
||||
"sine_weekofyear", "cosine_weekofyear", "sine_hourofday", "cosine_hourofday",
|
||||
"temperature-7"]
|
||||
|
||||
categorical_features = ["usaf", "wban", "p_k", "station_name"]
|
||||
|
||||
inputs_dc = ModelDataCollector(model_name="driftmodel",
|
||||
identifier="inputs",
|
||||
feature_names=feature_names)
|
||||
|
||||
prediction_dc = ModelDataCollector("driftmodel",
|
||||
identifier="predictions",
|
||||
feature_names=["temperature"])
|
||||
|
||||
|
||||
def run(raw_data):
|
||||
global inputs_dc, prediction_dc
|
||||
|
||||
try:
|
||||
data = json.loads(raw_data)["data"]
|
||||
data = pd.DataFrame(data)
|
||||
|
||||
# Remove the categorical features as the model expects OHE values
|
||||
input_data = data.drop(categorical_features, axis=1)
|
||||
|
||||
result = model.predict(input_data)
|
||||
|
||||
# Collect the non-OHE dataframe
|
||||
collected_df = data[feature_names]
|
||||
|
||||
inputs_dc.collect(collected_df.values)
|
||||
prediction_dc.collect(result)
|
||||
return result.tolist()
|
||||
except Exception as e:
|
||||
error = str(e)
|
||||
|
||||
print(error + time.strftime("%H:%M:%S"))
|
||||
return error
|
||||
@@ -21,5 +21,6 @@ dependencies:
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-explain-model
|
||||
- pandas_ml
|
||||
|
||||
|
||||
@@ -22,5 +22,6 @@ dependencies:
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-explain-model
|
||||
- pandas_ml
|
||||
|
||||
|
||||
@@ -92,8 +92,6 @@
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-classification-bmarketing'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-classification-bankmarketing'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -103,7 +101,6 @@
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -164,20 +161,7 @@
|
||||
"source": [
|
||||
"# Data\n",
|
||||
"\n",
|
||||
"Here load the data in the get_data() script to be utilized in azure compute. To do this first load all the necessary libraries and dependencies to set up paths for the data and to create the conda_Run_config."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not os.path.isdir('data'):\n",
|
||||
" os.mkdir('data')\n",
|
||||
" \n",
|
||||
"if not os.path.exists(project_folder):\n",
|
||||
" os.makedirs(project_folder)"
|
||||
"Create a run configuration for the remote run."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -207,7 +191,7 @@
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Here we create the script to be run in azure comput for loading the data, we load the bank marketing dataset into X_train and y_train. Next X_train and y_train is returned for training the model."
|
||||
"Load the bank marketing dataset into X_train and y_train. X_train contains the training features, which are inputs to the model. y_train contains the training labels, which are the expected output of the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -240,7 +224,6 @@
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
|
||||
"\n",
|
||||
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||
]
|
||||
@@ -263,7 +246,6 @@
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path = project_folder,\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" X = X_train,\n",
|
||||
" y = y_train,\n",
|
||||
|
||||
@@ -92,8 +92,6 @@
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-classification-ccard'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-classification-creditcard'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -103,7 +101,6 @@
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -164,20 +161,7 @@
|
||||
"source": [
|
||||
"# Data\n",
|
||||
"\n",
|
||||
"Here load the data in the get_data script to be utilized in azure compute. To do this, first load all the necessary libraries and dependencies to set up paths for the data and to create the conda_run_config."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not os.path.isdir('data'):\n",
|
||||
" os.mkdir('data')\n",
|
||||
" \n",
|
||||
"if not os.path.exists(project_folder):\n",
|
||||
" os.makedirs(project_folder)"
|
||||
"Create a run configuration for the remote run."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -207,7 +191,7 @@
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Here create the script to be run in azure compute for loading the data, load the credit card dataset into cards and store the Class column (y) in the y variable and store the remaining data in the x variable. Next split the data using random_split and return X_train and y_train for training the model."
|
||||
"Load the credit card dataset into X and y. X contains the features, which are inputs to the model. y contains the labels, which are the expected output of the model. Next split the data using random_split and return X_train and y_train for training the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -241,7 +225,6 @@
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
|
||||
"\n",
|
||||
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||
]
|
||||
@@ -270,8 +253,7 @@
|
||||
"}\n",
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors_20190417.log',\n",
|
||||
" path = project_folder,\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" X = X_train,\n",
|
||||
" y = y_train,\n",
|
||||
|
||||
@@ -92,8 +92,6 @@
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-classification-deployment'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-classification-deployment'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -103,7 +101,6 @@
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -126,8 +123,7 @@
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -148,8 +144,7 @@
|
||||
" iterations = 10,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path = project_folder)"
|
||||
" y = y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -89,9 +89,8 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"# Choose a name for the experiment.\n",
|
||||
"experiment_name = 'automl-classification-onnx'\n",
|
||||
"project_folder = './sample_projects/automl-classification-onnx'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -101,7 +100,6 @@
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -127,9 +125,7 @@
|
||||
"X_train, X_test, y_train, y_test = train_test_split(iris.data, \n",
|
||||
" iris.target, \n",
|
||||
" test_size=0.2, \n",
|
||||
" random_state=0)\n",
|
||||
"\n",
|
||||
"\n"
|
||||
" random_state=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -170,8 +166,7 @@
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**enable_onnx_compatible_models**|Enable the ONNX compatible models in the experiment.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
"|**enable_onnx_compatible_models**|Enable the ONNX compatible models in the experiment.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -196,8 +191,7 @@
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" preprocess=True,\n",
|
||||
" enable_onnx_compatible_models=True,\n",
|
||||
" path = project_folder)"
|
||||
" enable_onnx_compatible_models=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -100,9 +100,8 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"# Choose a name for the experiment.\n",
|
||||
"experiment_name = 'automl-local-whitelist'\n",
|
||||
"project_folder = './sample_projects/automl-local-whitelist'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -112,7 +111,6 @@
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -158,7 +156,6 @@
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
|
||||
"|**whitelist_models**|List of models that AutoML should use. The possible values are listed [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings).|"
|
||||
]
|
||||
},
|
||||
@@ -177,8 +174,7 @@
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" enable_tf=True,\n",
|
||||
" whitelist_models=whitelist_models,\n",
|
||||
" path = project_folder)"
|
||||
" whitelist_models=whitelist_models)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -113,9 +113,8 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"# Choose a name for the experiment.\n",
|
||||
"experiment_name = 'automl-classification'\n",
|
||||
"project_folder = './sample_projects/automl-classification'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -125,7 +124,6 @@
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
|
||||
@@ -1,529 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning\n",
|
||||
"_**Prepare Data using `azureml.dataprep` for Remote Execution (AmlCompute)**_\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Data](#Data)\n",
|
||||
"1. [Train](#Train)\n",
|
||||
"1. [Results](#Results)\n",
|
||||
"1. [Test](#Test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Define data loading and preparation steps in a `Dataflow` using `azureml.dataprep`.\n",
|
||||
"2. Pass the `Dataflow` to AutoML for a local run.\n",
|
||||
"3. Pass the `Dataflow` to AutoML for a remote run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Currently, Data Prep only supports __Ubuntu 16__ and __Red Hat Enterprise Linux 7__. We are working on supporting more linux distros."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.compute import DsvmCompute\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"import azureml.dataprep as dprep\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
" \n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-dataprep-remote-dsvm'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-dataprep-remote-dsvm'\n",
|
||||
" \n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
" \n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can use `auto_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
|
||||
"# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n",
|
||||
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
|
||||
"# and convert column types manually.\n",
|
||||
"example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n",
|
||||
"dflow = dprep.read_csv(example_data, infer_column_types=True)\n",
|
||||
"dflow.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# As `Primary Type` is our y data, we need to drop the values those are null in this column.\n",
|
||||
"dflow = dflow.drop_nulls('Primary Type')\n",
|
||||
"dflow.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Review the Data Preparation Result\n",
|
||||
"\n",
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets.\n",
|
||||
"\n",
|
||||
"`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = dflow.drop_columns(columns=['Primary Type', 'FBI Code'])\n",
|
||||
"y = dflow.keep_columns(columns=['Primary Type'], validate_column_exists=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train\n",
|
||||
"\n",
|
||||
"This creates a general AutoML settings object applicable for both local and remote runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\" : 10,\n",
|
||||
" \"iterations\" : 2,\n",
|
||||
" \"primary_metric\" : 'AUC_weighted',\n",
|
||||
" \"preprocess\" : True,\n",
|
||||
" \"verbosity\" : logging.INFO\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach an AmlCompute cluster"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
"\n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 6)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\\n\",\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
"\n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
"\n",
|
||||
"# Set compute target to AmlCompute\n",
|
||||
"conda_run_config.target = compute_target\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pass Data with `Dataflow` Objects\n",
|
||||
"\n",
|
||||
"The `Dataflow` objects captured above can also be passed to the `submit` method for a remote run. AutoML will serialize the `Dataflow` object and send it to the remote compute target. The `Dataflow` will not be evaluated locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path = project_folder,\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" X = X,\n",
|
||||
" y = y,\n",
|
||||
" **automl_settings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pre-process cache cleanup\n",
|
||||
"The preprocess data gets cache at user default file store. When the run is completed the cache can be cleaned by running below cell"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run.clean_preprocessor_cache()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Cancelling Runs\n",
|
||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cancel the ongoing experiment and stop scheduling new iterations.\n",
|
||||
"# remote_run.cancel()\n",
|
||||
"\n",
|
||||
"# Cancel iteration 1 and move onto iteration 2.\n",
|
||||
"# remote_run.cancel_iteration(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(remote_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"children = list(remote_run.get_children())\n",
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
" \n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
"rundata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = remote_run.get_output()\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model that has the smallest `log_loss` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lookup_metric = \"log_loss\"\n",
|
||||
"best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the first iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iteration = 0\n",
|
||||
"best_run, fitted_model = remote_run.get_output(iteration = iteration)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test\n",
|
||||
"\n",
|
||||
"#### Load Test Data\n",
|
||||
"For the test data, it should have the same preparation step as the train data. Otherwise it might get failed at the preprocessing step."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dflow_test = dprep.auto_read_file(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv').skip(1)\n",
|
||||
"dflow_test = dflow_test.drop_nulls('Primary Type')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing Our Best Fitted Model\n",
|
||||
"We will use confusion matrix to see how our model works."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pandas_ml import ConfusionMatrix\n",
|
||||
"\n",
|
||||
"y_test = dflow_test.keep_columns(columns=['Primary Type']).to_pandas_dataframe()\n",
|
||||
"X_test = dflow_test.drop_columns(columns=['Primary Type', 'FBI Code']).to_pandas_dataframe()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"ypred = fitted_model.predict(X_test)\n",
|
||||
"\n",
|
||||
"cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n",
|
||||
"\n",
|
||||
"print(cm)\n",
|
||||
"\n",
|
||||
"cm.plot()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "savitam"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
name: auto-ml-dataprep-remote-execution
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
@@ -1,417 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning\n",
|
||||
"_**Prepare Data using `azureml.dataprep` for Local Execution**_\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Data](#Data)\n",
|
||||
"1. [Train](#Train)\n",
|
||||
"1. [Results](#Results)\n",
|
||||
"1. [Test](#Test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Define data loading and preparation steps in a `Dataflow` using `azureml.dataprep`.\n",
|
||||
"2. Pass the `Dataflow` to AutoML for a local run.\n",
|
||||
"3. Pass the `Dataflow` to AutoML for a remote run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"Currently, Data Prep only supports __Ubuntu 16__ and __Red Hat Enterprise Linux 7__. We are working on supporting more linux distros."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"import azureml.dataprep as dprep\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
" \n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-dataprep-local'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-dataprep-local'\n",
|
||||
" \n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
" \n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can use `auto_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
|
||||
"# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n",
|
||||
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
|
||||
"# and convert column types manually.\n",
|
||||
"example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n",
|
||||
"dflow = dprep.auto_read_file(example_data).skip(1) # Remove the header row.\n",
|
||||
"dflow.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# As `Primary Type` is our y data, we need to drop the values those are null in this column.\n",
|
||||
"dflow = dflow.drop_nulls('Primary Type')\n",
|
||||
"dflow.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Review the Data Preparation Result\n",
|
||||
"\n",
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets.\n",
|
||||
"\n",
|
||||
"`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = dflow.drop_columns(columns=['Primary Type', 'FBI Code'])\n",
|
||||
"y = dflow.keep_columns(columns=['Primary Type'], validate_column_exists=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train\n",
|
||||
"\n",
|
||||
"This creates a general AutoML settings object applicable for both local and remote runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\" : 10,\n",
|
||||
" \"iterations\" : 2,\n",
|
||||
" \"primary_metric\" : 'AUC_weighted',\n",
|
||||
" \"preprocess\" : True,\n",
|
||||
" \"verbosity\" : logging.INFO\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pass Data with `Dataflow` Objects\n",
|
||||
"\n",
|
||||
"The `Dataflow` objects captured above can be passed to the `submit` method for a local run. AutoML will retrieve the results from the `Dataflow` for model training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" X = X,\n",
|
||||
" y = y,\n",
|
||||
" **automl_settings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(local_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"children = list(local_run.get_children())\n",
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
" \n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
"rundata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = local_run.get_output()\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model that has the smallest `log_loss` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lookup_metric = \"log_loss\"\n",
|
||||
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the first iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iteration = 0\n",
|
||||
"best_run, fitted_model = local_run.get_output(iteration = iteration)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test\n",
|
||||
"\n",
|
||||
"#### Load Test Data\n",
|
||||
"For the test data, it should have the same preparation step as the train data. Otherwise it might get failed at the preprocessing step."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dflow_test = dprep.auto_read_file(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv').skip(1)\n",
|
||||
"dflow_test = dflow_test.drop_nulls('Primary Type')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing Our Best Fitted Model\n",
|
||||
"We will use confusion matrix to see how our model works."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pandas_ml import ConfusionMatrix\n",
|
||||
"\n",
|
||||
"y_test = dflow_test.keep_columns(columns=['Primary Type']).to_pandas_dataframe()\n",
|
||||
"X_test = dflow_test.drop_columns(columns=['Primary Type', 'FBI Code']).to_pandas_dataframe()\n",
|
||||
"\n",
|
||||
"ypred = fitted_model.predict(X_test)\n",
|
||||
"\n",
|
||||
"cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n",
|
||||
"\n",
|
||||
"print(cm)\n",
|
||||
"\n",
|
||||
"cm.plot()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "savitam"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -87,8 +87,6 @@
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-dataset-remote-bai'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-dataprep-remote-bai'\n",
|
||||
" \n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
" \n",
|
||||
@@ -98,7 +96,6 @@
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -253,7 +250,6 @@
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path = project_folder,\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" X = X,\n",
|
||||
" y = y,\n",
|
||||
|
||||
@@ -87,8 +87,6 @@
|
||||
" \n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-dataset-local'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-dataset-local'\n",
|
||||
" \n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
" \n",
|
||||
@@ -98,7 +96,6 @@
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
|
||||
@@ -97,8 +97,6 @@
|
||||
"\n",
|
||||
"# choose a name for the run history container in the workspace\n",
|
||||
"experiment_name = 'automl-bikeshareforecasting'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-bikeshareforecasting'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -108,7 +106,6 @@
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Run History Name'] = experiment_name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -225,7 +222,8 @@
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**country_or_region**|The country/region used to generate holiday features. These should be ISO 3166 two-letter country/region codes (i.e. 'US', 'GB').|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. "
|
||||
"\n",
|
||||
"This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the iteration_timeout_minutes parameter value to get results."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -246,12 +244,12 @@
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task='forecasting', \n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" blacklist_models = ['ExtremeRandomTrees'],\n",
|
||||
" iterations=10,\n",
|
||||
" iteration_timeout_minutes=5,\n",
|
||||
" X=X_train,\n",
|
||||
" y=y_train,\n",
|
||||
" n_cross_validations=3,\n",
|
||||
" path=project_folder,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" **automl_settings)"
|
||||
]
|
||||
|
||||
@@ -93,8 +93,6 @@
|
||||
"\n",
|
||||
"# choose a name for the run history container in the workspace\n",
|
||||
"experiment_name = 'automl-energydemandforecasting'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-energydemandforecasting'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -104,7 +102,6 @@
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Run History Name'] = experiment_name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -213,8 +210,7 @@
|
||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits. Rolling Origin Validation is used to split time-series in a temporally consistent way.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. "
|
||||
"|**n_cross_validations**|Number of cross validation splits. Rolling Origin Validation is used to split time-series in a temporally consistent way.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -237,7 +233,6 @@
|
||||
" X=X_train,\n",
|
||||
" y=y_train,\n",
|
||||
" n_cross_validations=3,\n",
|
||||
" path=project_folder,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" **time_series_settings)"
|
||||
]
|
||||
@@ -463,7 +458,9 @@
|
||||
"source": [
|
||||
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation.\n",
|
||||
"\n",
|
||||
"Now that we configured target lags, that is the previous values of the target variables, and the prediction is no longer horizon-less. We therefore must still specify the `max_horizon` that the model will learn to forecast. The `target_lags` keyword specifies how far back we will construct the lags of the target variable, and the `target_rolling_window_size` specifies the size of the rolling window over which we will generate the `max`, `min` and `sum` features."
|
||||
"Now that we configured target lags, that is the previous values of the target variables, and the prediction is no longer horizon-less. We therefore must still specify the `max_horizon` that the model will learn to forecast. The `target_lags` keyword specifies how far back we will construct the lags of the target variable, and the `target_rolling_window_size` specifies the size of the rolling window over which we will generate the `max`, `min` and `sum` features.\n",
|
||||
"\n",
|
||||
"This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the iteration_timeout_minutes parameter value to get results."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -482,13 +479,12 @@
|
||||
"automl_config_lags = AutoMLConfig(task='forecasting',\n",
|
||||
" debug_log='automl_nyc_energy_errors.log',\n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" blacklist_models=['ElasticNet','ExtremeRandomTrees','GradientBoosting'],\n",
|
||||
" blacklist_models=['ElasticNet','ExtremeRandomTrees','GradientBoosting','XGBoostRegressor'],\n",
|
||||
" iterations=10,\n",
|
||||
" iteration_timeout_minutes=10,\n",
|
||||
" X=X_train,\n",
|
||||
" y=y_train,\n",
|
||||
" n_cross_validations=3,\n",
|
||||
" path=project_folder,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" **time_series_settings_with_lags)"
|
||||
]
|
||||
@@ -556,7 +552,21 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### What features matter for the forecast?"
|
||||
"### What features matter for the forecast?\n",
|
||||
"The following steps will allow you to compute and visualize engineered feature importance based on your test data for forecasting. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Setup the model explanations for AutoML models\n",
|
||||
"The *fitted_model* can generate the following which will be used for getting the engineered and raw feature explanations using *automl_setup_model_explanations*:-\n",
|
||||
"1. Featurized data from train samples/test samples \n",
|
||||
"2. Gather engineered and raw feature name lists\n",
|
||||
"3. Find the classes in your labeled column in classification scenarios\n",
|
||||
"\n",
|
||||
"The *automl_explainer_setup_obj* contains all the structures from above list. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -565,14 +575,74 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl.automlexplainer import explain_model\n",
|
||||
"\n",
|
||||
"# feature names are everything in the transformed data except the target\n",
|
||||
"features = X_trans_lags.columns[:-1]\n",
|
||||
"expl = explain_model(fitted_model_lags, X_train.copy(), X_test.copy(), features=features, best_run=best_run_lags, y_train=y_train)\n",
|
||||
"# unpack the tuple\n",
|
||||
"shap_values, expected_values, feat_overall_imp, feat_names, per_class_summary, per_class_imp = expl\n",
|
||||
"best_run_lags"
|
||||
"from azureml.train.automl.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations\n",
|
||||
"automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, X=X_train.copy(), \n",
|
||||
" X_test=X_test.copy(), y=y_train, \n",
|
||||
" task='forecasting')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Initialize the Mimic Explainer for feature importance\n",
|
||||
"For explaining the AutoML models, use the *MimicWrapper* from *azureml.explain.model* package. The *MimicWrapper* can be initialized with fields in *automl_explainer_setup_obj*, your workspace and a LightGBM model which acts as a surrogate model to explain the AutoML model (*fitted_model* here). The *MimicWrapper* also takes the *best_run* object where the raw and engineered explanations will be uploaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel\n",
|
||||
"from azureml.explain.model.mimic_wrapper import MimicWrapper\n",
|
||||
"explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMExplainableModel, \n",
|
||||
" init_dataset=automl_explainer_setup_obj.X_transform, run=best_run,\n",
|
||||
" features=automl_explainer_setup_obj.engineered_feature_names, \n",
|
||||
" feature_maps=[automl_explainer_setup_obj.feature_map])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Use Mimic Explainer for computing and visualizing engineered feature importance\n",
|
||||
"The *explain()* method in *MimicWrapper* can be called with the transformed test samples to get the feature importance for the generated engineered features. You can also use *ExplanationDashboard* to view the dash board visualization of the feature importance values of the generated engineered features by AutoML featurizers."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engineered_explanations = explainer.explain(['local', 'global'], eval_dataset=automl_explainer_setup_obj.X_test_transform)\n",
|
||||
"print(engineered_explanations.get_feature_importance_dict())\n",
|
||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard\n",
|
||||
"ExplanationDashboard(engineered_explanations, automl_explainer_setup_obj.automl_estimator, automl_explainer_setup_obj.X_test_transform)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Use Mimic Explainer for computing and visualizing raw feature importance\n",
|
||||
"The *explain()* method in *MimicWrapper* can be again called with the transformed test samples and setting *get_raw* to *True* to get the feature importance for the raw features. You can also use *ExplanationDashboard* to view the dash board visualization of the feature importance values of the raw features."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"raw_explanations = explainer.explain(['local', 'global'], get_raw=True, \n",
|
||||
" raw_feature_names=automl_explainer_setup_obj.raw_feature_names,\n",
|
||||
" eval_dataset=automl_explainer_setup_obj.X_test_transform)\n",
|
||||
"print(raw_explanations.get_feature_importance_dict())\n",
|
||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard\n",
|
||||
"ExplanationDashboard(raw_explanations, automl_explainer_setup_obj.automl_pipeline, automl_explainer_setup_obj.X_test_raw)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -8,3 +8,4 @@ dependencies:
|
||||
- pandas_ml
|
||||
- statsmodels
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-explain-model
|
||||
|
||||
@@ -89,8 +89,6 @@
|
||||
"\n",
|
||||
"# choose a name for the run history container in the workspace\n",
|
||||
"experiment_name = 'automl-ojforecasting'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-ojforecasting'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -100,7 +98,6 @@
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Run History Name'] = experiment_name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -247,7 +244,6 @@
|
||||
"|**enable_voting_ensemble**|Allow AutoML to create a Voting ensemble of the best performing models\n",
|
||||
"|**enable_stack_ensemble**|Allow AutoML to create a Stack ensemble of the best performing models\n",
|
||||
"|**debug_log**|Log file path for writing debugging information\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
|
||||
"|**time_column_name**|Name of the datetime column in the input data|\n",
|
||||
"|**grain_column_names**|Name(s) of the columns defining individual series in the input data|\n",
|
||||
"|**drop_column_names**|Name(s) of columns to drop prior to modeling|\n",
|
||||
@@ -276,7 +272,6 @@
|
||||
" n_cross_validations=3,\n",
|
||||
" enable_voting_ensemble=False,\n",
|
||||
" enable_stack_ensemble=False,\n",
|
||||
" path=project_folder,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" **time_series_settings)"
|
||||
]
|
||||
|
||||
@@ -93,7 +93,6 @@
|
||||
"\n",
|
||||
"# Choose a name for the experiment.\n",
|
||||
"experiment_name = 'automl-local-missing-data'\n",
|
||||
"project_folder = './sample_projects/automl-local-missing-data'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -103,7 +102,6 @@
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -166,8 +164,7 @@
|
||||
"|**experiment_exit_score**|*double* value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
|
||||
"|**blacklist_models**|*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i>|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -186,8 +183,7 @@
|
||||
" blacklist_models = ['KNN','LinearSVM'],\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path = project_folder)"
|
||||
" y = y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -69,7 +69,8 @@
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.core.dataset import Dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -107,29 +108,42 @@
|
||||
"## Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Training Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn import datasets\n",
|
||||
"\n",
|
||||
"iris = datasets.load_iris()\n",
|
||||
"y = iris.target\n",
|
||||
"X = iris.data\n",
|
||||
"\n",
|
||||
"features = iris.feature_names\n",
|
||||
"\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X,\n",
|
||||
" y,\n",
|
||||
" test_size=0.1,\n",
|
||||
" random_state=100,\n",
|
||||
" stratify=y)\n",
|
||||
"\n",
|
||||
"X_train = pd.DataFrame(X_train, columns=features)\n",
|
||||
"X_test = pd.DataFrame(X_test, columns=features)"
|
||||
"train_data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv\"\n",
|
||||
"train_dataset = Dataset.Tabular.from_delimited_files(train_data)\n",
|
||||
"X_train = train_dataset.drop_columns(columns=['y']).to_pandas_dataframe()\n",
|
||||
"y_train = train_dataset.keep_columns(columns=['y'], validate=True).to_pandas_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_validate.csv\"\n",
|
||||
"test_dataset = Dataset.Tabular.from_delimited_files(test_data)\n",
|
||||
"X_test = test_dataset.drop_columns(columns=['y']).to_pandas_dataframe()\n",
|
||||
"y_test = test_dataset.keep_columns(columns=['y'], validate=True).to_pandas_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -148,8 +162,6 @@
|
||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains the data with a specific pipeline|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**X_valid**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y_valid**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**model_explainability**|Indicate to explain each trained pipeline or not |\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. |"
|
||||
]
|
||||
@@ -166,10 +178,10 @@
|
||||
" iteration_timeout_minutes = 200,\n",
|
||||
" iterations = 10,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" preprocess = True,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" X_valid = X_test,\n",
|
||||
" y_valid = y_test,\n",
|
||||
" n_cross_validations = 5,\n",
|
||||
" model_explainability=True,\n",
|
||||
" path=project_folder)"
|
||||
]
|
||||
@@ -197,7 +209,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run"
|
||||
"best_run, fitted_model = local_run.get_output()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -302,19 +314,21 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Beside retrieve the existed model explanation information, explain the model with different train/test data"
|
||||
"### Computing model explanations and visualizing the explanations using azureml-explain-model package\n",
|
||||
"Beside retrieve the existed model explanation information, explain the model with different train/test data. The following steps will allow you to compute and visualize engineered feature importance and raw feature importance based on your test data. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl.automlexplainer import explain_model\n",
|
||||
"#### Setup the model explanations for AutoML models\n",
|
||||
"The *fitted_model* can generate the following which will be used for getting the engineered and raw feature explanations using *automl_setup_model_explanations*:-\n",
|
||||
"1. Featurized data from train samples/test samples \n",
|
||||
"2. Gather engineered and raw feature name lists\n",
|
||||
"3. Find the classes in your labeled column in classification scenarios\n",
|
||||
"\n",
|
||||
"shap_values, expected_values, overall_summary, overall_imp, per_class_summary, per_class_imp = \\\n",
|
||||
" explain_model(fitted_model, X_train, X_test, features=features)"
|
||||
"The *automl_explainer_setup_obj* contains all the structures from above list. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -323,8 +337,116 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(overall_summary)\n",
|
||||
"print(overall_imp)"
|
||||
"from azureml.train.automl.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations\n",
|
||||
"\n",
|
||||
"automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, X=X_train, \n",
|
||||
" X_test=X_test, y=y_train, \n",
|
||||
" task='classification')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Initialize the Mimic Explainer for feature importance\n",
|
||||
"For explaining the AutoML models, use the *MimicWrapper* from *azureml.explain.model* package. The *MimicWrapper* can be initialized with fields in *automl_explainer_setup_obj*, your workspace and a LightGBM model which acts as a surrogate model to explain the AutoML model (*fitted_model* here). The *MimicWrapper* also takes the *best_run* object where the raw and engineered explanations will be uploaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel\n",
|
||||
"from azureml.explain.model.mimic_wrapper import MimicWrapper\n",
|
||||
"explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMExplainableModel, \n",
|
||||
" init_dataset=automl_explainer_setup_obj.X_transform, run=best_run,\n",
|
||||
" features=automl_explainer_setup_obj.engineered_feature_names, \n",
|
||||
" feature_maps=[automl_explainer_setup_obj.feature_map],\n",
|
||||
" classes=automl_explainer_setup_obj.classes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Use Mimic Explainer for computing and visualizing engineered feature importance\n",
|
||||
"The *explain()* method in *MimicWrapper* can be called with the transformed test samples to get the feature importance for the generated engineered features. You can also use *ExplanationDashboard* to view the dash board visualization of the feature importance values of the generated engineered features by AutoML featurizers."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"engineered_explanations = explainer.explain(['local', 'global'], eval_dataset=automl_explainer_setup_obj.X_test_transform)\n",
|
||||
"print(engineered_explanations.get_feature_importance_dict())\n",
|
||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard\n",
|
||||
"ExplanationDashboard(engineered_explanations, automl_explainer_setup_obj.automl_estimator, automl_explainer_setup_obj.X_test_transform)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Use Mimic Explainer for computing and visualizing raw feature importance\n",
|
||||
"The *explain()* method in *MimicWrapper* can be again called with the transformed test samples and setting *get_raw* to *True* to get the feature importance for the raw features. You can also use *ExplanationDashboard* to view the dash board visualization of the feature importance values of the raw features."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"raw_explanations = explainer.explain(['local', 'global'], get_raw=True, \n",
|
||||
" raw_feature_names=automl_explainer_setup_obj.raw_feature_names,\n",
|
||||
" eval_dataset=automl_explainer_setup_obj.X_test_transform)\n",
|
||||
"print(raw_explanations.get_feature_importance_dict())\n",
|
||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard\n",
|
||||
"ExplanationDashboard(raw_explanations, automl_explainer_setup_obj.automl_pipeline, automl_explainer_setup_obj.X_test_raw)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Download engineered feature importance from artifact store\n",
|
||||
"You can use *ExplanationClient* to download the engineered feature explanations from the artifact store of the *best_run*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.explain.model._internal.explanation_client import ExplanationClient\n",
|
||||
"client = ExplanationClient.from_run(best_run)\n",
|
||||
"engineered_explanations = client.download_model_explanation(raw=False)\n",
|
||||
"print(engineered_explanations.get_feature_importance_dict())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Download raw feature importance from artifact store\n",
|
||||
"You can use *ExplanationClient* to download the raw feature explanations from the artifact store of the *best_run*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.explain.model._internal.explanation_client import ExplanationClient\n",
|
||||
"client = ExplanationClient.from_run(best_run)\n",
|
||||
"raw_explanations = client.download_model_explanation(raw=True)\n",
|
||||
"print(raw_explanations.get_feature_importance_dict())"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -7,3 +7,4 @@ dependencies:
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-explain-model
|
||||
|
||||
@@ -87,9 +87,8 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"# Choose a name for the experiment.\n",
|
||||
"experiment_name = 'automl-regression-concrete'\n",
|
||||
"project_folder = './sample_projects/automl-regression-concrete'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -99,7 +98,6 @@
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -160,20 +158,7 @@
|
||||
"source": [
|
||||
"# Data\n",
|
||||
"\n",
|
||||
"Here load the data in the get_data script to be utilized in azure compute. To do this, first load all the necessary libraries and dependencies to set up paths for the data and to create the conda_run_config."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not os.path.isdir('data'):\n",
|
||||
" os.mkdir('data')\n",
|
||||
" \n",
|
||||
"if not os.path.exists(project_folder):\n",
|
||||
" os.makedirs(project_folder)"
|
||||
"Create a run configuration for the remote run."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -203,7 +188,7 @@
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Here create the script to be run in azure compute for loading the data, load the concrete strength dataset into the X and y variables. Next, split the data using random_split and return X_train and y_train for training the model. Finally, return X_train and y_train for training the model."
|
||||
"Load the concrete strength dataset into X and y. X contains the training features, which are inputs to the model. y contains the training labels, which are the expected output of the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -238,7 +223,6 @@
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
|
||||
"\n",
|
||||
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||
]
|
||||
@@ -268,7 +252,6 @@
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'regression',\n",
|
||||
" debug_log = 'automl.log',\n",
|
||||
" path = project_folder,\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" X = X_train,\n",
|
||||
" y = y_train,\n",
|
||||
|
||||
@@ -87,9 +87,8 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"# Choose a name for the experiment.\n",
|
||||
"experiment_name = 'automl-regression-hardware'\n",
|
||||
"project_folder = './sample_projects/automl-remote-regression'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -99,7 +98,6 @@
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -160,20 +158,7 @@
|
||||
"source": [
|
||||
"# Data\n",
|
||||
"\n",
|
||||
"Here load the data in the get_data script to be utilized in azure compute. To do this, first load all the necessary libraries and dependencies to set up paths for the data and to create the conda_run_config."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not os.path.isdir('data'):\n",
|
||||
" os.mkdir('data')\n",
|
||||
" \n",
|
||||
"if not os.path.exists(project_folder):\n",
|
||||
" os.makedirs(project_folder)"
|
||||
"Create a run configuration for the remote run."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -203,7 +188,7 @@
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Here create the script to be run in azure compute for loading the data, load the hardware dataset into the X and y variables. Next split the data using random_split and return X_train and y_train for training the model."
|
||||
"Load the hardware performance dataset into X and y. X contains the training features, which are inputs to the model. y contains the training labels, which are the expected output of the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -239,7 +224,6 @@
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
|
||||
"\n",
|
||||
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||
]
|
||||
@@ -268,8 +252,7 @@
|
||||
"}\n",
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'regression',\n",
|
||||
" debug_log = 'automl_errors_20190417.log',\n",
|
||||
" path = project_folder,\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" X = X_train,\n",
|
||||
" y = y_train,\n",
|
||||
|
||||
@@ -84,9 +84,8 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"# Choose a name for the experiment.\n",
|
||||
"experiment_name = 'automl-local-regression'\n",
|
||||
"project_folder = './sample_projects/automl-local-regression'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -96,7 +95,6 @@
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
@@ -144,8 +142,7 @@
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -162,8 +159,7 @@
|
||||
" debug_log = 'automl.log',\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path = project_folder)"
|
||||
" y = y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -21,9 +21,49 @@ Notebook 6 is an Automated ML sample notebook for Classification.
|
||||
|
||||
Learn more about [how to use Azure Databricks as a development environment](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-environment#azure-databricks) for Azure Machine Learning service.
|
||||
|
||||
**Databricks as a Compute Target from AML Pipelines**
|
||||
**Databricks as a Compute Target from Azure ML Pipelines**
|
||||
You can use Azure Databricks as a compute target from [Azure Machine Learning Pipelines](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines). Take a look at this notebook for details: [aml-pipelines-use-databricks-as-compute-target.ipynb](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/azure-databricks/databricks-as-remote-compute-target/aml-pipelines-use-databricks-as-compute-target.ipynb).
|
||||
|
||||
# Linked Azure Databricks and Azure Machine Learning Workspaces (Preview)
|
||||
Customers can now link Azure Databricks and AzureML Workspaces to better enable cross-Azure ML scenarios by [managing their tracking data in a single place when using the MLflow client](https://mlflow.org/docs/latest/tracking.html#mlflow-tracking) - the Azure ML workspace.
|
||||
|
||||
## Linking the Workspaces (Admin operation)
|
||||
|
||||
1. The Azure Databricks Azure portal blade now includes a new button to link an Azure ML workspace.
|
||||

|
||||
2. Both a new or existing Azure ML Workspace can be linked in the resulting prompt. Follow any instructions to set up the Azure ML Workspace.
|
||||

|
||||
3. After a successful link operation, you should see the Azure Databricks overview reflect the linked status
|
||||

|
||||
|
||||
## Configure MLflow to send data to Azure ML (All roles)
|
||||
|
||||
1. Add azureml-mlflow as a library to any notebook or cluster that should send data to Azure ML. You can do this via:
|
||||
1. [DBUtils](https://docs.azuredatabricks.net/user-guide/dev-tools/dbutils.html#dbutils-library)
|
||||
```
|
||||
dbutils.library.installPyPI("azureml-mlflow")
|
||||
dbutils.library.restartPython() # Removes Python state
|
||||
```
|
||||
2. [Cluster Libraries](https://docs.azuredatabricks.net/user-guide/libraries.html#install-a-library-on-a-cluster)
|
||||

|
||||
2. [Set the MLflow tracking URI](https://mlflow.org/docs/latest/tracking.html#where-runs-are-recorded) to the following scheme:
|
||||
```
|
||||
adbazureml://${azuremlRegion}.experiments.azureml.net/history/v1.0/subscriptions/${azuremlSubscriptionId}/resourceGroups/${azuremlResourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/${azuremlWorkspaceName}
|
||||
```
|
||||
1. You can automatically configure this on your clusters for all subsequent notebook sessions using this helper script instead of manually setting the tracking URI in the notebook:
|
||||
* [AzureML Tracking Cluster Init Script](./linking/README.md)
|
||||
3. If configured correctly, you'll now be able to see your MLflow tracking data in both Azure ML (via the REST API and all clients) and Azure Databricks (in the MLflow UI and using the MLflow client)
|
||||
|
||||
|
||||
## Known Preview Limitations
|
||||
While we roll this experience out to customers for feedback, there are some known limitations we'd love comments on in addition to any other issues seen in your workflow.
|
||||
### 1-to-1 Workspace linking
|
||||
Currently, an Azure ML Workspace can only be linked to one Azure Databricks Workspace at a time.
|
||||
### Data synchronization
|
||||
At the moment, data is only generated in the Azure Machine Learning workspace for tracking. Editing tags via the Azure Databricks MLflow UI won't be reflected in the Azure ML UI.
|
||||
### Java and R support
|
||||
The experience currently is only available from the Python MLflow client.
|
||||
|
||||
For more on SDK concepts, please refer to [notebooks](https://github.com/Azure/MachineLearningNotebooks).
|
||||
|
||||
**Please let us know your feedback.**
|
||||
|
||||
BIN
how-to-use-azureml/azure-databricks/img/adb-link-button.png
Executable file
|
After Width: | Height: | Size: 173 KiB |
BIN
how-to-use-azureml/azure-databricks/img/adb-successful-link.png
Executable file
|
After Width: | Height: | Size: 187 KiB |
BIN
how-to-use-azureml/azure-databricks/img/cluster-library.png
Executable file
|
After Width: | Height: | Size: 84 KiB |
BIN
how-to-use-azureml/azure-databricks/img/link-prompt.png
Executable file
|
After Width: | Height: | Size: 111 KiB |
56
how-to-use-azureml/azure-databricks/linking/README.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# Adding an init script to an Azure Databricks cluster
|
||||
|
||||
The [azureml-cluster-init.sh](./azureml-cluster-init.sh) script configures the environment to
|
||||
1. Use the configured AzureML Workspace with Workspace.from_config()
|
||||
2. Set the default MLflow Tracking Server to be the AzureML managed one
|
||||
|
||||
Modify azureml-cluster-init.sh by providing the values for region, subscriptionId, resourceGroupName, and workspaceName of your target Azure ML workspace in the highlighted section at the top of the script.
|
||||
|
||||
To create the Azure Databricks cluster-scoped init script
|
||||
|
||||
1. Create the base directory you want to store the init script in if it does not exist.
|
||||
```
|
||||
dbutils.fs.mkdirs("dbfs:/databricks/<directory>/")
|
||||
```
|
||||
|
||||
2. Create the script by copying the contents of azureml-cluster-init.sh
|
||||
```
|
||||
dbutils.fs.put("/databricks/<directory>/azureml-cluster-init.sh","""
|
||||
<configured_contents_of_azureml-cluster-init.sh>
|
||||
""", True)
|
||||
|
||||
3. Check that the script exists.
|
||||
```
|
||||
display(dbutils.fs.ls("dbfs:/databricks/<directory>/azureml-cluster-init.sh"))
|
||||
```
|
||||
|
||||
1. Configure the cluster to run the script.
|
||||
* Using the cluster configuration page
|
||||
1. On the cluster configuration page, click the Advanced Options toggle.
|
||||
1. At the bottom of the page, click the Init Scripts tab.
|
||||
1. In the Destination drop-down, select a destination type. Example: 'DBFS'
|
||||
1. Specify a path to the init script.
|
||||
```
|
||||
dbfs:/databricks/<directory>/azureml-cluster-init.sh
|
||||
```
|
||||
1. Click Add
|
||||
|
||||
* Using the API.
|
||||
```
|
||||
curl -n -X POST -H 'Content-Type: application/json' -d '{
|
||||
"cluster_id": "<cluster_id>",
|
||||
"num_workers": <num_workers>,
|
||||
"spark_version": "<spark_version>",
|
||||
"node_type_id": "<node_type_id>",
|
||||
"cluster_log_conf": {
|
||||
"dbfs" : {
|
||||
"destination": "dbfs:/cluster-logs"
|
||||
}
|
||||
},
|
||||
"init_scripts": [ {
|
||||
"dbfs": {
|
||||
"destination": "dbfs:/databricks/<directory>/azureml-cluster-init.sh"
|
||||
}
|
||||
} ]
|
||||
}' https://<databricks-instance>/api/2.0/clusters/edit
|
||||
```
|
||||
@@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
# This script configures the environment to
|
||||
# 1. Use the configured AzureML Workspace with azureml.core.Workspace.from_config()
|
||||
# 2. Set the default MLflow Tracking Server to be the AzureML managed one
|
||||
|
||||
############## START CONFIGURATION #################
|
||||
# Provide the required *AzureML* workspace information
|
||||
region="" # example: westus2
|
||||
subscriptionId="" # example: bcb65f42-f234-4bff-91cf-9ef816cd9936
|
||||
resourceGroupName="" # example: dev-rg
|
||||
workspaceName="" # example: myazuremlws
|
||||
|
||||
# Optional config directory
|
||||
configLocation="/databricks/config.json"
|
||||
############### END CONFIGURATION #################
|
||||
|
||||
|
||||
# Drop the workspace configuration on the cluster
|
||||
sudo touch $configLocation
|
||||
sudo echo {\\"subscription_id\\": \\"${subscriptionId}\\", \\"resource_group\\": \\"${resourceGroupName}\\", \\"workspace_name\\": \\"${workspaceName}\\"} > $configLocation
|
||||
|
||||
# Set the MLflow Tracking URI
|
||||
trackingUri="adbazureml://${region}.experiments.azureml.net/history/v1.0/subscriptions/${subscriptionId}/resourceGroups/${resourceGroupName}/providers/Microsoft.MachineLearningServices/workspaces/${workspaceName}"
|
||||
sudo echo export MLFLOW_TRACKING_URI=${trackingUri} >> /databricks/spark/conf/spark-env.sh
|
||||
|
Before Width: | Height: | Size: 22 KiB |
217
how-to-use-azureml/deployment/accelerated-models/NOTICE.txt
Normal file
@@ -0,0 +1,217 @@
|
||||
|
||||
NOTICES AND INFORMATION
|
||||
Do Not Translate or Localize
|
||||
|
||||
This Azure Machine Learning service example notebooks repository includes material from the projects listed below.
|
||||
|
||||
|
||||
1. SSD-Tensorflow (https://github.com/balancap/ssd-tensorflow)
|
||||
|
||||
|
||||
%% SSD-Tensorflow NOTICES AND INFORMATION BEGIN HERE
|
||||
=========================================
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
=========================================
|
||||
END OF SSD-Tensorflow NOTICES AND INFORMATION
|
||||
@@ -543,7 +543,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.6"
|
||||
"version": "3.7.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -198,7 +198,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"azuremlexception-remarks-sample"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||
@@ -13,7 +13,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -13,7 +13,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -70,23 +70,11 @@
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"```\n",
|
||||
"Or\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you are using Jupyter Labs run the following commands instead:\n",
|
||||
"If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"(myenv) $ jupyter labextension install microsoft-mli-widget\n",
|
||||
"```"
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -634,7 +622,7 @@
|
||||
"# retrieve model for visualization and deployment\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"original_model = Model(ws, 'original_model')\n",
|
||||
"original_model = Model(ws, 'model_explain_model_on_amlcomp')\n",
|
||||
"model_path = original_model.download(exist_ok=True)\n",
|
||||
"original_model = joblib.load(model_path)"
|
||||
]
|
||||
|
||||
|
After Width: | Height: | Size: 237 KiB |
|
After Width: | Height: | Size: 128 KiB |
@@ -46,7 +46,8 @@ with open(model_file_name, 'wb') as file:
|
||||
|
||||
# register the model
|
||||
run.upload_file('original_model.pkl', os.path.join('./outputs/', model_file_name))
|
||||
original_model = run.register_model(model_name='original_model', model_path='original_model.pkl')
|
||||
original_model = run.register_model(model_name='model_explain_model_on_amlcomp',
|
||||
model_path='original_model.pkl')
|
||||
|
||||
# Explain predictions on your local machine
|
||||
tabular_explainer = TabularExplainer(model, X_train, features=boston_data.feature_names)
|
||||
|
||||
@@ -60,25 +60,11 @@
|
||||
"2. Run 'explain_model' with AML Run History, which leverages run history service to store and manage the explanation data\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"```\n",
|
||||
"Or\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you are using Jupyter Labs run the following commands instead:\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"(myenv) $ jupyter labextension install microsoft-mli-widget\n",
|
||||
"```"
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
|
After Width: | Height: | Size: 237 KiB |
@@ -0,0 +1,33 @@
|
||||
import json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import pickle
|
||||
from sklearn.externals import joblib
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from azureml.core.model import Model
|
||||
|
||||
|
||||
def init():
|
||||
|
||||
global original_model
|
||||
global scoring_explainer
|
||||
|
||||
# Retrieve the path to the model file using the model name
|
||||
# Assume original model is named original_prediction_model
|
||||
original_model_path = Model.get_model_path('local_deploy_model')
|
||||
scoring_explainer_path = Model.get_model_path('IBM_attrition_explainer')
|
||||
|
||||
original_model = joblib.load(original_model_path)
|
||||
scoring_explainer = joblib.load(scoring_explainer_path)
|
||||
|
||||
|
||||
def run(raw_data):
|
||||
# Get predictions and explanations for each data point
|
||||
data = pd.read_json(raw_data)
|
||||
# Make prediction
|
||||
predictions = original_model.predict(data)
|
||||
# Retrieve model explanations
|
||||
local_importance_values = scoring_explainer.explain(data)
|
||||
# You can return any data type as long as it is JSON-serializable
|
||||
return {'predictions': predictions.tolist(), 'local_importance_values': local_importance_values}
|
||||
@@ -0,0 +1,33 @@
|
||||
import json
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import pickle
|
||||
from sklearn.externals import joblib
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from azureml.core.model import Model
|
||||
|
||||
|
||||
def init():
|
||||
|
||||
global original_model
|
||||
global scoring_explainer
|
||||
|
||||
# Retrieve the path to the model file using the model name
|
||||
# Assume original model is named original_prediction_model
|
||||
original_model_path = Model.get_model_path('amlcompute_deploy_model')
|
||||
scoring_explainer_path = Model.get_model_path('IBM_attrition_explainer')
|
||||
|
||||
original_model = joblib.load(original_model_path)
|
||||
scoring_explainer = joblib.load(scoring_explainer_path)
|
||||
|
||||
|
||||
def run(raw_data):
|
||||
# Get predictions and explanations for each data point
|
||||
data = pd.read_json(raw_data)
|
||||
# Make prediction
|
||||
predictions = original_model.predict(data)
|
||||
# Retrieve model explanations
|
||||
local_importance_values = scoring_explainer.explain(data)
|
||||
# You can return any data type as long as it is JSON-serializable
|
||||
return {'predictions': predictions.tolist(), 'local_importance_values': local_importance_values}
|
||||
@@ -268,7 +268,8 @@
|
||||
"\n",
|
||||
"# Register original model\n",
|
||||
"run.upload_file('original_model.pkl', os.path.join('./outputs/', model_file_name))\n",
|
||||
"original_model = run.register_model(model_name='original_model', model_path='original_model.pkl')\n",
|
||||
"original_model = run.register_model(model_name='local_deploy_model', \n",
|
||||
" model_path='original_model.pkl')\n",
|
||||
"\n",
|
||||
"# Register scoring explainer\n",
|
||||
"run.upload_file('IBM_attrition_explainer.pkl', 'scoring_explainer.pkl')\n",
|
||||
@@ -383,7 +384,7 @@
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"# Use the custom scoring, docker, and conda files we created above\n",
|
||||
"image_config = ContainerImage.image_configuration(execution_script=\"score.py\",\n",
|
||||
"image_config = ContainerImage.image_configuration(execution_script=\"score_local_explain.py\",\n",
|
||||
" docker_file=\"dockerfile\", \n",
|
||||
" runtime=\"python\", \n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
|
||||
@@ -241,7 +241,7 @@
|
||||
"\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-explain-model', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-explain-model'\n",
|
||||
" 'azureml-explain-model', 'azureml-dataprep'\n",
|
||||
"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
@@ -309,7 +309,7 @@
|
||||
"# retrieve model for visualization and deployment\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"original_model = Model(ws, 'original_model')\n",
|
||||
"original_model = Model(ws, 'amlcompute_deploy_model')\n",
|
||||
"model_path = original_model.download(exist_ok=True)\n",
|
||||
"original_svm_model = joblib.load(model_path)"
|
||||
]
|
||||
@@ -447,7 +447,7 @@
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"# Use the custom scoring, docker, and conda files we created above\n",
|
||||
"image_config = ContainerImage.image_configuration(execution_script=\"score.py\",\n",
|
||||
"image_config = ContainerImage.image_configuration(execution_script=\"score_remote_explain.py\",\n",
|
||||
" docker_file=\"dockerfile\", \n",
|
||||
" runtime=\"python\", \n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
|
||||
@@ -99,7 +99,8 @@ with open(model_file_name, 'wb') as file:
|
||||
|
||||
# register the model with the model management service for later use
|
||||
run.upload_file('original_model.pkl', os.path.join(OUTPUT_DIR, model_file_name))
|
||||
original_model = run.register_model(model_name='original_model', model_path='original_model.pkl')
|
||||
original_model = run.register_model(model_name='amlcompute_deploy_model',
|
||||
model_path='original_model.pkl')
|
||||
|
||||
# create an explainer to validate or debug the model
|
||||
tabular_explainer = TabularExplainer(model,
|
||||
|
||||
@@ -62,24 +62,10 @@
|
||||
"4. Visualize the global and local explanations with the visualization dashboard.\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"```\n",
|
||||
"Or\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you are using Jupyter Labs run the following commands instead:\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"(myenv) $ jupyter labextension install microsoft-mli-widget\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -59,24 +59,10 @@
|
||||
"3. Visualize the global and local explanations with the visualization dashboard.\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"```\n",
|
||||
"Or\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you are using Jupyter Labs run the following commands instead:\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"(myenv) $ jupyter labextension install microsoft-mli-widget\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -60,24 +60,10 @@
|
||||
"3. Visualize the global and local explanations with the visualization dashboard.\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"```\n",
|
||||
"Or\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you are using Jupyter Labs run the following commands instead:\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"(myenv) $ jupyter labextension install microsoft-mli-widget\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -59,24 +59,10 @@
|
||||
"3. Visualize the global and local explanations with the visualization dashboard.\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"```\n",
|
||||
"Or\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you are using Jupyter Labs run the following commands instead:\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"(myenv) $ jupyter labextension install microsoft-mli-widget\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
|
||||
|
After Width: | Height: | Size: 116 KiB |
@@ -61,24 +61,10 @@
|
||||
"4. Visualize the global and local explanations with the visualization dashboard.\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"```\n",
|
||||
"Or\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you are using Jupyter Labs run the following commands instead:\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"(myenv) $ jupyter labextension install microsoft-mli-widget\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -36,13 +36,12 @@ Azure Machine Learning Pipelines optimize for simplicity, speed, and efficiency.
|
||||
|
||||
In this directory, there are two types of notebooks:
|
||||
|
||||
* The first type of notebooks will introduce you to core Azure Machine Learning Pipelines features. These notebooks below belong in this category, and are designed to go in sequence; they're all located in the "intro-to-pipelines" folder:
|
||||
Take a look at [intro-to-pipelines](./intro-to-pipelines/) for the list of notebooks that introduce Azure Machine Learning concepts for you.
|
||||
* The first type of notebooks will introduce you to core Azure Machine Learning Pipelines features. Notebooks in this category are designed to go in sequence; they're all located in the [intro-to-pipelines](./intro-to-pipelines/) folder.
|
||||
|
||||
* The second type of notebooks illustrate more sophisticated scenarios, and are independent of each other. These notebooks include:
|
||||
|
||||
1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score): This notebook demonstrates how to run a batch scoring job using Azure Machine Learning pipelines.
|
||||
2. [pipeline-style-transfer.ipynb](https://aka.ms/pl-style-trans): This notebook demonstrates a multi-step pipeline that uses GPU compute.
|
||||
2. [pipeline-style-transfer.ipynb](https://aka.ms/pl-style-trans): This notebook demonstrates a multi-step pipeline that uses GPU compute. This sample also showcases how to use conda dependencies using runconfig when using Pipelines.
|
||||
3. [nyc-taxi-data-regression-model-building.ipynb](https://aka.ms/pl-nyctaxi-tutorial): This notebook is an AzureML Pipelines version of the previously published two part sample.
|
||||
|
||||

|
||||
|
||||
@@ -15,6 +15,7 @@ These notebooks below are designed to go in sequence.
|
||||
10. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule): Once you publish a Pipeline, you can schedule it to trigger based on an interval or on data change in a defined datastore.
|
||||
11. [aml-pipelines-with-automated-machine-learning-step.ipynb](https://aka.ms/pl-automl): AutoMLStep in Pipelines shows how you can do automated machine learning using Pipelines.
|
||||
12. [aml-pipelines-setup-versioned-pipeline-endpoints.ipynb](https://aka.ms/pl-ver-endpoint): This notebook shows how you can setup PipelineEndpoint and submit a Pipeline using the PipelineEndpoint.
|
||||
|
||||
13. [aml-pipelines-showcasing-datapath-and-pipelineparameter.ipynb](https://aka.ms/pl-datapath): This notebook showcases how to use DataPath and PipelineParameter in AML Pipeline.
|
||||
14. [aml-pipelines-how-to-use-pipeline-drafts.ipynb](http://aka.ms/pl-pl-draft): This notebook shows how to use Pipeline Drafts. Pipeline Drafts are mutable pipelines which can be used to submit runs and create Published Pipelines.
|
||||
|
||||

|
||||
|
||||
@@ -0,0 +1,266 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# How to Use Pipeline Drafts\n",
|
||||
"In this notebook, we will show you how you can use Pipeline Drafts. Pipeline Drafts are mutable pipelines which can be used to submit runs and create Published Pipelines."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites and AML Basics\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration Notebook](https://aka.ms/pl-config) first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc.\n",
|
||||
"\n",
|
||||
"### Initialization Steps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace\n",
|
||||
"from azureml.core import Run, Experiment, Datastore\n",
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Compute Target\n",
|
||||
"Retrieve an already attached Azure Machine Learning Compute to use in the Pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
||||
"aml_compute_target = \"cpu-cluster\"\n",
|
||||
"try:\n",
|
||||
" aml_compute = AmlCompute(ws, aml_compute_target)\n",
|
||||
" print(\"Found existing compute target: {}\".format(aml_compute_target))\n",
|
||||
"except:\n",
|
||||
" print(\"Creating new compute target: {}\".format(aml_compute_target))\n",
|
||||
" \n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||
" min_nodes = 1, \n",
|
||||
" max_nodes = 4) \n",
|
||||
" aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)\n",
|
||||
" aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Build a Pipeline\n",
|
||||
"Build a simple pipeline to use to create a PipelineDraft."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core import Pipeline\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"\n",
|
||||
"source_directory = \"publish_run_train\"\n",
|
||||
"\n",
|
||||
"train_step = PythonScriptStep(\n",
|
||||
" name=\"Training_Step\",\n",
|
||||
" script_name=\"train.py\", \n",
|
||||
" compute_target=aml_compute_target, \n",
|
||||
" source_directory=source_directory)\n",
|
||||
"print(\"train step created\")\n",
|
||||
"\n",
|
||||
"pipeline = Pipeline(workspace=ws, steps=[train_step])\n",
|
||||
"print (\"Pipeline is built\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a Pipeline Draft\n",
|
||||
"Create a PipelineDraft by specifying a name, description, experiment_name and Pipeline. You can also specify tags, properties and pipeline_parameter values.\n",
|
||||
"\n",
|
||||
"In this example we use the previously created Pipeline object to create the Pipeline Draft. You can also create a Pipeline Draft from an existing Pipeline Run, Published Pipeline, or other Pipeline Draft."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core import PipelineDraft\n",
|
||||
"\n",
|
||||
"pipeline_draft = PipelineDraft.create(ws, name=\"TestPipelineDraft\",\n",
|
||||
" description=\"draft description\",\n",
|
||||
" experiment_name=\"helloworld\",\n",
|
||||
" pipeline=pipeline,\n",
|
||||
" continue_on_step_failure=True,\n",
|
||||
" tags={'dev': 'true'},\n",
|
||||
" properties={'train': 'value'})\n",
|
||||
"\n",
|
||||
"created_pipeline_draft_id = pipeline_draft.id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### List Pipeline Drafts in a Workspace\n",
|
||||
"Use the PipelineDraft.list() function to list all PipelineDrafts in a Workspace. You can use the optional tags parameter to filter on specified tag values."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_drafts = PipelineDraft.list(ws, tags={'dev': 'true'})\n",
|
||||
"\n",
|
||||
"for pipeline_draft in pipeline_drafts:\n",
|
||||
" print(pipeline_draft)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get a Pipeline Draft by Id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_draft = PipelineDraft.get(ws, id=created_pipeline_draft_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Update a Pipeline Draft\n",
|
||||
"The update() function of a pipeline draft can be used to update the name, description, experiment name, pipeline parameter assignments, continue on step failure setting and Pipeline associated with the PipelineDraft. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"new_train_step = PythonScriptStep(\n",
|
||||
" name=\"New_Training_Step\",\n",
|
||||
" script_name=\"train.py\", \n",
|
||||
" compute_target=aml_compute_target, \n",
|
||||
" source_directory=source_directory)\n",
|
||||
"\n",
|
||||
"new_pipeline = Pipeline(workspace=ws, steps=[new_train_step])\n",
|
||||
"\n",
|
||||
"pipeline_draft.update(name=\"UpdatedPipelineDraft\", description=\"has updated train step\", pipeline=new_pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit a Pipeline Run from a Pipeline Draft\n",
|
||||
"Use the pipeline_draft.submit() function to submit a PipelineRun. After the run is submitted, the PipelineDraft can still be edited and used to submit new runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run = pipeline_draft.submit_run()\n",
|
||||
"pipeline_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a Published Pipeline from a Pipeline Draft\n",
|
||||
"Use the pipeline_draft.publish() function to create a Published Pipeline from the Pipeline Draft. After creating a Published Pipeline, the Pipeline Draft can still be edited and used to create other Published Pipelines."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"published_pipeline = pipeline_draft.publish()\n",
|
||||
"published_pipeline"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "elihop"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-how-to-use-pipeline-drafts
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -333,7 +333,7 @@
|
||||
" policy=early_termination_policy,\n",
|
||||
" primary_metric_name='validation_acc', \n",
|
||||
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, \n",
|
||||
" max_total_runs=10,\n",
|
||||
" max_total_runs=4,\n",
|
||||
" max_concurrent_runs=4)"
|
||||
]
|
||||
},
|
||||
@@ -441,8 +441,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# pipeline_run.wait_for_completion()"
|
||||
"pipeline_run.wait_for_completion()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -459,9 +458,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n",
|
||||
"# num_file_downloaded = metrics_output.download('.', show_progress=True)"
|
||||
"metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n",
|
||||
"num_file_downloaded = metrics_output.download('.', show_progress=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -470,15 +468,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# import pandas as pd\n",
|
||||
"# import json\n",
|
||||
"# with open(metrics_output._path_on_datastore) as f: \n",
|
||||
"# metrics_output_result = f.read()\n",
|
||||
"import pandas as pd\n",
|
||||
"import json\n",
|
||||
"with open(metrics_output._path_on_datastore) as f: \n",
|
||||
" metrics_output_result = f.read()\n",
|
||||
" \n",
|
||||
"# deserialized_metrics_output = json.loads(metrics_output_result)\n",
|
||||
"# df = pd.DataFrame(deserialized_metrics_output)\n",
|
||||
"# df"
|
||||
"deserialized_metrics_output = json.loads(metrics_output_result)\n",
|
||||
"df = pd.DataFrame(deserialized_metrics_output)\n",
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -495,10 +492,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# hd_step_run = HyperDriveStepRun(step_run=pipeline_run.find_step_run(hd_step_name)[0])\n",
|
||||
"# best_run = hd_step_run.get_best_run_by_primary_metric()\n",
|
||||
"# best_run"
|
||||
"hd_step_run = HyperDriveStepRun(step_run=pipeline_run.find_step_run(hd_step_name)[0])\n",
|
||||
"best_run = hd_step_run.get_best_run_by_primary_metric()\n",
|
||||
"best_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -514,8 +510,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# print(best_run.get_file_names())"
|
||||
"print(best_run.get_file_names())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -531,8 +526,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# model = best_run.register_model(model_name='tf-dnn-mnist', model_path='outputs/model')"
|
||||
"model = best_run.register_model(model_name='tf-dnn-mnist', model_path='outputs/model')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -596,15 +590,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# from azureml.core.runconfig import CondaDependencies\n",
|
||||
"from azureml.core.runconfig import CondaDependencies\n",
|
||||
"\n",
|
||||
"# cd = CondaDependencies.create()\n",
|
||||
"# cd.add_conda_package('numpy')\n",
|
||||
"# cd.add_tensorflow_conda_package()\n",
|
||||
"# cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"cd = CondaDependencies.create()\n",
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_tensorflow_conda_package()\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"# print(cd.serialize_to_string())"
|
||||
"print(cd.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -621,13 +614,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# from azureml.core.webservice import AciWebservice\n",
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"\n",
|
||||
"# aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
"# memory_gb=1, \n",
|
||||
"# tags={'name':'mnist', 'framework': 'TensorFlow DNN'},\n",
|
||||
"# description='Tensorflow DNN on MNIST')"
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
" memory_gb=1, \n",
|
||||
" tags={'name':'mnist', 'framework': 'TensorFlow DNN'},\n",
|
||||
" description='Tensorflow DNN on MNIST')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -652,12 +644,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# from azureml.core.image import ContainerImage\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"# imgconfig = ContainerImage.image_configuration(execution_script=\"score.py\", \n",
|
||||
"# runtime=\"python\", \n",
|
||||
"# conda_file=\"myenv.yml\")"
|
||||
"imgconfig = ContainerImage.image_configuration(execution_script=\"score.py\", \n",
|
||||
" runtime=\"python\", \n",
|
||||
" conda_file=\"myenv.yml\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -666,17 +657,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# %%time\n",
|
||||
"# from azureml.core.webservice import Webservice\n",
|
||||
"%%time\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"\n",
|
||||
"# service = Webservice.deploy_from_model(workspace=ws,\n",
|
||||
"# name='tf-mnist-svc',\n",
|
||||
"# deployment_config=aciconfig,\n",
|
||||
"# models=[model],\n",
|
||||
"# image_config=imgconfig)\n",
|
||||
"service = Webservice.deploy_from_model(workspace=ws,\n",
|
||||
" name='tf-mnist-svc',\n",
|
||||
" deployment_config=aciconfig,\n",
|
||||
" models=[model],\n",
|
||||
" image_config=imgconfig)\n",
|
||||
"\n",
|
||||
"# service.wait_for_deployment(show_output=True)"
|
||||
"service.wait_for_deployment(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -692,8 +682,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# print(service.get_logs())"
|
||||
"print(service.get_logs())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -709,8 +698,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# print(service.scoring_uri)"
|
||||
"print(service.scoring_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -729,37 +717,36 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# import json\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# # find 30 random samples from test set\n",
|
||||
"# n = 30\n",
|
||||
"# sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n",
|
||||
"# find 30 random samples from test set\n",
|
||||
"n = 30\n",
|
||||
"sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n",
|
||||
"\n",
|
||||
"# test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n",
|
||||
"# test_samples = bytes(test_samples, encoding='utf8')\n",
|
||||
"test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n",
|
||||
"test_samples = bytes(test_samples, encoding='utf8')\n",
|
||||
"\n",
|
||||
"# # predict using the deployed model\n",
|
||||
"# result = service.run(input_data=test_samples)\n",
|
||||
"# predict using the deployed model\n",
|
||||
"result = service.run(input_data=test_samples)\n",
|
||||
"\n",
|
||||
"# # compare actual value vs. the predicted values:\n",
|
||||
"# i = 0\n",
|
||||
"# plt.figure(figsize = (20, 1))\n",
|
||||
"# compare actual value vs. the predicted values:\n",
|
||||
"i = 0\n",
|
||||
"plt.figure(figsize = (20, 1))\n",
|
||||
"\n",
|
||||
"# for s in sample_indices:\n",
|
||||
"# plt.subplot(1, n, i + 1)\n",
|
||||
"# plt.axhline('')\n",
|
||||
"# plt.axvline('')\n",
|
||||
"for s in sample_indices:\n",
|
||||
" plt.subplot(1, n, i + 1)\n",
|
||||
" plt.axhline('')\n",
|
||||
" plt.axvline('')\n",
|
||||
" \n",
|
||||
"# # use different color for misclassified sample\n",
|
||||
"# font_color = 'red' if y_test[s] != result[i] else 'black'\n",
|
||||
"# clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n",
|
||||
" # use different color for misclassified sample\n",
|
||||
" font_color = 'red' if y_test[s] != result[i] else 'black'\n",
|
||||
" clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n",
|
||||
" \n",
|
||||
"# plt.text(x=10, y=-10, s=y_hat[s], fontsize=18, color=font_color)\n",
|
||||
"# plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n",
|
||||
" plt.text(x=10, y=-10, s=y_hat[s], fontsize=18, color=font_color)\n",
|
||||
" plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n",
|
||||
" \n",
|
||||
"# i = i + 1\n",
|
||||
"# plt.show()"
|
||||
" i = i + 1\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -775,21 +762,20 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# import requests\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"# # send a random row from the test set to score\n",
|
||||
"# random_index = np.random.randint(0, len(X_test)-1)\n",
|
||||
"# input_data = \"{\\\"data\\\": [\" + str(list(X_test[random_index])) + \"]}\"\n",
|
||||
"# send a random row from the test set to score\n",
|
||||
"random_index = np.random.randint(0, len(X_test)-1)\n",
|
||||
"input_data = \"{\\\"data\\\": [\" + str(list(X_test[random_index])) + \"]}\"\n",
|
||||
"\n",
|
||||
"# headers = {'Content-Type':'application/json'}\n",
|
||||
"headers = {'Content-Type':'application/json'}\n",
|
||||
"\n",
|
||||
"# resp = requests.post(service.scoring_uri, input_data, headers=headers)\n",
|
||||
"resp = requests.post(service.scoring_uri, input_data, headers=headers)\n",
|
||||
"\n",
|
||||
"# print(\"POST to url\", service.scoring_uri)\n",
|
||||
"# print(\"input data:\", input_data)\n",
|
||||
"# print(\"label:\", y_test[random_index])\n",
|
||||
"# print(\"prediction:\", resp.text)"
|
||||
"print(\"POST to url\", service.scoring_uri)\n",
|
||||
"print(\"input data:\", input_data)\n",
|
||||
"print(\"label:\", y_test[random_index])\n",
|
||||
"print(\"prediction:\", resp.text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -808,18 +794,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# models = ws.models\n",
|
||||
"# for name, model in models.items():\n",
|
||||
"# print(\"Model: {}, ID: {}\".format(name, model.id))\n",
|
||||
"models = ws.models\n",
|
||||
"for name, model in models.items():\n",
|
||||
" print(\"Model: {}, ID: {}\".format(name, model.id))\n",
|
||||
" \n",
|
||||
"# images = ws.images\n",
|
||||
"# for name, image in images.items():\n",
|
||||
"# print(\"Image: {}, location: {}\".format(name, image.image_location))\n",
|
||||
"images = ws.images\n",
|
||||
"for name, image in images.items():\n",
|
||||
" print(\"Image: {}, location: {}\".format(name, image.image_location))\n",
|
||||
" \n",
|
||||
"# webservices = ws.webservices\n",
|
||||
"# for name, webservice in webservices.items():\n",
|
||||
"# print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))"
|
||||
"webservices = ws.webservices\n",
|
||||
"for name, webservice in webservices.items():\n",
|
||||
" print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -836,15 +821,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# PUBLISHONLY\n",
|
||||
"# service.delete()"
|
||||
"service.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sonnyp"
|
||||
"name": "sanpil"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
name: auto-ml-dataprep
|
||||
name: aml-pipelines-parameter-tuning-with-hyperdrive
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- numpy
|
||||
- pandas_ml
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-setup-schedule-for-a-published-pipeline
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -21,10 +21,10 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"# How to Setup a PipelineEndpoint and Submit a Pipeline Using the PipelineEndpoint.\n",
|
||||
"In this notebook, we will see how to setup a PipelineEndpoint and run specific pipeline version.\n",
|
||||
"In this notebook, we will see how to setup a PipelineEndpoint and run a specific pipeline version.\n",
|
||||
"\n",
|
||||
"PipelineEndpoint can be used to update a published pipeline while maintaining same endpoint.\n",
|
||||
"PipelineEndpoint, provides a way to keep track of [PublishedPipelines](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.publishedpipeline) using versions. PipelineEndpoint uses endpoint with version information to trigger underlying published pipeline. Pipeline endpoints are uniquely named within a workspace. \n"
|
||||
"PipelineEndpoint can be used to update a published pipeline while maintaining the same endpoint.\n",
|
||||
"PipelineEndpoint provides a way to keep track of [PublishedPipelines](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.publishedpipeline) using versions. PipelineEndpoint uses endpoint with version information to trigger an underlying published pipeline. Pipeline endpoints are uniquely named within a workspace. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -315,7 +315,25 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Set Published Pipeline to default version"
|
||||
"#### Add Published Pipeline to PipelineEndpoint, \n",
|
||||
"Adds a published pipeline (if its not present) using add() and if you want to add and set to default use add_default()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_endpoint_by_name.add(published_pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Add Published pipeline to PipelineEndpoint and set it to default version\n",
|
||||
"Adding published pipeline to PipelineEndpoint if not present and set it to default"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -391,40 +409,6 @@
|
||||
"pipeline_endpoint_by_name.set_name(name=\"NewName\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Add Published Pipeline to PipelineEndpoint, \n",
|
||||
"Adding published pipeline, if its not present in PipelineEndpoint."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_endpoint_by_name.add(published_pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Add Published pipeline to PipelineEndpoint and set it to default version\n",
|
||||
"Adding published pipeline to PipelineEndpoint if not present and set it to default"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_endpoint_by_name.add_default(published_pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -449,7 +433,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_endpoint_by_name = PipelineEndpoint.get(workspace=ws, name=\"PipelineEndpointTest\")\n",
|
||||
"pipeline_endpoint_by_name = PipelineEndpoint.get(workspace=ws, name=\"NewName\")\n",
|
||||
"\n",
|
||||
"# endpoint with id \n",
|
||||
"rest_endpoint_id = pipeline_endpoint_by_name.endpoint\n",
|
||||
@@ -531,11 +515,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# submit pipeline with specific version\n",
|
||||
"run_id = pipeline_endpoint_by_name.submit(\"TestPipelineEndpoint\", pipeline_version=\"0\")\n",
|
||||
"run_id = pipeline_endpoint_by_name.submit(\"NewName\", pipeline_version=\"0\")\n",
|
||||
"print(run_id)\n",
|
||||
"\n",
|
||||
"# submit pipeline with default version\n",
|
||||
"run_id = pipeline_endpoint_by_name.submit(\"TestPipelineEndpoint\")\n",
|
||||
"run_id = pipeline_endpoint_by_name.submit(\"NewName\")\n",
|
||||
"print(run_id)"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
name: aml-pipelines-setup-versioned-pipeline-endpoints
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- requests
|
||||
@@ -0,0 +1,479 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Showcasing DataPath and PipelineParameter\n",
|
||||
"\n",
|
||||
"This notebook demonstrateas the use of [**DataPath**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.datapath.datapath?view=azure-ml-py) and [**PipelineParameters**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelineparameter?view=azure-ml-py) in AML Pipeline. You will learn how strings and [**DataPath**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.datapath.datapath?view=azure-ml-py) can be parameterized and submitted to AML Pipelines via [**PipelineParameters**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelineparameter?view=azure-ml-py).\n",
|
||||
"To see more about how parameters work between steps, please refer [aml-pipelines-with-data-dependency-steps](https://aka.ms/pl-data-dep).\n",
|
||||
"\n",
|
||||
"* [How to create a Pipeline with a DataPath PipelineParameter](#index1)\n",
|
||||
"* [How to submit a Pipeline with a DataPath PipelineParameter](#index2)\n",
|
||||
"* [How to submit a Pipeline and change the DataPath PipelineParameter value from the sdk](#index3)\n",
|
||||
"* [How to submit a Pipeline and change the DataPath PipelineParameter value using a REST call](#index4)\n",
|
||||
"* [How to create a datastore trigger schedule and use the data_path_parameter_name to get the path of the changed blob in the Pipeline](#index5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Azure Machine Learning and Pipeline SDK-specific imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace, Experiment\n",
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.data.datapath import DataPath, DataPathComputeBinding\n",
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"from azureml.pipeline.core import PipelineParameter\n",
|
||||
"from azureml.pipeline.core import Pipeline, PipelineRun\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize Workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration. If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure the config file is present at .\\config.json\n",
|
||||
"\n",
|
||||
"If you don't have a config.json file, please go through the configuration Notebook first.\n",
|
||||
"\n",
|
||||
"This sets you up with a working config file that has information on your workspace, subscription id, etc."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create an Azure ML experiment\n",
|
||||
"\n",
|
||||
"Let's create an experiment named \"automl-classification\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Choose a name for the run history container in the workspace.\n",
|
||||
"experiment_name = 'showcasing-datapath'\n",
|
||||
"source_directory = '.'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or Attach an AmlCompute cluster\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you get the default `AmlCompute` as your training compute resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
" \n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 4)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, timeout_in_minutes = 10)\n",
|
||||
" \n",
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data and arguments setup \n",
|
||||
"\n",
|
||||
"We will setup a trining script to run and its arguments to be used. The sample training script below will print the two arguments to show what has been passed to pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile train_with_datapath.py\n",
|
||||
"import argparse\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"parser = argparse.ArgumentParser(\"train\")\n",
|
||||
"parser.add_argument(\"--arg1\", type=str, help=\"sample string argument\")\n",
|
||||
"parser.add_argument(\"--arg2\", type=str, help=\"sample datapath argument\")\n",
|
||||
"args = parser.parse_args()\n",
|
||||
"\n",
|
||||
"print(\"Sample string argument : %s\" % args.arg1)\n",
|
||||
"print(\"Sample datapath argument: %s\" % args.arg2)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's setup string and DataPath arguments using PipelineParameter. \n",
|
||||
"\n",
|
||||
"Note that Pipeline accepts a tuple of the form ([**PipelineParameters**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelineparameter?view=azure-ml-py) , [**DataPathComputeBinding**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.datapath.datapathcomputebinding?view=azure-ml-py)) as an input. DataPath defines the location of input data. DataPathComputeBinding defines how the data is consumed during step execution. The DataPath can be modified at pipeline submission time with a DataPath parameter, while the compute binding does not change. For static data inputs, we use [**DataReference**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.data_reference.datareference?view=azure-ml-py) which defines both the data location and compute binding."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def_blob_store = ws.get_default_datastore()\n",
|
||||
"print(\"Default datastore's name: {}\".format(def_blob_store.name))\n",
|
||||
"\n",
|
||||
"data_path = DataPath(datastore=def_blob_store, path_on_datastore='sample_datapath1')\n",
|
||||
"datapath1_pipeline_param = PipelineParameter(name=\"input_datapath\", default_value=data_path)\n",
|
||||
"datapath_input = (datapath1_pipeline_param, DataPathComputeBinding(mode='mount'))\n",
|
||||
"\n",
|
||||
"string_pipeline_param = PipelineParameter(name=\"input_string\", default_value='sample_string1')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='index1'></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a Pipeline with a DataPath PipelineParameter\n",
|
||||
"\n",
|
||||
"Note that the ```datapath_input``` is specified on both arguments and inputs to create a step."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_step = PythonScriptStep(\n",
|
||||
" name='train_step',\n",
|
||||
" script_name=\"train_with_datapath.py\",\n",
|
||||
" arguments=[\"--arg1\", string_pipeline_param, \"--arg2\", datapath_input],\n",
|
||||
" inputs=[datapath_input],\n",
|
||||
" compute_target=compute_target, \n",
|
||||
" source_directory=source_directory)\n",
|
||||
"print(\"train_step created\")\n",
|
||||
"\n",
|
||||
"pipeline = Pipeline(workspace=ws, steps=[train_step])\n",
|
||||
"print(\"pipeline with the train_step created\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='index2'></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Submit a Pipeline with a DataPath PipelineParameter\n",
|
||||
"\n",
|
||||
"Pipelines can be submitted with default values of PipelineParameters by not specifying any parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run = experiment.submit(pipeline)\n",
|
||||
"print(\"Pipeline is submitted for execution\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"RunDetails(pipeline_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run.wait_for_completion()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='index3'></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Submit a Pipeline and change the DataPath PipelineParameter value from the sdk\n",
|
||||
"\n",
|
||||
"Or Pipelines can be submitted with values other than default ones by using pipeline_parameters. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run_with_params = experiment.submit(pipeline, \\\n",
|
||||
" pipeline_parameters={'input_datapath': DataPath(datastore=def_blob_store, path_on_datastore='sample_datapath2'),\n",
|
||||
" 'input_string': 'sample_string2'}) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"RunDetails(pipeline_run_with_params).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run_with_params.wait_for_completion()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='index4'></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Submit a Pipeline and change the DataPath PipelineParameter value using a REST call\n",
|
||||
"\n",
|
||||
"Let's published the pipeline to use the rest endpoint of the published pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"published_pipeline = pipeline.publish(name=\"DataPath_Pipeline\", description=\"Pipeline to test Datapath\", continue_on_step_failure=True)\n",
|
||||
"published_pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.authentication import InteractiveLoginAuthentication\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"auth = InteractiveLoginAuthentication()\n",
|
||||
"aad_token = auth.get_authentication_header()\n",
|
||||
"\n",
|
||||
"rest_endpoint = published_pipeline.endpoint\n",
|
||||
"\n",
|
||||
"print(\"You can perform HTTP POST on URL {} to trigger this pipeline\".format(rest_endpoint))\n",
|
||||
"\n",
|
||||
"# specify the param when running the pipeline\n",
|
||||
"response = requests.post(rest_endpoint, \n",
|
||||
" headers=aad_token, \n",
|
||||
" json={\"ExperimentName\": \"MyRestPipeline\",\n",
|
||||
" \"RunSource\": \"SDK\",\n",
|
||||
" \"DataPathAssignments\": {\n",
|
||||
" \"input_datapath\": { \n",
|
||||
" \"DataStoreName\": def_blob_store.name,\n",
|
||||
" \"RelativePath\": 'sample_datapath3'\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"ParameterAssignments\": {\"input_string\": \"sample_string3\"}\n",
|
||||
" }\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"run_id = response.json()[\"Id\"]\n",
|
||||
"print('Submitted pipeline run: ', run_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"published_pipeline_run_via_rest = PipelineRun(ws.experiments[\"MyRestPipeline\"], run_id)\n",
|
||||
"RunDetails(published_pipeline_run_via_rest).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"published_pipeline_run_via_rest.wait_for_completion()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='index5'></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a Datastore trigger schedule and use data path parameter\n",
|
||||
"\n",
|
||||
"When the Pipeline is scheduled with DataPath parameter, it will be triggered by the modified or added data in the DataPath. ```path_on_datastore``` should be a folder and the value of the DataPath will be replaced by the path of the modified data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core import Schedule\n",
|
||||
"\n",
|
||||
"schedule = Schedule.create(workspace=ws, \n",
|
||||
" name=\"Datastore_trigger_schedule\",\n",
|
||||
" pipeline_id=published_pipeline.id, \n",
|
||||
" experiment_name='Scheduled_Pipeline',\n",
|
||||
" datastore=def_blob_store,\n",
|
||||
" wait_for_provisioning=True,\n",
|
||||
" description=\"Datastore trigger schedule demo\",\n",
|
||||
" path_on_datastore=\"sample_datapath_for_folder\",\n",
|
||||
" data_path_parameter_name=\"input_datapath\") #Same name as used above to create PipelineParameter\n",
|
||||
"\n",
|
||||
"print(\"Created schedule with id: {}\".format(schedule.id))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schedule.disable()\n",
|
||||
"schedule"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sanpil"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-showcasing-datapath-and-pipelineparameter
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -20,7 +20,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Machine Learning Pipelines with Data Dependency\n",
|
||||
"In this notebook, we will see how we can build a pipeline with implicit data dependancy."
|
||||
"In this notebook, we will see how we can build a pipeline with implicit data dependency."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -248,7 +248,7 @@
|
||||
"# Specify CondaDependencies obj, add necessary packages\n",
|
||||
"aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n",
|
||||
" conda_packages=['pandas','scikit-learn'], \n",
|
||||
" pip_packages=['azureml-sdk', 'azureml-dataprep', 'azureml-train-automl==1.0.33'], \n",
|
||||
" pip_packages=['azureml-sdk', 'azureml-dataprep', 'azureml-train-automl'], \n",
|
||||
" pin_sdk_version=False)\n",
|
||||
"\n",
|
||||
"print (\"Run configuration created.\")"
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
|
||||
import chainer
|
||||
from chainer import backend
|
||||
from chainer import backends
|
||||
from chainer.backends import cuda
|
||||
from chainer import Function, gradient_check, report, training, utils, Variable
|
||||
from chainer import datasets, iterators, optimizers, serializers
|
||||
from chainer import Link, Chain, ChainList
|
||||
import chainer.functions as F
|
||||
import chainer.links as L
|
||||
from chainer.training import extensions
|
||||
from chainer.dataset import concat_examples
|
||||
from chainer.backends.cuda import to_cpu
|
||||
|
||||
from azureml.core.run import Run
|
||||
run = Run.get_context()
|
||||
|
||||
|
||||
class MyNetwork(Chain):
|
||||
|
||||
def __init__(self, n_mid_units=100, n_out=10):
|
||||
super(MyNetwork, self).__init__()
|
||||
with self.init_scope():
|
||||
self.l1 = L.Linear(None, n_mid_units)
|
||||
self.l2 = L.Linear(n_mid_units, n_mid_units)
|
||||
self.l3 = L.Linear(n_mid_units, n_out)
|
||||
|
||||
def forward(self, x):
|
||||
h = F.relu(self.l1(x))
|
||||
h = F.relu(self.l2(h))
|
||||
return self.l3(h)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Chainer example: MNIST')
|
||||
parser.add_argument('--batchsize', '-b', type=int, default=100,
|
||||
help='Number of images in each mini-batch')
|
||||
parser.add_argument('--epochs', '-e', type=int, default=20,
|
||||
help='Number of sweeps over the dataset to train')
|
||||
parser.add_argument('--output_dir', '-o', default='./outputs',
|
||||
help='Directory to output the result')
|
||||
parser.add_argument('--gpu_id', '-g', default=0,
|
||||
help='ID of the GPU to be used. Set to -1 if you use CPU')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Download the MNIST data if you haven't downloaded it yet
|
||||
train, test = datasets.mnist.get_mnist(withlabel=True, ndim=1)
|
||||
|
||||
gpu_id = args.gpu_id
|
||||
batchsize = args.batchsize
|
||||
epochs = args.epochs
|
||||
run.log('Batch size', np.int(batchsize))
|
||||
run.log('Epochs', np.int(epochs))
|
||||
|
||||
train_iter = iterators.SerialIterator(train, batchsize)
|
||||
test_iter = iterators.SerialIterator(test, batchsize,
|
||||
repeat=False, shuffle=False)
|
||||
|
||||
model = MyNetwork()
|
||||
|
||||
if gpu_id >= 0:
|
||||
# Make a specified GPU current
|
||||
chainer.backends.cuda.get_device_from_id(0).use()
|
||||
model.to_gpu() # Copy the model to the GPU
|
||||
|
||||
# Choose an optimizer algorithm
|
||||
optimizer = optimizers.MomentumSGD(lr=0.01, momentum=0.9)
|
||||
|
||||
# Give the optimizer a reference to the model so that it
|
||||
# can locate the model's parameters.
|
||||
optimizer.setup(model)
|
||||
|
||||
while train_iter.epoch < epochs:
|
||||
# ---------- One iteration of the training loop ----------
|
||||
train_batch = train_iter.next()
|
||||
image_train, target_train = concat_examples(train_batch, gpu_id)
|
||||
|
||||
# Calculate the prediction of the network
|
||||
prediction_train = model(image_train)
|
||||
|
||||
# Calculate the loss with softmax_cross_entropy
|
||||
loss = F.softmax_cross_entropy(prediction_train, target_train)
|
||||
|
||||
# Calculate the gradients in the network
|
||||
model.cleargrads()
|
||||
loss.backward()
|
||||
|
||||
# Update all the trainable parameters
|
||||
optimizer.update()
|
||||
# --------------------- until here ---------------------
|
||||
|
||||
# Check the validation accuracy of prediction after every epoch
|
||||
if train_iter.is_new_epoch: # If this iteration is the final iteration of the current epoch
|
||||
|
||||
# Display the training loss
|
||||
print('epoch:{:02d} train_loss:{:.04f} '.format(
|
||||
train_iter.epoch, float(to_cpu(loss.array))), end='')
|
||||
|
||||
test_losses = []
|
||||
test_accuracies = []
|
||||
while True:
|
||||
test_batch = test_iter.next()
|
||||
image_test, target_test = concat_examples(test_batch, gpu_id)
|
||||
|
||||
# Forward the test data
|
||||
prediction_test = model(image_test)
|
||||
|
||||
# Calculate the loss
|
||||
loss_test = F.softmax_cross_entropy(prediction_test, target_test)
|
||||
test_losses.append(to_cpu(loss_test.array))
|
||||
|
||||
# Calculate the accuracy
|
||||
accuracy = F.accuracy(prediction_test, target_test)
|
||||
accuracy.to_cpu()
|
||||
test_accuracies.append(accuracy.array)
|
||||
|
||||
if test_iter.is_new_epoch:
|
||||
test_iter.epoch = 0
|
||||
test_iter.current_position = 0
|
||||
test_iter.is_new_epoch = False
|
||||
test_iter._pushed_position = None
|
||||
break
|
||||
|
||||
val_accuracy = np.mean(test_accuracies)
|
||||
print('val_loss:{:.04f} val_accuracy:{:.04f}'.format(
|
||||
np.mean(test_losses), val_accuracy))
|
||||
|
||||
run.log("Accuracy", np.float(val_accuracy))
|
||||
|
||||
serializers.save_npz(os.path.join(args.output_dir, 'model.npz'), model)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,45 @@
|
||||
import numpy as np
|
||||
import os
|
||||
import json
|
||||
|
||||
from chainer import serializers, using_config, Variable, datasets
|
||||
import chainer.functions as F
|
||||
import chainer.links as L
|
||||
from chainer import Chain
|
||||
|
||||
from azureml.core.model import Model
|
||||
|
||||
|
||||
class MyNetwork(Chain):
|
||||
|
||||
def __init__(self, n_mid_units=100, n_out=10):
|
||||
super(MyNetwork, self).__init__()
|
||||
with self.init_scope():
|
||||
self.l1 = L.Linear(None, n_mid_units)
|
||||
self.l2 = L.Linear(n_mid_units, n_mid_units)
|
||||
self.l3 = L.Linear(n_mid_units, n_out)
|
||||
|
||||
def forward(self, x):
|
||||
h = F.relu(self.l1(x))
|
||||
h = F.relu(self.l2(h))
|
||||
return self.l3(h)
|
||||
|
||||
|
||||
def init():
|
||||
global model
|
||||
|
||||
model_root = Model.get_model_path('chainer-dnn-mnist')
|
||||
|
||||
# Load our saved artifacts
|
||||
model = MyNetwork()
|
||||
serializers.load_npz(model_root, model)
|
||||
|
||||
|
||||
def run(input_data):
|
||||
i = np.array(json.loads(input_data)['data'])
|
||||
|
||||
_, test = datasets.get_mnist()
|
||||
x = Variable(np.asarray([test[i][0]]))
|
||||
y = model(x)
|
||||
|
||||
return np.ndarray.tolist(y.data.argmax(axis=1))
|
||||
@@ -0,0 +1,725 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Train and hyperparameter tune with Chainer\n",
|
||||
"\n",
|
||||
"In this tutorial, we demonstrate how to use the Azure ML Python SDK to train a Convolutional Neural Network (CNN) on a single-node GPU with Chainer to perform handwritten digit recognition on the popular MNIST dataset. We will also demonstrate how to perform hyperparameter tuning of the model using Azure ML's HyperDrive service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!jupyter nbextension install --py --user azureml.widgets\n",
|
||||
"!jupyter nbextension enable --py --user azureml.widgets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"Diagnostics"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize workspace\n",
|
||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or Attach existing AmlCompute\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource.\n",
|
||||
"\n",
|
||||
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n",
|
||||
"\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"gpu-cluster\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
||||
" min_nodes=2,\n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" compute_target.wait_for_completion(show_output=True)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code creates a GPU cluster. If you instead want to create a CPU cluster, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train model on the remote compute\n",
|
||||
"Now that you have your data and training script prepared, you are ready to train on your remote compute cluster. You can take advantage of Azure compute to leverage GPUs to cut down your training time. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a project directory\n",
|
||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"project_folder = './chainer-mnist'\n",
|
||||
"os.makedirs(project_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare training script\n",
|
||||
"Now you will need to create your training script. In this tutorial, the training script is already provided for you at `chainer_mnist.py`. In practice, you should be able to take any custom training script as is and run it with Azure ML without having to modify your code.\n",
|
||||
"\n",
|
||||
"However, if you would like to use Azure ML's [tracking and metrics](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#metrics) capabilities, you will have to add a small amount of Azure ML code inside your training script. \n",
|
||||
"\n",
|
||||
"In `chainer_mnist.py`, we will log some metrics to our Azure ML run. To do so, we will access the Azure ML `Run` object within the script:\n",
|
||||
"```Python\n",
|
||||
"from azureml.core.run import Run\n",
|
||||
"run = Run.get_context()\n",
|
||||
"```\n",
|
||||
"Further within `chainer_mnist.py`, we log the batchsize and epochs parameters, and the highest accuracy the model achieves:\n",
|
||||
"```Python\n",
|
||||
"run.log('Batch size', np.int(args.batchsize))\n",
|
||||
"run.log('Epochs', np.int(args.epochs))\n",
|
||||
"\n",
|
||||
"run.log('Accuracy', np.float(val_accuracy))\n",
|
||||
"```\n",
|
||||
"These run metrics will become particularly important when we begin hyperparameter tuning our model in the \"Tune model hyperparameters\" section."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once your script is ready, copy the training script `chainer_mnist.py` into your project directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"shutil.copy('chainer_mnist.py', project_folder)\n",
|
||||
"shutil.copy('chainer_score.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an experiment\n",
|
||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this Chainer tutorial. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"experiment_name = 'chainer-mnist'\n",
|
||||
"experiment = Experiment(ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a Chainer estimator\n",
|
||||
"The Azure ML SDK's Chainer estimator enables you to easily submit Chainer training jobs for both single-node and distributed runs. The following code will define a single-node Chainer job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"dnn-chainer-remarks-sample"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import Chainer\n",
|
||||
"\n",
|
||||
"script_params = {\n",
|
||||
" '--epochs': 10,\n",
|
||||
" '--batchsize': 128,\n",
|
||||
" '--output_dir': './outputs'\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"estimator = Chainer(source_directory=project_folder, \n",
|
||||
" script_params=script_params,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" pip_packages=['numpy', 'pytest'],\n",
|
||||
" entry_script='chainer_mnist.py',\n",
|
||||
" use_gpu=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `script_params` parameter is a dictionary containing the command-line arguments to your training script `entry_script`. To leverage the Azure VM's GPU for training, we set `use_gpu=True`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit job\n",
|
||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = experiment.submit(estimator)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor your run\n",
|
||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"RunDetails(run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# to get more details of your run\n",
|
||||
"print(run.get_details())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tune model hyperparameters\n",
|
||||
"Now that we've seen how to do a simple Chainer training run using the SDK, let's see if we can further improve the accuracy of our model. We can optimize our model's hyperparameters using Azure Machine Learning's hyperparameter tuning capabilities."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Start a hyperparameter sweep\n",
|
||||
"First, we will define the hyperparameter space to sweep over. Let's tune the batch size and epochs parameters. In this example we will use random sampling to try different configuration sets of hyperparameters to maximize our primary metric, accuracy.\n",
|
||||
"\n",
|
||||
"Then, we specify the early termination policy to use to early terminate poorly performing runs. Here we use the `BanditPolicy`, which will terminate any run that doesn't fall within the slack factor of our primary evaluation metric. In this tutorial, we will apply this policy every epoch (since we report our `Accuracy` metric every epoch and `evaluation_interval=1`). Notice we will delay the first policy evaluation until after the first `3` epochs (`delay_evaluation=3`).\n",
|
||||
"Refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-tune-hyperparameters#specify-an-early-termination-policy) for more information on the BanditPolicy and other policies available."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.hyperdrive.runconfig import HyperDriveConfig\n",
|
||||
"from azureml.train.hyperdrive.sampling import RandomParameterSampling\n",
|
||||
"from azureml.train.hyperdrive.policy import BanditPolicy\n",
|
||||
"from azureml.train.hyperdrive.run import PrimaryMetricGoal\n",
|
||||
"from azureml.train.hyperdrive.parameter_expressions import choice\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"param_sampling = RandomParameterSampling( {\n",
|
||||
" \"--batchsize\": choice(128, 256),\n",
|
||||
" \"--epochs\": choice(5, 10, 20, 40)\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"hyperdrive_config = HyperDriveConfig(estimator=estimator,\n",
|
||||
" hyperparameter_sampling=param_sampling, \n",
|
||||
" primary_metric_name='Accuracy',\n",
|
||||
" policy=BanditPolicy(evaluation_interval=1, slack_factor=0.1, delay_evaluation=3),\n",
|
||||
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,\n",
|
||||
" max_total_runs=8,\n",
|
||||
" max_concurrent_runs=4)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, lauch the hyperparameter tuning job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# start the HyperDrive run\n",
|
||||
"hyperdrive_run = experiment.submit(hyperdrive_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor HyperDrive runs\n",
|
||||
"You can monitor the progress of the runs with the following Jupyter widget. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"RunDetails(hyperdrive_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hyperdrive_run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Find and register best model\n",
|
||||
"When all jobs finish, we can find out the one that has the highest accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run = hyperdrive_run.get_best_run_by_primary_metric()\n",
|
||||
"print(best_run.get_details()['runDefinition']['arguments'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's list the model files uploaded during the run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(best_run.get_file_names())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can then register the folder (and all files in it) as a model named `chainer-dnn-mnist` under the workspace for deployment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = best_run.register_model(model_name='chainer-dnn-mnist', model_path='outputs/model.npz')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy the model in ACI\n",
|
||||
"Now, we are ready to deploy the model as a web service running in Azure Container Instance, [ACI](https://azure.microsoft.com/en-us/services/container-instances/). Azure Machine Learning accomplishes this by constructing a Docker image with the scoring logic and model baked in.\n",
|
||||
"\n",
|
||||
"### Create scoring script\n",
|
||||
"First, we will create a scoring script that will be invoked by the web service call.\n",
|
||||
"+ Now that the scoring script must have two required functions, `init()` and `run(input_data)`.\n",
|
||||
" + In `init()`, you typically load the model into a global object. This function is executed only once when the Docker contianer is started.\n",
|
||||
" + In `run(input_data)`, the model is used to predict a value based on the input data. The input and output to `run` uses NPZ as the serialization and de-serialization format because it is the preferred format for Chainer, but you are not limited to it.\n",
|
||||
" \n",
|
||||
"Refer to the scoring script `chainer_score.py` for this tutorial. Our web service will use this file to predict. When writing your own scoring script, don't forget to test it locally first before you go and deploy the web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"shutil.copy('chainer_score.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create myenv.yml\n",
|
||||
"We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify conda packages `numpy` and `chainer`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import CondaDependencies\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create()\n",
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_conda_package('chainer')\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy to ACI\n",
|
||||
"We are almost ready to deploy. Create the inference configuration and deployment configuration and deploy to ACI. This cell will run for about 7-8 minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"chainer_score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
|
||||
" auth_enabled=True, # this flag generates API keys to secure access\n",
|
||||
" memory_gb=1,\n",
|
||||
" tags={'name': 'mnist', 'framework': 'Chainer'},\n",
|
||||
" description='Chainer DNN with MNIST')\n",
|
||||
"\n",
|
||||
"service = Model.deploy(workspace=ws, \n",
|
||||
" name='chainer-mnist-1', \n",
|
||||
" models=[model], \n",
|
||||
" inference_config=inference_config, \n",
|
||||
" deployment_config=aciconfig)\n",
|
||||
"service.wait_for_deployment(True)\n",
|
||||
"print(service.state)\n",
|
||||
"print(service.scoring_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:** `print(service.get_logs())`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This is the scoring web service endpoint: `print(service.scoring_uri)`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test the deployed model\n",
|
||||
"Let's test the deployed model. Pick a random sample from the test set, and send it to the web service hosted in ACI for a prediction. Note, here we are using the an HTTP request to invoke the service.\n",
|
||||
"\n",
|
||||
"We can retrieve the API keys used for accessing the HTTP endpoint and construct a raw HTTP request to send to the service. Don't forget to add key to the HTTP header."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# retreive the API keys. two keys were generated.\n",
|
||||
"key1, Key2 = service.get_keys()\n",
|
||||
"print(key1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import urllib\n",
|
||||
"import gzip\n",
|
||||
"import numpy as np\n",
|
||||
"import struct\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# load compressed MNIST gz files and return numpy arrays\n",
|
||||
"def load_data(filename, label=False):\n",
|
||||
" with gzip.open(filename) as gz:\n",
|
||||
" struct.unpack('I', gz.read(4))\n",
|
||||
" n_items = struct.unpack('>I', gz.read(4))\n",
|
||||
" if not label:\n",
|
||||
" n_rows = struct.unpack('>I', gz.read(4))[0]\n",
|
||||
" n_cols = struct.unpack('>I', gz.read(4))[0]\n",
|
||||
" res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)\n",
|
||||
" res = res.reshape(n_items[0], n_rows * n_cols)\n",
|
||||
" else:\n",
|
||||
" res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)\n",
|
||||
" res = res.reshape(n_items[0], 1)\n",
|
||||
" return res\n",
|
||||
"\n",
|
||||
"os.makedirs('./data/mnist', exist_ok=True)\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')\n",
|
||||
"\n",
|
||||
"X_test = load_data('./data/mnist/test-images.gz', False)\n",
|
||||
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# send a random row from the test set to score\n",
|
||||
"random_index = np.random.randint(0, len(X_test)-1)\n",
|
||||
"input_data = \"{\\\"data\\\": [\" + str(random_index) + \"]}\"\n",
|
||||
"\n",
|
||||
"headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
||||
"\n",
|
||||
"# send sample to service for scoring\n",
|
||||
"resp = requests.post(service.scoring_uri, input_data, headers=headers)\n",
|
||||
"\n",
|
||||
"print(\"label:\", y_test[random_index])\n",
|
||||
"print(\"prediction:\", resp.text[1])\n",
|
||||
"\n",
|
||||
"plt.imshow(X_test[random_index].reshape((28,28)), cmap='gray')\n",
|
||||
"plt.axis('off')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's look at the workspace after the web service was deployed. You should see\n",
|
||||
"\n",
|
||||
" + a registered model named 'chainer-dnn-mnist' and with the id 'chainer-dnn-mnist:1'\n",
|
||||
" + a webservice called 'chainer-mnist-svc' with some scoring URL"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"models = ws.models\n",
|
||||
"for name, model in models.items():\n",
|
||||
" print(\"Model: {}, ID: {}\".format(name, model.id))\n",
|
||||
" \n",
|
||||
"webservices = ws.webservices\n",
|
||||
"for name, webservice in webservices.items():\n",
|
||||
" print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clean up"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can delete the ACI deployment with a simple delete API call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"service.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "dipeck"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
},
|
||||
"msauthor": "dipeck"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
name: train-hyperparameter-tune-deploy-with-chainer
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- numpy
|
||||
- matplotlib
|
||||
- json
|
||||
- urllib
|
||||
- gzip
|
||||
- struct
|
||||
- requests
|
||||
@@ -0,0 +1,320 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Distributed Chainer\n",
|
||||
"In this tutorial, you will run a Chainer training example on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using ChainerMN distributed training across a GPU cluster."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"Diagnostics"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize workspace\n",
|
||||
"\n",
|
||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or attach existing AmlCompute\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
||||
"\n",
|
||||
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n",
|
||||
"\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"gpu-cluster\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" compute_target.wait_for_completion(show_output=True)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current AmlCompute. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train model on the remote compute\n",
|
||||
"Now that we have the AmlCompute ready to go, let's run our distributed training job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a project directory\n",
|
||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"project_folder = './chainer-distr'\n",
|
||||
"os.makedirs(project_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare training script\n",
|
||||
"Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `train_mnist.py`. In practice, you should be able to take any custom Chainer training script as is and run it with Azure ML without having to modify your code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once your script is ready, copy the training script `train_mnist.py` into the project directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"shutil.copy('train_mnist.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an experiment\n",
|
||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed Chainer tutorial. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"experiment_name = 'chainer-distr'\n",
|
||||
"experiment = Experiment(ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a Chainer estimator\n",
|
||||
"The Azure ML SDK's Chainer estimator enables you to easily submit Chainer training jobs for both single-node and distributed runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import Chainer, Mpi\n",
|
||||
"\n",
|
||||
"estimator = Chainer(source_directory=project_folder,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='train_mnist.py',\n",
|
||||
" node_count=2,\n",
|
||||
" distributed_training=Mpi(),\n",
|
||||
" use_gpu=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI, you must provide the argument `distributed_backend=Mpi()`. To specify `i` workers per node, you must provide the argument `distributed_backend=Mpi(process_count_per_node=i)`.Using this estimator with these settings, Chainer and its dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `Chainer` constructor's `pip_packages` or `conda_packages` parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit job\n",
|
||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = experiment.submit(estimator)\n",
|
||||
"print(run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor your run\n",
|
||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"RunDetails(run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +1,5 @@
|
||||
name: logging-api
|
||||
name: distributed-chainer
|
||||
dependencies:
|
||||
- numpy
|
||||
- matplotlib
|
||||
- tqdm
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,125 @@
|
||||
# Official ChainerMN example taken from
|
||||
# https://github.com/chainer/chainer/blob/master/examples/chainermn/mnist/train_mnist.py
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import argparse
|
||||
|
||||
import chainer
|
||||
import chainer.functions as F
|
||||
import chainer.links as L
|
||||
from chainer import training
|
||||
from chainer.training import extensions
|
||||
|
||||
import chainermn
|
||||
|
||||
|
||||
class MLP(chainer.Chain):
|
||||
|
||||
def __init__(self, n_units, n_out):
|
||||
super(MLP, self).__init__(
|
||||
# the size of the inputs to each layer will be inferred
|
||||
l1=L.Linear(784, n_units), # n_in -> n_units
|
||||
l2=L.Linear(n_units, n_units), # n_units -> n_units
|
||||
l3=L.Linear(n_units, n_out), # n_units -> n_out
|
||||
)
|
||||
|
||||
def __call__(self, x):
|
||||
h1 = F.relu(self.l1(x))
|
||||
h2 = F.relu(self.l2(h1))
|
||||
return self.l3(h2)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='ChainerMN example: MNIST')
|
||||
parser.add_argument('--batchsize', '-b', type=int, default=100,
|
||||
help='Number of images in each mini-batch')
|
||||
parser.add_argument('--communicator', type=str,
|
||||
default='non_cuda_aware', help='Type of communicator')
|
||||
parser.add_argument('--epoch', '-e', type=int, default=20,
|
||||
help='Number of sweeps over the dataset to train')
|
||||
parser.add_argument('--gpu', '-g', default=True,
|
||||
help='Use GPU')
|
||||
parser.add_argument('--out', '-o', default='result',
|
||||
help='Directory to output the result')
|
||||
parser.add_argument('--resume', '-r', default='',
|
||||
help='Resume the training from snapshot')
|
||||
parser.add_argument('--unit', '-u', type=int, default=1000,
|
||||
help='Number of units')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Prepare ChainerMN communicator.
|
||||
|
||||
if args.gpu:
|
||||
if args.communicator == 'naive':
|
||||
print("Error: 'naive' communicator does not support GPU.\n")
|
||||
exit(-1)
|
||||
comm = chainermn.create_communicator(args.communicator)
|
||||
device = comm.intra_rank
|
||||
else:
|
||||
if args.communicator != 'naive':
|
||||
print('Warning: using naive communicator '
|
||||
'because only naive supports CPU-only execution')
|
||||
comm = chainermn.create_communicator('naive')
|
||||
device = -1
|
||||
|
||||
if comm.rank == 0:
|
||||
print('==========================================')
|
||||
print('Num process (COMM_WORLD): {}'.format(comm.size))
|
||||
if args.gpu:
|
||||
print('Using GPUs')
|
||||
print('Using {} communicator'.format(args.communicator))
|
||||
print('Num unit: {}'.format(args.unit))
|
||||
print('Num Minibatch-size: {}'.format(args.batchsize))
|
||||
print('Num epoch: {}'.format(args.epoch))
|
||||
print('==========================================')
|
||||
|
||||
model = L.Classifier(MLP(args.unit, 10))
|
||||
if device >= 0:
|
||||
chainer.cuda.get_device_from_id(device).use()
|
||||
model.to_gpu()
|
||||
|
||||
# Create a multi node optimizer from a standard Chainer optimizer.
|
||||
optimizer = chainermn.create_multi_node_optimizer(
|
||||
chainer.optimizers.Adam(), comm)
|
||||
optimizer.setup(model)
|
||||
|
||||
# Split and distribute the dataset. Only worker 0 loads the whole dataset.
|
||||
# Datasets of worker 0 are evenly split and distributed to all workers.
|
||||
if comm.rank == 0:
|
||||
train, test = chainer.datasets.get_mnist()
|
||||
else:
|
||||
train, test = None, None
|
||||
train = chainermn.scatter_dataset(train, comm, shuffle=True)
|
||||
test = chainermn.scatter_dataset(test, comm, shuffle=True)
|
||||
|
||||
train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
|
||||
test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
|
||||
repeat=False, shuffle=False)
|
||||
|
||||
updater = training.StandardUpdater(train_iter, optimizer, device=device)
|
||||
trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
|
||||
|
||||
# Create a multi node evaluator from a standard Chainer evaluator.
|
||||
evaluator = extensions.Evaluator(test_iter, model, device=device)
|
||||
evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
|
||||
trainer.extend(evaluator)
|
||||
|
||||
# Some display and output extensions are necessary only for one worker.
|
||||
# (Otherwise, there would just be repeated outputs.)
|
||||
if comm.rank == 0:
|
||||
trainer.extend(extensions.dump_graph('main/loss'))
|
||||
trainer.extend(extensions.LogReport())
|
||||
trainer.extend(extensions.PrintReport(
|
||||
['epoch', 'main/loss', 'validation/main/loss',
|
||||
'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
|
||||
trainer.extend(extensions.ProgressBar())
|
||||
|
||||
if args.resume:
|
||||
chainer.serializers.load_npz(args.resume, trainer)
|
||||
|
||||
trainer.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,31 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torchvision import transforms
|
||||
import json
|
||||
|
||||
from azureml.core.model import Model
|
||||
|
||||
|
||||
def init():
|
||||
global model
|
||||
model_path = Model.get_model_path('pytorch-birds')
|
||||
model = torch.load(model_path, map_location=lambda storage, loc: storage)
|
||||
model.eval()
|
||||
|
||||
|
||||
def run(input_data):
|
||||
input_data = torch.tensor(json.loads(input_data)['data'])
|
||||
|
||||
# get prediction
|
||||
with torch.no_grad():
|
||||
output = model(input_data)
|
||||
classes = ['chicken', 'turkey']
|
||||
softmax = nn.Softmax(dim=1)
|
||||
pred_probs = softmax(output).numpy()[0]
|
||||
index = torch.argmax(output, 1)
|
||||
|
||||
result = {"label": classes[index], "probability": str(pred_probs[index])}
|
||||
return result
|
||||
@@ -0,0 +1,206 @@
|
||||
# Copyright (c) 2017, PyTorch contributors
|
||||
# Modifications copyright (C) Microsoft Corporation
|
||||
# Licensed under the BSD license
|
||||
# Adapted from https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html
|
||||
|
||||
from __future__ import print_function, division
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.optim import lr_scheduler
|
||||
from torchvision import datasets, models, transforms
|
||||
import numpy as np
|
||||
import time
|
||||
import os
|
||||
import copy
|
||||
import argparse
|
||||
|
||||
from azureml.core.run import Run
|
||||
# get the Azure ML run object
|
||||
run = Run.get_context()
|
||||
|
||||
|
||||
def load_data(data_dir):
|
||||
"""Load the train/val data."""
|
||||
|
||||
# Data augmentation and normalization for training
|
||||
# Just normalization for validation
|
||||
data_transforms = {
|
||||
'train': transforms.Compose([
|
||||
transforms.RandomResizedCrop(224),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||
]),
|
||||
'val': transforms.Compose([
|
||||
transforms.Resize(256),
|
||||
transforms.CenterCrop(224),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
||||
]),
|
||||
}
|
||||
|
||||
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
|
||||
data_transforms[x])
|
||||
for x in ['train', 'val']}
|
||||
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
|
||||
shuffle=True, num_workers=4)
|
||||
for x in ['train', 'val']}
|
||||
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
|
||||
class_names = image_datasets['train'].classes
|
||||
|
||||
return dataloaders, dataset_sizes, class_names
|
||||
|
||||
|
||||
def train_model(model, criterion, optimizer, scheduler, num_epochs, data_dir):
|
||||
"""Train the model."""
|
||||
|
||||
# load training/validation data
|
||||
dataloaders, dataset_sizes, class_names = load_data(data_dir)
|
||||
|
||||
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
||||
|
||||
since = time.time()
|
||||
|
||||
best_model_wts = copy.deepcopy(model.state_dict())
|
||||
best_acc = 0.0
|
||||
|
||||
for epoch in range(num_epochs):
|
||||
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
|
||||
print('-' * 10)
|
||||
|
||||
# Each epoch has a training and validation phase
|
||||
for phase in ['train', 'val']:
|
||||
if phase == 'train':
|
||||
scheduler.step()
|
||||
model.train() # Set model to training mode
|
||||
else:
|
||||
model.eval() # Set model to evaluate mode
|
||||
|
||||
running_loss = 0.0
|
||||
running_corrects = 0
|
||||
|
||||
# Iterate over data.
|
||||
for inputs, labels in dataloaders[phase]:
|
||||
inputs = inputs.to(device)
|
||||
labels = labels.to(device)
|
||||
|
||||
# zero the parameter gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
# forward
|
||||
# track history if only in train
|
||||
with torch.set_grad_enabled(phase == 'train'):
|
||||
outputs = model(inputs)
|
||||
_, preds = torch.max(outputs, 1)
|
||||
loss = criterion(outputs, labels)
|
||||
|
||||
# backward + optimize only if in training phase
|
||||
if phase == 'train':
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# statistics
|
||||
running_loss += loss.item() * inputs.size(0)
|
||||
running_corrects += torch.sum(preds == labels.data)
|
||||
|
||||
epoch_loss = running_loss / dataset_sizes[phase]
|
||||
epoch_acc = running_corrects.double() / dataset_sizes[phase]
|
||||
|
||||
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
|
||||
phase, epoch_loss, epoch_acc))
|
||||
|
||||
# deep copy the model
|
||||
if phase == 'val' and epoch_acc > best_acc:
|
||||
best_acc = epoch_acc
|
||||
best_model_wts = copy.deepcopy(model.state_dict())
|
||||
|
||||
# log the best val accuracy to AML run
|
||||
run.log('best_val_acc', np.float(best_acc))
|
||||
|
||||
print()
|
||||
|
||||
time_elapsed = time.time() - since
|
||||
print('Training complete in {:.0f}m {:.0f}s'.format(
|
||||
time_elapsed // 60, time_elapsed % 60))
|
||||
print('Best val Acc: {:4f}'.format(best_acc))
|
||||
|
||||
# load best model weights
|
||||
model.load_state_dict(best_model_wts)
|
||||
return model
|
||||
|
||||
|
||||
def fine_tune_model(num_epochs, data_dir, learning_rate, momentum):
|
||||
"""Load a pretrained model and reset the final fully connected layer."""
|
||||
|
||||
# log the hyperparameter metrics to the AML run
|
||||
run.log('lr', np.float(learning_rate))
|
||||
run.log('momentum', np.float(momentum))
|
||||
|
||||
model_ft = models.resnet18(pretrained=True)
|
||||
num_ftrs = model_ft.fc.in_features
|
||||
model_ft.fc = nn.Linear(num_ftrs, 2) # only 2 classes to predict
|
||||
|
||||
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
||||
model_ft = model_ft.to(device)
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
|
||||
# Observe that all parameters are being optimized
|
||||
optimizer_ft = optim.SGD(model_ft.parameters(),
|
||||
lr=learning_rate, momentum=momentum)
|
||||
|
||||
# Decay LR by a factor of 0.1 every 7 epochs
|
||||
exp_lr_scheduler = lr_scheduler.StepLR(
|
||||
optimizer_ft, step_size=7, gamma=0.1)
|
||||
|
||||
model = train_model(model_ft, criterion, optimizer_ft,
|
||||
exp_lr_scheduler, num_epochs, data_dir)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def download_data():
|
||||
"""Download and extract the training data."""
|
||||
import urllib
|
||||
from zipfile import ZipFile
|
||||
# download data
|
||||
data_file = './fowl_data.zip'
|
||||
download_url = 'https://msdocsdatasets.blob.core.windows.net/pytorchfowl/fowl_data.zip'
|
||||
urllib.request.urlretrieve(download_url, filename=data_file)
|
||||
|
||||
# extract files
|
||||
with ZipFile(data_file, 'r') as zip:
|
||||
print('extracting files...')
|
||||
zip.extractall()
|
||||
print('finished extracting')
|
||||
data_dir = zip.namelist()[0]
|
||||
|
||||
# delete zip file
|
||||
os.remove(data_file)
|
||||
return data_dir
|
||||
|
||||
|
||||
def main():
|
||||
print("Torch version:", torch.__version__)
|
||||
|
||||
# get command-line arguments
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--num_epochs', type=int, default=25,
|
||||
help='number of epochs to train')
|
||||
parser.add_argument('--output_dir', type=str, help='output directory')
|
||||
parser.add_argument('--learning_rate', type=float,
|
||||
default=0.001, help='learning rate')
|
||||
parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
|
||||
args = parser.parse_args()
|
||||
|
||||
data_dir = download_data()
|
||||
print("data directory is: " + data_dir)
|
||||
model = fine_tune_model(args.num_epochs, data_dir,
|
||||
args.learning_rate, args.momentum)
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
torch.save(model, os.path.join(args.output_dir, 'model.pt'))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
After Width: | Height: | Size: 1.6 MiB |
@@ -0,0 +1,715 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Train, hyperparameter tune, and deploy with PyTorch\n",
|
||||
"\n",
|
||||
"In this tutorial, you will train, hyperparameter tune, and deploy a PyTorch model using the Azure Machine Learning (Azure ML) Python SDK.\n",
|
||||
"\n",
|
||||
"This tutorial will train an image classification model using transfer learning, based on PyTorch's [Transfer Learning tutorial](https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html). The model is trained to classify chickens and turkeys by first using a pretrained ResNet18 model that has been trained on the [ImageNet](http://image-net.org/index) dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"Diagnostics"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize workspace\n",
|
||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or Attach existing AmlCompute\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource.\n",
|
||||
"\n",
|
||||
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n",
|
||||
"\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"gpu-cluster\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" compute_target.wait_for_completion(show_output=True)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code creates a GPU cluster. If you instead want to create a CPU cluster, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train model on the remote compute\n",
|
||||
"Now that you have your data and training script prepared, you are ready to train on your remote compute cluster. You can take advantage of Azure compute to leverage GPUs to cut down your training time. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a project directory\n",
|
||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"project_folder = './pytorch-birds'\n",
|
||||
"os.makedirs(project_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Download training data\n",
|
||||
"The dataset we will use (located on a public blob [here](https://msdocsdatasets.blob.core.windows.net/pytorchfowl/fowl_data.zip) as a zip file) consists of about 120 training images each for turkeys and chickens, with 100 validation images for each class. The images are a subset of the [Open Images v5 Dataset](https://storage.googleapis.com/openimages/web/index.html). We will download and extract the dataset as part of our training script `pytorch_train.py`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare training script\n",
|
||||
"Now you will need to create your training script. In this tutorial, the training script is already provided for you at `pytorch_train.py`. In practice, you should be able to take any custom training script as is and run it with Azure ML without having to modify your code.\n",
|
||||
"\n",
|
||||
"However, if you would like to use Azure ML's [tracking and metrics](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#metrics) capabilities, you will have to add a small amount of Azure ML code inside your training script. \n",
|
||||
"\n",
|
||||
"In `pytorch_train.py`, we will log some metrics to our Azure ML run. To do so, we will access the Azure ML `Run` object within the script:\n",
|
||||
"```Python\n",
|
||||
"from azureml.core.run import Run\n",
|
||||
"run = Run.get_context()\n",
|
||||
"```\n",
|
||||
"Further within `pytorch_train.py`, we log the learning rate and momentum parameters, and the best validation accuracy the model achieves:\n",
|
||||
"```Python\n",
|
||||
"run.log('lr', np.float(learning_rate))\n",
|
||||
"run.log('momentum', np.float(momentum))\n",
|
||||
"\n",
|
||||
"run.log('best_val_acc', np.float(best_acc))\n",
|
||||
"```\n",
|
||||
"These run metrics will become particularly important when we begin hyperparameter tuning our model in the \"Tune model hyperparameters\" section."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once your script is ready, copy the training script `pytorch_train.py` into your project directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"shutil.copy('pytorch_train.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an experiment\n",
|
||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this transfer learning PyTorch tutorial. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"experiment_name = 'pytorch-birds'\n",
|
||||
"experiment = Experiment(ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a PyTorch estimator\n",
|
||||
"The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch). The following code will define a single-node PyTorch job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"dnn-pytorch-remarks-sample"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import PyTorch\n",
|
||||
"\n",
|
||||
"script_params = {\n",
|
||||
" '--num_epochs': 30,\n",
|
||||
" '--output_dir': './outputs'\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"estimator = PyTorch(source_directory=project_folder, \n",
|
||||
" script_params=script_params,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='pytorch_train.py',\n",
|
||||
" use_gpu=True,\n",
|
||||
" pip_packages=['pillow==5.4.1'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `script_params` parameter is a dictionary containing the command-line arguments to your training script `entry_script`. Please note the following:\n",
|
||||
"- We passed our training data reference `ds_data` to our script's `--data_dir` argument. This will 1) mount our datastore on the remote compute and 2) provide the path to the training data `fowl_data` on our datastore.\n",
|
||||
"- We specified the output directory as `./outputs`. The `outputs` directory is specially treated by Azure ML in that all the content in this directory gets uploaded to your workspace as part of your run history. The files written to this directory are therefore accessible even once your remote run is over. In this tutorial, we will save our trained model to this output directory.\n",
|
||||
"\n",
|
||||
"To leverage the Azure VM's GPU for training, we set `use_gpu=True`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit job\n",
|
||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = experiment.submit(estimator)\n",
|
||||
"print(run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# to get more details of your run\n",
|
||||
"print(run.get_details())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor your run\n",
|
||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"RunDetails(run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, you can block until the script has completed training before running more code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Tune model hyperparameters\n",
|
||||
"Now that we've seen how to do a simple PyTorch training run using the SDK, let's see if we can further improve the accuracy of our model. We can optimize our model's hyperparameters using Azure Machine Learning's hyperparameter tuning capabilities."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Start a hyperparameter sweep\n",
|
||||
"First, we will define the hyperparameter space to sweep over. Since our training script uses a learning rate schedule to decay the learning rate every several epochs, let's tune the initial learning rate and the momentum parameters. In this example we will use random sampling to try different configuration sets of hyperparameters to maximize our primary metric, the best validation accuracy (`best_val_acc`).\n",
|
||||
"\n",
|
||||
"Then, we specify the early termination policy to use to early terminate poorly performing runs. Here we use the `BanditPolicy`, which will terminate any run that doesn't fall within the slack factor of our primary evaluation metric. In this tutorial, we will apply this policy every epoch (since we report our `best_val_acc` metric every epoch and `evaluation_interval=1`). Notice we will delay the first policy evaluation until after the first `10` epochs (`delay_evaluation=10`).\n",
|
||||
"Refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-tune-hyperparameters#specify-an-early-termination-policy) for more information on the BanditPolicy and other policies available."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, uniform, PrimaryMetricGoal\n",
|
||||
"\n",
|
||||
"param_sampling = RandomParameterSampling( {\n",
|
||||
" 'learning_rate': uniform(0.0005, 0.005),\n",
|
||||
" 'momentum': uniform(0.9, 0.99)\n",
|
||||
" }\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"early_termination_policy = BanditPolicy(slack_factor=0.15, evaluation_interval=1, delay_evaluation=10)\n",
|
||||
"\n",
|
||||
"hyperdrive_config = HyperDriveConfig(estimator=estimator,\n",
|
||||
" hyperparameter_sampling=param_sampling, \n",
|
||||
" policy=early_termination_policy,\n",
|
||||
" primary_metric_name='best_val_acc',\n",
|
||||
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,\n",
|
||||
" max_total_runs=8,\n",
|
||||
" max_concurrent_runs=4)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, lauch the hyperparameter tuning job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# start the HyperDrive run\n",
|
||||
"hyperdrive_run = experiment.submit(hyperdrive_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor HyperDrive runs\n",
|
||||
"You can monitor the progress of the runs with the following Jupyter widget. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"RunDetails(hyperdrive_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Or block until the HyperDrive sweep has completed:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hyperdrive_run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Find and register the best model\n",
|
||||
"Once all the runs complete, we can find the run that produced the model with the highest accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run = hyperdrive_run.get_best_run_by_primary_metric()\n",
|
||||
"best_run_metrics = best_run.get_metrics()\n",
|
||||
"print(best_run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('Best Run is:\\n Validation accuracy: {0:.5f} \\n Learning rate: {1:.5f} \\n Momentum: {2:.5f}'.format(\n",
|
||||
" best_run_metrics['best_val_acc'][-1],\n",
|
||||
" best_run_metrics['lr'],\n",
|
||||
" best_run_metrics['momentum'])\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, register the model from your best-performing run to your workspace. The `model_path` parameter takes in the relative path on the remote VM to the model file in your `outputs` directory. In the next section, we will deploy this registered model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = best_run.register_model(model_name = 'pytorch-birds', model_path = 'outputs/model.pt')\n",
|
||||
"print(model.name, model.id, model.version, sep = '\\t')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy model as web service\n",
|
||||
"Once you have your trained model, you can deploy the model on Azure. In this tutorial, we will deploy the model as a web service in [Azure Container Instances](https://docs.microsoft.com/en-us/azure/container-instances/) (ACI). For more information on deploying models using Azure ML, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-deploy-and-where)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create scoring script\n",
|
||||
"\n",
|
||||
"First, we will create a scoring script that will be invoked by the web service call. Note that the scoring script must have two required functions:\n",
|
||||
"* `init()`: In this function, you typically load the model into a `global` object. This function is executed only once when the Docker container is started. \n",
|
||||
"* `run(input_data)`: In this function, the model is used to predict a value based on the input data. The input and output typically use JSON as serialization and deserialization format, but you are not limited to that.\n",
|
||||
"\n",
|
||||
"Refer to the scoring script `pytorch_score.py` for this tutorial. Our web service will use this file to predict whether an image is a chicken or a turkey. When writing your own scoring script, don't forget to test it locally first before you go and deploy the web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create environment file\n",
|
||||
"Then, we will need to create an environment file (`myenv.yml`) that specifies all of the scoring script's package dependencies. This file is used to ensure that all of those dependencies are installed in the Docker image by Azure ML. In this case, we need to specify `azureml-core`, `torch` and `torchvision`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=['azureml-defaults', 'torch', 'torchvision'])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())\n",
|
||||
" \n",
|
||||
"print(myenv.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy to ACI container\n",
|
||||
"We are ready to deploy. Create an inference configuration which gives specifies the inferencing environment and scripts. Create a deployment configuration file to specify the number of CPUs and gigabytes of RAM needed for your ACI container. While it depends on your model, the default of `1` core and `1` gigabyte of RAM is usually sufficient for many models. This cell will run for about 7-8 minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"pytorch_score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
" memory_gb=1, \n",
|
||||
" tags={'data': 'birds', 'method':'transfer learning', 'framework':'pytorch'},\n",
|
||||
" description='Classify turkey/chickens using transfer learning with PyTorch')\n",
|
||||
"\n",
|
||||
"service = Model.deploy(workspace=ws, \n",
|
||||
" name='aci-birds', \n",
|
||||
" models=[model], \n",
|
||||
" inference_config=inference_config, \n",
|
||||
" deployment_config=aciconfig)\n",
|
||||
"service.wait_for_deployment(True)\n",
|
||||
"print(service.state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If your deployment fails for any reason and you need to redeploy, make sure to delete the service before you do so: `service.delete()`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"service.get_logs()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Get the web service's HTTP endpoint, which accepts REST client calls. This endpoint can be shared with anyone who wants to test the web service or integrate it into an application."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(service.scoring_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test the web service\n",
|
||||
"Finally, let's test our deployed web service. We will send the data as a JSON string to the web service hosted in ACI and use the SDK's `run` API to invoke the service. Here we will take an image from our validation data to predict on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"from PIL import Image\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"%matplotlib inline\n",
|
||||
"plt.imshow(Image.open('test_img.jpg'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"from torchvision import transforms\n",
|
||||
" \n",
|
||||
"def preprocess(image_file):\n",
|
||||
" \"\"\"Preprocess the input image.\"\"\"\n",
|
||||
" data_transforms = transforms.Compose([\n",
|
||||
" transforms.Resize(256),\n",
|
||||
" transforms.CenterCrop(224),\n",
|
||||
" transforms.ToTensor(),\n",
|
||||
" transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n",
|
||||
" ])\n",
|
||||
"\n",
|
||||
" image = Image.open(image_file)\n",
|
||||
" image = data_transforms(image).float()\n",
|
||||
" image = torch.tensor(image)\n",
|
||||
" image = image.unsqueeze(0)\n",
|
||||
" return image.numpy()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"input_data = preprocess('test_img.jpg')\n",
|
||||
"result = service.run(input_data=json.dumps({'data': input_data.tolist()}))\n",
|
||||
"print(result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clean up\n",
|
||||
"Once you no longer need the web service, you can delete it with a simple API call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"service.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +1,9 @@
|
||||
name: train-and-deploy-pytorch
|
||||
name: train-hyperparameter-tune-deploy-with-pytorch
|
||||
dependencies:
|
||||
- matplotlib
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-mlflow
|
||||
- azureml-widgets
|
||||
- pillow==5.4.1
|
||||
- matplotlib
|
||||
- https://download.pytorch.org/whl/cpu/torch-1.1.0-cp35-cp35m-win_amd64.whl
|
||||
- https://download.pytorch.org/whl/cpu/torchvision-0.3.0-cp35-cp35m-win_amd64.whl
|
||||
@@ -0,0 +1,340 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Distributed PyTorch with Horovod\n",
|
||||
"In this tutorial, you will train a PyTorch model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using distributed training via [Horovod](https://github.com/uber/horovod) across a GPU cluster."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`\n",
|
||||
"* Review the [tutorial](../train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) on single-node PyTorch training using Azure Machine Learning"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"Diagnostics"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize workspace\n",
|
||||
"\n",
|
||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or attach existing AmlCompute\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
||||
"\n",
|
||||
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n",
|
||||
"\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"gpu-cluster\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" compute_target.wait_for_completion(show_output=True)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current AmlCompute. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train model on the remote compute\n",
|
||||
"Now that we have the AmlCompute ready to go, let's run our distributed training job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a project directory\n",
|
||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"project_folder = './pytorch-distr-hvd'\n",
|
||||
"os.makedirs(project_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare training script\n",
|
||||
"Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `pytorch_horovod_mnist.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code.\n",
|
||||
"\n",
|
||||
"However, if you would like to use Azure ML's [metric logging](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#logging) capabilities, you will have to add a small amount of Azure ML logic inside your training script. In this example, at each logging interval, we will log the loss for that minibatch to our Azure ML run.\n",
|
||||
"\n",
|
||||
"To do so, in `pytorch_horovod_mnist.py`, we will first access the Azure ML `Run` object within the script:\n",
|
||||
"```Python\n",
|
||||
"from azureml.core.run import Run\n",
|
||||
"run = Run.get_context()\n",
|
||||
"```\n",
|
||||
"Later within the script, we log the loss metric to our run:\n",
|
||||
"```Python\n",
|
||||
"run.log('loss', loss.item())\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once your script is ready, copy the training script `pytorch_horovod_mnist.py` into the project directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"shutil.copy('pytorch_horovod_mnist.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an experiment\n",
|
||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed PyTorch tutorial. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"experiment_name = 'pytorch-distr-hvd'\n",
|
||||
"experiment = Experiment(ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a PyTorch estimator\n",
|
||||
"The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import PyTorch, Mpi\n",
|
||||
"\n",
|
||||
"estimator = PyTorch(source_directory=project_folder,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='pytorch_horovod_mnist.py',\n",
|
||||
" node_count=2,\n",
|
||||
" distributed_training=Mpi(),\n",
|
||||
" use_gpu=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI/Horovod, you must provide the argument `distributed_backend=Mpi()`. To specify `i` workers per node, you must provide the argument `distributed_backend=Mpi(process_count_per_node=i)`. Using this estimator with these settings, PyTorch, Horovod and their dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `PyTorch` constructor's `pip_packages` or `conda_packages` parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit job\n",
|
||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = experiment.submit(estimator)\n",
|
||||
"print(run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor your run\n",
|
||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"RunDetails(run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, you can block until the script has completed training before running more code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True) # this provides a verbose log"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
name: distributed-pytorch-with-horovod
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,170 @@
|
||||
# Copyright (c) 2017, PyTorch contributors
|
||||
# Modifications copyright (C) Microsoft Corporation
|
||||
# Licensed under the BSD license
|
||||
# Adapted from https://github.com/uber/horovod/blob/master/examples/pytorch_mnist.py
|
||||
|
||||
from __future__ import print_function
|
||||
import argparse
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torchvision import datasets, transforms
|
||||
import torch.utils.data.distributed
|
||||
import horovod.torch as hvd
|
||||
|
||||
from azureml.core.run import Run
|
||||
# get the Azure ML run object
|
||||
run = Run.get_context()
|
||||
|
||||
print("Torch version:", torch.__version__)
|
||||
|
||||
# Training settings
|
||||
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
|
||||
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
|
||||
help='input batch size for training (default: 64)')
|
||||
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
|
||||
help='input batch size for testing (default: 1000)')
|
||||
parser.add_argument('--epochs', type=int, default=10, metavar='N',
|
||||
help='number of epochs to train (default: 10)')
|
||||
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
|
||||
help='learning rate (default: 0.01)')
|
||||
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
|
||||
help='SGD momentum (default: 0.5)')
|
||||
parser.add_argument('--no-cuda', action='store_true', default=False,
|
||||
help='disables CUDA training')
|
||||
parser.add_argument('--seed', type=int, default=42, metavar='S',
|
||||
help='random seed (default: 42)')
|
||||
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
|
||||
help='how many batches to wait before logging training status')
|
||||
parser.add_argument('--fp16-allreduce', action='store_true', default=False,
|
||||
help='use fp16 compression during allreduce')
|
||||
args = parser.parse_args()
|
||||
args.cuda = not args.no_cuda and torch.cuda.is_available()
|
||||
|
||||
hvd.init()
|
||||
torch.manual_seed(args.seed)
|
||||
|
||||
if args.cuda:
|
||||
# Horovod: pin GPU to local rank.
|
||||
torch.cuda.set_device(hvd.local_rank())
|
||||
torch.cuda.manual_seed(args.seed)
|
||||
|
||||
|
||||
kwargs = {}
|
||||
train_dataset = \
|
||||
datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True,
|
||||
transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))
|
||||
]))
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(
|
||||
train_dataset, num_replicas=hvd.size(), rank=hvd.rank())
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset, batch_size=args.batch_size, sampler=train_sampler, **kwargs)
|
||||
|
||||
test_dataset = \
|
||||
datasets.MNIST('data-%d' % hvd.rank(), train=False, transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))
|
||||
]))
|
||||
test_sampler = torch.utils.data.distributed.DistributedSampler(
|
||||
test_dataset, num_replicas=hvd.size(), rank=hvd.rank())
|
||||
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size,
|
||||
sampler=test_sampler, **kwargs)
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
|
||||
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
|
||||
self.conv2_drop = nn.Dropout2d()
|
||||
self.fc1 = nn.Linear(320, 50)
|
||||
self.fc2 = nn.Linear(50, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(F.max_pool2d(self.conv1(x), 2))
|
||||
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
|
||||
x = x.view(-1, 320)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.dropout(x, training=self.training)
|
||||
x = self.fc2(x)
|
||||
return F.log_softmax(x)
|
||||
|
||||
|
||||
model = Net()
|
||||
|
||||
if args.cuda:
|
||||
# Move model to GPU.
|
||||
model.cuda()
|
||||
|
||||
# Horovod: broadcast parameters.
|
||||
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
|
||||
|
||||
# Horovod: scale learning rate by the number of GPUs.
|
||||
optimizer = optim.SGD(model.parameters(), lr=args.lr * hvd.size(),
|
||||
momentum=args.momentum)
|
||||
|
||||
# Horovod: (optional) compression algorithm.
|
||||
compression = hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none
|
||||
|
||||
# Horovod: wrap optimizer with DistributedOptimizer.
|
||||
optimizer = hvd.DistributedOptimizer(optimizer,
|
||||
named_parameters=model.named_parameters(),
|
||||
compression=compression)
|
||||
|
||||
|
||||
def train(epoch):
|
||||
model.train()
|
||||
train_sampler.set_epoch(epoch)
|
||||
for batch_idx, (data, target) in enumerate(train_loader):
|
||||
if args.cuda:
|
||||
data, target = data.cuda(), target.cuda()
|
||||
optimizer.zero_grad()
|
||||
output = model(data)
|
||||
loss = F.nll_loss(output, target)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
if batch_idx % args.log_interval == 0:
|
||||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
|
||||
epoch, batch_idx * len(data), len(train_sampler),
|
||||
100. * batch_idx / len(train_loader), loss.item()))
|
||||
|
||||
# log the loss to the Azure ML run
|
||||
run.log('loss', loss.item())
|
||||
|
||||
|
||||
def metric_average(val, name):
|
||||
tensor = torch.tensor(val)
|
||||
avg_tensor = hvd.allreduce(tensor, name=name)
|
||||
return avg_tensor.item()
|
||||
|
||||
|
||||
def test():
|
||||
model.eval()
|
||||
test_loss = 0.
|
||||
test_accuracy = 0.
|
||||
for data, target in test_loader:
|
||||
if args.cuda:
|
||||
data, target = data.cuda(), target.cuda()
|
||||
output = model(data)
|
||||
# sum up batch loss
|
||||
test_loss += F.nll_loss(output, target, size_average=False).item()
|
||||
# get the index of the max log-probability
|
||||
pred = output.data.max(1, keepdim=True)[1]
|
||||
test_accuracy += pred.eq(target.data.view_as(pred)).cpu().float().sum()
|
||||
|
||||
test_loss /= len(test_sampler)
|
||||
test_accuracy /= len(test_sampler)
|
||||
|
||||
test_loss = metric_average(test_loss, 'avg_loss')
|
||||
test_accuracy = metric_average(test_accuracy, 'avg_accuracy')
|
||||
|
||||
if hvd.rank() == 0:
|
||||
print('\nTest set: Average loss: {:.4f}, Accuracy: {:.2f}%\n'.format(
|
||||
test_loss, 100. * test_accuracy))
|
||||
|
||||
|
||||
for epoch in range(1, args.epochs + 1):
|
||||
train(epoch)
|
||||
test()
|
||||
@@ -0,0 +1,382 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Distributed PyTorch \n",
|
||||
"In this tutorial, you will train a PyTorch model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using distributed training via Nccl/Gloo across a GPU cluster. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"Diagnostics"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize workspace\n",
|
||||
"\n",
|
||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or attach existing AmlCompute\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
||||
"\n",
|
||||
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n",
|
||||
"\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"gpu-cluster\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" compute_target.wait_for_completion(show_output=True)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current AmlCompute. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train model on the remote compute\n",
|
||||
"Now that we have the AmlCompute ready to go, let's run our distributed training job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a project directory\n",
|
||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"project_folder = './pytorch-distr'\n",
|
||||
"os.makedirs(project_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare training script\n",
|
||||
"Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `pytorch_mnist.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code.\n",
|
||||
"\n",
|
||||
"However, if you would like to use Azure ML's [metric logging](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#logging) capabilities, you will have to add a small amount of Azure ML logic inside your training script. In this example, at each logging interval, we will log the loss for that minibatch to our Azure ML run.\n",
|
||||
"\n",
|
||||
"To do so, in `pytorch_mnist.py`, we will first access the Azure ML `Run` object within the script:\n",
|
||||
"```Python\n",
|
||||
"from azureml.core.run import Run\n",
|
||||
"run = Run.get_context()\n",
|
||||
"```\n",
|
||||
"Later within the script, we log the loss metric to our run:\n",
|
||||
"```Python\n",
|
||||
"run.log('loss', losses.avg)\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once your script is ready, copy the training script `pytorch_mnist.py` into the project directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"shutil.copy('pytorch_mnist.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an experiment\n",
|
||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed PyTorch tutorial. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"experiment_name = 'pytorch-distr'\n",
|
||||
"experiment = Experiment(ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a PyTorch estimator(Nccl Backend)\n",
|
||||
"The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import PyTorch, Nccl\n",
|
||||
"\n",
|
||||
"estimator = PyTorch(source_directory=project_folder,\n",
|
||||
" script_params={\"--dist-backend\" : \"nccl\",\n",
|
||||
" \"--dist-url\": \"$AZ_BATCHAI_PYTORCH_INIT_METHOD\",\n",
|
||||
" \"--rank\": \"$AZ_BATCHAI_TASK_INDEX\",\n",
|
||||
" \"--world-size\": 2},\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='pytorch_mnist.py',\n",
|
||||
" node_count=2,\n",
|
||||
" distributed_training=Nccl(),\n",
|
||||
" use_gpu=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In the above code, `script_params` uses Azure ML generated `AZ_BATCHAI_PYTORCH_INIT_METHOD` for shared file-system initialization and `AZ_BATCHAI_TASK_INDEX` as rank of each worker process.\n",
|
||||
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using Nccl, you must provide the argument `distributed_training=Nccl()`. Using this estimator with these settings, PyTorch and dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `PyTorch` constructor's `pip_packages` or `conda_packages` parameters."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit job\n",
|
||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = experiment.submit(estimator)\n",
|
||||
"print(run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor your run\n",
|
||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"RunDetails(run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, you can block until the script has completed training before running more code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True) # this provides a verbose log"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a PyTorch estimator(Gloo Backend)\n",
|
||||
"The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import PyTorch, Gloo\n",
|
||||
"\n",
|
||||
"estimator = PyTorch(source_directory=project_folder,\n",
|
||||
" script_params={\"--dist-backend\" : \"gloo\",\n",
|
||||
" \"--dist-url\": \"$AZ_BATCHAI_PYTORCH_INIT_METHOD\",\n",
|
||||
" \"--rank\": \"$AZ_BATCHAI_TASK_INDEX\",\n",
|
||||
" \"--world-size\": 2},\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='pytorch_mnist.py',\n",
|
||||
" node_count=2,\n",
|
||||
" distributed_training=Gloo(),\n",
|
||||
" use_gpu=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In the above code, `script_params` uses Azure ML generated `AZ_BATCHAI_PYTORCH_INIT_METHOD` for shared file-system initialization and `AZ_BATCHAI_TASK_INDEX` as rank of each worker process.\n",
|
||||
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using Gloo, you must provide the argument `distributed_training=Gloo()`. Using this estimator with these settings, PyTorch and dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `PyTorch` constructor's `pip_packages` or `conda_packages` parameters.\n",
|
||||
"\n",
|
||||
"Once you create the estimaotr you can follow the submit steps as shown above to submit a PyTorch run with `Gloo` backend. "
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
name: distributed-pytorch-with-nccl-gloo
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,209 @@
|
||||
# Copyright (c) 2017, PyTorch contributors
|
||||
# Modifications copyright (C) Microsoft Corporation
|
||||
# Licensed under the BSD license
|
||||
# Adapted from https://github.com/Azure/BatchAI/tree/master/recipes/PyTorch/PyTorch-GPU-Distributed-Gloo
|
||||
|
||||
from __future__ import print_function
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torchvision import datasets, transforms
|
||||
import torch.nn.parallel
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.distributed as dist
|
||||
import torch.utils.data
|
||||
import torch.utils.data.distributed
|
||||
import torchvision.models as models
|
||||
|
||||
from azureml.core.run import Run
|
||||
# get the Azure ML run object
|
||||
run = Run.get_context()
|
||||
|
||||
# Training settings
|
||||
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
|
||||
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
|
||||
help='input batch size for training (default: 64)')
|
||||
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
|
||||
help='input batch size for testing (default: 1000)')
|
||||
parser.add_argument('--epochs', type=int, default=10, metavar='N',
|
||||
help='number of epochs to train (default: 10)')
|
||||
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
|
||||
help='learning rate (default: 0.01)')
|
||||
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
|
||||
help='SGD momentum (default: 0.5)')
|
||||
parser.add_argument('--seed', type=int, default=1, metavar='S',
|
||||
help='random seed (default: 1)')
|
||||
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
|
||||
help='number of data loading workers (default: 4)')
|
||||
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
|
||||
help='how many batches to wait before logging training status')
|
||||
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
|
||||
metavar='W', help='weight decay (default: 1e-4)')
|
||||
parser.add_argument('--world-size', default=1, type=int,
|
||||
help='number of distributed processes')
|
||||
parser.add_argument('--dist-url', type=str,
|
||||
help='url used to set up distributed training')
|
||||
parser.add_argument('--dist-backend', default='nccl', type=str,
|
||||
help='distributed backend')
|
||||
parser.add_argument('--rank', default=-1, type=int,
|
||||
help='rank of the worker')
|
||||
|
||||
best_prec1 = 0
|
||||
args = parser.parse_args()
|
||||
|
||||
args.distributed = args.world_size >= 2
|
||||
|
||||
if args.distributed:
|
||||
dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
|
||||
world_size=args.world_size, rank=args.rank)
|
||||
|
||||
train_dataset = datasets.MNIST('data', train=True, download=True,
|
||||
transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))
|
||||
]))
|
||||
|
||||
if args.distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_dataset,
|
||||
batch_size=args.batch_size, shuffle=(train_sampler is None),
|
||||
num_workers=args.workers, pin_memory=True, sampler=train_sampler)
|
||||
|
||||
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
train_dataset,
|
||||
batch_size=args.batch_size, shuffle=False,
|
||||
num_workers=args.workers, pin_memory=True)
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
|
||||
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
|
||||
self.conv2_drop = nn.Dropout2d()
|
||||
self.fc1 = nn.Linear(320, 50)
|
||||
self.fc2 = nn.Linear(50, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(F.max_pool2d(self.conv1(x), 2))
|
||||
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
|
||||
x = x.view(-1, 320)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.dropout(x, training=self.training)
|
||||
x = self.fc2(x)
|
||||
return F.log_softmax(x)
|
||||
|
||||
|
||||
model = Net()
|
||||
|
||||
if not args.distributed:
|
||||
model = torch.nn.DataParallel(model).cuda()
|
||||
else:
|
||||
model.cuda()
|
||||
model = torch.nn.parallel.DistributedDataParallel(model)
|
||||
|
||||
# define loss function (criterion) and optimizer
|
||||
criterion = nn.CrossEntropyLoss().cuda()
|
||||
|
||||
optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
|
||||
|
||||
|
||||
def train(epoch):
|
||||
batch_time = AverageMeter()
|
||||
data_time = AverageMeter()
|
||||
losses = AverageMeter()
|
||||
top1 = AverageMeter()
|
||||
top5 = AverageMeter()
|
||||
|
||||
# switch to train mode
|
||||
model.train()
|
||||
end = time.time()
|
||||
for i, (input, target) in enumerate(train_loader):
|
||||
# measure data loading time
|
||||
data_time.update(time.time() - end)
|
||||
|
||||
input, target = input.cuda(), target.cuda()
|
||||
|
||||
# compute output
|
||||
try:
|
||||
output = model(input)
|
||||
loss = criterion(output, target)
|
||||
|
||||
# measure accuracy and record loss
|
||||
prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
|
||||
losses.update(loss.item(), input.size(0))
|
||||
top1.update(prec1[0], input.size(0))
|
||||
top5.update(prec5[0], input.size(0))
|
||||
|
||||
# compute gradient and do SGD step
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# measure elapsed time
|
||||
batch_time.update(time.time() - end)
|
||||
end = time.time()
|
||||
|
||||
if i % 10 == 0:
|
||||
run.log("loss", losses.avg)
|
||||
run.log("prec@1", "{0:.3f}".format(top1.avg))
|
||||
run.log("prec@5", "{0:.3f}".format(top5.avg))
|
||||
print('Epoch: [{0}][{1}/{2}]\t'
|
||||
'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
|
||||
'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
|
||||
'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
|
||||
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
|
||||
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(epoch, i, len(train_loader),
|
||||
batch_time=batch_time, data_time=data_time,
|
||||
loss=losses, top1=top1, top5=top5))
|
||||
except:
|
||||
import sys
|
||||
print("Unexpected error:", sys.exc_info()[0])
|
||||
|
||||
|
||||
class AverageMeter(object):
|
||||
"""Computes and stores the average and current value"""
|
||||
def __init__(self):
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.val = 0
|
||||
self.avg = 0
|
||||
self.sum = 0
|
||||
self.count = 0
|
||||
|
||||
def update(self, val, n=1):
|
||||
self.val = val
|
||||
self.sum += val * n
|
||||
self.count += n
|
||||
self.avg = self.sum / self.count
|
||||
|
||||
|
||||
def accuracy(output, target, topk=(1,)):
|
||||
"""Computes the precision@k for the specified values of k"""
|
||||
maxk = max(topk)
|
||||
batch_size = target.size(0)
|
||||
|
||||
_, pred = output.topk(maxk, 1, True, True)
|
||||
pred = pred.t()
|
||||
correct = pred.eq(target.view(1, -1).expand_as(pred))
|
||||
|
||||
res = []
|
||||
for k in topk:
|
||||
correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
|
||||
res.append(correct_k.mul_(100.0 / batch_size))
|
||||
return res
|
||||
|
||||
|
||||
for epoch in range(1, args.epochs + 1):
|
||||
train(epoch)
|
||||