Compare commits

...

27 Commits

Author SHA1 Message Date
Jeff Shepherd
fcc882174b Pin scikit-learn to avoid conflict with azureml-responsibleai 2023-10-23 09:53:39 -07:00
jeff-shepherd
6872d8a3bb Merge pull request #1941 from Azure/jeffshep/updatefor1.53.2
Updated automl_env.yml for Azure ML SDK 1.53.2
2023-10-10 08:49:04 -07:00
Jeff Shepherd
a2cb4c3589 Updated fbprophet to prophet 2023-10-10 08:47:09 -07:00
Jeff Shepherd
15008962b2 Updated automl_env.yml for Azure ML SDK 1.53.2 2023-10-05 19:29:26 -07:00
jeff-shepherd
9414b51fac Merge pull request #1937 from Azure/jeffshep/fixwindows153
Fixed Windows automl_setup for 1.53.0
2023-08-31 21:56:12 -07:00
Jeff Shepherd
80ac414582 Fixed Windows automl_setup for 1.53.0 2023-08-31 16:54:20 -07:00
jeff-shepherd
cbc151660b Merge pull request #1936 from Azure/jeffshep/fixtabulardataset
Fixed tabular-dataset-partition-per-column.ipynb
2023-08-25 15:34:08 -07:00
Jeff Shepherd
0024abc6e3 Fixed tabular-dataset-partition-per-column.ipynb and removed deploy-to-cloud/model-register-and-deploy.ipynb 2023-08-25 13:52:29 -07:00
jeff-shepherd
fa13385860 Merge pull request #1935 from Azure/release_update_stablev2/Release-193
update samples from Release-193 as a part of 1.53.0 SDK stable release
2023-08-23 11:41:24 -07:00
Jeff Shepherd
0c5f6daf52 Fixed readme syntax 2023-08-23 11:37:30 -07:00
Jeff Shepherd
c11e9fc1da Fixed readme syntax 2023-08-23 11:36:17 -07:00
Jeff Shepherd
280150713e Restored V2 message 2023-08-23 10:20:25 -07:00
amlrelsa-ms
bb11c80b1b update samples from Release-193 as a part of 1.53.0 SDK stable release 2023-08-23 03:24:03 +00:00
Diondra Peck
d0961b98bf Add disclaimer to README 2023-06-28 15:47:49 -07:00
Paul Shealy
302589b7f9 Merge pull request #1915 from Azure/release_update_stablev2/Release-171
Release update stablev2/release 171 for SDK 1.51.0
2023-06-07 19:19:33 -07:00
amlrelsa-ms
cc85949d6d update samples from Release-171 as a part of 1.51 SDK stable release 2023-06-06 21:58:24 +05:30
amlrelsa-sa
3a1824e3ad update samples from Release-170 as a part of 1.51 SDK stable release 2023-06-06 10:50:33 +05:30
Paul Shealy
579643326d Merge pull request #1911 from diondrapeck/add-deprecation-disclaimer
Add repository deprecation disclaimer and pointer to v2 repo
2023-05-25 08:04:29 -07:00
Diondra Peck
14f76f227e Add deprecation disclaimer 2023-05-23 12:48:14 -07:00
Paul Shealy
25baf5203a Merge pull request #1899 from Azure/release_update/Release-177
update samples from Release-177 as a part of  SDK release
2023-04-17 13:01:27 -07:00
amlrelsa-ms
1178fcb0ba update samples from Release-177 as a part of SDK release 2023-04-17 10:22:59 +00:00
Sasidhar Kasturi
e4d84c8e45 update samples from Release-169 as a part of 1.50.0 SDK stable release (#1898)
Co-authored-by: amlrelsa-ms <amlrelsa@microsoft.com>
2023-04-14 10:39:38 -04:00
Harneet Virk
7a3ab1e44c Merge pull request #1895 from Azure/release_update/Release-175
update samples from Release-175 as a part of  SDK release
2023-03-28 10:17:27 -07:00
amlrelsa-ms
598a293dfa update samples from Release-175 as a part of SDK release 2023-03-28 01:02:26 +00:00
Harneet Virk
40b3068462 Merge pull request #1884 from Azure/release_update_stablev2/Release-166
update samples from Release-166 as a part of 1.49.0 SDK stable release
2023-02-13 21:22:05 -08:00
amlrelsa-ms
0ecbbbce75 update samples from Release-166 as a part of 1.49.0 SDK stable release 2023-02-14 02:46:24 +00:00
Harneet Virk
9b1e130d18 Merge pull request #1867 from Azure/release_update/Release-173
update samples from Release-173 as a part of  SDK release
2022-12-19 19:37:41 -08:00
136 changed files with 1692 additions and 5162 deletions

View File

@@ -1,6 +1,8 @@
# Azure Machine Learning Python SDK notebooks
> a community-driven repository of examples using mlflow for tracking can be found at https://github.com/Azure/azureml-examples
** **With the introduction of AzureML SDK v2, this samples repository for the v1 SDK is now deprecated and will not be monitored or updated. Users are encouraged to visit the [v2 SDK samples repository](https://github.com/Azure/azureml-examples) instead for up-to-date and enhanced examples of how to build, train, and deploy machine learning models with AzureML's newest features.** **
Welcome to the Azure Machine Learning Python SDK notebooks repository!

View File

@@ -103,7 +103,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.53.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -329,7 +329,7 @@
" print(\"Creating new gpu-cluster\")\n",
" \n",
" # Specify the configuration for the new cluster\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"Standard_NC6s_v3\",\n",
" min_nodes=0,\n",
" max_nodes=4)\n",
" # Create the cluster with the specified name and configuration\n",

View File

@@ -174,7 +174,7 @@
"else:\n",
" print(\"creating new cluster\")\n",
" # vm_size parameter below could be modified to one of the RAPIDS-supported VM types\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"Standard_NC6s_v2\", min_nodes=1, max_nodes = 1)\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"Standard_NC6s_v3\", min_nodes=1, max_nodes = 1)\n",
"\n",
" # create the cluster\n",
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)\n",

View File

@@ -3,10 +3,11 @@ dependencies:
- pip:
- azureml-sdk
- azureml-contrib-fairness
- fairlearn>=0.6.2
- fairlearn>=0.6.2,<=0.7.0
- joblib
- liac-arff
- raiwidgets~=0.23.0
- raiwidgets~=0.28.0
- itsdangerous==2.0.1
- markupsafe<2.1.0
- protobuf==3.20.0
- numpy<1.24.0

View File

@@ -3,10 +3,11 @@ dependencies:
- pip:
- azureml-sdk
- azureml-contrib-fairness
- fairlearn>=0.6.2
- fairlearn>=0.6.2,<=0.7.0
- joblib
- liac-arff
- raiwidgets~=0.23.0
- raiwidgets~=0.28.0
- itsdangerous==2.0.1
- markupsafe<2.1.0
- protobuf==3.20.0
- numpy<1.24.0

View File

@@ -7,14 +7,21 @@ dependencies:
# The python interpreter version.
# Azure ML only supports 3.7.0 and later.
- pip==22.3.1
- python>=3.7,<3.9
- python>=3.8,<3.9
- holidays==0.10.3
- pandas==1.3.5
- scipy==1.10.1
- Cython==0.29.14
- tqdm==4.66.1
- scikit-learn<1.1
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.48.0
- azureml-defaults~=1.48.0
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_win32_requirements.txt [--no-deps]
- matplotlib==3.6.2
- azureml-widgets~=1.53.0
- azureml-defaults~=1.53.0
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.53.2/validated_win32_requirements.txt [--no-deps]
- matplotlib==3.7.1
- xgboost==1.3.3
- arch==4.14
- mlflow-skinny==1.30.0
- prophet==1.1.4
- cmdstanpy==1.1.0
- setuptools-git==1.2

View File

@@ -6,30 +6,28 @@ channels:
dependencies:
# The python interpreter version.
# Azure ML only supports 3.7 and later.
- pip==20.1.1
- python>=3.7,<3.9
- matplotlib==3.2.1
- numpy>=1.21.6,<=1.22.3
- pip==22.3.1
- python>=3.8,<3.9
- matplotlib==3.7.1
- numpy==1.22.3
- cython==0.29.14
- urllib3==1.26.7
- scipy>=1.4.1,<=1.5.3
- scikit-learn==0.22.1
- scipy==1.10.1
- scikit-learn==1.1.3
- py-xgboost<=1.3.3
- holidays==0.10.3
- conda-forge::fbprophet==0.7.1
- conda-forge::prophet==1.1.4
- pytorch::pytorch=1.11.0
- cudatoolkit=10.1.243
- notebook
- jinja2<=2.11.2
- markupsafe<2.1.0
- scikit-learn<1.1
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.48.0
- azureml-defaults~=1.48.0
- azureml-widgets~=1.53.0
- azureml-defaults~=1.53.0
- pytorch-transformers==1.0.0
- spacy==2.2.4
- pystan==2.19.1.1
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_linux_requirements.txt [--no-deps]
- arch==4.14
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.53.2/validated_linux_requirements.txt [--no-deps]

View File

@@ -6,30 +6,24 @@ channels:
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.7 and later.
- pip==20.1.1
- python>=3.7,<3.9
- matplotlib==3.2.1
- numpy>=1.21.6,<=1.22.3
- pip==22.3.1
- python>=3.8,<3.9
- numpy==1.22.3
- cython==0.29.14
- urllib3==1.26.7
- scipy>=1.4.1,<=1.5.3
- scikit-learn==0.22.1
- py-xgboost<=1.3.3
- scipy==1.10.1
- scikit-learn==1.1.3
- holidays==0.10.3
- conda-forge::fbprophet==0.7.1
- pytorch::pytorch=1.11.0
- cudatoolkit=9.0
- notebook
- jinja2<=2.11.2
- markupsafe<2.1.0
- scikit-learn<1.1
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.48.0
- azureml-defaults~=1.48.0
- azureml-widgets~=1.53.0
- azureml-defaults~=1.53.0
- pytorch-transformers==1.0.0
- spacy==2.2.4
- pystan==2.19.1.1
- prophet==1.1.4
- xgboost==1.3.3
- matplotlib==3.7.1
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_darwin_requirements.txt [--no-deps]
- arch==4.14
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.53.2/validated_darwin_requirements.txt [--no-deps]

View File

@@ -1,5 +1,21 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -712,7 +728,9 @@
"from azureml.core.model import Model\n",
"from azureml.core.environment import Environment\n",
"\n",
"inference_config = InferenceConfig(entry_script=script_file_name)\n",
"inference_config = InferenceConfig(\n",
" environment=best_run.get_environment(), entry_script=script_file_name\n",
")\n",
"\n",
"aciconfig = AciWebservice.deploy_configuration(\n",
" cpu_cores=2,\n",
@@ -828,9 +846,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"metadata": {},
"outputs": [],
"source": [
"%matplotlib notebook\n",

View File

@@ -1,5 +1,21 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},

View File

@@ -1,5 +1,21 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -139,8 +155,8 @@
" print(\"Found existing cluster, use it.\")\n",
"except ComputeTargetException:\n",
" compute_config = AmlCompute.provisioning_configuration(\n",
" vm_size=\"STANDARD_NC6\", # CPU for BiLSTM, such as \"STANDARD_D2_V2\"\n",
" # To use BERT (this is recommended for best performance), select a GPU such as \"STANDARD_NC6\"\n",
" vm_size=\"Standard_NC6s_v3\", # CPU for BiLSTM, such as \"STANDARD_D2_V2\"\n",
" # To use BERT (this is recommended for best performance), select a GPU such as \"Standard_NC6s_v3\"\n",
" # or similar GPU option\n",
" # available in your workspace\n",
" idle_seconds_before_scaledown=60,\n",
@@ -336,7 +352,7 @@
"metadata": {},
"source": [
"For local inferencing, you can load the model locally via. the method `remote_run.get_output()`. For more information on the arguments expected by this method, you can run `remote_run.get_output??`.\n",
"Note that when the model contains BERT, this step will require pytorch and pytorch-transformers installed in your local environment. The exact versions of these packages can be found in the **automl_env.yml** file located in the local copy of your azureml-examples folder here: \"azureml-examples/python-sdk/tutorials/automl-with-azureml\""
"Note that when the model contains BERT, this step will require pytorch and pytorch-transformers installed in your local environment. The exact versions of these packages can be found in the **automl_env.yml** file located in the local copy of your MachineLearningNotebooks folder here: \"MachineLearningNotebooks\\how-to-use-azureml\\automated-machine-learning\""
]
},
{

View File

@@ -1,3 +1,4 @@
import json
import pandas as pd
from azureml.core import Environment, ScriptRunConfig
from azureml.core.run import Run
@@ -13,7 +14,16 @@ def run_inference(
model_name,
):
try:
inference_env = train_run.get_environment()
except BaseException:
run_details = train_run.get_details()
run_def = run_details.get("runDefinition")
env = run_def.get("environment")
if env is None:
raise
json.dump(env, open("azureml_environment.json", "w"))
inference_env = Environment.load_from_directory(".")
est = ScriptRunConfig(
source_directory=script_folder,

View File

@@ -3,7 +3,7 @@ import argparse
import pandas as pd
import numpy as np
from sklearn.externals import joblib
import joblib
from azureml.automl.runtime.shared.score import scoring, constants
from azureml.core import Run, Dataset

View File

@@ -1,5 +1,21 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/continuous-retraining/auto-ml-continuous-retraining.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},

View File

@@ -31,7 +31,7 @@ try:
model = Model(ws, args.model_name)
last_train_time = model.created_time
print("Model was last trained on {0}.".format(last_train_time))
except Exception as e:
except Exception:
print("Could not get last model train time.")
last_train_time = datetime.min.replace(tzinfo=pytz.UTC)

View File

@@ -97,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.53.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -97,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.53.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -454,10 +454,13 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"**Note:** Not all datasets produce a y_transformer. The dataset used in the current notebook requires a transformer as the y column data is categorical."
"**Note:** Not all datasets produce a y_transformer. The dataset used in the current notebook requires a transformer as the y column data is categorical. \n",
"\n",
"We will go ahead and download the mlflow transformer model and use it to transform test data that can be used for further experimentation below. To run the commented code, make sure the environment requirement is satisfied. You can go ahead and create the environment from the `conda.yaml` file under `/outputs/featurization/pipeline/` and run the given code in it."
]
},
{
@@ -466,7 +469,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.automl.core.shared.constants import Transformers\n",
"''' from azureml.automl.core.shared.constants import Transformers\n",
"\n",
"transformers = mlflow.sklearn.load_model(uri) # Using method 1\n",
"data_transformers = transformers.get_transformers()\n",
@@ -474,14 +477,15 @@
"y_transformer = data_transformers[Transformers.Y_TRANSFORMER]\n",
"\n",
"X_test = x_transformer.transform(X_test_data)\n",
"y_test = y_transformer.transform(y_test_data)"
"y_test = y_transformer.transform(y_test_data) '''"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Run the following cell to see the featurization summary of X and y transformers. "
"Run the following cell to see the featurization summary of X and y transformers. Uncomment to use. "
]
},
{
@@ -490,10 +494,10 @@
"metadata": {},
"outputs": [],
"source": [
"X_data_summary = x_transformer.get_featurization_summary(is_user_friendly=False)\n",
"''' X_data_summary = x_transformer.get_featurization_summary(is_user_friendly=False)\n",
"\n",
"summary_df = pd.DataFrame.from_records(X_data_summary)\n",
"summary_df"
"summary_df '''"
]
},
{
@@ -544,10 +548,11 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Another way to load the data is to go to the above autofeaturization experiment and check for the featurized dataset ids under `Output datasets`. Uncomment and replace them accordingly below to use."
"Another way to load the data is to go to the above autofeaturization experiment and check for the featurized dataset ids under `Output datasets`. Uncomment and replace them accordingly below, to use."
]
},
{
@@ -597,10 +602,20 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Here we are passing our training data to the lightgbm classifier, any custom model can be used with your data."
"Here we are passing our training data to the lightgbm classifier, any custom model can be used with your data. Let us first install lightgbm."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install lightgbm"
]
},
{
@@ -612,11 +627,27 @@
"import lightgbm as lgb\n",
"\n",
"model = lgb.LGBMClassifier(learning_rate=0.08,max_depth=-5,random_state=42)\n",
"model.fit(X_train, y_train, sample_weight=sample_weight, eval_set=[(X_test, y_test),(X_train, y_train)],\n",
" verbose=20,eval_metric='logloss')\n",
"\n",
"model.fit(X_train, y_train, sample_weight=sample_weight)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Once training is done, the test data obtained after transforming from the above downloaded transformer can be used to calculate the accuracy "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Training accuracy {:.4f}'.format(model.score(X_train, y_train)))\n",
"print('Testing accuracy {:.4f}'.format(model.score(X_test, y_test)))"
"\n",
"# Uncomment below to test the model on test data \n",
"# print('Testing accuracy {:.4f}'.format(model.score(X_test, y_test)))"
]
},
{
@@ -654,45 +685,8 @@
"metadata": {},
"outputs": [],
"source": [
"y_pred = model.predict(X_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Calculate metrics for the prediction\n",
"\n",
"Now visualize the data on a scatter plot to show what our truth (actual) values are compared to the predicted values \n",
"from the trained model that was returned."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"from matplotlib import pyplot as plt\n",
"import numpy as np\n",
"import itertools\n",
"\n",
"cf =confusion_matrix(y_test,y_pred)\n",
"plt.imshow(cf,cmap=plt.cm.Blues,interpolation='nearest')\n",
"plt.colorbar()\n",
"plt.title('Confusion Matrix')\n",
"plt.xlabel('Predicted')\n",
"plt.ylabel('Actual')\n",
"class_labels = ['False','True']\n",
"tick_marks = np.arange(len(class_labels))\n",
"plt.xticks(tick_marks,class_labels)\n",
"plt.yticks([-0.5,0,1,1.5],['','False','True',''])\n",
"# plotting text value inside cells\n",
"thresh = cf.max() / 2.\n",
"for i,j in itertools.product(range(cf.shape[0]),range(cf.shape[1])):\n",
" plt.text(j,i,format(cf[i,j],'d'),horizontalalignment='center',color='white' if cf[i,j] >thresh else 'black')\n",
"plt.show()"
"# Uncomment below to test the model on test data\n",
"# y_pred = model.predict(X_test)"
]
},
{

View File

@@ -3,7 +3,7 @@ dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.7.0 and later.
- pip<=22.3.1
- python>=3.7.0,<3.10
- python>=3.7.0,<3.11
- pip:
# Required packages for AzureML execution, history, and data preparation.

View File

@@ -4,10 +4,10 @@ channels:
- main
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.6.0 and later.
# Currently Azure ML only supports 3.7.0 and later.
- pip<=20.2.4
- nomkl
- python>=3.6.0,<3.10
- python>=3.7.0,<3.11
- urllib3==1.26.7
- PyJWT < 2.0.0
- numpy>=1.21.6,<=1.22.3

View File

@@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.53.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -91,7 +91,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.53.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -13,7 +13,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-hierarchical-timeseries/auto-ml-forecasting-hierarchical-timeseries.png)"
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-backtest-many-models/auto-ml-forecasting-backtest-many-models.png)"
]
},
{

View File

@@ -7,7 +7,7 @@
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License.\n",
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/automl-forecasting-function.png)"
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/auto-ml-forecasting-backtest-single-model.png)"
]
},
{

View File

@@ -16,6 +16,13 @@
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-bike-share)).</font>"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -42,7 +49,7 @@
"\n",
"AutoML highlights here include built-in holiday featurization, accessing engineered feature names, and working with the `forecast` function. Please also look at the additional forecasting notebooks, which document lagging, rolling windows, forecast quantiles, other ways to use the forecast function, and forecaster deployment.\n",
"\n",
"Make sure you have executed the [configuration notebook](../../../configuration.ipynb) before running this notebook.\n",
"Make sure you have executed the [configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) before running this notebook.\n",
"\n",
"Notebook synopsis:\n",
"1. Creating an Experiment in an existing Workspace\n",
@@ -61,7 +68,11 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"gather": {
"logged": 1680248038565
}
},
"outputs": [],
"source": [
"import json\n",
@@ -170,25 +181,6 @@
"source": [
"## Data\n",
"\n",
"The [Machine Learning service workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-workspace) is paired with the storage account, which contains the default data store. We will use it to upload the bike share data and create [tabular dataset](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.tabulardataset?view=azure-ml-py) for training. A tabular dataset defines a series of lazily-evaluated, immutable operations to load data from the data source into tabular representation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"datastore = ws.get_default_datastore()\n",
"datastore.upload_files(\n",
" files=[\"./bike-no.csv\"], target_path=\"dataset/\", overwrite=True, show_progress=True\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's set up what we know about the dataset. \n",
"\n",
"**Target column** is what we want to forecast.\n",
@@ -206,25 +198,50 @@
"time_column_name = \"date\""
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"You are now ready to load the historical bike share data. We will load the CSV file into a plain pandas DataFrame."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"dataset = Dataset.Tabular.from_delimited_files(\n",
" path=[(datastore, \"dataset/bike-no.csv\")]\n",
").with_timestamp_columns(fine_grain_timestamp=time_column_name)\n",
"all_data = pd.read_csv(\"bike-no.csv\", parse_dates=[time_column_name])\n",
"\n",
"# Drop the columns 'casual' and 'registered' as these columns are a breakdown of the total and therefore a leak.\n",
"dataset = dataset.drop_columns(columns=[\"casual\", \"registered\"])\n",
"\n",
"dataset.take(5).to_pandas_dataframe().reset_index(drop=True)"
"all_data.drop([\"casual\", \"registered\"], axis=1, inplace=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"### Split the data\n",
"\n",
@@ -234,22 +251,63 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"gather": {
"logged": 1680247376789
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"# select data that occurs before a specified date\n",
"train = dataset.time_before(datetime(2012, 8, 31), include_boundary=True)\n",
"train.to_pandas_dataframe().tail(5).reset_index(drop=True)"
"train = all_data[all_data[time_column_name] <= pd.Timestamp(\"2012-08-31\")].copy()\n",
"test = all_data[all_data[time_column_name] >= pd.Timestamp(\"2012-09-01\")].copy()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Upload data to datastore\n",
"\n",
"The [Machine Learning service workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-workspace) is paired with the storage account, which contains the default data store. We will use it to upload the bike share data and create [tabular dataset](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.tabulardataset?view=azure-ml-py) for training. A tabular dataset defines a series of lazily-evaluated, immutable operations to load data from the data source into tabular representation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"test = dataset.time_after(datetime(2012, 9, 1), include_boundary=True)\n",
"test.to_pandas_dataframe().head(5).reset_index(drop=True)"
"from azureml.data.dataset_factory import TabularDatasetFactory\n",
"\n",
"datastore = ws.get_default_datastore()\n",
"\n",
"train_dataset = TabularDatasetFactory.register_pandas_dataframe(\n",
" train, target=(datastore, \"dataset/\"), name=\"bike_no_train\"\n",
")\n",
"\n",
"test_dataset = TabularDatasetFactory.register_pandas_dataframe(\n",
" test, target=(datastore, \"dataset/\"), name=\"bike_no_test\"\n",
")"
]
},
{
@@ -360,7 +418,7 @@
" featurization=featurization_config,\n",
" blocked_models=[\"ExtremeRandomTrees\"],\n",
" experiment_timeout_hours=0.3,\n",
" training_data=train,\n",
" training_data=train_dataset,\n",
" label_column_name=target_column_name,\n",
" compute_target=compute_target,\n",
" enable_early_stopping=True,\n",
@@ -546,7 +604,7 @@
"from run_forecast import run_rolling_forecast\n",
"\n",
"remote_run = run_rolling_forecast(\n",
" test_experiment, compute_target, best_run, test, target_column_name\n",
" test_experiment, compute_target, best_run, test_dataset, target_column_name\n",
")\n",
"remote_run"
]
@@ -722,6 +780,9 @@
],
"friendly_name": "Forecasting BikeShare Demand",
"index_order": 1,
"kernel_info": {
"name": "python38-azureml"
},
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
@@ -737,11 +798,19 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.13"
"version": "3.8.10"
},
"microsoft": {
"ms_spell_check": {
"ms_spell_check_language": "en"
}
},
"mimetype": "text/x-python",
"name": "python",
"npconvert_exporter": "python",
"nteract": {
"version": "nteract-front-end@1.0.0"
},
"pygments_lexer": "ipython3",
"tags": [
"Forecasting"

View File

@@ -1,6 +1,6 @@
import argparse
from azureml.core import Dataset, Run
from sklearn.externals import joblib
import joblib
parser = argparse.ArgumentParser()
parser.add_argument(

View File

@@ -16,6 +16,13 @@
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/blob/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-task-energy-demand/automl-forecasting-task-energy-demand-advanced-mlflow.ipynb)).</font>"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -43,7 +50,7 @@
"\n",
"In this example we use the associated New York City energy demand dataset to showcase how you can use AutoML for a simple forecasting problem and explore the results. The goal is predict the energy demand for the next 48 hours based on historic time-series data.\n",
"\n",
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) first, if you haven't already, to establish your connection to the AzureML Workspace.\n",
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) first, if you haven't already, to establish your connection to the AzureML Workspace.\n",
"\n",
"In this notebook you will learn how to:\n",
"1. Creating an Experiment using an existing Workspace\n",
@@ -260,8 +267,12 @@
"outputs": [],
"source": [
"# split into train based on time\n",
"train = dataset.time_before(datetime(2017, 8, 8, 5), include_boundary=True)\n",
"train.to_pandas_dataframe().reset_index(drop=True).sort_values(time_column_name).tail(5)"
"train = (\n",
" dataset.time_before(datetime(2017, 8, 8, 5), include_boundary=True)\n",
" .to_pandas_dataframe()\n",
" .reset_index(drop=True)\n",
")\n",
"train.sort_values(time_column_name).tail(5)"
]
},
{
@@ -271,8 +282,39 @@
"outputs": [],
"source": [
"# split into test based on time\n",
"test = dataset.time_between(datetime(2017, 8, 8, 6), datetime(2017, 8, 10, 5))\n",
"test.to_pandas_dataframe().reset_index(drop=True).head(5)"
"test = (\n",
" dataset.time_between(datetime(2017, 8, 8, 6), datetime(2017, 8, 10, 5))\n",
" .to_pandas_dataframe()\n",
" .reset_index(drop=True)\n",
")\n",
"test.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"# register the splitted train and test data in workspace storage\n",
"from azureml.data.dataset_factory import TabularDatasetFactory\n",
"\n",
"datastore = ws.get_default_datastore()\n",
"train_dataset = TabularDatasetFactory.register_pandas_dataframe(\n",
" train, target=(datastore, \"dataset/\"), name=\"nyc_energy_train\"\n",
")\n",
"test_dataset = TabularDatasetFactory.register_pandas_dataframe(\n",
" test, target=(datastore, \"dataset/\"), name=\"nyc_energy_test\"\n",
")"
]
},
{
@@ -361,7 +403,7 @@
" primary_metric=\"normalized_root_mean_squared_error\",\n",
" blocked_models=[\"ExtremeRandomTrees\", \"AutoArima\", \"Prophet\"],\n",
" experiment_timeout_hours=0.3,\n",
" training_data=train,\n",
" training_data=train_dataset,\n",
" label_column_name=target_column_name,\n",
" compute_target=compute_target,\n",
" enable_early_stopping=True,\n",
@@ -521,7 +563,7 @@
" test_experiment=test_experiment,\n",
" compute_target=compute_target,\n",
" train_run=best_run,\n",
" test_dataset=test,\n",
" test_dataset=test_dataset,\n",
" target_column_name=target_column_name,\n",
")\n",
"remote_run_infer.wait_for_completion(show_output=False)\n",
@@ -627,7 +669,7 @@
" \"Prophet\",\n",
" ], # These models are blocked for tutorial purposes, remove this for real use cases.\n",
" experiment_timeout_hours=0.3,\n",
" training_data=train,\n",
" training_data=train_dataset,\n",
" label_column_name=target_column_name,\n",
" compute_target=compute_target,\n",
" enable_early_stopping=True,\n",
@@ -698,7 +740,7 @@
" test_experiment=test_experiment_advanced,\n",
" compute_target=compute_target,\n",
" train_run=best_run_lags,\n",
" test_dataset=test,\n",
" test_dataset=test_dataset,\n",
" target_column_name=target_column_name,\n",
" inference_folder=\"./forecast_advanced\",\n",
")\n",
@@ -766,6 +808,9 @@
"how-to-use-azureml",
"automated-machine-learning"
],
"kernel_info": {
"name": "python3"
},
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
@@ -781,7 +826,15 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.8.10"
},
"microsoft": {
"ms_spell_check": {
"ms_spell_check_language": "en"
}
},
"nteract": {
"version": "nteract-front-end@1.0.0"
},
"vscode": {
"interpreter": {
@@ -790,5 +843,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}

View File

@@ -6,7 +6,7 @@ compute instance.
import argparse
from azureml.core import Dataset, Run
from sklearn.externals import joblib
import joblib
from pandas.tseries.frequencies import to_offset
parser = argparse.ArgumentParser()

View File

@@ -52,7 +52,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Please make sure you have followed the `configuration.ipynb` notebook so that your ML workspace information is saved in the config file."
"Please make sure you have followed the [configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) so that your ML workspace information is saved in the config file."
]
},
{

View File

@@ -19,7 +19,14 @@
"hidePrompt": false
},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-beer-remote/auto-ml-forecasting-beer-remote.png)"
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-github-dau/auto-ml-forecasting-github-dau.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-github-dau)).</font>"
]
},
{
@@ -52,7 +59,7 @@
"\n",
"AutoML highlights here include using Deep Learning forecasts, Arima, Prophet, Remote Execution and Remote Inferencing, and working with the `forecast` function. Please also look at the additional forecasting notebooks, which document lagging, rolling windows, forecast quantiles, other ways to use the forecast function, and forecaster deployment.\n",
"\n",
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
"Make sure you have executed the [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) before running this notebook.\n",
"\n",
"Notebook synopsis:\n",
"\n",
@@ -382,7 +389,7 @@
"automl_config = AutoMLConfig(\n",
" task=\"forecasting\",\n",
" primary_metric=\"normalized_root_mean_squared_error\",\n",
" experiment_timeout_hours=1,\n",
" experiment_timeout_hours=1.5,\n",
" training_data=train_dataset,\n",
" label_column_name=target_column_name,\n",
" validation_data=valid_dataset,\n",
@@ -695,7 +702,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.10"
}
},
"nbformat": 4,

View File

@@ -4,7 +4,7 @@ import os
import numpy as np
import pandas as pd
from sklearn.externals import joblib
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error
from azureml.automl.runtime.shared.score import scoring, constants

View File

@@ -16,6 +16,13 @@
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-hierarchical-timeseries/auto-ml-forecasting-hierarchical-timeseries.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-hierarchical-timeseries-in-pipeline)).</font>"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -666,7 +673,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
"version": "3.8.10"
}
},
"nbformat": 4,

View File

@@ -16,6 +16,13 @@
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-hierarchical-timeseries/auto-ml-forecasting-hierarchical-timeseries.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-many-models-in-pipeline)).</font>"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -306,7 +313,7 @@
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"\n",
"# Name your cluster\n",
"compute_name = \"mm-compute\"\n",
"compute_name = \"mm-compute-v1\"\n",
"\n",
"\n",
"if compute_name in ws.compute_targets:\n",
@@ -316,7 +323,7 @@
"else:\n",
" print(\"Creating a new compute target...\")\n",
" provisioning_config = AmlCompute.provisioning_configuration(\n",
" vm_size=\"STANDARD_D16S_V3\", max_nodes=20\n",
" vm_size=\"STANDARD_D14_V2\", max_nodes=20\n",
" )\n",
" # Create the compute target\n",
" compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)\n",
@@ -878,7 +885,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.8.10"
},
"vscode": {
"interpreter": {

View File

@@ -16,6 +16,13 @@
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales)).</font>"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -40,7 +47,7 @@
"## Introduction<a id=\"introduction\"></a>\n",
"In this example, we use AutoML to train, select, and operationalize a time-series forecasting model for multiple time-series.\n",
"\n",
"Make sure you have executed the [configuration notebook](../../../configuration.ipynb) before running this notebook.\n",
"Make sure you have executed the [configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) before running this notebook.\n",
"\n",
"The examples in the follow code samples use the University of Chicago's Dominick's Finer Foods dataset to forecast orange juice sales. Dominick's was a grocery chain in the Chicago metropolitan area."
]
@@ -242,7 +249,9 @@
" time_series_id_column_names, group_keys=False\n",
" )\n",
" df_head = df_grouped.apply(lambda dfg: dfg.iloc[:-n])\n",
" df_head.reset_index(inplace=True, drop=True)\n",
" df_tail = df_grouped.apply(lambda dfg: dfg.iloc[-n:])\n",
" df_tail.reset_index(inplace=True, drop=True)\n",
" return df_head, df_tail\n",
"\n",
"\n",
@@ -715,7 +724,7 @@
" description=\"Automl forecasting sample service\",\n",
")\n",
"\n",
"aci_service_name = \"automl-oj-forecast-01\"\n",
"aci_service_name = \"automl-oj-forecast-03\"\n",
"print(aci_service_name)\n",
"aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n",
"aci_service.wait_for_deployment(True)\n",
@@ -792,7 +801,7 @@
"metadata": {},
"outputs": [],
"source": [
"serv = Webservice(ws, \"automl-oj-forecast-01\")\n",
"serv = Webservice(ws, \"automl-oj-forecast-03\")\n",
"serv.delete() # don't do it accidentally"
]
}
@@ -835,7 +844,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.8.10"
},
"tags": [
"None"

View File

@@ -6,7 +6,7 @@ compute instance.
import argparse
from azureml.core import Dataset, Run
from sklearn.externals import joblib
import joblib
from pandas.tseries.frequencies import to_offset
parser = argparse.ArgumentParser()

View File

@@ -1,5 +1,21 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1h_automl_in_pipeline/automl-forecasting-in-pipeline)).</font>\n",
"</br>\n",
"</br>\n",
"<font color=\"red\" size=\"5\">\n",
"For examples illustrating how to build pipelines with components, please use the following links:</font>\n",
"<ul>\n",
" <li><a href=\"https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-many-models-in-pipeline\">Many Models</a></li>\n",
" <li><a href=\"https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/pipelines/1k_demand_forecasting_with_pipeline_components/automl-forecasting-demand-hierarchical-timeseries-in-pipeline\">Hierarchical Time Series</a></li>\n",
" <li><a href=\"https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-distributed-tcn\">Distributed TCN</a></li>\n",
"</ul>"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -13,7 +29,7 @@
"source": [
"## Introduction\n",
"\n",
"In this notebook, we demonstrate how to use piplines to train and inference on AutoML Forecasting model. Two pipelines will be created: one for training AutoML model, and the other is for inference on AutoML model. We'll also demonstrate how to schedule the inference pipeline so you can get inference results periodically (with refreshed test dataset). Make sure you have executed the configuration notebook before running this notebook. In this notebook you will learn how to:\n",
"In this notebook, we demonstrate how to use piplines to train and inference on AutoML Forecasting model. Two pipelines will be created: one for training AutoML model, and the other is for inference on AutoML model. We'll also demonstrate how to schedule the inference pipeline so you can get inference results periodically (with refreshed test dataset). Make sure you have executed the [configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) before running this notebook. In this notebook you will learn how to:\n",
"\n",
"- Configure AutoML using AutoMLConfig for forecasting tasks using pipeline AutoMLSteps.\n",
"- Create and register an AutoML model using AzureML pipeline.\n",
@@ -555,40 +571,15 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Model\n",
"from azureml.train.automl.run import AutoMLRun\n",
"\n",
"model = Model(ws, model_name_str)\n",
"download_path = model.download(model_name_str, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After all the files are downloaded, we can generate the run config for inference runs."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Environment, RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"for step in training_pipeline_run.get_steps():\n",
" if step.properties.get(\"StepType\") == \"AutoMLStep\":\n",
" automl_run = AutoMLRun(experiment, step.id)\n",
" break\n",
"\n",
"env_file = os.path.join(download_path, \"conda_env_v_1_0_0.yml\")\n",
"inference_env = Environment(\"oj-inference-env\")\n",
"inference_env.python.conda_dependencies = CondaDependencies(\n",
" conda_dependencies_file_path=env_file\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[Optional] The enviroment can also be assessed from the training run using `get_environment()` API."
"best_run = automl_run.get_best_child()\n",
"inference_env = best_run.get_environment()"
]
},
{

View File

@@ -6,7 +6,7 @@ import numpy as np
import pandas as pd
from pandas.tseries.frequencies import to_offset
from sklearn.externals import joblib
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error
from azureml.data.dataset_factory import TabularDatasetFactory
@@ -30,7 +30,7 @@ def infer_forecasting_dataset_tcn(
run = Run.get_context()
registered_train = TabularDatasetFactory.register_pandas_dataframe(
TabularDatasetFactory.register_pandas_dataframe(
df_all,
target=(
run.experiment.workspace.get_default_datastore(),

View File

@@ -20,7 +20,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"In this notebook we will explore the univaraite time-series data to determine the settings for an automated ML experiment. We will follow the thought process depicted in the following diagram:<br/>\n",
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-recipes-univariate)).</font>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this notebook we will explore the univariate time-series data to determine the settings for an automated ML experiment. We will follow the thought process depicted in the following diagram:<br/>\n",
"![Forecasting after training](figures/univariate_settings_map_20210408.jpg)\n",
"\n",
"The objective is to answer the following questions:\n",
@@ -32,11 +39,11 @@
" </ul>\n",
" <li>Is the data stationary? </li>\n",
" <ul style=\"margin-top:-1px; list-style-type:none\"> \n",
" <li> Importance: In the absense of features that capture trend behavior, ML models (regression and tree based) are not well equiped to predict stochastic trends. Working with stationary data solves this problem. </li>\n",
" <li> Importance: In the absence of features that capture trend behavior, ML models (regression and tree based) are not well equipped to predict stochastic trends. Working with stationary data solves this problem. </li>\n",
" </ul>\n",
" <li>Is there a detectable auto-regressive pattern in the stationary data? </li>\n",
" <ul style=\"margin-top:-1px; list-style-type:none\"> \n",
" <li> Importance: The accuracy of ML models can be improved if serial correlation is modeled by including lags of the dependent/target varaible as features. Including target lags in every experiment by default will result in a regression in accuracy scores if such setting is not warranted. </li>\n",
" <li> Importance: The accuracy of ML models can be improved if serial correlation is modeled by including lags of the dependent/target variable as features. Including target lags in every experiment by default will result in a regression in accuracy scores if such setting is not warranted. </li>\n",
" </ul>\n",
"</ol>\n",
"\n",
@@ -109,7 +116,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"The graph plots the alcohol sales in the United States. Because the data is trending, it can be difficult to see cycles, seasonality or other interestng behaviors due to the scaling issues. For example, if there is a seasonal pattern, which we will discuss later, we cannot see them on the trending data. In such case, it is worth plotting the same data in first differences."
"The graph plots the alcohol sales in the United States. Because the data is trending, it can be difficult to see cycles, seasonality or other interesting behaviors due to the scaling issues. For example, if there is a seasonal pattern, which we will discuss later, we cannot see them on the trending data. In such case, it is worth plotting the same data in first differences."
]
},
{
@@ -335,8 +342,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# 3 Check if there is a clear autoregressive pattern\n",
"We need to determine if we should include lags of the target variable as features in order to improve forecast accuracy. To do this, we will examine the ACF and partial ACF (PACF) plots of the stationary series. In our case, it is a series in first diffrences.\n",
"# 3 Check if there is a clear auto-regressive pattern\n",
"We need to determine if we should include lags of the target variable as features in order to improve forecast accuracy. To do this, we will examine the ACF and partial ACF (PACF) plots of the stationary series. In our case, it is a series in first differences.\n",
"\n",
"<ul>\n",
" <li> Question: What is an Auto-regressive pattern? What are we looking for? </li>\n",
@@ -418,11 +425,11 @@
" </li>\n",
" where $\\sigma_{xzy}$ is the covariance between two random variables $X$ and $Z$; $\\sigma_x$ and $\\sigma_z$ is the variance for $X$ and $Z$, respectively. The correlation coefficient measures the strength of linear relationship between two random variables. This metric can take any value from -1 to 1. <li/>\n",
" <br/>\n",
" <li> The auto-correlation coefficient $\\rho_{Y_{t} Y_{t-k}}$ is the time series equivalent of the correlation coefficient, except instead of measuring linear association between two random variables $X$ and $Z$, it measures the strength of a linear relationship between a random variable $Y_t$ and its lag $Y_{t-k}$ for any positive interger value of $k$. </li> \n",
" <li> The auto-correlation coefficient $\\rho_{Y_{t} Y_{t-k}}$ is the time series equivalent of the correlation coefficient, except instead of measuring linear association between two random variables $X$ and $Z$, it measures the strength of a linear relationship between a random variable $Y_t$ and its lag $Y_{t-k}$ for any positive integer value of $k$. </li> \n",
" <br />\n",
" <li> To visualize the ACF for a particular lag, say lag 2, plot the second lag of a series $y_{t-2}$ on the x-axis, and plot the series itself $y_t$ on the y-axis. The autocorrelation coefficient is the slope of the best fitted regression line and can be interpreted as follows. A one unit increase in the lag of a variable one period ago leads to a $\\rho_{Y_{t} Y_{t-2}}$ units change in the variable in the current period. This interpreation can be applied to any lag. </li> \n",
" <li> To visualize the ACF for a particular lag, say lag 2, plot the second lag of a series $y_{t-2}$ on the x-axis, and plot the series itself $y_t$ on the y-axis. The autocorrelation coefficient is the slope of the best fitted regression line and can be interpreted as follows. A one unit increase in the lag of a variable one period ago leads to a $\\rho_{Y_{t} Y_{t-2}}$ units change in the variable in the current period. This interpretation can be applied to any lag. </li> \n",
" <br />\n",
" <li> In the interpretation posted above we need to be careful not to confuse the word \"leads\" with \"causes\" since these are not the same thing. We do not know the lagged value of the varaible causes it to change. Afterall, there are probably many other features that may explain the movement in $Y_t$. All we are trying to do in this section is to identify situations when the variable contains the strong auto-regressive components that needs to be included in the model to improve forecast accuracy. </li>\n",
" <li> In the interpretation posted above we need to be careful not to confuse the word \"leads\" with \"causes\" since these are not the same thing. We do not know the lagged value of the variable causes it to change. After all, there are probably many other features that may explain the movement in $Y_t$. All we are trying to do in this section is to identify situations when the variable contains the strong auto-regressive components that needs to be included in the model to improve forecast accuracy. </li>\n",
" </ul>\n",
"</ul>"
]
@@ -434,7 +441,7 @@
"<ul>\n",
" <li> Question: What is the PACF? </li>\n",
" <ul style=\"list-style-type:none;\">\n",
" <li> When describing the ACF we essentially running a regression between a partigular lag of a series, say, lag 4, and the series itself. What this implies is the regression coefficient for lag 4 captures the impact of everything that happens in lags 1, 2 and 3. In other words, if lag 1 is the most important lag and we exclude it from the regression, naturally, the regression model will assign the importance of the 1st lag to the 4th one. Partial auto-correlation function fixes this problem since it measures the contribution of each lag accounting for the information added by the intermediary lags. If we were to illustrate ACF and PACF for the fourth lag using the regression analogy, the difference is a follows: \n",
" <li> When describing the ACF we essentially running a regression between a particular lag of a series, say, lag 4, and the series itself. What this implies is the regression coefficient for lag 4 captures the impact of everything that happens in lags 1, 2 and 3. In other words, if lag 1 is the most important lag and we exclude it from the regression, naturally, the regression model will assign the importance of the 1st lag to the 4th one. Partial auto-correlation function fixes this problem since it measures the contribution of each lag accounting for the information added by the intermediary lags. If we were to illustrate ACF and PACF for the fourth lag using the regression analogy, the difference is a follows: \n",
" \\begin{align}\n",
" Y_{t} &= a_{0} + a_{4} Y_{t-4} + e_{t} \\\\\n",
" Y_{t} &= b_{0} + b_{1} Y_{t-1} + b_{2} Y_{t-2} + b_{3} Y_{t-3} + b_{4} Y_{t-4} + \\varepsilon_{t} \\\\\n",
@@ -442,7 +449,7 @@
" </li>\n",
" <br/>\n",
" <li>\n",
" Here, you can think of $a_4$ and $b_{4}$ as the auto- and partial auto-correlation coefficients for lag 4. Notice, in the second equation we explicitely accounting for the intermediate lags by adding them as regrerssors.\n",
" Here, you can think of $a_4$ and $b_{4}$ as the auto- and partial auto-correlation coefficients for lag 4. Notice, in the second equation we explicitly accounting for the intermediate lags by adding them as regressors.\n",
" </li>\n",
" </ul>\n",
"</ul>"
@@ -455,11 +462,11 @@
"<ul>\n",
" <li> Question: Auto-regressive pattern? What are we looking for? </li>\n",
" <ul style=\"list-style-type:none;\">\n",
" <li> We are looking for a classical profiles for an AR(p) process such as an exponential decay of an ACF and a the first $p$ significant lags of the PACF. Let's examine the ACF/PACF profiles of the same simulated AR(2) shown in Section 3, and check if the ACF/PACF explanation are refelcted in these plots. <li/>\n",
" <li> We are looking for a classical profiles for an AR(p) process such as an exponential decay of an ACF and a the first $p$ significant lags of the PACF. Let's examine the ACF/PACF profiles of the same simulated AR(2) shown in Section 3, and check if the ACF/PACF explanation are reflected in these plots. <li/>\n",
" <li><img src=\"figures/ACF_PACF_for_AR2.png\" class=\"img_class\">\n",
" <li> The autocorrelation coefficient for the 3rd lag is 0.6, which can be interpreted that a one unit increase in the value of the target varaible three periods ago leads to 0.6 units increase in the current period. However, the PACF plot shows that the partial autocorrealtion coefficient is zero (from a statistical point of view since it lies within the shaded region). This is happening because the 1st and 2nd lags are good predictors of the target variable. Ommiting these two lags from the regression results in the misleading conclusion that the third lag is a good prediciton. <li/>\n",
" <li> The autocorrelation coefficient for the 3rd lag is 0.6, which can be interpreted that a one unit increase in the value of the target variable three periods ago leads to 0.6 units increase in the current period. However, the PACF plot shows that the partial autocorrelation coefficient is zero (from a statistical point of view since it lies within the shaded region). This is happening because the 1st and 2nd lags are good predictors of the target variable. Omitting these two lags from the regression results in the misleading conclusion that the third lag is a good prediction. <li/>\n",
" <br/>\n",
" <li> This is why it is important to examine both the ACF and the PACF plots when tring to determine the auto regressive order for the variable in question. <li/>\n",
" <li> This is why it is important to examine both the ACF and the PACF plots when trying to determine the auto regressive order for the variable in question. <li/>\n",
" </ul>\n",
"</ul> "
]
@@ -471,6 +478,9 @@
"name": "vlbejan"
}
],
"kernel_info": {
"name": "python38-azureml"
},
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
@@ -486,7 +496,15 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.10"
},
"microsoft": {
"ms_spell_check": {
"ms_spell_check_language": "en"
}
},
"nteract": {
"version": "nteract-front-end@1.0.0"
}
},
"nbformat": 4,

View File

@@ -16,6 +16,13 @@
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-recipes-univariate/2_run_experiment.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<font color=\"red\" size=\"5\"><strong>!Important!</strong> </br>This notebook is outdated and is not supported by the AutoML Team. Please use the supported version ([link](https://github.com/Azure/azureml-examples/tree/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-recipes-univariate)).</font>"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -300,27 +307,14 @@
"df_train.to_csv(\"train.csv\", index=False)\n",
"df_test.to_csv(\"test.csv\", index=False)\n",
"\n",
"from azureml.data.dataset_factory import TabularDatasetFactory\n",
"\n",
"datastore = ws.get_default_datastore()\n",
"datastore.upload_files(\n",
" files=[\"./train.csv\"],\n",
" target_path=\"uni-recipe-dataset/tabular/\",\n",
" overwrite=True,\n",
" show_progress=True,\n",
"train_dataset = TabularDatasetFactory.register_pandas_dataframe(\n",
" df_train, target=(datastore, \"dataset/\"), name=\"train\"\n",
")\n",
"datastore.upload_files(\n",
" files=[\"./test.csv\"],\n",
" target_path=\"uni-recipe-dataset/tabular/\",\n",
" overwrite=True,\n",
" show_progress=True,\n",
")\n",
"\n",
"from azureml.core import Dataset\n",
"\n",
"train_dataset = Dataset.Tabular.from_delimited_files(\n",
" path=[(datastore, \"uni-recipe-dataset/tabular/train.csv\")]\n",
")\n",
"test_dataset = Dataset.Tabular.from_delimited_files(\n",
" path=[(datastore, \"uni-recipe-dataset/tabular/test.csv\")]\n",
"test_dataset = TabularDatasetFactory.register_pandas_dataframe(\n",
" df_test, target=(datastore, \"dataset/\"), name=\"test\"\n",
")\n",
"\n",
"# print the first 5 rows of the Dataset\n",
@@ -571,6 +565,9 @@
"name": "vlbejan"
}
],
"kernel_info": {
"name": "python3"
},
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
@@ -586,7 +583,15 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.8.10"
},
"microsoft": {
"ms_spell_check": {
"ms_spell_check_language": "en"
}
},
"nteract": {
"version": "nteract-front-end@1.0.0"
},
"vscode": {
"interpreter": {

View File

@@ -7,7 +7,7 @@ compute instance.
import argparse
from azureml.core import Dataset, Run
from azureml.automl.core.shared.constants import TimeSeriesInternal
from sklearn.externals import joblib
import joblib
parser = argparse.ArgumentParser()
parser.add_argument(

View File

@@ -1,5 +1,27 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {
"hideCode": false,
"hidePrompt": false
},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/regression-explanation-featurization/auto-ml-regression-explanation-featurization.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -495,6 +517,30 @@
"#### Create conda configuration for model explanations experiment from automl_run object"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"from azureml.core import Environment\n",
"\n",
"\n",
"def get_environment_safe(parent_run):\n",
" \"\"\"Get the environment from parent run\"\"\"\n",
" try:\n",
" return parent_run.get_environment()\n",
" except BaseException:\n",
" run_details = parent_run.get_details()\n",
" run_def = run_details.get(\"runDefinition\")\n",
" env = run_def.get(\"environment\")\n",
" if env is None:\n",
" raise\n",
" json.dump(env, open(\"azureml_environment.json\", \"w\"))\n",
" return Environment.load_from_directory(\".\")"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -502,8 +548,6 @@
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"import pkg_resources\n",
"\n",
"# create a new RunConfig object\n",
"conda_run_config = RunConfiguration(framework=\"python\")\n",
@@ -513,7 +557,7 @@
"conda_run_config.environment.docker.enabled = True\n",
"\n",
"# specify CondaDependencies obj\n",
"conda_run_config.environment = automl_run.get_environment()"
"conda_run_config.environment = get_environment_safe(automl_run)"
]
},
{
@@ -686,7 +730,7 @@
" description=\"Get local explanations for Machine test data\",\n",
")\n",
"\n",
"myenv = automl_run.get_environment()\n",
"myenv = get_environment_safe(automl_run)\n",
"inference_config = InferenceConfig(entry_script=\"score_explain.py\", environment=myenv)\n",
"\n",
"# Use configs and models generated above\n",
@@ -909,7 +953,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
"version": "3.8.7"
},
"tags": [
"featurization",

View File

@@ -1,12 +0,0 @@
# Model Deployment with Azure ML service
You can use Azure Machine Learning to package, debug, validate and deploy inference containers to a variety of compute targets. This process is known as "MLOps" (ML operationalization).
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where
## Get Started
To begin, you will need an ML workspace.
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace
## Deploy to the cloud
You can deploy to the cloud using the Azure ML CLI or the Azure ML SDK.
- CLI example: https://aka.ms/azmlcli
- Notebook example: [model-register-and-deploy](./model-register-and-deploy.ipynb).

View File

@@ -1,597 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/deployment/deploy-to-cloud/model-register-and-deploy.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Register model and deploy as webservice in ACI\n",
"\n",
"Following this notebook, you will:\n",
"\n",
" - Learn how to register a model in your Azure Machine Learning Workspace.\n",
" - Deploy your model as a web service in an Azure Container Instance."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"\n",
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration notebook](../../../configuration.ipynb) to install the Azure Machine Learning Python SDK and create a workspace."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import azureml.core\n",
"\n",
"\n",
"# Check core SDK version number.\n",
"print('SDK version:', azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize workspace\n",
"\n",
"Create a [Workspace](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace%28class%29?view=azure-ml-py) object from your persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"create workspace"
]
},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create trained model\n",
"\n",
"For this example, we will train a small model on scikit-learn's [diabetes dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html). "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import joblib\n",
"\n",
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import Ridge\n",
"\n",
"\n",
"dataset_x, dataset_y = load_diabetes(return_X_y=True)\n",
"\n",
"model = Ridge().fit(dataset_x, dataset_y)\n",
"\n",
"joblib.dump(model, 'sklearn_regression_model.pkl')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Register input and output datasets\n",
"\n",
"Here, you will register the data used to create the model in your workspace."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"from azureml.core import Dataset\n",
"\n",
"\n",
"np.savetxt('features.csv', dataset_x, delimiter=',')\n",
"np.savetxt('labels.csv', dataset_y, delimiter=',')\n",
"\n",
"datastore = ws.get_default_datastore()\n",
"datastore.upload_files(files=['./features.csv', './labels.csv'],\n",
" target_path='sklearn_regression/',\n",
" overwrite=True)\n",
"\n",
"input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'sklearn_regression/features.csv')])\n",
"output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'sklearn_regression/labels.csv')])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Register model\n",
"\n",
"Register a file or folder as a model by calling [Model.register()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py#register-workspace--model-path--model-name--tags-none--properties-none--description-none--datasets-none--model-framework-none--model-framework-version-none--child-paths-none-).\n",
"\n",
"In addition to the content of the model file itself, your registered model will also store model metadata -- model description, tags, and framework information -- that will be useful when managing and deploying models in your workspace. Using tags, for instance, you can categorize your models and apply filters when listing models in your workspace. Also, marking this model with the scikit-learn framework will simplify deploying it as a web service, as we'll see later."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"register model from file",
"sample-model-register"
]
},
"outputs": [],
"source": [
"import sklearn\n",
"\n",
"from azureml.core import Model\n",
"from azureml.core.resource_configuration import ResourceConfiguration\n",
"\n",
"\n",
"model = Model.register(workspace=ws,\n",
" model_name='my-sklearn-model', # Name of the registered model in your workspace.\n",
" model_path='./sklearn_regression_model.pkl', # Local file to upload and register as a model.\n",
" model_framework=Model.Framework.SCIKITLEARN, # Framework used to create the model.\n",
" model_framework_version=sklearn.__version__, # Version of scikit-learn used to create the model.\n",
" sample_input_dataset=input_dataset,\n",
" sample_output_dataset=output_dataset,\n",
" resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),\n",
" description='Ridge regression model to predict diabetes progression.',\n",
" tags={'area': 'diabetes', 'type': 'regression'})\n",
"\n",
"print('Name:', model.name)\n",
"print('Version:', model.version)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Deploy model\n",
"\n",
"Deploy your model as a web service using [Model.deploy()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py#deploy-workspace--name--models--inference-config--deployment-config-none--deployment-target-none-). Web services take one or more models, load them in an environment, and run them on one of several supported deployment targets. For more information on all your options when deploying models, see the [next steps](#Next-steps) section at the end of this notebook.\n",
"\n",
"For this example, we will deploy your scikit-learn model to an Azure Container Instance (ACI)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Use a default environment (for supported models)\n",
"\n",
"The Azure Machine Learning service provides a default environment for supported model frameworks, including scikit-learn, based on the metadata you provided when registering your model. This is the easiest way to deploy your model.\n",
"\n",
"Even when you deploy your model to ACI with a default environment you can still customize the deploy configuration (i.e. the number of cores and amount of memory made available for the deployment) using the [AciWebservice.deploy_configuration()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.webservice.aci.aciwebservice#deploy-configuration-cpu-cores-none--memory-gb-none--tags-none--properties-none--description-none--location-none--auth-enabled-none--ssl-enabled-none--enable-app-insights-none--ssl-cert-pem-file-none--ssl-key-pem-file-none--ssl-cname-none--dns-name-label-none--). Look at the \"Use a custom environment\" section of this notebook for more information on deploy configuration.\n",
"\n",
"**Note**: This step can take several minutes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"service_name = 'my-sklearn-service'\n",
"\n",
"service = Model.deploy(ws, service_name, [model], overwrite=True)\n",
"service.wait_for_deployment(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After your model is deployed, perform a call to the web service using [service.run()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice%28class%29?view=azure-ml-py#run-input-)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"\n",
"\n",
"input_payload = json.dumps({\n",
" 'data': dataset_x[0:2].tolist(),\n",
" 'method': 'predict' # If you have a classification model, you can get probabilities by changing this to 'predict_proba'.\n",
"})\n",
"\n",
"output = service.run(input_payload)\n",
"\n",
"print(output)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When you are finished testing your service, clean up the deployment with [service.delete()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice%28class%29?view=azure-ml-py#delete--)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"service.delete()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Use a custom environment\n",
"\n",
"If you want more control over how your model is run, if it uses another framework, or if it has special runtime requirements, you can instead specify your own environment and scoring method. Custom environments can be used for any model you want to deploy.\n",
"\n",
"Specify the model's runtime environment by creating an [Environment](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.environment%28class%29?view=azure-ml-py) object and providing the [CondaDependencies](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.conda_dependencies.condadependencies?view=azure-ml-py) needed by your model."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Environment\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"\n",
"environment = Environment('my-sklearn-environment')\n",
"environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[\n",
" 'pip==20.2.4'],\n",
" pip_packages=[\n",
" 'azureml-defaults',\n",
" 'inference-schema[numpy-support]',\n",
" 'joblib',\n",
" 'numpy==1.23',\n",
" 'scikit-learn=={}'.format(sklearn.__version__)\n",
"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When using a custom environment, you must also provide Python code for initializing and running your model. An example script is included with this notebook."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('score.py') as f:\n",
" print(f.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Deploy your model in the custom environment by providing an [InferenceConfig](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.inferenceconfig?view=azure-ml-py) object to [Model.deploy()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py#deploy-workspace--name--models--inference-config--deployment-config-none--deployment-target-none-). In this case we are also using the [AciWebservice.deploy_configuration()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.webservice.aci.aciwebservice#deploy-configuration-cpu-cores-none--memory-gb-none--tags-none--properties-none--description-none--location-none--auth-enabled-none--ssl-enabled-none--enable-app-insights-none--ssl-cert-pem-file-none--ssl-key-pem-file-none--ssl-cname-none--dns-name-label-none--) method to generate a custom deploy configuration.\n",
"\n",
"**Note**: This step can take several minutes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"azuremlexception-remarks-sample",
"sample-aciwebservice-deploy-config"
]
},
"outputs": [],
"source": [
"from azureml.core.model import InferenceConfig\n",
"from azureml.core.webservice import AciWebservice\n",
"\n",
"\n",
"service_name = 'my-custom-env-service'\n",
"\n",
"inference_config = InferenceConfig(entry_script='score.py', environment=environment)\n",
"aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)\n",
"\n",
"service = Model.deploy(workspace=ws,\n",
" name=service_name,\n",
" models=[model],\n",
" inference_config=inference_config,\n",
" deployment_config=aci_config,\n",
" overwrite=True)\n",
"service.wait_for_deployment(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"After your model is deployed, make a call to the web service using [service.run()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice%28class%29?view=azure-ml-py#run-input-)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"input_payload = json.dumps({\n",
" 'data': dataset_x[0:2].tolist()\n",
"})\n",
"\n",
"output = service.run(input_payload)\n",
"\n",
"print(output)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When you are finished testing your service, clean up the deployment with [service.delete()](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice%28class%29?view=azure-ml-py#delete--)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"service.delete()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model Profiling\n",
"\n",
"Profile your model to understand how much CPU and memory the service, created as a result of its deployment, will need. Profiling returns information such as CPU usage, memory usage, and response latency. It also provides a CPU and memory recommendation based on the resource usage. You can profile your model (or more precisely the service built based on your model) on any CPU and/or memory combination where 0.1 <= CPU <= 3.5 and 0.1GB <= memory <= 15GB. If you do not provide a CPU and/or memory requirement, we will test it on the default configuration of 3.5 CPU and 15GB memory.\n",
"\n",
"In order to profile your model you will need:\n",
"- a registered model\n",
"- an entry script\n",
"- an inference configuration\n",
"- a single column tabular dataset, where each row contains a string representing sample request data sent to the service.\n",
"\n",
"Please, note that profiling is a long running operation and can take up to 25 minutes depending on the size of the dataset.\n",
"\n",
"At this point we only support profiling of services that expect their request data to be a string, for example: string serialized json, text, string serialized image, etc. The content of each row of the dataset (string) will be put into the body of the HTTP request and sent to the service encapsulating the model for scoring.\n",
"\n",
"Below is an example of how you can construct an input dataset to profile a service which expects its incoming requests to contain serialized json. In this case we created a dataset based one hundred instances of the same request data. In real world scenarios however, we suggest that you use larger datasets with various inputs, especially if your model resource usage/behavior is input dependent."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You may want to register datasets using the register() method to your workspace so they can be shared with others, reused and referred to by name in your script.\n",
"You can try get the dataset first to see if it's already registered."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Datastore\n",
"from azureml.core.dataset import Dataset\n",
"from azureml.data import dataset_type_definitions\n",
"\n",
"dataset_name='diabetes_sample_request_data'\n",
"\n",
"dataset_registered = False\n",
"try:\n",
" sample_request_data = Dataset.get_by_name(workspace = ws, name = dataset_name)\n",
" dataset_registered = True\n",
"except:\n",
" print(\"The dataset {} is not registered in workspace yet.\".format(dataset_name))\n",
"\n",
"if not dataset_registered:\n",
" # create a string that can be utf-8 encoded and\n",
" # put in the body of the request\n",
" serialized_input_json = json.dumps({\n",
" 'data': [\n",
" [ 0.03807591, 0.05068012, 0.06169621, 0.02187235, -0.0442235,\n",
" -0.03482076, -0.04340085, -0.00259226, 0.01990842, -0.01764613]\n",
" ]\n",
" })\n",
" dataset_content = []\n",
" for i in range(100):\n",
" dataset_content.append(serialized_input_json)\n",
" dataset_content = '\\n'.join(dataset_content)\n",
" file_name = \"{}.txt\".format(dataset_name)\n",
" f = open(file_name, 'w')\n",
" f.write(dataset_content)\n",
" f.close()\n",
"\n",
" # upload the txt file created above to the Datastore and create a dataset from it\n",
" data_store = Datastore.get_default(ws)\n",
" data_store.upload_files(['./' + file_name], target_path='sample_request_data')\n",
" datastore_path = [(data_store, 'sample_request_data' +'/' + file_name)]\n",
" sample_request_data = Dataset.Tabular.from_delimited_files(\n",
" datastore_path,\n",
" separator='\\n',\n",
" infer_column_types=True,\n",
" header=dataset_type_definitions.PromoteHeadersBehavior.NO_HEADERS)\n",
" sample_request_data = sample_request_data.register(workspace=ws,\n",
" name=dataset_name,\n",
" create_new_version=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now that we have an input dataset we are ready to go ahead with profiling. In this case we are testing the previously introduced sklearn regression model on 1 CPU and 0.5 GB memory. The memory usage and recommendation presented in the result is measured in Gigabytes. The CPU usage and recommendation is measured in CPU cores."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
"\n",
"\n",
"environment = Environment('my-sklearn-environment')\n",
"environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[\n",
" 'pip==20.2.4'],\n",
" pip_packages=[\n",
" 'azureml-defaults',\n",
" 'inference-schema[numpy-support]',\n",
" 'joblib',\n",
" 'numpy==1.23',\n",
" 'scikit-learn=={}'.format(sklearn.__version__)\n",
"])\n",
"inference_config = InferenceConfig(entry_script='score.py', environment=environment)\n",
"# if cpu and memory_in_gb parameters are not provided\n",
"# the model will be profiled on default configuration of\n",
"# 3.5CPU and 15GB memory\n",
"profile = Model.profile(ws,\n",
" 'rgrsn-%s' % datetime.now().strftime('%m%d%Y-%H%M%S'),\n",
" [model],\n",
" inference_config,\n",
" input_dataset=sample_request_data,\n",
" cpu=1.0,\n",
" memory_in_gb=0.5)\n",
"\n",
"# profiling is a long running operation and may take up to 25 min\n",
"profile.wait_for_completion(True)\n",
"details = profile.get_details()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model packaging\n",
"\n",
"If you want to build a Docker image that encapsulates your model and its dependencies, you can use the model packaging option. The output image will be pushed to your workspace's ACR.\n",
"\n",
"You must include an Environment object in your inference configuration to use `Model.package()`.\n",
"\n",
"```python\n",
"package = Model.package(ws, [model], inference_config)\n",
"package.wait_for_creation(show_output=True) # Or show_output=False to hide the Docker build logs.\n",
"package.pull()\n",
"```\n",
"\n",
"Instead of a fully-built image, you can also generate a Dockerfile and download all the assets needed to build an image on top of your Environment.\n",
"\n",
"```python\n",
"package = Model.package(ws, [model], inference_config, generate_dockerfile=True)\n",
"package.wait_for_creation(show_output=True)\n",
"package.save(\"./local_context_dir\")\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Next steps\n",
"\n",
" - To run a production-ready web service, see the [notebook on deployment to Azure Kubernetes Service](../production-deploy-to-aks/production-deploy-to-aks.ipynb).\n",
" - To run a local web service, see the [notebook on deployment to a local Docker container](../deploy-to-local/register-model-deploy-local.ipynb).\n",
" - For more information on datasets, see the [notebook on training with datasets](../../work-with-data/datasets-tutorial/train-with-datasets/train-with-datasets.ipynb).\n",
" - For more information on environments, see the [notebook on using environments](../../training/using-environments/using-environments.ipynb).\n",
" - For information on all the available deployment targets, see [&ldquo;How and where to deploy models&rdquo;](https://docs.microsoft.com/azure/machine-learning/v1/how-to-deploy-and-where#choose-a-compute-target)."
]
}
],
"metadata": {
"authors": [
{
"name": "vaidyas"
}
],
"category": "deployment",
"compute": [
"None"
],
"datasets": [
"Diabetes"
],
"deployment": [
"Azure Container Instance"
],
"exclude_from_index": false,
"framework": [
"Scikit-learn"
],
"friendly_name": "Register model and deploy as webservice",
"index_order": 3,
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
"name": "python38-azureml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
},
"star_tag": [
"featured"
],
"tags": [
"None"
],
"task": "Deploy a model with Azure Machine Learning"
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,6 +0,0 @@
name: model-register-and-deploy
dependencies:
- pip:
- azureml-sdk
- numpy
- scikit-learn

View File

@@ -1,38 +0,0 @@
import joblib
import numpy as np
import os
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
# The init() method is called once, when the web service starts up.
#
# Typically you would deserialize the model file, as shown here using joblib,
# and store it in a global variable so your run() method can access it later.
def init():
global model
# The AZUREML_MODEL_DIR environment variable indicates
# a directory containing the model file you registered.
model_filename = 'sklearn_regression_model.pkl'
model_path = os.path.join(os.environ['AZUREML_MODEL_DIR'], model_filename)
model = joblib.load(model_path)
# The run() method is called each time a request is made to the scoring API.
#
# Shown here are the optional input_schema and output_schema decorators
# from the inference-schema pip package. Using these decorators on your
# run() method parses and validates the incoming payload against
# the example input you provide here. This will also generate a Swagger
# API document for your web service.
@input_schema('data', NumpyParameterType(np.array([[0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9, 9.0]])))
@output_schema(NumpyParameterType(np.array([4429.929236457418])))
def run(data):
# Use the model object loaded by init().
result = model.predict(data)
# You can return any JSON-serializable object.
return result.tolist()

View File

@@ -1,373 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deploy models to Azure Kubernetes Service (AKS) using controlled roll out\n",
"This notebook will show you how to deploy mulitple AKS webservices with the same scoring endpoint and how to roll out your models in a controlled manner by configuring % of scoring traffic going to each webservice. If you are using a Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to install the Azure Machine Learning Python SDK and create an Azure ML Workspace."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check for latest version\n",
"import azureml.core\n",
"print(azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize workspace\n",
"Create a [Workspace](https://docs.microsoft.com/python/api/azureml-core/azureml.core.workspace%28class%29?view=azure-ml-py) object from your persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.workspace import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Register the model\n",
"Register a file or folder as a model by calling [Model.register()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py#register-workspace--model-path--model-name--tags-none--properties-none--description-none--datasets-none--model-framework-none--model-framework-version-none--child-paths-none-).\n",
"In addition to the content of the model file itself, your registered model will also store model metadata -- model description, tags, and framework information -- that will be useful when managing and deploying models in your workspace. Using tags, for instance, you can categorize your models and apply filters when listing models in your workspace."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Model\n",
"\n",
"model = Model.register(workspace=ws,\n",
" model_name='sklearn_regression_model.pkl', # Name of the registered model in your workspace.\n",
" model_path='./sklearn_regression_model.pkl', # Local file to upload and register as a model.\n",
" model_framework=Model.Framework.SCIKITLEARN, # Framework used to create the model.\n",
" model_framework_version='0.19.1', # Version of scikit-learn used to create the model.\n",
" description='Ridge regression model to predict diabetes progression.',\n",
" tags={'area': 'diabetes', 'type': 'regression'})\n",
"\n",
"print('Name:', model.name)\n",
"print('Version:', model.version)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Register an environment (for all models)\n",
"\n",
"If you control over how your model is run, or if it has special runtime requirements, you can specify your own environment and scoring method.\n",
"\n",
"Specify the model's runtime environment by creating an [Environment](https://docs.microsoft.com/python/api/azureml-core/azureml.core.environment%28class%29?view=azure-ml-py) object and providing the [CondaDependencies](https://docs.microsoft.com/python/api/azureml-core/azureml.core.conda_dependencies.condadependencies?view=azure-ml-py) needed by your model."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Environment\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"environment=Environment('my-sklearn-environment')\n",
"environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[\n",
" 'pip==20.2.4'],\n",
" pip_packages=[\n",
" 'azureml-defaults',\n",
" 'inference-schema[numpy-support]',\n",
" 'numpy==1.23',\n",
" 'scikit-learn==0.22.1',\n",
" 'scipy'\n",
"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When using a custom environment, you must also provide Python code for initializing and running your model. An example script is included with this notebook."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('score.py') as f:\n",
" print(f.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create the InferenceConfig\n",
"Create the inference configuration to reference your environment and entry script during deployment"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.model import InferenceConfig\n",
"\n",
"inference_config = InferenceConfig(entry_script='score.py', \n",
" source_directory='.',\n",
" environment=environment)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Provision the AKS Cluster\n",
"If you already have an AKS cluster attached to this workspace, skip the step below and provide the name of the cluster.\n",
"\n",
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import AksCompute\n",
"from azureml.core.compute import ComputeTarget\n",
"# Use the default configuration (can also provide parameters to customize)\n",
"prov_config = AksCompute.provisioning_configuration()\n",
"\n",
"aks_name = 'my-aks' \n",
"# Create the cluster\n",
"aks_target = ComputeTarget.create(workspace = ws, \n",
" name = aks_name, \n",
" provisioning_configuration = prov_config) \n",
"aks_target.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create an Endpoint and add a version (AKS service)\n",
"This creates a new endpoint and adds a version behind it. By default the first version added is the default version. You can specify the traffic percentile a version takes behind an endpoint. \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# deploying the model and create a new endpoint\n",
"from azureml.core.webservice import AksEndpoint\n",
"# from azureml.core.compute import ComputeTarget\n",
"\n",
"#select a created compute\n",
"compute = ComputeTarget(ws, 'my-aks')\n",
"namespace_name=\"endpointnamespace\"\n",
"# define the endpoint name\n",
"endpoint_name = \"myendpoint1\"\n",
"# define the service name\n",
"version_name= \"versiona\"\n",
"\n",
"endpoint_deployment_config = AksEndpoint.deploy_configuration(tags = {'modelVersion':'firstversion', 'department':'finance'}, \n",
" description = \"my first version\", namespace = namespace_name, \n",
" version_name = version_name, traffic_percentile = 40)\n",
"\n",
"endpoint = Model.deploy(ws, endpoint_name, [model], inference_config, endpoint_deployment_config, compute)\n",
"endpoint.wait_for_deployment(True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"endpoint.get_logs()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Add another version of the service to an existing endpoint\n",
"This adds another version behind an existing endpoint. You can specify the traffic percentile the new version takes. If no traffic_percentile is specified then it defaults to 0. All the unspecified traffic percentile (in this example 50) across all versions goes to default version."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Adding a new version to an existing Endpoint.\n",
"version_name_add=\"versionb\" \n",
"\n",
"endpoint.create_version(version_name = version_name_add, inference_config=inference_config, models=[model], tags = {'modelVersion':'secondversion', 'department':'finance'}, \n",
" description = \"my second version\", traffic_percentile = 10)\n",
"endpoint.wait_for_deployment(True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Update an existing version in an endpoint\n",
"There are two types of versions: control and treatment. An endpoint contains one or more treatment versions but only one control version. This categorization helps compare the different versions against the defined control version."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"endpoint.update_version(version_name=endpoint.versions[version_name_add].name, description=\"my second version update\", traffic_percentile=40, is_default=True, is_control_version_type=True)\n",
"endpoint.wait_for_deployment(True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test the web service using run method\n",
"Test the web sevice by passing in data. Run() method retrieves API keys behind the scenes to make sure that call is authenticated."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Scoring on endpoint\n",
"import json\n",
"test_sample = json.dumps({'data': [\n",
" [1,2,3,4,5,6,7,8,9,10], \n",
" [10,9,8,7,6,5,4,3,2,1]\n",
"]})\n",
"\n",
"test_sample_encoded = bytes(test_sample, encoding='utf8')\n",
"prediction = endpoint.run(input_data=test_sample_encoded)\n",
"print(prediction)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Delete Resources"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# deleting a version in an endpoint\n",
"endpoint.delete_version(version_name=version_name)\n",
"endpoint.wait_for_deployment(True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# deleting an endpoint, this will delete all versions in the endpoint and the endpoint itself\n",
"endpoint.delete()"
]
}
],
"metadata": {
"authors": [
{
"name": "shipatel"
}
],
"category": "deployment",
"compute": [
"None"
],
"datasets": [
"Diabetes"
],
"deployment": [
"Azure Kubernetes Service"
],
"exclude_from_index": false,
"framework": [
"Scikit-learn"
],
"friendly_name": "Deploy models to AKS using controlled roll out",
"index_order": 3,
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
"name": "python38-azureml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
},
"star_tag": [
"featured"
],
"tags": [
"None"
],
"task": "Deploy a model with Azure Machine Learning"
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,4 +0,0 @@
name: deploy-aks-with-controlled-rollout
dependencies:
- pip:
- azureml-sdk

View File

@@ -1,28 +0,0 @@
import pickle
import json
import numpy
from sklearn.externals import joblib
from sklearn.linear_model import Ridge
from azureml.core.model import Model
def init():
global model
# note here "sklearn_regression_model.pkl" is the name of the model registered under
# this is a different behavior than before when the code is run locally, even though the code is the same.
model_path = Model.get_model_path('sklearn_regression_model.pkl')
# deserialize the model file back into a sklearn model
model = joblib.load(model_path)
# note you can pass in multiple rows for scoring
def run(raw_data):
try:
data = json.loads(raw_data)['data']
data = numpy.array(data)
result = model.predict(data)
# you can return any data type as long as it is JSON-serializable
return result.tolist()
except Exception as e:
error = str(e)
return error

View File

@@ -123,7 +123,7 @@
"import pickle\n",
"import json\n",
"import numpy\n",
"from sklearn.externals import joblib\n",
"import joblib\n",
"from sklearn.linear_model import Ridge\n",
"import time\n",
"\n",

View File

@@ -105,7 +105,7 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
" max_nodes=6)\n",
"\n",
" # create the cluster\n",
@@ -620,7 +620,7 @@
},
"manual": null
},
"vm_size": "STANDARD_NC6"
"vm_size": "Standard_NC6s_v3"
},
"error": "",
"layout": "IPY_MODEL_c899ddfc2b134ca9b89a4f278ac7c997",

View File

@@ -136,6 +136,9 @@
"# Choose a name for your GPU cluster\n",
"gpu_cluster_name = \"aks-gpu-cluster\"\n",
"\n",
"# Choose a location for your GPU cluster\n",
"gpu_cluster_location = \"eastus\"\n",
"\n",
"# Verify that cluster does not exist already\n",
"try:\n",
" gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)\n",
@@ -146,7 +149,8 @@
" # Specify the configuration for the new cluster\n",
" compute_config = AksCompute.provisioning_configuration(cluster_purpose=AksCompute.ClusterPurpose.DEV_TEST,\n",
" agent_count=1,\n",
" vm_size=\"Standard_NV6\")\n",
" vm_size=\"Standard_NC6s_v3\",\n",
" location=gpu_cluster_location)\n",
" # Create the cluster with the specified name and configuration\n",
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)\n",
"\n",
@@ -170,7 +174,7 @@
"outputs": [],
"source": [
"%%writefile score.py\n",
"import tensorflow as tf\n",
"import tensorflow.compat.v1 as tf\n",
"import numpy as np\n",
"import json\n",
"import os\n",
@@ -240,9 +244,9 @@
"# Please see [Azure ML Containers repository](https://github.com/Azure/AzureML-Containers#featured-tags)\n",
"# for open-sourced GPU base images.\n",
"env.docker.base_image = DEFAULT_GPU_IMAGE\n",
"env.python.conda_dependencies = CondaDependencies.create(python_version=\"3.6.2\", pin_sdk_version=False,\n",
" conda_packages=['tensorflow-gpu==1.12.0','numpy'],\n",
" pip_packages=['azureml-contrib-services==1.47.0', 'azureml-defaults==1.47.0'])\n",
"env.python.conda_dependencies = CondaDependencies.create(python_version=\"3.8\", pin_sdk_version=False,\n",
" conda_packages=['tensorflow-gpu','numpy'],\n",
" pip_packages=['azureml-contrib-services', 'azureml-defaults'])\n",
"\n",
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=env)\n",
"aks_config = AksWebservice.deploy_configuration()\n",

View File

@@ -2,4 +2,3 @@ name: production-deploy-to-aks-gpu
dependencies:
- pip:
- azureml-sdk
- tensorflow

View File

@@ -154,7 +154,7 @@
"import pickle\n",
"import json\n",
"import numpy\n",
"from sklearn.externals import joblib\n",
"import joblib\n",
"from sklearn.linear_model import Ridge\n",
"from inference_schema.schema_decorators import input_schema, output_schema\n",
"from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType\n",

View File

@@ -154,7 +154,7 @@
"import pickle\n",
"import json\n",
"import numpy\n",
"from sklearn.externals import joblib\n",
"import joblib\n",
"from sklearn.linear_model import Ridge\n",
"\n",
"def init():\n",

View File

@@ -1 +0,0 @@
{"class":"org.apache.spark.ml.classification.LogisticRegressionModel","timestamp":1570147252329,"sparkVersion":"2.4.0","uid":"LogisticRegression_5df3978caaf3","paramMap":{"regParam":0.01},"defaultParamMap":{"aggregationDepth":2,"threshold":0.5,"rawPredictionCol":"rawPrediction","featuresCol":"features","labelCol":"label","predictionCol":"prediction","family":"auto","regParam":0.0,"tol":1.0E-6,"probabilityCol":"probability","standardization":true,"elasticNetParam":0.0,"maxIter":100,"fitIntercept":true}}

View File

@@ -1,349 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/deployment/deploy-to-cloud/model-register-and-deploy.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Register Spark Model and deploy as Webservice\n",
"\n",
"This example shows how to deploy a Webservice in step-by-step fashion:\n",
"\n",
" 1. Register Spark Model\n",
" 2. Deploy Spark Model as Webservice"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"create workspace"
]
},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Register Model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can add tags and descriptions to your Models. Note you need to have a `iris.model` file in the current directory. This model file is generated using [train in spark](../training/train-in-spark/train-in-spark.ipynb) notebook. The below call registers that file as a Model with the same name `iris.model` in the workspace.\n",
"\n",
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model. Note that tags must be alphanumeric."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"register model from file"
]
},
"outputs": [],
"source": [
"from azureml.core.model import Model\n",
"\n",
"model = Model.register(model_path=\"iris.model\",\n",
" model_name=\"iris.model\",\n",
" tags={'type': \"regression\"},\n",
" description=\"Logistic regression model to predict iris species\",\n",
" workspace=ws)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Fetch Environment"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can now create and/or use an Environment object when deploying a Webservice. The Environment can have been previously registered with your Workspace, or it will be registered with it as a part of the Webservice deployment.\n",
"\n",
"More information can be found in our [using environments notebook](../training/using-environments/using-environments.ipynb)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Environment\r\n",
"from azureml.core.environment import SparkPackage\r\n",
"from azureml.core.conda_dependencies import CondaDependencies\r\n",
"\r\n",
"myenv = Environment('my-pyspark-environment')\r\n",
"myenv.docker.base_image = \"mcr.microsoft.com/mmlspark/release:0.15\"\r\n",
"myenv.inferencing_stack_version = \"latest\"\r\n",
"myenv.python.conda_dependencies = CondaDependencies.create(pip_packages=[\"azureml-core\",\"azureml-defaults\",\"azureml-telemetry\",\"azureml-train-restclients-hyperdrive\",\"azureml-train-core\"], python_version=\"3.7.0\")\r\n",
"myenv.python.conda_dependencies.add_channel(\"conda-forge\")\r\n",
"myenv.spark.packages = [SparkPackage(\"com.microsoft.ml.spark\", \"mmlspark_2.11\", \"0.15\"), SparkPackage(\"com.microsoft.azure\", \"azure-storage\", \"2.0.0\"), SparkPackage(\"org.apache.hadoop\", \"hadoop-azure\", \"2.7.0\")]\r\n",
"myenv.spark.repositories = [\"https://mmlspark.azureedge.net/maven\"]\r\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Inference Configuration\n",
"\n",
"There is now support for a source directory, you can upload an entire folder from your local machine as dependencies for the Webservice.\n",
"Note: in that case, your entry_script is relative path to the source_directory path.\n",
"\n",
"Sample code for using a source directory:\n",
"\n",
"```python\n",
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
" entry_script=\"x/y/score.py\",\n",
" environment=environment)\n",
"```\n",
"\n",
" - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
" - entry_script = contains logic specific to initializing your model and running predictions\n",
" - environment = An environment object to use for the deployment. Doesn't have to be registered"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"create image"
]
},
"outputs": [],
"source": [
"from azureml.core.model import InferenceConfig\n",
"\n",
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Deploy Model as Webservice on Azure Container Instance\n",
"\n",
"Note that the service creation can take few minutes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"azuremlexception-remarks-sample"
]
},
"outputs": [],
"source": [
"from azureml.core.webservice import AciWebservice, Webservice\n",
"from azureml.exceptions import WebserviceException\n",
"\n",
"deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)\n",
"aci_service_name = 'aciservice1'\n",
"\n",
"try:\n",
" # if you want to get existing service below is the command\n",
" # since aci name needs to be unique in subscription deleting existing aci if any\n",
" # we use aci_service_name to create azure aci\n",
" service = Webservice(ws, name=aci_service_name)\n",
" if service:\n",
" service.delete()\n",
"except WebserviceException as e:\n",
" print()\n",
"\n",
"service = Model.deploy(ws, aci_service_name, [model], inference_config, deployment_config)\n",
"\n",
"service.wait_for_deployment(True)\n",
"print(service.state)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Test web service"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"test_sample = json.dumps({'features':{'type':1,'values':[4.3,3.0,1.1,0.1]},'label':2.0})\n",
"\n",
"test_sample_encoded = bytes(test_sample, encoding='utf8')\n",
"prediction = service.run(input_data=test_sample_encoded)\n",
"print(prediction)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Delete ACI to clean up"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"deploy service",
"aci"
]
},
"outputs": [],
"source": [
"service.delete()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model Profiling\n",
"\n",
"You can also take advantage of the profiling feature to estimate CPU and memory requirements for models.\n",
"\n",
"```python\n",
"profile = Model.profile(ws, \"profilename\", [model], inference_config, test_sample)\n",
"profile.wait_for_profiling(True)\n",
"profiling_results = profile.get_results()\n",
"print(profiling_results)\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Model Packaging\n",
"\n",
"If you want to build a Docker image that encapsulates your model and its dependencies, you can use the model packaging option. The output image will be pushed to your workspace's ACR.\n",
"\n",
"You must include an Environment object in your inference configuration to use `Model.package()`.\n",
"\n",
"```python\n",
"package = Model.package(ws, [model], inference_config)\n",
"package.wait_for_creation(show_output=True) # Or show_output=False to hide the Docker build logs.\n",
"package.pull()\n",
"```\n",
"\n",
"Instead of a fully-built image, you can also generate a Dockerfile and download all the assets needed to build an image on top of your Environment.\n",
"\n",
"```python\n",
"package = Model.package(ws, [model], inference_config, generate_dockerfile=True)\n",
"package.wait_for_creation(show_output=True)\n",
"package.save(\"./local_context_dir\")\n",
"```"
]
}
],
"metadata": {
"authors": [
{
"name": "vaidyas"
}
],
"category": "deployment",
"compute": [
"None"
],
"datasets": [
"Iris"
],
"deployment": [
"Azure Container Instance"
],
"exclude_from_index": false,
"framework": [
"PySpark"
],
"friendly_name": "Register Spark model and deploy as webservice",
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
"name": "python38-azureml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,4 +0,0 @@
name: model-register-and-deploy-spark
dependencies:
- pip:
- azureml-sdk

View File

@@ -1,37 +0,0 @@
import traceback
from pyspark.ml.linalg import VectorUDT
from azureml.core.model import Model
from pyspark.ml.classification import LogisticRegressionModel
from pyspark.sql.types import StructType, StructField
from pyspark.sql.types import DoubleType
from pyspark.sql import SQLContext
from pyspark import SparkContext
sc = SparkContext.getOrCreate()
sqlContext = SQLContext(sc)
spark = sqlContext.sparkSession
input_schema = StructType([StructField("features", VectorUDT()), StructField("label", DoubleType())])
reader = spark.read
reader.schema(input_schema)
def init():
global model
# note here "iris.model" is the name of the model registered under the workspace
# this call should return the path to the model.pkl file on the local disk.
model_path = Model.get_model_path('iris.model')
# Load the model file back into a LogisticRegression model
model = LogisticRegressionModel.load(model_path)
def run(data):
try:
input_df = reader.json(sc.parallelize([data]))
result = model.transform(input_df)
# you can return any datatype as long as it is JSON-serializable
return result.collect()[0]['prediction']
except Exception as e:
traceback.print_exc()
error = str(e)
return error

View File

@@ -1,59 +0,0 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license.
from azureml.core.run import Run
from azureml.interpret import ExplanationClient
from interpret_community.adapter import ExplanationAdapter
import joblib
import os
import shap
import xgboost
OUTPUT_DIR = './outputs/'
os.makedirs(OUTPUT_DIR, exist_ok=True)
run = Run.get_context()
client = ExplanationClient.from_run(run)
# get a dataset on income prediction
X, y = shap.datasets.adult()
features = X.columns.values
# train an XGBoost model (but any other tree model type should work)
model = xgboost.XGBClassifier()
model.fit(X, y)
explainer = shap.explainers.GPUTree(model, X)
X_shap = X[:100]
shap_values = explainer(X_shap)
print("computed shap values:")
print(shap_values)
# Use the explanation adapter to convert the importances into an interpret-community
# style explanation which can be uploaded to AzureML or visualized in the
# ExplanationDashboard widget
adapter = ExplanationAdapter(features, classification=True)
global_explanation = adapter.create_global(shap_values.values, X_shap, expected_values=shap_values.base_values)
# write X_shap out as a pickle file for later visualization
x_shap_pkl = 'x_shap.pkl'
with open(x_shap_pkl, 'wb') as file:
joblib.dump(value=X_shap, filename=os.path.join(OUTPUT_DIR, x_shap_pkl))
run.upload_file('x_shap_adult_census.pkl', os.path.join(OUTPUT_DIR, x_shap_pkl))
model_file_name = 'xgboost_.pkl'
# save model in the outputs folder so it automatically gets uploaded
with open(model_file_name, 'wb') as file:
joblib.dump(value=model, filename=os.path.join(OUTPUT_DIR,
model_file_name))
# register the model
run.upload_file('xgboost_model.pkl', os.path.join('./outputs/', model_file_name))
original_model = run.register_model(model_name='xgboost_with_gpu_tree_explainer',
model_path='xgboost_model.pkl')
# Uploading model explanation data for storage or visualization in webUX
# The explanation can then be downloaded on any compute
comment = 'Global explanation on classification model trained on adult census income dataset'
client.upload_model_explanation(global_explanation, comment=comment, model_id=original_model.id)

View File

@@ -1,516 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Explain tree-based models on GPU using GPUTreeExplainer\n",
"\n",
"\n",
"_**This notebook illustrates how to use shap's GPUTreeExplainer on an Azure GPU machine.**_\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"Problem: Train a tree-based model and explain the model on an Azure GPU machine using the GPUTreeExplainer.\n",
"\n",
"---\n",
"\n",
"## Table of Contents\n",
"\n",
"1. [Introduction](#Introduction)\n",
"1. [Setup](#Setup)\n",
"1. [Run model explainer locally at training time](#Explain)\n",
" 1. Apply feature transformations\n",
" 1. Train a binary classification model\n",
" 1. Explain the model on raw features\n",
" 1. Generate global explanations\n",
" 1. Generate local explanations\n",
"1. [Visualize explanations](#Visualize)\n",
"1. [Deploy model and scoring explainer](#Deploy)\n",
"1. [Next steps](#Next)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Introduction\n",
"This notebook demonstrates how to use the GPUTreeExplainer on some simple datasets. Like the TreeExplainer, the GPUTreeExplainer is specifically designed for tree-based machine learning models, but it is designed to accelerate the computations using NVIDIA GPUs.\n",
"\n",
"\n",
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
"\n",
"Notebook synopsis:\n",
"\n",
"1. Creating an Experiment in an existing Workspace\n",
"2. Configuration and remote run with a GPU machine"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"import os\n",
"import shutil\n",
"\n",
"import pandas as pd\n",
"\n",
"import azureml.core\n",
"from azureml.core.experiment import Experiment\n",
"from azureml.core.workspace import Workspace\n",
"from azureml.core.dataset import Dataset\n",
"from azureml.core.compute import AmlCompute\n",
"from azureml.core.compute import ComputeTarget\n",
"from azureml.core.run import Run\n",
"from azureml.core.model import Model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"As part of the setup you have already created a <b>Workspace</b>. To run the script, you also need to create an <b>Experiment</b>. An Experiment corresponds to a prediction problem you are trying to solve, while a Run corresponds to a specific approach to the problem."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ws = Workspace.from_config()\n",
"\n",
"# Choose an experiment name.\n",
"experiment_name = 'gpu-tree-explainer'\n",
"\n",
"experiment = Experiment(ws, experiment_name)\n",
"\n",
"output = {}\n",
"output['Subscription ID'] = ws.subscription_id\n",
"output['Workspace Name'] = ws.name\n",
"output['Resource Group'] = ws.resource_group\n",
"output['Location'] = ws.location\n",
"output['Experiment Name'] = experiment.name\n",
"pd.set_option('display.max_colwidth', -1)\n",
"outputDf = pd.DataFrame(data = output, index = [''])\n",
"outputDf.T"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create project directory\n",
"\n",
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import shutil\n",
"\n",
"project_folder = './azureml-shap-gpu-tree-explainer'\n",
"os.makedirs(project_folder, exist_ok=True)\n",
"shutil.copy('gpu_tree_explainer.py', project_folder)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set up a compute cluster\n",
"This section uses a user-provided compute cluster (named \"gpu-shap-cluster\" in this example). If a cluster with this name does not exist in the user's workspace, the below code will create a new cluster. You can choose the parameters of the cluster as mentioned in the comments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"num_nodes = 1\n",
"\n",
"# Choose a name for your cluster.\n",
"amlcompute_cluster_name = \"gpu-shap-cluster\"\n",
"\n",
"# Verify that cluster does not exist already\n",
"try:\n",
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
" print('Found existing cluster, use it.')\n",
"except ComputeTargetException:\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\",\n",
" # To use GPUTreeExplainer, select a GPU such as \"STANDARD_NC6\" \n",
" # or similar GPU option\n",
" # available in your workspace\n",
" max_nodes = num_nodes)\n",
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
"\n",
"compute_target.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure & Run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"# Create a new RunConfig object\n",
"run_config = RunConfiguration(framework=\"python\")\n",
"\n",
"# Set compute target to AmlCompute target created in previous step\n",
"run_config.target = amlcompute_cluster_name\n",
"\n",
"from azureml.core import Environment\n",
"\n",
"environment_name = \"shapgpu\"\n",
"env = Environment(environment_name)\n",
"\n",
"env.docker.enabled = True\n",
"env.docker.base_image = None\n",
"\n",
"\n",
"# Note: this is to pin the pandas and xgboost versions to be same as notebook.\n",
"# In production scenario user would choose their dependencies\n",
"import pkg_resources\n",
"from distutils.version import LooseVersion\n",
"available_packages = pkg_resources.working_set\n",
"pandas_ver = None\n",
"numpy_ver = None\n",
"sklearn_ver = None\n",
"for dist in list(available_packages):\n",
" if dist.key == 'pandas':\n",
" pandas_ver = dist.version\n",
" if dist.key == 'numpy':\n",
" if LooseVersion(dist.version) >= LooseVersion('1.20.0'):\n",
" numpy_ver = dist.version\n",
" else:\n",
" numpy_ver = '1.21.6'\n",
" if dist.key == 'scikit-learn':\n",
" sklearn_ver = dist.version\n",
"pandas_dep = 'pandas'\n",
"numpy_dep = 'numpy'\n",
"sklearn_dep = 'scikit-learn'\n",
"if pandas_ver:\n",
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
"if numpy_ver:\n",
" numpy_dep = 'numpy=={}'.format(numpy_ver)\n",
"if sklearn_ver:\n",
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
"\n",
"# Note: we build shap at commit 690245 for Tesla K80 GPUs\n",
"env.docker.base_dockerfile = f\"\"\"\n",
"FROM nvidia/cuda:10.2-devel-ubuntu18.04\n",
"ENV PATH=\"/root/miniconda3/bin:${{PATH}}\"\n",
"ARG PATH=\"/root/miniconda3/bin:${{PATH}}\"\n",
"RUN apt-get update && \\\n",
"apt-get install -y fuse && \\\n",
"apt-get install -y build-essential && \\\n",
"apt-get install -y python3-dev && \\\n",
"apt-get install -y wget && \\\n",
"apt-get install -y git && \\\n",
"rm -rf /var/lib/apt/lists/* && \\\n",
"wget \\\n",
"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \\\n",
"mkdir /root/.conda && \\\n",
"bash Miniconda3-latest-Linux-x86_64.sh -b && \\\n",
"rm -f Miniconda3-latest-Linux-x86_64.sh && \\\n",
"conda init bash && \\\n",
". ~/.bashrc && \\\n",
"conda create -n shapgpu python=3.8 && \\\n",
"conda activate shapgpu && \\\n",
"apt-get install -y g++ && \\\n",
"printenv && \\\n",
"echo \"which nvcc: \" && \\\n",
"which nvcc && \\\n",
"pip install azureml-defaults && \\\n",
"pip install azureml-telemetry && \\\n",
"pip install azureml-interpret && \\\n",
"pip install {pandas_dep} && \\\n",
"cd /usr/local/src && \\\n",
"git clone https://github.com/slundberg/shap.git --single-branch && \\\n",
"cd shap && \\\n",
"git reset --hard 690245c6ab043edf40cfce3d8438a62e29ab599f && \\\n",
"mkdir build && \\\n",
"python setup.py install --user && \\\n",
"pip uninstall -y xgboost && \\\n",
"conda install py-xgboost==1.3.3 && \\\n",
"pip uninstall -y numpy && \\\n",
"pip install {numpy_dep} &&\\\n",
"pip install {sklearn_dep} \\\n",
"\"\"\"\n",
"\n",
"env.python.user_managed_dependencies = True\n",
"env.python.interpreter_path = '/root/miniconda3/envs/shapgpu/bin/python'\n",
"\n",
"from azureml.core import Run\n",
"from azureml.core import ScriptRunConfig\n",
"\n",
"src = ScriptRunConfig(source_directory=project_folder, \n",
" script='gpu_tree_explainer.py', \n",
" compute_target=amlcompute_cluster_name,\n",
" environment=env) \n",
"run = experiment.submit(config=src)\n",
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"# Shows output of the run on stdout.\n",
"run.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.get_metrics()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Download \n",
"1. Download model explanation data."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.interpret import ExplanationClient\n",
"\n",
"# Get model explanation data\n",
"client = ExplanationClient.from_run(run)\n",
"global_explanation = client.download_model_explanation()\n",
"local_importance_values = global_explanation.local_importance_values\n",
"expected_values = global_explanation.expected_values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Get the top k (e.g., 4) most important features with their importance values\n",
"global_explanation_topk = client.download_model_explanation(top_k=4)\n",
"global_importance_values = global_explanation_topk.get_ranked_global_values()\n",
"global_importance_names = global_explanation_topk.get_ranked_global_names()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('global importance values: {}'.format(global_importance_values))\n",
"print('global importance names: {}'.format(global_importance_names))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Download model file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Retrieve model for visualization and deployment\n",
"from azureml.core.model import Model\n",
"import joblib\n",
"original_model = Model(ws, 'xgboost_with_gpu_tree_explainer')\n",
"model_path = original_model.download(exist_ok=True)\n",
"original_model = joblib.load(model_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"3. Download test dataset."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Retrieve x_test for visualization\n",
"x_test_path = './x_shap_adult_census.pkl'\n",
"run.download_file('x_shap_adult_census.pkl', output_file_path=x_test_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x_test = joblib.load('x_shap_adult_census.pkl')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Visualize\n",
"Load the visualization dashboard"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from raiwidgets import ExplanationDashboard"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from interpret_community.common.model_wrapper import wrap_model\n",
"from interpret_community.dataset.dataset_wrapper import DatasetWrapper\n",
"# note we need to wrap the XGBoost model to output predictions and probabilities in the scikit-learn format\n",
"class WrappedXGBoostModel(object):\n",
" \"\"\"A class for wrapping an XGBoost model to output integer predicted classes.\"\"\"\n",
"\n",
" def __init__(self, model):\n",
" self.model = model\n",
"\n",
" def predict(self, dataset):\n",
" return self.model.predict(dataset).astype(int)\n",
"\n",
" def predict_proba(self, dataset):\n",
" return self.model.predict_proba(dataset)\n",
"\n",
"wrapped_model = WrappedXGBoostModel(wrap_model(original_model, DatasetWrapper(x_test), model_task='classification'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ExplanationDashboard(global_explanation, wrapped_model, dataset=x_test)"
]
}
],
"metadata": {
"authors": [
{
"name": "ilmat"
}
],
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
"name": "python38-azureml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,18 +0,0 @@
name: train-explain-model-gpu-tree-explainer
dependencies:
- py-xgboost==1.3.3
- pip:
- azureml-sdk
- azureml-interpret
- flask
- flask-cors
- gevent>=1.3.6
- ipython
- matplotlib
- ipywidgets
- raiwidgets~=0.23.0
- itsdangerous==2.0.1
- markupsafe<2.1.0
- scipy>=1.5.3
- protobuf==3.20.0
- jinja2==3.0.3

View File

@@ -8,9 +8,8 @@ dependencies:
- gevent>=1.3.6
- ipython
- matplotlib
- azureml-dataset-runtime
- ipywidgets
- raiwidgets~=0.23.0
- raiwidgets~=0.28.0
- itsdangerous==2.0.1
- markupsafe<2.1.0
- scipy>=1.5.3

View File

@@ -9,7 +9,7 @@ dependencies:
- ipython
- matplotlib
- ipywidgets
- raiwidgets~=0.23.0
- raiwidgets~=0.28.0
- packaging>=20.9
- itsdangerous==2.0.1
- markupsafe<2.1.0

View File

@@ -9,7 +9,7 @@ dependencies:
- ipython
- matplotlib
- ipywidgets
- raiwidgets~=0.23.0
- raiwidgets~=0.28.0
- packaging>=20.9
- itsdangerous==2.0.1
- markupsafe<2.1.0

View File

@@ -8,10 +8,9 @@ dependencies:
- gevent>=1.3.6
- ipython
- matplotlib
- azureml-dataset-runtime
- azureml-core
- ipywidgets
- raiwidgets~=0.23.0
- raiwidgets~=0.28.0
- itsdangerous==2.0.1
- markupsafe<2.1.0
- scipy>=1.5.3

View File

@@ -175,7 +175,7 @@
"store_name=os.getenv(\"ADL_STORENAME_62\", \"<my-datastore-name>\") # ADLS account name\n",
"tenant_id=os.getenv(\"ADL_TENANT_62\", \"<my-tenant-id>\") # tenant id of service principal\n",
"client_id=os.getenv(\"ADL_CLIENTID_62\", \"<my-client-id>\") # client id of service principal\n",
"client_secret=os.getenv(\"ADL_CLIENT_SECRET_62\", \"<my-client-secret>\") # the secret of service principal\n",
"client_st=os.getenv(\"ADL_CLIENT_SECRET_62\", \"<my-client-secret>\") # the secret of service principal\n",
"\n",
"try:\n",
" adls_datastore = Datastore.get(ws, datastore_name)\n",
@@ -189,7 +189,7 @@
" store_name=store_name, # ADLS account name\n",
" tenant_id=tenant_id, # tenant id of service principal\n",
" client_id=client_id, # client id of service principal\n",
" client_secret=client_secret) # the secret of service principal\n",
" client_secret=client_st) # the secret of service principal\n",
" print(\"Registered datastore with name: %s\" % datastore_name)\n",
"\n",
"adls_data_ref = DataReference(\n",

View File

@@ -233,7 +233,7 @@
" print('Found existing compute target {}.'.format(cluster_name))\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"Standard_NC6s_v3\",\n",
" max_nodes=4)\n",
"\n",
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",

View File

@@ -133,7 +133,7 @@
" \n",
"if not found:\n",
" print('Creating a new compute target...')\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"Standard_NC6s_v3\"\n",
" #vm_priority = 'lowpriority', # optional\n",
" max_nodes = 4)\n",
"\n",

View File

@@ -136,7 +136,7 @@
" \n",
"if not found:\n",
" print('Creating a new compute target...')\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"Standard_NC6s_v3\"\n",
" #vm_priority = 'lowpriority', # optional\n",
" max_nodes = 4)\n",
"\n",

View File

@@ -147,7 +147,7 @@
"store_name = os.getenv(\"ADL_STORENAME_62\", \"<my-datastore-name>\") # ADLS account name\n",
"tenant_id = os.getenv(\"ADL_TENANT_62\", \"<my-tenant-id>\") # tenant id of service principal\n",
"client_id = os.getenv(\"ADL_CLIENTID_62\", \"<my-client-id>\") # client id of service principal\n",
"client_secret = os.getenv(\"ADL_CLIENT_62_SECRET\", \"<my-client-secret>\") # the secret of service principal\n",
"client_st = os.getenv(\"ADL_CLIENT_62_SECRET\", \"<my-client-secret>\") # the secret of service principal\n",
"\n",
"try:\n",
" adls_datastore = Datastore.get(ws, datastore_name)\n",
@@ -161,7 +161,7 @@
" store_name=store_name, # ADLS account name\n",
" tenant_id=tenant_id, # tenant id of service principal\n",
" client_id=client_id, # client id of service principal\n",
" client_secret=client_secret) # the secret of service principal\n",
" client_secret=client_st) # the secret of service principal\n",
" print(\"registered datastore with name: %s\" % datastore_name)"
]
},

View File

@@ -330,7 +330,7 @@
"- **inputs:** List of input connections for data consumed by this step. Fetch this inside the notebook using dbutils.widgets.get(\"input\")\n",
"- **outputs:** List of output port definitions for outputs produced by this step. Fetch this inside the notebook using dbutils.widgets.get(\"output\")\n",
"- **existing_cluster_id:** Cluster ID of an existing Interactive cluster on the Databricks workspace. If you are providing this, do not provide any of the parameters below that are used to create a new cluster such as spark_version, node_type, etc.\n",
"- **spark_version:** Version of spark for the databricks run cluster. You can refer to [DataBricks runtime version](https://learn.microsoft.com/azure/databricks/dev-tools/api/#--runtime-version-strings) to specify the spark version. default value: 4.0.x-scala2.11\n",
"- **spark_version:** Version of spark for the databricks run cluster. You can refer to [DataBricks runtime version](https://learn.microsoft.com/azure/databricks/dev-tools/api/#--runtime-version-strings) to specify the spark version. default value: 10.4.x-scala2.12\n",
"- **node_type:** Azure vm node types for the databricks run cluster. default value: Standard_D3_v2\n",
"- **num_workers:** Specifies a static number of workers for the databricks run cluster\n",
"- **min_workers:** Specifies a min number of workers to use for auto-scaling the databricks run cluster\n",

View File

@@ -148,7 +148,7 @@
" compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
" print('Found existing cluster, use it.')\n",
"except ComputeTargetException:\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',# for GPU, use \"STANDARD_NC6\"\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',# for GPU, use \"Standard_NC6s_v3\"\n",
" #vm_priority = 'lowpriority', # optional\n",
" max_nodes=4)\n",
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",

View File

@@ -86,7 +86,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `Standard_NC6s_v3` GPU VMs. This process is broken down into 3 steps:\n",
"1. create the configuration (this step is local and only takes a second)\n",
"2. create the cluster (this step will take about **20 seconds**)\n",
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
@@ -109,7 +109,7 @@
" print('Found existing compute target')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', max_nodes=4)\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', max_nodes=4)\n",
"\n",
" # create the cluster\n",
" gpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)\n",

View File

@@ -176,7 +176,7 @@
" \n",
"if not found:\n",
" print('Creating a new compute target...')\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"Standard_NC6s_v3\"\n",
" #vm_priority = 'lowpriority', # optional\n",
" max_nodes = 4)\n",
"\n",

View File

@@ -105,7 +105,7 @@
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 4)\n",
"\n",
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
"# This example uses CPU VM. For using GPU VM, set SKU to Standard_NC6s_v3\n",
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
"\n",
"\n",

View File

@@ -143,7 +143,7 @@
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 2)\n",
"\n",
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
"# This example uses CPU VM. For using GPU VM, set SKU to Standard_NC6s_v3\n",
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
"\n",
"\n",

View File

@@ -103,7 +103,7 @@
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 4)\n",
"\n",
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
"# This example uses CPU VM. For using GPU VM, set SKU to Standard_NC6s_v3\n",
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
"\n",
"\n",

View File

@@ -86,7 +86,7 @@
"import requests\n",
"\n",
"oj_sales_path = \"./oj.csv\"\n",
"r = requests.get(\"http://www.cs.unitn.it/~taufer/Data/oj.csv\")\n",
"r = requests.get(\"https://raw.githubusercontent.com/Azure/azureml-examples/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales/data/dominicks_OJ.csv\")\n",
"open(oj_sales_path, \"wb\").write(r.content)"
]
},
@@ -140,7 +140,7 @@
"metadata": {},
"outputs": [],
"source": [
"partitioned_dataset = dataset.partition_by(partition_keys=['store', 'brand'], target=(datastore, \"partition_by_key_res\"), name=\"partitioned_oj_data\")\n",
"partitioned_dataset = dataset.partition_by(partition_keys=['Store', 'Brand'], target=(datastore, \"partition_by_key_res\"), name=\"partitioned_oj_data\")\n",
"partitioned_dataset.partition_keys"
]
},
@@ -165,7 +165,7 @@
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 2)\n",
"\n",
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
"# This example uses CPU VM. For using GPU VM, set SKU to Standard_NC6s_v3\n",
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
"\n",
"\n",
@@ -274,7 +274,7 @@
"parallel_run_config = ParallelRunConfig(\n",
" source_directory=scripts_folder,\n",
" entry_script=script_file, # the user script to run against each input\n",
" partition_keys=['store', 'brand'],\n",
" partition_keys=['Store', 'Brand'],\n",
" error_threshold=5,\n",
" output_action='append_row',\n",
" append_row_file_name=\"revenue_outputs.txt\",\n",
@@ -362,8 +362,8 @@
"result_file = os.path.join(target_dir, batch_output.path_on_datastore, parallel_run_config.append_row_file_name)\n",
"\n",
"df = pd.read_csv(result_file, delimiter=\" \", header=None)\n",
"df.columns=[\"WeekStarting\", \"Quantity\", \"logQuantity\", \"Advert\", \"Price\", \"Age60\", \"COLLEGE\", \"INCOME\", \"Hincome150\", \"Large HH\", \"Minorities\", \"WorkingWoman\", \"SSTRDIST\", \"SSTRVOL\", \"CPDIST5\", \"CPWVOL5\", \"Store\", \"Brand\", \"total_income\"]\n",
"\n",
"df.columns = [\"week\", \"logmove\", \"feat\", \"price\", \"AGE60\", \"EDUC\", \"ETHNIC\", \"INCOME\", \"HHLARGE\", \"WORKWOM\", \"HVAL150\", \"SSTRDIST\", \"SSTRVOL\", \"CPDIST5\", \"CPWVOL5\", \"store\", \"brand\", \"total_income\"]\n",
"print(\"Prediction has \", df.shape[0], \" rows\")\n",
"df.head(10)"
]
@@ -413,7 +413,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.13"
}
},
"nbformat": 4,

View File

@@ -210,7 +210,7 @@
" print(\"found existing cluster.\")\n",
"except ComputeTargetException:\n",
" print(\"creating new cluster\")\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\",\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"Standard_NC6s_v3\",\n",
" max_nodes = 3)\n",
"\n",
" # create the cluster\n",

View File

@@ -246,7 +246,7 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3',\n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",

View File

@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -10,6 +11,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -17,6 +19,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -40,6 +43,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -80,6 +84,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -101,6 +106,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -131,6 +137,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -168,6 +175,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -206,6 +214,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -213,6 +222,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -240,6 +250,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -269,6 +280,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -279,10 +291,11 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `Standard_NC6s_v3` GPU VMs. This process is broken down into 3 steps:\n",
"1. create the configuration (this step is local and only takes a second)\n",
"2. create the cluster (this step will take about **20 seconds**)\n",
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
@@ -305,7 +318,7 @@
" print('Found existing compute target')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",
@@ -320,6 +333,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -338,6 +352,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -361,6 +376,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -375,6 +391,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -394,6 +411,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -411,6 +429,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -430,12 +449,12 @@
"channels:\n",
"- conda-forge\n",
"dependencies:\n",
"- python=3.7\n",
"- pip=21.3.1\n",
"- python=3.8\n",
"- pip=23.1.2\n",
"- pip:\n",
" - h5py<=2.10.0\n",
" - azureml-defaults\n",
" - tensorflow-gpu==2.0.0\n",
" - tensorflow-gpu==2.2.0\n",
" - keras<=2.3.1\n",
" - matplotlib\n",
" - protobuf==3.20.1"
@@ -457,6 +476,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -501,6 +521,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -518,6 +539,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -547,6 +569,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -572,6 +595,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -579,6 +603,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -619,6 +644,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -626,6 +652,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -649,6 +676,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -657,6 +685,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -668,6 +697,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -675,6 +705,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -691,6 +722,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -698,6 +730,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -712,6 +745,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -719,6 +753,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -726,6 +761,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -753,6 +789,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -775,6 +812,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -791,6 +829,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -813,6 +852,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -829,6 +869,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -863,6 +904,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -890,6 +932,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -906,6 +949,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -922,6 +966,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -970,6 +1015,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -997,6 +1043,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1035,6 +1082,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1051,6 +1099,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1067,6 +1116,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1115,6 +1165,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1133,6 +1184,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1162,6 +1214,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1184,6 +1237,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [

View File

@@ -3,6 +3,4 @@ dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- tensorflow
- keras<=2.3.1
- matplotlib

View File

@@ -97,7 +97,7 @@
"metadata": {},
"source": [
"## Create or attach existing AmlCompute\n",
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `Standard_NC6s_v3` GPU cluster that autoscales from `0` to `4` nodes.\n",
"\n",
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
"\n",
@@ -123,7 +123,7 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3',\n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",
@@ -293,7 +293,7 @@
"source": [
"from azureml.core import Environment\n",
"\n",
"pytorch_env = Environment.get(ws, name='AzureML-PyTorch-1.6-GPU')"
"pytorch_env = Environment.get(ws, name='azureml-acpt-pytorch-1.11-cuda11.3')"
]
},
{
@@ -323,7 +323,7 @@
"To use the per-process launch option in which Azure ML will handle launching each of the processes to run your training script,\n",
"\n",
"1. Specify the training script and arguments\n",
"2. Create a `PyTorchConfiguration` and specify `node_count` and `process_count`. The `process_count` is the total number of processes you want to run for the job; this should typically equal the # of GPUs available on each node multiplied by the # of nodes. Since this tutorial uses the `STANDARD_NC6` SKU, which has one GPU, the total process count for a 2-node job is `2`. If you are using a SKU with >1 GPUs, adjust the `process_count` accordingly.\n",
"2. Create a `PyTorchConfiguration` and specify `node_count` and `process_count`. The `process_count` is the total number of processes you want to run for the job; this should typically equal the # of GPUs available on each node multiplied by the # of nodes. Since this tutorial uses the `Standard_NC6s_v3` SKU, which has one GPU, the total process count for a 2-node job is `2`. If you are using a SKU with >1 GPUs, adjust the `process_count` accordingly.\n",
"\n",
"Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, `NODE_RANK`, `WORLD_SIZE` environment variables on each node, in addition to the process-level `RANK` and `LOCAL_RANK` environment variables, that are needed for distributed PyTorch training."
]

View File

@@ -97,7 +97,7 @@
"metadata": {},
"source": [
"## Create or attach existing AmlCompute\n",
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `Standard_NC6s_v3` GPU cluster that autoscales from `0` to `4` nodes.\n",
"\n",
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
"\n",
@@ -123,7 +123,7 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3',\n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",

View File

@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -10,6 +11,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -17,6 +19,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -28,6 +31,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -48,6 +52,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -71,6 +76,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -94,6 +100,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -124,7 +131,7 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",
@@ -137,6 +144,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -144,6 +152,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -152,6 +161,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -172,6 +182,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -180,6 +191,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -204,6 +216,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -222,6 +235,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -242,32 +256,13 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create an environment\n",
"\n",
"Define a conda environment YAML file with your training script dependencies and create an Azure ML environment."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile conda_dependencies.yml\n",
"\n",
"channels:\n",
"- conda-forge\n",
"- pytorch\n",
"dependencies:\n",
"- python=3.8.12\n",
"- pip=21.3.1\n",
"- pytorch::pytorch==1.8.1\n",
"- pytorch::torchvision==0.9.1\n",
"- pip:\n",
" - azureml-defaults"
"Create an Azure ML environment."
]
},
{
@@ -278,14 +273,11 @@
"source": [
"from azureml.core import Environment\n",
"\n",
"pytorch_env = Environment.from_conda_specification(name = 'pytorch-1.6-gpu', file_path = './conda_dependencies.yml')\n",
"\n",
"# Specify a GPU base image\n",
"pytorch_env.docker.enabled = True\n",
"pytorch_env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.1-cudnn8-ubuntu20.04'"
"pytorch_env = Environment.get(ws, name='azureml-acpt-pytorch-1.11-cuda11.3')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -310,6 +302,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -338,6 +331,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -357,6 +351,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -373,6 +368,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -381,6 +377,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -417,6 +414,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -434,6 +432,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -451,6 +450,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -476,6 +476,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -517,6 +518,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -534,6 +536,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -542,6 +545,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -555,6 +559,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -564,6 +569,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -598,6 +604,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -605,6 +612,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -621,6 +629,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -637,6 +646,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -695,6 +705,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [

View File

@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -10,6 +11,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -17,6 +19,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -25,6 +28,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -32,6 +36,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -51,6 +56,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -58,6 +64,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -76,6 +83,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -83,6 +91,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -105,6 +114,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -112,6 +122,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -123,6 +134,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -164,6 +176,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -171,6 +184,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -178,6 +192,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -185,6 +200,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -192,6 +208,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -211,6 +228,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -218,6 +236,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -258,6 +277,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -265,6 +285,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -284,27 +305,13 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create an environment\n",
"\n",
"Define a conda environment YAML file with your training script dependencies and create an Azure ML environment."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile conda_dependencies.yml\n",
"\n",
"dependencies:\n",
"- python=3.6.2\n",
"- scikit-learn\n",
"- pip:\n",
" - azureml-defaults"
"Create an Azure ML environment."
]
},
{
@@ -315,10 +322,11 @@
"source": [
"from azureml.core import Environment\n",
"\n",
"sklearn_env = Environment.from_conda_specification(name = 'sklearn-env', file_path = './conda_dependencies.yml')"
"sklearn_env = Environment.get(ws, name='azureml-sklearn-1.0')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -343,6 +351,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -350,6 +359,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -366,6 +376,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -373,6 +384,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -400,6 +412,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -407,6 +420,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -414,6 +428,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -421,6 +436,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -454,6 +470,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -471,6 +488,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -478,6 +496,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -512,6 +531,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -539,6 +559,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -555,6 +576,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [

View File

@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -10,6 +11,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -17,6 +19,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -25,6 +28,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -49,6 +53,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -72,6 +77,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -95,6 +101,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -125,7 +132,7 @@
" print('Found existing compute target')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",
@@ -138,6 +145,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -145,6 +153,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -152,6 +161,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -159,6 +169,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -179,6 +190,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -195,10 +207,11 @@
"source": [
"from azureml.core import Environment\n",
"\n",
"tf_env = Environment.get(ws, name='AzureML-tensorflow-2.7-ubuntu20.04-py38-cuda11-gpu')"
"tf_env = Environment.get(ws, name='azureml-tensorflow-2.11-cuda11')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -226,6 +239,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -245,6 +259,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -263,6 +278,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [

View File

@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -10,6 +11,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -17,6 +19,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -25,6 +28,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -65,6 +69,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -88,6 +93,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -109,6 +115,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -139,6 +146,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -171,6 +179,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -215,6 +224,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -238,6 +248,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -266,6 +277,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -276,10 +288,11 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `Standard_NC6s_v3` GPU VMs. This process is broken down into 3 steps:\n",
"1. create the configuration (this step is local and only takes a second)\n",
"2. create the cluster (this step will take about **20 seconds**)\n",
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
@@ -302,7 +315,7 @@
" print('Found existing compute target')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3',\n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",
@@ -317,6 +330,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -335,6 +349,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -358,6 +373,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -372,6 +388,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -391,6 +408,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -408,6 +426,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -424,10 +443,11 @@
"source": [
"from azureml.core import Environment\n",
"\n",
"tf_env = Environment.get(ws, name='AzureML-tensorflow-2.6-ubuntu20.04-py38-cuda11-gpu')"
"tf_env = Environment.get(ws, name='azureml-tensorflow-2.11-cuda11')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -457,6 +477,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -484,6 +505,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -494,6 +516,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -519,6 +542,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -541,6 +565,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -558,6 +583,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -581,6 +607,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -597,6 +624,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -631,6 +659,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -648,6 +677,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -664,6 +694,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -680,6 +711,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -710,6 +742,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -735,6 +768,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -753,6 +787,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -783,6 +818,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -808,6 +844,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -833,6 +870,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [

View File

@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -10,6 +11,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -17,6 +19,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -39,6 +42,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -79,6 +83,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -102,6 +107,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -123,6 +129,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -153,6 +160,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -186,6 +194,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -229,6 +238,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -252,6 +262,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -283,6 +294,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -293,10 +305,11 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `Standard_NC6s_v3` GPU VMs. This process is broken down into 3 steps:\n",
"1. create the configuration (this step is local and only takes a second)\n",
"2. create the cluster (this step will take about **20 seconds**)\n",
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
@@ -319,7 +332,7 @@
" print('Found existing compute target')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",
@@ -334,6 +347,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -352,6 +366,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -375,6 +390,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"nbpresent": {
@@ -389,6 +405,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -408,6 +425,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -425,6 +443,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -441,10 +460,11 @@
"source": [
"from azureml.core import Environment\n",
"\n",
"tf_env = Environment.get(ws, name='AzureML-tensorflow-2.6-ubuntu20.04-py38-cuda11-gpu')"
"tf_env = Environment.get(ws, name='azureml-tensorflow-2.11-cuda11')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -475,6 +495,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -492,6 +513,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -522,6 +544,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -547,6 +570,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -587,6 +611,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -616,6 +641,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -623,6 +649,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -639,6 +666,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -649,6 +677,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -657,6 +686,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -669,6 +699,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -676,6 +707,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -683,6 +715,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -710,6 +743,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -732,6 +766,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -748,6 +783,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -770,6 +806,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -786,6 +823,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -820,6 +858,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -846,6 +885,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -862,6 +902,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -878,6 +919,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -924,6 +966,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -950,6 +993,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -987,6 +1031,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1003,6 +1048,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1019,6 +1065,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1067,6 +1114,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1096,6 +1144,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -1118,6 +1167,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [

View File

@@ -6,7 +6,5 @@ dependencies:
- azureml-sdk
- azureml-widgets
- pandas
- keras
- tensorflow==2.0.0
- matplotlib
- fuse

View File

@@ -5,7 +5,7 @@ import numpy as np
import argparse
import os
import re
import tensorflow as tf
import tensorflow.compat.v1 as tf
import glob
from azureml.core import Run
@@ -41,8 +41,8 @@ y_test = load_data(glob.glob(os.path.join(data_folder, '**/t10k-labels-idx1-ubyt
recursive=True)[0], True).reshape(-1)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n')
training_set_size = X_train.shape[0]
tf.disable_v2_behavior()
n_inputs = 28 * 28
n_h1 = 100

View File

@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -10,6 +11,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -17,6 +19,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -25,6 +28,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -49,6 +53,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -72,6 +77,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -95,6 +101,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -125,7 +132,7 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",
@@ -138,6 +145,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -145,6 +153,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -169,6 +178,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -208,6 +218,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -215,6 +226,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -234,6 +246,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -256,6 +269,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -276,6 +290,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -292,10 +307,11 @@
"source": [
"from azureml.core import Environment\n",
"\n",
"tf_env = Environment.get(ws, name='AzureML-TensorFlow-1.13-GPU')"
"tf_env = Environment.get(ws, name='azureml-tensorflow-2.11-cuda11')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -322,6 +338,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -340,6 +357,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -358,6 +376,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -374,6 +393,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -381,6 +401,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -401,6 +422,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@@ -428,6 +450,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [

View File

@@ -164,7 +164,7 @@
"source": [
"from azureml.core import Environment\n",
"\n",
"env = Environment.get(workspace=ws, name=\"AzureML-PyTorch-1.4-GPU\").clone(\"mlflow-env\")\n",
"env = Environment.get(workspace=ws, name=\"azureml-acpt-pytorch-1.11-cuda11.3\").clone(\"mlflow-env\")\n",
"\n",
"env.python.conda_dependencies.add_pip_package(\"azureml-mlflow\")\n",
"env.python.conda_dependencies.add_pip_package(\"Pillow==6.0.0\")"

View File

@@ -36,8 +36,6 @@ Using these samples, you will learn how to do the following.
| [cartpole_ci.ipynb](cartpole-on-compute-instance/cartpole_ci.ipynb) | Notebook to train a Cartpole playing agent on an Azure Machine Learning Compute Instance |
| [cartpole_sc.ipynb](cartpole-on-single-compute/cartpole_sc.ipynb) | Notebook to train a Cartpole playing agent on an Azure Machine Learning Compute Cluster (single node) |
| [pong_rllib.ipynb](atari-on-distributed-compute/pong_rllib.ipynb) | Notebook for distributed training of Pong agent using RLlib on multiple compute targets |
| [minecraft.ipynb](minecraft-on-distributed-compute/minecraft.ipynb) | Notebook to train an agent to navigate through a lava maze in the Minecraft game |
| [particle.ipynb](multiagent-particle-envs/particle.ipynb) | Notebook to train policies in a multiagent cooperative navigation scenario based on OpenAI's Particle environments |
## Prerequisites
@@ -46,7 +44,7 @@ To make use of these samples, you need the following.
* A Microsoft Azure subscription.
* A Microsoft Azure resource group.
* An Azure Machine Learning Workspace in the resource group.
* Azure Machine Learning training compute. These samples use the VM sizes `STANDARD_NC6` and `STANDARD_D2_V2`. If these are not available in your region,
* Azure Machine Learning training compute. These samples use the VM sizes `Standard_NC6s_v3` and `STANDARD_D2_V2`. If these are not available in your region,
you can replace them with other sizes.
* A virtual network set up in the resource group for samples that use multiple compute targets. The Cartpole and Multi-agent Particle examples do not need a virtual network. Any network security group defined on the virtual network must allow network traffic on ports used by Azure infrastructure services. Sample instructions are provided in Atari Pong and Minecraft example notebooks.

View File

@@ -1,14 +1,16 @@
FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.2-cudnn8-ubuntu20.04:20221010.v1
# NC-series GPUs only support the pytorch-1.11/cuda11.3 combo
# See https://learn.microsoft.com/en-us/azure/machine-learning/resource-curated-environments
FROM mcr.microsoft.com/azureml/curated/acpt-pytorch-1.11-cuda11.3
USER root
ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/tensorflow-2.7
# ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/ray-rllib
# Create conda environment
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
python=3.8 pip=20.2.4
# RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
# python=3.8.5
# Prepend path to AzureML conda environment
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
# ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
# Install necessary packages to support videos in rllib/gym
RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -22,51 +24,32 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
RUN pip --version
RUN python --version
# Install ray-on-aml
RUN pip install 'ray-on-aml==0.1.6'
RUN pip install ray-on-aml==0.2.4 \
ray==2.4.0 \
ray[rllib]==2.4.0 \
mlflow==2.3.1 \
azureml-defaults==1.50.0 \
azureml-dataset-runtime[fuse,pandas]==1.50.0 \
azureml-contrib-reinforcementlearning==1.50.0 \
gputil==1.4.0 \
scipy==1.9.1 \
pyglet==2.0.6 \
cloudpickle==2.2.1 \
tensorflow==2.11.0 \
tensorflow-probability==0.19.0 \
tabulate==0.9.0 \
dm_tree==0.1.8 \
lz4==4.3.2 \
psutil==5.9.4 \
setproctitle==1.3.2 \
pygame==2.1.0 \
gymnasium[classic_control]==0.26.3 \
gymnasium[atari]==0.26.3 \
gymnasium[accept-rom-license]==0.26.3 \
gym==0.26.2 \
gym[atari]==0.26.2 \
gym[accept-rom-license]==0.26.2
RUN pip install ray==0.8.7
RUN pip install gym[atari]==0.19.0
RUN pip install gym[accept-rom-license]==0.19.0
# Install pip dependencies for Tensorflow
RUN pip install 'matplotlib~=3.5.0' \
'psutil~=5.8.0' \
'tqdm~=4.62.0' \
'pandas~=1.3.0' \
'scipy~=1.7.0' \
'numpy~=1.21.0' \
'ipykernel~=6.0' \
'azureml-core==1.47.0' \
'azureml-defaults==1.47.0' \
'azureml-mlflow==1.47.0' \
'azureml-telemetry==1.47.0' \
'tensorboard~=2.7.0' \
'tensorflow-gpu~=2.7.0' \
'tensorflow-datasets~=4.5.0' \
'onnxruntime-gpu~=1.9.0' \
'protobuf~=3.20' \
'horovod[tensorflow-gpu]~=0.23.0' \
'debugpy~=1.6.3'
RUN pip install --no-cache-dir \
azureml-defaults \
azureml-dataset-runtime[fuse,pandas] \
azureml-contrib-reinforcementlearning \
gputil \
cloudpickle==1.3.0 \
tabulate \
dm_tree \
lz4 \
psutil \
setproctitle
# This is required for ray 0.8.7
RUN pip install -U aiohttp==3.7.4
RUN pip install 'msrest<0.7.0'
RUN pip install protobuf==3.20.0
# This is needed for mpi to locate libpython
ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH
# Display all versions
RUN pip freeze

View File

@@ -1,237 +0,0 @@
import sys
import csv
from azure.mgmt.network import NetworkManagementClient
def check_port_in_port_range(expected_port: str,
dest_port_range: str):
"""
Check if a port is within a port range
Port range maybe like *, 8080 or 8888-8889
"""
if dest_port_range == '*':
return True
dest_ports = dest_port_range.split('-')
if len(dest_ports) == 1 and \
int(dest_ports[0]) == int(expected_port):
return True
if len(dest_ports) == 2 and \
int(dest_ports[0]) <= int(expected_port) and \
int(dest_ports[1]) >= int(expected_port):
return True
return False
def check_port_in_destination_port_ranges(expected_port: str,
dest_port_ranges: list):
"""
Check if a port is within a given list of port ranges
i.e. check if port 8080 is in port ranges of 22,80,8080-8090,443
"""
for dest_port_range in dest_port_ranges:
if check_port_in_port_range(expected_port, dest_port_range) is True:
return True
return False
def check_ports_in_destination_port_ranges(expected_ports: list,
dest_port_ranges: list):
"""
Check if all ports in a given port list are within a given list
of port ranges
i.e. check if port 8080,8081 are in port ranges of 22,80,8080-8090,443
"""
for expected_port in expected_ports:
if check_port_in_destination_port_ranges(
expected_port, dest_port_ranges) is False:
return False
return True
def check_source_address_prefix(source_address_prefix: str):
"""Check if source address prefix is BatchNodeManagement or default"""
required_prefix = 'BatchNodeManagement'
default_prefix = 'default'
if source_address_prefix.lower() == required_prefix.lower() or \
source_address_prefix.lower() == default_prefix.lower():
return True
return False
def check_protocol(protocol: str):
"""Check if protocol is supported - Tcp/Any"""
required_protocol = 'Tcp'
any_protocol = 'Any'
if required_protocol.lower() == protocol.lower() or \
any_protocol.lower() == protocol.lower():
return True
return False
def check_direction(direction: str):
"""Check if port direction is inbound"""
required_direction = 'Inbound'
if required_direction.lower() == direction.lower():
return True
return False
def check_provisioning_state(provisioning_state: str):
"""Check if the provisioning state is succeeded"""
required_provisioning_state = 'Succeeded'
if required_provisioning_state.lower() == provisioning_state.lower():
return True
return False
def check_rule_for_Azure_ML(rule):
"""Check if the ports required for Azure Machine Learning are open"""
required_ports = ['29876', '29877']
if check_source_address_prefix(rule.source_address_prefix) is False:
return False
if check_protocol(rule.protocol) is False:
return False
if check_direction(rule.direction) is False:
return False
if check_provisioning_state(rule.provisioning_state) is False:
return False
if rule.destination_port_range is not None:
if check_ports_in_destination_port_ranges(
required_ports,
[rule.destination_port_range]) is False:
return False
else:
if check_ports_in_destination_port_ranges(
required_ports,
rule.destination_port_ranges) is False:
return False
return True
def check_vnet_security_rules(auth_object,
vnet_subscription_id,
vnet_resource_group,
vnet_name,
save_to_file=False):
"""
Check all the rules of virtual network if required ports for Azure Machine
Learning are open
"""
network_client = NetworkManagementClient(
auth_object,
vnet_subscription_id)
# get the vnet
vnet = network_client.virtual_networks.get(
resource_group_name=vnet_resource_group,
virtual_network_name=vnet_name)
vnet_location = vnet.location
vnet_info = []
if vnet.subnets is None or len(vnet.subnets) == 0:
print('WARNING: No subnet found for VNet:', vnet_name)
# for each subnet of the vnet
for subnet in vnet.subnets:
if subnet.network_security_group is None:
print('WARNING: No network security group found for subnet.',
'Subnet',
subnet.id.split("/")[-1])
else:
# get all the rules
network_security_group_name = \
subnet.network_security_group.id.split("/")[-1]
network_security_group_resource_group_name = \
subnet.network_security_group.id.split("/")[4]
network_security_group_subscription_id = \
subnet.network_security_group.id.split("/")[2]
security_rules = list(network_client.security_rules.list(
network_security_group_resource_group_name,
network_security_group_name))
rule_matched = None
for rule in security_rules:
rule_info = []
# add vnet details
rule_info.append(vnet_name)
rule_info.append(vnet_subscription_id)
rule_info.append(vnet_resource_group)
rule_info.append(vnet_location)
# add subnet details
rule_info.append(subnet.id.split("/")[-1])
rule_info.append(network_security_group_name)
rule_info.append(network_security_group_subscription_id)
rule_info.append(network_security_group_resource_group_name)
# add rule details
rule_info.append(rule.priority)
rule_info.append(rule.name)
rule_info.append(rule.source_address_prefix)
if rule.destination_port_range is not None:
rule_info.append(rule.destination_port_range)
else:
rule_info.append(rule.destination_port_ranges)
rule_info.append(rule.direction)
rule_info.append(rule.provisioning_state)
vnet_info.append(rule_info)
if check_rule_for_Azure_ML(rule) is True:
rule_matched = rule
if rule_matched is not None:
print("INFORMATION: Rule matched with required ports. Subnet:",
subnet.id.split("/")[-1], "Rule:", rule.name)
else:
print("WARNING: No rule matched with required ports. Subnet:",
subnet.id.split("/")[-1])
if save_to_file is True:
file_name = vnet_name + ".csv"
with open(file_name, mode='w') as vnet_rule_file:
vnet_rule_file_writer = csv.writer(
vnet_rule_file,
delimiter=',',
quotechar='"',
quoting=csv.QUOTE_MINIMAL)
header = ['VNet_Name', 'VNet_Subscription_ID',
'VNet_Resource_Group', 'VNet_Location',
'Subnet_Name', 'NSG_Name',
'NSG_Subscription_ID', 'NSG_Resource_Group',
'Rule_Priority', 'Rule_Name', 'Rule_Source',
'Rule_Destination_Ports', 'Rule_Direction',
'Rule_Provisioning_State']
vnet_rule_file_writer.writerow(header)
vnet_rule_file_writer.writerows(vnet_info)
print("INFORMATION: Network security group rules for your virtual \
network are saved in file", file_name)

View File

@@ -0,0 +1,18 @@
pong-impala-vectorized:
env: ALE/Pong-v5
run: IMPALA
config:
# Make analogous to old v4 + NoFrameskip.
env_config:
frameskip: 1
full_action_space: false
repeat_action_probability: 0.0
rollout_fragment_length: 50
train_batch_size: 500
num_workers: 11
num_envs_per_worker: 10
framework: torch
log_level: INFO
stop:
episode_reward_mean: 10
time_total_s: 3600

Some files were not shown because too many files have changed in this diff Show More