Compare commits

...

18 Commits

Author SHA1 Message Date
amlrelsa-ms
883e4a4c59 update samples from Release-92 as a part of SDK release 2021-03-10 01:48:54 +00:00
Harneet Virk
e90826b331 Merge pull request #1384 from yunjie-hub/master
Add synapse sample notebooks
2021-03-09 12:40:33 -08:00
yunjie-hub
ac04172f6d Add files via upload 2021-03-09 12:38:23 -08:00
Harneet Virk
8c0000beb4 Merge pull request #1382 from Azure/release_update/Release-91
update samples from Release-91 as a part of  SDK release
2021-03-08 21:43:10 -08:00
amlrelsa-ms
35287ab0d8 update samples from Release-91 as a part of SDK release 2021-03-09 05:36:08 +00:00
Harneet Virk
3fe4f8b038 Merge pull request #1375 from Azure/release_update/Release-90
update samples from Release-90 as a part of  SDK release
2021-03-01 09:15:14 -08:00
amlrelsa-ms
1722678469 update samples from Release-90 as a part of SDK release 2021-03-01 17:13:25 +00:00
Harneet Virk
17da7e8706 Merge pull request #1364 from Azure/release_update/Release-89
update samples from Release-89 as a part of  SDK release
2021-02-23 17:27:27 -08:00
amlrelsa-ms
d2e7213ff3 update samples from Release-89 as a part of SDK release 2021-02-24 01:26:17 +00:00
mx-iao
882cb76e8a Merge pull request #1361 from Azure/minxia/distr-pytorch
Update distributed pytorch example
2021-02-23 12:07:20 -08:00
mx-iao
37f37a46c1 Delete pytorch_mnist.py 2021-02-23 11:19:39 -08:00
mx-iao
0cd1412421 Delete distributed-pytorch-with-nccl-gloo.ipynb 2021-02-23 11:19:33 -08:00
mx-iao
c3ae9f00f6 Add files via upload 2021-02-23 11:19:02 -08:00
mx-iao
11b02c650c Rename how-to-use-azureml/ml-frameworks/pytorch/distributed-pytorch-with-distributeddataparallel.ipynb to how-to-use-azureml/ml-frameworks/pytorch/distributed-pytorch-with-distributeddataparallel/distributed-pytorch-with-distributeddataparallel.ipynb 2021-02-23 11:18:43 -08:00
mx-iao
606048c71f Add files via upload 2021-02-23 11:18:10 -08:00
Harneet Virk
cb1c354d44 Merge pull request #1353 from Azure/release_update/Release-88
update samples from Release-88 as a part of  SDK release 1.23.0
2021-02-22 11:49:02 -08:00
amlrelsa-ms
c868fff5a2 update samples from Release-88 as a part of SDK release 2021-02-22 19:23:04 +00:00
Harneet Virk
bc4e6611c4 Merge pull request #1342 from Azure/release_update/Release-87
update samples from Release-87 as a part of  SDK release
2021-02-16 18:43:49 -08:00
151 changed files with 1253 additions and 9685 deletions

View File

@@ -103,7 +103,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -21,9 +21,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.22.0
- azureml-widgets~=1.24.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.22.0/validated_win32_requirements.txt [--no-deps]
- PyJWT < 2.0.0
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_win32_requirements.txt [--no-deps]

View File

@@ -21,10 +21,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.22.0
- azureml-widgets~=1.24.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.22.0/validated_linux_requirements.txt [--no-deps]
- PyJWT < 2.0.0
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_linux_requirements.txt [--no-deps]

View File

@@ -22,9 +22,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.22.0
- azureml-widgets~=1.24.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.22.0/validated_darwin_requirements.txt [--no-deps]
- PyJWT < 2.0.0
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_darwin_requirements.txt [--no-deps]

View File

@@ -105,7 +105,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-classification-bank-marketing-all-features
dependencies:
- pip:
- azureml-sdk

View File

@@ -93,7 +93,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-classification-credit-card-fraud
dependencies:
- pip:
- azureml-sdk

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-classification-text-dnn
dependencies:
- pip:
- azureml-sdk

View File

@@ -81,7 +81,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-continuous-retraining
dependencies:
- pip:
- azureml-sdk

View File

@@ -5,7 +5,7 @@ set options=%3
set PIP_NO_WARN_SCRIPT_LOCATION=0
IF "%conda_env_name%"=="" SET conda_env_name="azure_automl_experimental"
IF "%automl_env_file%"=="" SET automl_env_file="automl_env.yml"
IF "%automl_env_file%"=="" SET automl_env_file="automl_thin_client_env.yml"
IF NOT EXIST %automl_env_file% GOTO YmlMissing

View File

@@ -12,7 +12,7 @@ fi
if [ "$AUTOML_ENV_FILE" == "" ]
then
AUTOML_ENV_FILE="automl_env.yml"
AUTOML_ENV_FILE="automl_thin_client_env.yml"
fi
if [ ! -f $AUTOML_ENV_FILE ]; then

View File

@@ -12,7 +12,7 @@ fi
if [ "$AUTOML_ENV_FILE" == "" ]
then
AUTOML_ENV_FILE="automl_env.yml"
AUTOML_ENV_FILE="automl_thin_client_env_mac.yml"
fi
if [ ! -f $AUTOML_ENV_FILE ]; then

View File

@@ -5,17 +5,14 @@ dependencies:
- pip<=19.3.1
- python>=3.5.2,<3.8
- nb_conda
- matplotlib==2.1.0
- numpy~=1.18.0
- cython
- urllib3<1.24
- scikit-learn==0.22.1
- pandas==0.25.1
- PyJWT < 2.0.0
- numpy==1.18.5
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-defaults
- azureml-sdk
- azureml-widgets
- azureml-explain-model
- PyJWT < 2.0.0
- pandas

View File

@@ -6,17 +6,14 @@ dependencies:
- nomkl
- python>=3.5.2,<3.8
- nb_conda
- matplotlib==2.1.0
- numpy~=1.18.0
- cython
- urllib3<1.24
- scikit-learn==0.22.1
- pandas==0.25.1
- PyJWT < 2.0.0
- numpy==1.18.5
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-defaults
- azureml-sdk
- azureml-widgets
- azureml-explain-model
- PyJWT < 2.0.0
- pandas

View File

@@ -67,11 +67,8 @@
"source": [
"import logging\n",
"\n",
"from matplotlib import pyplot as plt\n",
"import json\n",
"import numpy as np\n",
"import pandas as pd\n",
" \n",
"\n",
"\n",
"import azureml.core\n",
"from azureml.core.experiment import Experiment\n",
@@ -93,7 +90,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -116,9 +113,7 @@
"output['Resource Group'] = ws.resource_group\n",
"output['Location'] = ws.location\n",
"output['Run History Name'] = experiment_name\n",
"pd.set_option('display.max_colwidth', -1)\n",
"outputDf = pd.DataFrame(data = output, index = [''])\n",
"outputDf.T"
"output"
]
},
{
@@ -199,7 +194,6 @@
"|**n_cross_validations**|Number of cross validation splits.|\n",
"|**training_data**|(sparse) array-like, shape = [n_samples, n_features]|\n",
"|**label_column_name**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
"|**scenario**|We need to set this parameter to 'Latest' to enable some experimental features. This parameter should not be set outside of this experimental notebook.|\n",
"\n",
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
]
@@ -228,7 +222,6 @@
" compute_target = compute_target,\n",
" training_data = train_data,\n",
" label_column_name = label,\n",
" scenario='Latest',\n",
" **automl_settings\n",
" )"
]
@@ -276,34 +269,13 @@
"## Results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Widget for Monitoring Runs\n",
"\n",
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
"\n",
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.widgets import RunDetails\n",
"RunDetails(remote_run).show() "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"remote_run.wait_for_completion()"
"remote_run.wait_for_completion(show_output=True)"
]
},
{
@@ -368,18 +340,12 @@
"metadata": {},
"outputs": [],
"source": [
"# preview the first 3 rows of the dataset\n",
"\n",
"test_data = test_data.to_pandas_dataframe()\n",
"y_test = test_data['ERP'].fillna(0)\n",
"test_data = test_data.drop('ERP', 1)\n",
"test_data = test_data.fillna(0)\n",
"y_test = test_data.keep_columns('ERP')\n",
"test_data = test_data.drop_columns('ERP')\n",
"\n",
"\n",
"train_data = train_data.to_pandas_dataframe()\n",
"y_train = train_data['ERP'].fillna(0)\n",
"train_data = train_data.drop('ERP', 1)\n",
"train_data = train_data.fillna(0)\n"
"y_train = train_data.keep_columns('ERP')\n",
"train_data = train_data.drop_columns('ERP')\n"
]
},
{
@@ -397,7 +363,16 @@
"outputs": [],
"source": [
"from azureml.train.automl.model_proxy import ModelProxy\n",
"best_model_proxy = ModelProxy(best_run)"
"best_model_proxy = ModelProxy(best_run)\n",
"y_pred_train = best_model_proxy.predict(train_data)\n",
"y_pred_test = best_model_proxy.predict(test_data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Exploring results"
]
},
{
@@ -406,60 +381,15 @@
"metadata": {},
"outputs": [],
"source": [
"y_pred_train = best_model_proxy.predict(train_data).to_pandas_dataframe().values.flatten()\n",
"y_pred_train = y_pred_train.to_pandas_dataframe().values.flatten()\n",
"y_train = y_train.to_pandas_dataframe().values.flatten()\n",
"y_residual_train = y_train - y_pred_train\n",
"\n",
"y_pred_test = best_model_proxy.predict(test_data).to_pandas_dataframe().values.flatten()\n",
"y_residual_test = y_test - y_pred_test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"from sklearn.metrics import mean_squared_error, r2_score\n",
"\n",
"# Set up a multi-plot chart.\n",
"f, (a0, a1) = plt.subplots(1, 2, gridspec_kw = {'width_ratios':[1, 1], 'wspace':0, 'hspace': 0})\n",
"f.suptitle('Regression Residual Values', fontsize = 18)\n",
"f.set_figheight(6)\n",
"f.set_figwidth(16)\n",
"\n",
"# Plot residual values of training set.\n",
"a0.axis([0, 360, -100, 100])\n",
"a0.plot(y_residual_train, 'bo', alpha = 0.5)\n",
"a0.plot([-10,360],[0,0], 'r-', lw = 3)\n",
"a0.text(16,170,'RMSE = {0:.2f}'.format(np.sqrt(mean_squared_error(y_train, y_pred_train))), fontsize = 12)\n",
"a0.text(16,140,'R2 score = {0:.2f}'.format(r2_score(y_train, y_pred_train)),fontsize = 12)\n",
"a0.set_xlabel('Training samples', fontsize = 12)\n",
"a0.set_ylabel('Residual Values', fontsize = 12)\n",
"\n",
"# Plot residual values of test set.\n",
"a1.axis([0, 90, -100, 100])\n",
"a1.plot(y_residual_test, 'bo', alpha = 0.5)\n",
"a1.plot([-10,360],[0,0], 'r-', lw = 3)\n",
"a1.text(5,170,'RMSE = {0:.2f}'.format(np.sqrt(mean_squared_error(y_test, y_pred_test))), fontsize = 12)\n",
"a1.text(5,140,'R2 score = {0:.2f}'.format(r2_score(y_test, y_pred_test)),fontsize = 12)\n",
"a1.set_xlabel('Test samples', fontsize = 12)\n",
"a1.set_yticklabels([])\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"test_pred = plt.scatter(y_test, y_pred_test, color='')\n",
"test_test = plt.scatter(y_test, y_test, color='g')\n",
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
"plt.show()"
"y_pred_test = y_pred_test.to_pandas_dataframe().values.flatten()\n",
"y_test = y_test.to_pandas_dataframe().values.flatten()\n",
"y_residual_test = y_test - y_pred_test\n",
"print(y_residual_train)\n",
"print(y_residual_test)"
]
},
{

View File

@@ -0,0 +1,4 @@
name: auto-ml-regression-model-proxy
dependencies:
- pip:
- azureml-sdk

View File

@@ -113,7 +113,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-forecasting-beer-remote
dependencies:
- pip:
- azureml-sdk

View File

@@ -87,7 +87,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-forecasting-bike-share
dependencies:
- pip:
- azureml-sdk

View File

@@ -97,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-forecasting-energy-demand
dependencies:
- pip:
- azureml-sdk

View File

@@ -94,7 +94,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-forecasting-function
dependencies:
- pip:
- azureml-sdk

View File

@@ -82,7 +82,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-forecasting-orange-juice-sales
dependencies:
- pip:
- azureml-sdk

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-classification-credit-card-fraud-local
dependencies:
- pip:
- azureml-sdk

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,4 @@
name: auto-ml-regression-explanation-featurization
dependencies:
- pip:
- azureml-sdk

View File

@@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -375,18 +375,12 @@
"metadata": {},
"outputs": [],
"source": [
"# preview the first 3 rows of the dataset\n",
"\n",
"test_data = test_data.to_pandas_dataframe()\n",
"y_test = test_data['ERP'].fillna(0)\n",
"test_data = test_data.drop('ERP', 1)\n",
"test_data = test_data.fillna(0)\n",
"y_test = test_data.keep_columns('ERP').to_pandas_dataframe()\n",
"test_data = test_data.drop_columns('ERP').to_pandas_dataframe()\n",
"\n",
"\n",
"train_data = train_data.to_pandas_dataframe()\n",
"y_train = train_data['ERP'].fillna(0)\n",
"train_data = train_data.drop('ERP', 1)\n",
"train_data = train_data.fillna(0)\n"
"y_train = train_data.keep_columns('ERP').to_pandas_dataframe()\n",
"train_data = train_data.drop_columns('ERP').to_pandas_dataframe()\n"
]
},
{
@@ -396,10 +390,10 @@
"outputs": [],
"source": [
"y_pred_train = fitted_model.predict(train_data)\n",
"y_residual_train = y_train - y_pred_train\n",
"y_residual_train = y_train.values - y_pred_train\n",
"\n",
"y_pred_test = fitted_model.predict(test_data)\n",
"y_residual_test = y_test - y_pred_test"
"y_residual_test = y_test.values - y_pred_test"
]
},
{

View File

@@ -0,0 +1,4 @@
name: auto-ml-regression
dependencies:
- pip:
- azureml-sdk

View File

@@ -0,0 +1,84 @@
Azure Synapse Analyticsis a limitless analytics service that brings together data integration, enterprise data warehousing, and big data analytics. It gives you the freedom to query data on your terms, using either serverless or dedicated resources—at scale. Azure Synapse brings these worlds together with a unified experience to ingest, explore, prepare, manage, and serve data for immediate BI and machine learning needs.A coreoffering within Azure Synapse Analyticsare serverlessApache Spark poolsenhanced for big data workloads.
Synapse in Aml integration is for customerswho want to useApacheSparkin AzureSynapse Analyticsto prepare data at scale in Azure ML before training their ML model. This will allow customers to work on their end-to-end ML lifecycle including large-scale data preparation, model training and deployment within Azure ML workspace without having to use suboptimal tools for machine learning or switch between multipletools for data preparation and model training.The ability to perform all ML tasks within Azure ML willreducetimerequired for customersto iterate on a machine learning project which typically includesmultiple rounds ofdata preparation and training.
In the public preview, the capabilities are provided:
- Link Azure Synapse Analytics workspace to Azure Machine Learning workspace (via ARM, UI or SDK)
- Attach Apache Spark pools powered by Azure Synapse Analytics as Azure Machine Learning compute targets (via ARM, UI or SDK)
- Launch Apache Spark sessions in notebooks and perform interactive data exploration and preparation. This interactive experience leverages Apache Spark magic and customers will have session-level Conda support to install packages.
- Productionize ML pipelines by leveraging Apache Spark pools to pre-process big data
# Using Synapse in Azure machine learning
## Create synapse resources
Follow up the documents to create Synapse workspace and resource-setup.sh is available for you to create the resources.
- Create from [Portal](https://docs.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-workspace)
- Create from [Cli](https://docs.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-workspace-cli)
Follow up the documents to create Synapse spark pool
- Create from [Portal](https://docs.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-apache-spark-pool-portal)
- Create from [Cli](https://docs.microsoft.com/en-us/cli/azure/ext/synapse/synapse/spark/pool?view=azure-cli-latest)
## Link Synapse Workspace
Make sure you are the owner of synapse workspace so that you can link synapse workspace into AML.
You can run resource-setup.py to link the synapse workspace and attach compute
```python
from azureml.core import Workspace
ws = Workspace.from_config()
from azureml.core import LinkedService, SynapseWorkspaceLinkedServiceConfiguration
synapse_link_config = SynapseWorkspaceLinkedServiceConfiguration(
subscription_id="<subscription id>",
resource_group="<resource group",
name="<synapse workspace name>"
)
linked_service = LinkedService.register(
workspace=ws,
name='<link name>',
linked_service_config=synapse_link_config)
```
## Attach synapse spark pool as AzureML compute
```python
from azureml.core.compute import SynapseCompute, ComputeTarget
spark_pool_name = "<spark pool name>"
attached_synapse_name = "<attached compute name>"
attach_config = SynapseCompute.attach_configuration(
linked_service,
type="SynapseSpark",
pool_name=spark_pool_name)
synapse_compute=ComputeTarget.attach(
workspace=ws,
name=attached_synapse_name,
attach_configuration=attach_config)
synapse_compute.wait_for_completion()
```
## Set up permission
Grant Spark admin role to system assigned identity of the linked service so that the user can submit experiment run or pipeline run from AML workspace to synapse spark pool.
Grant Spark admin role to the specific user so that the user can start spark session to synapse spark pool.
You can get the system assigned identity information by running
```python
print(linked_service.system_assigned_identity_principal_id)
```
- Launch synapse studio of the synapse workspace and grant linked service MSI "Synapse Apache Spark administrator" role.
- In azure portal grant linked service MSI "Storage Blob Data Contributor" role of the primary adlsgen2 account of synapse workspace to use the library management feature.

View File

@@ -0,0 +1,6 @@
name: multi-model-register-and-deploy
dependencies:
- pip:
- azureml-sdk
- numpy
- scikit-learn

View File

@@ -0,0 +1,6 @@
name: model-register-and-deploy
dependencies:
- pip:
- azureml-sdk
- numpy
- scikit-learn

View File

@@ -0,0 +1,4 @@
name: deploy-aks-with-controlled-rollout
dependencies:
- pip:
- azureml-sdk

View File

@@ -0,0 +1,4 @@
name: enable-app-insights-in-production-service
dependencies:
- pip:
- azureml-sdk

View File

@@ -94,6 +94,17 @@ def main():
os.makedirs(output_dir, exist_ok=True)
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
# Use Azure Open Datasets for MNIST dataset
datasets.MNIST.resources = [
("https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz",
"f68b3c2dcbeaaa9fbdd348bbdeb94873"),
("https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz",
"d53e105ee54ea40749a09fcbcd1e9432"),
("https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz",
"9fb629c4189551a2d022fa330f9573f3"),
("https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz",
"ec29112dd5afa0611ce80d1b7f02629c")
]
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=True, download=True,
transform=transforms.Compose([transforms.ToTensor(),

View File

@@ -0,0 +1,8 @@
name: onnx-convert-aml-deploy-tinyyolo
dependencies:
- pip:
- azureml-sdk
- numpy
- git+https://github.com/apple/coremltools@v2.1
- onnx<1.7.0
- onnxmltools

View File

@@ -0,0 +1,9 @@
name: onnx-inference-facial-expression-recognition-deploy
dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- matplotlib
- numpy
- onnx<1.7.0
- opencv-python-headless

View File

@@ -0,0 +1,9 @@
name: onnx-inference-mnist-deploy
dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- matplotlib
- numpy
- onnx<1.7.0
- opencv-python-headless

View File

@@ -0,0 +1,4 @@
name: onnx-model-register-and-deploy
dependencies:
- pip:
- azureml-sdk

View File

@@ -0,0 +1,4 @@
name: onnx-modelzoo-aml-deploy-resnet50
dependencies:
- pip:
- azureml-sdk

View File

@@ -0,0 +1,5 @@
name: onnx-train-pytorch-aml-deploy-mnist
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -0,0 +1,5 @@
name: production-deploy-to-aks-gpu
dependencies:
- pip:
- azureml-sdk
- tensorflow

View File

@@ -0,0 +1,8 @@
name: production-deploy-to-aks-ssl
dependencies:
- pip:
- azureml-sdk
- matplotlib
- tqdm
- scipy
- sklearn

View File

@@ -0,0 +1,8 @@
name: production-deploy-to-aks
dependencies:
- pip:
- azureml-sdk
- matplotlib
- tqdm
- scipy
- sklearn

View File

@@ -0,0 +1,4 @@
name: model-register-and-deploy-spark
dependencies:
- pip:
- azureml-sdk

View File

@@ -259,7 +259,7 @@
"run_config.environment.docker.enabled = True\n",
"\n",
"azureml_pip_packages = [\n",
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-telemetry', 'azureml-interpret'\n",
" 'azureml-defaults', 'azureml-telemetry', 'azureml-interpret'\n",
"]\n",
"\n",
"# Note: this is to pin the scikit-learn and pandas versions to be same as notebook.\n",

View File

@@ -0,0 +1,13 @@
name: explain-model-on-amlcompute
dependencies:
- pip:
- azureml-sdk
- azureml-interpret
- flask
- flask-cors
- gevent>=1.3.6
- jinja2
- ipython
- matplotlib
- azureml-dataset-runtime
- ipywidgets

View File

@@ -57,7 +57,7 @@
"Problem: IBM employee attrition classification with scikit-learn (run model explainer locally and upload explanation to the Azure Machine Learning Run History)\n",
"\n",
"1. Train a SVM classification model using Scikit-learn\n",
"2. Run 'explain_model' with AML Run History, which leverages run history service to store and manage the explanation data\n",
"2. Run 'explain-model-sample' with AML Run History, which leverages run history service to store and manage the explanation data\n",
"---\n",
"\n",
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
@@ -226,36 +226,6 @@
" ('classifier', SVC(C=1.0, probability=True))])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"'''\n",
"# Uncomment below if sklearn-pandas is not installed\n",
"#!pip install sklearn-pandas\n",
"from sklearn_pandas import DataFrameMapper\n",
"\n",
"# Impute, standardize the numeric features and one-hot encode the categorical features. \n",
"\n",
"\n",
"numeric_transformations = [([f], Pipeline(steps=[('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())])) for f in numerical]\n",
"\n",
"categorical_transformations = [([f], OneHotEncoder(handle_unknown='ignore', sparse=False)) for f in categorical]\n",
"\n",
"transformations = numeric_transformations + categorical_transformations\n",
"\n",
"# Append classifier to preprocessing pipeline.\n",
"# Now we have a full prediction pipeline.\n",
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
" ('classifier', SVC(C=1.0, probability=True))]) \n",
"\n",
"\n",
"\n",
"'''"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -475,7 +445,7 @@
"metadata": {},
"outputs": [],
"source": [
"experiment_name = 'explain_model'\n",
"experiment_name = 'explain-model-sample'\n",
"experiment = Experiment(ws, experiment_name)\n",
"run = experiment.start_logging()\n",
"client = ExplanationClient.from_run(run)"

View File

@@ -0,0 +1,12 @@
name: save-retrieve-explanations-run-history
dependencies:
- pip:
- azureml-sdk
- azureml-interpret
- flask
- flask-cors
- gevent>=1.3.6
- jinja2
- ipython
- matplotlib
- ipywidgets

View File

@@ -166,12 +166,12 @@
"source": [
"from sklearn.model_selection import train_test_split\n",
"import joblib\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn_pandas import DataFrameMapper\n",
"\n",
"from interpret.ext.blackbox import TabularExplainer\n",
"\n",
@@ -201,17 +201,23 @@
"# Store the numerical columns in a list numerical\n",
"numerical = attritionXData.columns.difference(categorical)\n",
"\n",
"numeric_transformations = [([f], Pipeline(steps=[\n",
"# We create the preprocessing pipelines for both numeric and categorical data.\n",
"numeric_transformer = Pipeline(steps=[\n",
" ('imputer', SimpleImputer(strategy='median')),\n",
" ('scaler', StandardScaler())])) for f in numerical]\n",
" ('scaler', StandardScaler())])\n",
"\n",
"categorical_transformations = [([f], OneHotEncoder(handle_unknown='ignore', sparse=False)) for f in categorical]\n",
"categorical_transformer = Pipeline(steps=[\n",
" ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n",
" ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n",
"\n",
"transformations = numeric_transformations + categorical_transformations\n",
"transformations = ColumnTransformer(\n",
" transformers=[\n",
" ('num', numeric_transformer, numerical),\n",
" ('cat', categorical_transformer, categorical)])\n",
"\n",
"# Append classifier to preprocessing pipeline.\n",
"# Now we have a full prediction pipeline.\n",
"clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),\n",
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
" ('classifier', RandomForestClassifier())])\n",
"\n",
"# Split data into train and test\n",
@@ -323,7 +329,7 @@
"\n",
"# azureml-defaults is required to host the model as a web service.\n",
"azureml_pip_packages = [\n",
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n",
" 'azureml-defaults', 'azureml-core', 'azureml-telemetry',\n",
" 'azureml-interpret'\n",
"]\n",
" \n",
@@ -350,7 +356,7 @@
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
"myenv = CondaDependencies.create(pip_packages=['sklearn-pandas', 'pyyaml', sklearn_dep, pandas_dep] + azureml_pip_packages,\n",
"myenv = CondaDependencies.create(pip_packages=['pyyaml', sklearn_dep, pandas_dep] + azureml_pip_packages,\n",
" pin_sdk_version=False)\n",
"\n",
"with open(\"myenv.yml\",\"w\") as f:\n",

View File

@@ -0,0 +1,12 @@
name: train-explain-model-locally-and-deploy
dependencies:
- pip:
- azureml-sdk
- azureml-interpret
- flask
- flask-cors
- gevent>=1.3.6
- jinja2
- ipython
- matplotlib
- ipywidgets

View File

@@ -267,7 +267,7 @@
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"azureml_pip_packages = [\n",
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-telemetry', 'azureml-interpret'\n",
" 'azureml-defaults', 'azureml-telemetry', 'azureml-interpret'\n",
"]\n",
" \n",
"\n",
@@ -294,7 +294,7 @@
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
"azureml_pip_packages.extend(['sklearn-pandas', 'pyyaml', sklearn_dep, pandas_dep])\n",
"azureml_pip_packages.extend(['pyyaml', sklearn_dep, pandas_dep])\n",
"run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=azureml_pip_packages)\n",
"# Now submit a run on AmlCompute\n",
"from azureml.core.script_run_config import ScriptRunConfig\n",
@@ -431,7 +431,7 @@
"\n",
"# WARNING: to install this, g++ needs to be available on the Docker image and is not by default (look at the next cell)\n",
"azureml_pip_packages = [\n",
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n",
" 'azureml-defaults', 'azureml-core', 'azureml-telemetry',\n",
" 'azureml-interpret'\n",
"]\n",
" \n",
@@ -458,7 +458,7 @@
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
"azureml_pip_packages.extend(['sklearn-pandas', 'pyyaml', sklearn_dep, pandas_dep])\n",
"azureml_pip_packages.extend(['pyyaml', sklearn_dep, pandas_dep])\n",
"myenv = CondaDependencies.create(pip_packages=azureml_pip_packages)\n",
"\n",
"with open(\"myenv.yml\",\"w\") as f:\n",

View File

@@ -0,0 +1,14 @@
name: train-explain-model-on-amlcompute-and-deploy
dependencies:
- pip:
- azureml-sdk
- azureml-interpret
- flask
- flask-cors
- gevent>=1.3.6
- jinja2
- ipython
- matplotlib
- azureml-dataset-runtime
- azureml-core
- ipywidgets

View File

@@ -5,13 +5,13 @@
import os
import pandas as pd
import zipfile
from sklearn.model_selection import train_test_split
import joblib
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn_pandas import DataFrameMapper
from azureml.core.run import Run
from interpret.ext.blackbox import TabularExplainer
@@ -57,16 +57,22 @@ for col, value in attritionXData.iteritems():
# store the numerical columns
numerical = attritionXData.columns.difference(categorical)
numeric_transformations = [([f], Pipeline(steps=[
# We create the preprocessing pipelines for both numeric and categorical data.
numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())])) for f in numerical]
('scaler', StandardScaler())])
categorical_transformations = [([f], OneHotEncoder(handle_unknown='ignore', sparse=False)) for f in categorical]
categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))])
transformations = numeric_transformations + categorical_transformations
transformations = ColumnTransformer(
transformers=[
('num', numeric_transformer, numerical),
('cat', categorical_transformer, categorical)])
# append classifier to preprocessing pipeline
clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),
clf = Pipeline(steps=[('preprocessor', transformations),
('classifier', LogisticRegression(solver='lbfgs'))])
# get the run this was submitted from to interact with run history

View File

@@ -0,0 +1,5 @@
name: aml-pipelines-data-transfer
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -0,0 +1,5 @@
name: aml-pipelines-getting-started
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -341,7 +341,7 @@
"outputs": [],
"source": [
"pipeline = Pipeline(workspace=ws, steps=[step])\n",
"pipeline_run = Experiment(ws, 'azurebatch_experiment').submit(pipeline)"
"pipeline_run = Experiment(ws, 'azurebatch_sample').submit(pipeline)"
]
},
{

View File

@@ -0,0 +1,5 @@
name: aml-pipelines-how-to-use-modulestep
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -130,7 +130,7 @@
"\n",
"pipeline_draft = PipelineDraft.create(ws, name=\"TestPipelineDraft\",\n",
" description=\"draft description\",\n",
" experiment_name=\"helloworld\",\n",
" experiment_name=\"pipeline_draft_sample\",\n",
" pipeline=pipeline,\n",
" continue_on_step_failure=True,\n",
" tags={'dev': 'true'},\n",

View File

@@ -0,0 +1,5 @@
name: aml-pipelines-how-to-use-pipeline-drafts
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -121,12 +121,17 @@
"metadata": {},
"outputs": [],
"source": [
"os.makedirs('./data/mnist', exist_ok=True)\n",
"data_folder = os.path.join(os.getcwd(), 'data/mnist')\n",
"os.makedirs(data_folder, exist_ok=True)\n",
"\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename = './data/mnist/train-images.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename = './data/mnist/train-labels.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')"
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz',\n",
" filename=os.path.join(data_folder, 'train-images-idx3-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz',\n",
" filename=os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))"
]
},
{
@@ -146,11 +151,11 @@
"from utils import load_data\n",
"\n",
"# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n",
"X_train = load_data('./data/mnist/train-images.gz', False) / 255.0\n",
"y_train = load_data('./data/mnist/train-labels.gz', True).reshape(-1)\n",
"X_train = load_data(os.path.join(data_folder, 'train-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
"y_train = load_data(os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
"\n",
"X_test = load_data('./data/mnist/test-images.gz', False) / 255.0\n",
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
"\n",
"count = 0\n",
"sample_size = 30\n",

View File

@@ -0,0 +1,9 @@
name: aml-pipelines-parameter-tuning-with-hyperdrive
dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- matplotlib
- numpy
- pandas_ml
- azureml-dataset-runtime[pandas,fuse]

View File

@@ -325,7 +325,7 @@
"outputs": [],
"source": [
"# submit a pipeline run\n",
"pipeline_run1 = Experiment(ws, 'Pipeline_experiment').submit(pipeline1)\n",
"pipeline_run1 = Experiment(ws, 'Pipeline_experiment_sample').submit(pipeline1)\n",
"# publish a pipeline from the submitted pipeline run\n",
"published_pipeline2 = pipeline_run1.publish_pipeline(name=\"My_New_Pipeline2\", description=\"My Published Pipeline Description\", version=\"0.1\", continue_on_step_failure=True)\n",
"published_pipeline2"

View File

@@ -0,0 +1,6 @@
name: aml-pipelines-publish-and-run-using-rest-endpoint
dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- requests

View File

@@ -259,7 +259,7 @@
"\n",
"schedule = Schedule.create(workspace=ws, name=\"My_Schedule\",\n",
" pipeline_id=pub_pipeline_id, \n",
" experiment_name='Schedule_Run',\n",
" experiment_name='Schedule-run-sample',\n",
" recurrence=recurrence,\n",
" wait_for_provisioning=True,\n",
" description=\"Schedule Run\")\n",
@@ -445,7 +445,7 @@
"\n",
"schedule = Schedule.create(workspace=ws, name=\"My_Schedule\",\n",
" pipeline_id=pub_pipeline_id, \n",
" experiment_name='Schedule_Run',\n",
" experiment_name='Schedule-run-sample',\n",
" datastore=datastore,\n",
" wait_for_provisioning=True,\n",
" description=\"Schedule Run\")\n",
@@ -516,7 +516,7 @@
"\n",
"schedule = Schedule.create_for_pipeline_endpoint(workspace=ws, name=\"My_Endpoint_Schedule\",\n",
" pipeline_endpoint_id=published_pipeline_endpoint_id,\n",
" experiment_name='Schedule_Run',\n",
" experiment_name='Schedule-run-sample',\n",
" recurrence=recurrence, description=\"Schedule_Run\",\n",
" wait_for_provisioning=True)\n",
"\n",

View File

@@ -0,0 +1,5 @@
name: aml-pipelines-setup-schedule-for-a-published-pipeline
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -553,7 +553,7 @@
"outputs": [],
"source": [
"from azureml.core import Experiment\n",
"pipeline_run = Experiment(ws, name=\"submit_from_endpoint\").submit(pipeline_endpoint_by_name, tags={'endpoint_tag': \"1\"}, pipeline_version=\"0\")"
"pipeline_run = Experiment(ws, name=\"submit_endpoint_sample\").submit(pipeline_endpoint_by_name, tags={'endpoint_tag': \"1\"}, pipeline_version=\"0\")"
]
}
],

View File

@@ -0,0 +1,6 @@
name: aml-pipelines-setup-versioned-pipeline-endpoints
dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- requests

View File

@@ -0,0 +1,5 @@
name: aml-pipelines-showcasing-datapath-and-pipelineparameter
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -0,0 +1,5 @@
name: aml-pipelines-showcasing-dataset-and-pipelineparameter
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -101,7 +101,7 @@
"metadata": {},
"source": [
"## Create an Azure ML experiment\n",
"Let's create an experiment named \"automlstep-classification\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure.\n",
"Let's create an experiment named \"automlstep-sample\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure.\n",
"\n",
"The best practice is to use separate folders for scripts and its dependent files for each step and specify that folder as the `source_directory` for the step. This helps reduce the size of the snapshot created for the step (only the specific folder is snapshotted). Since changes in any files in the `source_directory` would trigger a re-upload of the snapshot, this helps keep the reuse of the step when there are no changes in the `source_directory` of the step."
]
@@ -113,7 +113,7 @@
"outputs": [],
"source": [
"# Choose a name for the run history container in the workspace.\n",
"experiment_name = 'automlstep-classification'\n",
"experiment_name = 'automlstep-sample'\n",
"project_folder = './project'\n",
"\n",
"experiment = Experiment(ws, experiment_name)\n",

View File

@@ -0,0 +1,4 @@
name: aml-pipelines-with-automated-machine-learning-step
dependencies:
- pip:
- azureml-sdk

View File

@@ -0,0 +1,5 @@
name: aml-pipelines-with-commandstep-r
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -0,0 +1,5 @@
name: aml-pipelines-with-commandstep
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -428,7 +428,7 @@
"metadata": {},
"outputs": [],
"source": [
"pipeline_run1 = Experiment(ws, 'Data_dependency').submit(pipeline1)\n",
"pipeline_run1 = Experiment(ws, 'Data_dependency_sample').submit(pipeline1)\n",
"print(\"Pipeline is submitted for execution\")"
]
},

View File

@@ -0,0 +1,5 @@
name: aml-pipelines-with-data-dependency-steps
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -0,0 +1,6 @@
name: aml-pipelines-with-notebook-runner-step
dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- azureml-contrib-notebook

View File

@@ -0,0 +1,10 @@
name: nyc-taxi-data-regression-model-building
dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- azureml-opendatasets
- azureml-train-automl
- matplotlib
- pandas
- pyarrow

View File

@@ -0,0 +1,7 @@
name: file-dataset-image-inference-mnist
dependencies:
- pip:
- azureml-sdk
- azureml-pipeline-steps
- azureml-widgets
- pandas

View File

@@ -0,0 +1,7 @@
name: tabular-dataset-inference-iris
dependencies:
- pip:
- azureml-sdk
- azureml-pipeline-steps
- azureml-widgets
- pandas

View File

@@ -0,0 +1,7 @@
name: pipeline-style-transfer-parallel-run
dependencies:
- pip:
- azureml-sdk
- azureml-pipeline-steps
- azureml-widgets
- requests

View File

@@ -0,0 +1,5 @@
name: distributed-chainer
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -4,6 +4,8 @@ import os
import numpy as np
from utils import download_mnist
import chainer
from chainer import backend
from chainer import backends
@@ -17,6 +19,7 @@ from chainer.training import extensions
from chainer.dataset import concat_examples
from chainer.backends.cuda import to_cpu
from azureml.core.run import Run
run = Run.get_context()
@@ -49,7 +52,7 @@ def main():
args = parser.parse_args()
# Download the MNIST data if you haven't downloaded it yet
train, test = datasets.mnist.get_mnist(withlabel=True, ndim=1)
train, test = download_mnist()
gpu_id = args.gpu_id
batchsize = args.batchsize

View File

@@ -2,6 +2,8 @@ import numpy as np
import os
import json
from utils import download_mnist
from chainer import serializers, using_config, Variable, datasets
import chainer.functions as F
import chainer.links as L
@@ -41,7 +43,7 @@ def init():
def run(input_data):
i = np.array(json.loads(input_data)['data'])
_, test = datasets.get_mnist()
_, test = download_mnist()
x = Variable(np.asarray([test[i][0]]))
y = model(x)

View File

@@ -217,7 +217,8 @@
"import shutil\n",
"\n",
"shutil.copy('chainer_mnist.py', project_folder)\n",
"shutil.copy('chainer_score.py', project_folder)"
"shutil.copy('chainer_score.py', project_folder)\n",
"shutil.copy('utils.py', project_folder)"
]
},
{
@@ -263,6 +264,7 @@
"- python=3.6.2\n",
"- pip:\n",
" - azureml-defaults\n",
" - azureml-opendatasets\n",
" - chainer==5.1.0\n",
" - cupy-cuda90==5.1.0\n",
" - mpi4py==3.0.0\n",
@@ -557,6 +559,7 @@
"cd.add_conda_package('numpy')\n",
"cd.add_pip_package('chainer==5.1.0')\n",
"cd.add_pip_package(\"azureml-defaults\")\n",
"cd.add_pip_package(\"azureml-opendatasets\")\n",
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
"\n",
"print(cd.serialize_to_string())"
@@ -584,7 +587,8 @@
"\n",
"\n",
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv)\n",
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv,\n",
" source_directory=project_folder)\n",
"\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
" auth_enabled=True, # this flag generates API keys to secure access\n",
@@ -592,11 +596,11 @@
" tags={'name': 'mnist', 'framework': 'Chainer'},\n",
" description='Chainer DNN with MNIST')\n",
"\n",
"service = Model.deploy(workspace=ws, \n",
" name='chainer-mnist-1', \n",
" models=[model], \n",
" inference_config=inference_config, \n",
" deployment_config=aciconfig)\n",
"service = Model.deploy(workspace=ws,\n",
" name='chainer-mnist-1',\n",
" models=[model],\n",
" inference_config=inference_config,\n",
" deployment_config=aciconfig)\n",
"service.wait_for_deployment(True)\n",
"print(service.state)\n",
"print(service.scoring_uri)"
@@ -685,13 +689,16 @@
" res = res.reshape(n_items[0], 1)\n",
" return res\n",
"\n",
"os.makedirs('./data/mnist', exist_ok=True)\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')\n",
"data_folder = os.path.join(os.getcwd(), 'data/mnist')\n",
"os.makedirs(data_folder, exist_ok=True)\n",
"\n",
"X_test = load_data('./data/mnist/test-images.gz', False)\n",
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))\n",
"\n",
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
"\n",
"# send a random row from the test set to score\n",
"random_index = np.random.randint(0, len(X_test)-1)\n",

View File

@@ -0,0 +1,13 @@
name: train-hyperparameter-tune-deploy-with-chainer
dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- numpy
- matplotlib
- json
- urllib
- gzip
- struct
- requests
- azureml-opendatasets

View File

@@ -0,0 +1,50 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import glob
import gzip
import numpy as np
import os
import struct
from azureml.core import Dataset
from azureml.opendatasets import MNIST
from chainer.datasets import tuple_dataset
# load compressed MNIST gz files and return numpy arrays
def load_data(filename, label=False):
with gzip.open(filename) as gz:
struct.unpack('I', gz.read(4))
n_items = struct.unpack('>I', gz.read(4))
if not label:
n_rows = struct.unpack('>I', gz.read(4))[0]
n_cols = struct.unpack('>I', gz.read(4))[0]
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
res = res.reshape(n_items[0], n_rows * n_cols)
else:
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
res = res.reshape(n_items[0], 1)
return res
def download_mnist():
data_folder = os.path.join(os.getcwd(), 'data/mnist')
os.makedirs(data_folder, exist_ok=True)
mnist_file_dataset = MNIST.get_file_dataset()
mnist_file_dataset.download(data_folder, overwrite=True)
X_train = load_data(glob.glob(os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"),
recursive=True)[0], False) / 255.0
X_test = load_data(glob.glob(os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"),
recursive=True)[0], False) / 255.0
y_train = load_data(glob.glob(os.path.join(data_folder, "**/train-labels-idx1-ubyte.gz"),
recursive=True)[0], True).reshape(-1)
y_test = load_data(glob.glob(os.path.join(data_folder, "**/t10k-labels-idx1-ubyte.gz"),
recursive=True)[0], True).reshape(-1)
train = tuple_dataset.TupleDataset(X_train.astype(np.float32), y_train.astype(np.int32))
test = tuple_dataset.TupleDataset(X_test.astype(np.float32), y_test.astype(np.int32))
return train, test

View File

@@ -0,0 +1,5 @@
name: fastai-with-custom-docker
dependencies:
- pip:
- azureml-sdk
- fastai==1.0.61

View File

@@ -0,0 +1,8 @@
name: train-hyperparameter-tune-deploy-with-keras
dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- tensorflow
- keras<=2.3.1
- matplotlib

View File

@@ -21,7 +21,8 @@
"metadata": {},
"source": [
"# Distributed PyTorch with DistributedDataParallel\n",
"In this tutorial, you will train a PyTorch model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using distributed training with PyTorch's `DistributedDataParallel` module across a GPU cluster. "
"\n",
"In this tutorial, you will train a PyTorch model on the [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset using distributed training with PyTorch's `DistributedDataParallel` module across a GPU cluster."
]
},
{
@@ -113,7 +114,7 @@
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# choose a name for your cluster\n",
"cluster_name = \"gpu-cluster\"\n",
"cluster_name = 'gpu-cluster'\n",
"\n",
"try:\n",
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
@@ -139,6 +140,68 @@
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prepare dataset\n",
"\n",
"Prepare the dataset used for training. We will first download and extract the publicly available CIFAR-10 dataset from the cs.toronto.edu website and then create an Azure ML FileDataset to use the data for training."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download and extract CIFAR-10 data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import urllib\n",
"import tarfile\n",
"import os\n",
"\n",
"url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'\n",
"filename = 'cifar-10-python.tar.gz'\n",
"data_root = 'cifar-10'\n",
"filepath = os.path.join(data_root, filename)\n",
"\n",
"if not os.path.isdir(data_root):\n",
" os.makedirs(data_root, exist_ok=True)\n",
" urllib.request.urlretrieve(url, filepath)\n",
" with tarfile.open(filepath, \"r:gz\") as tar:\n",
" tar.extractall(path=data_root)\n",
" os.remove(filepath) # delete tar.gz file after extraction"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create Azure ML dataset\n",
"\n",
"The `upload_directory` method will upload the data to a datastore and create a FileDataset from it. In this tutorial we will use the workspace's default datastore."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Dataset\n",
"\n",
"datastore = ws.get_default_datastore()\n",
"dataset = Dataset.File.upload_directory(\n",
" src_dir=data_root, target=(datastore, data_root)\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -161,8 +224,6 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"project_folder = './pytorch-distr'\n",
"os.makedirs(project_folder, exist_ok=True)"
]
@@ -172,26 +233,14 @@
"metadata": {},
"source": [
"### Prepare training script\n",
"Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `pytorch_mnist.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code.\n",
"\n",
"However, if you would like to use Azure ML's [metric logging](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#logging) capabilities, you will have to add a small amount of Azure ML logic inside your training script. In this example, at each logging interval, we will log the loss for that minibatch to our Azure ML run.\n",
"\n",
"To do so, in `pytorch_mnist.py`, we will first access the Azure ML `Run` object within the script:\n",
"```Python\n",
"from azureml.core.run import Run\n",
"run = Run.get_context()\n",
"```\n",
"Later within the script, we log the loss metric to our run:\n",
"```Python\n",
"run.log('loss', losses.avg)\n",
"```"
"Now you will need to create your training script. In this tutorial, the script for distributed training on CIFAR-10 is already provided for you at `train.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Once your script is ready, copy the training script `pytorch_mnist.py` into the project directory."
"Once your script is ready, copy the training script `train.py` into the project directory."
]
},
{
@@ -202,7 +251,7 @@
"source": [
"import shutil\n",
"\n",
"shutil.copy('pytorch_mnist.py', project_folder)"
"shutil.copy('train.py', project_folder)"
]
},
{
@@ -231,26 +280,7 @@
"source": [
"### Create an environment\n",
"\n",
"Define a conda environment YAML file with your training script dependencies and create an Azure ML environment."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile conda_dependencies.yml\n",
"\n",
"channels:\n",
"- conda-forge\n",
"dependencies:\n",
"- python=3.6.2\n",
"- pip:\n",
" - azureml-defaults\n",
" - torch==1.6.0\n",
" - torchvision==0.7.0\n",
" - future==0.17.1"
"In this tutorial, we will use one of Azure ML's curated PyTorch environments for training. [Curated environments](https://docs.microsoft.com/azure/machine-learning/how-to-use-environments#use-a-curated-environment) are available in your workspace by default. Specifically, we will use the PyTorch 1.6 GPU curated environment."
]
},
{
@@ -261,24 +291,39 @@
"source": [
"from azureml.core import Environment\n",
"\n",
"pytorch_env = Environment.from_conda_specification(name = 'pytorch-1.6-gpu', file_path = './conda_dependencies.yml')\n",
"\n",
"# Specify a GPU base image\n",
"pytorch_env.docker.enabled = True\n",
"pytorch_env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04'"
"pytorch_env = Environment.get(ws, name='AzureML-PyTorch-1.6-GPU')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure the training job: torch.distributed with NCCL backend\n",
"### Configure the training job\n",
"\n",
"Create a ScriptRunConfig object to specify the configuration details of your training job, including your training script, environment to use, and the compute target to run on.\n",
"To launch a distributed PyTorch job on Azure ML, you have two options:\n",
"\n",
"In order to run a distributed PyTorch job with **torch.distributed** using the NCCL backend, create a `PyTorchConfiguration` and pass it to the `distributed_job_config` parameter of the ScriptRunConfig constructor. Specify `communication_backend='Nccl'` in the PyTorchConfiguration. The below code will configure a 2-node distributed job. The NCCL backend is the recommended backend for PyTorch distributed GPU training.\n",
"1. Per-process launch - specify the total # of worker processes (typically one per GPU) you want to run, and\n",
"Azure ML will handle launching each process.\n",
"2. Per-node launch with [torch.distributed.launch](https://pytorch.org/docs/stable/distributed.html#launch-utility) - provide the `torch.distributed.launch` command you want to\n",
"run on each node.\n",
"\n",
"The script arguments refers to the Azure ML-set environment variables `AZ_BATCHAI_PYTORCH_INIT_METHOD` for shared file-system initialization and `AZ_BATCHAI_TASK_INDEX` for the global rank of each worker process."
"For more information, see the [documentation](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-pytorch#distributeddataparallel).\n",
"\n",
"Both options are shown below."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Per-process launch\n",
"\n",
"To use the per-process launch option in which Azure ML will handle launching each of the processes to run your training script,\n",
"\n",
"1. Specify the training script and arguments\n",
"2. Create a `PyTorchConfiguration` and specify `node_count` and `process_count`. The `process_count` is the total number of processes you want to run for the job; this should typically equal the # of GPUs available on each node multiplied by the # of nodes. Since this tutorial uses the `STANDARD_NC6` SKU, which has one GPU, the total process count for a 2-node job is `2`. If you are using a SKU with >1 GPUs, adjust the `process_count` accordingly.\n",
"\n",
"Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, `NODE_RANK`, `WORLD_SIZE` environment variables on each node, in addition to the process-level `RANK` and `LOCAL_RANK` environment variables, that are needed for distributed PyTorch training."
]
},
{
@@ -290,17 +335,61 @@
"from azureml.core import ScriptRunConfig\n",
"from azureml.core.runconfig import PyTorchConfiguration\n",
"\n",
"args = ['--dist-backend', 'nccl',\n",
" '--dist-url', '$AZ_BATCHAI_PYTORCH_INIT_METHOD',\n",
" '--rank', '$AZ_BATCHAI_TASK_INDEX',\n",
" '--world-size', 2]\n",
"# create distributed config\n",
"distr_config = PyTorchConfiguration(process_count=2, node_count=2)\n",
"\n",
"# create args\n",
"args = [\"--data-dir\", dataset.as_download(), \"--epochs\", 25]\n",
"\n",
"# create job config\n",
"src = ScriptRunConfig(source_directory=project_folder,\n",
" script='pytorch_mnist.py',\n",
" script='train.py',\n",
" arguments=args,\n",
" compute_target=compute_target,\n",
" environment=pytorch_env,\n",
" distributed_job_config=PyTorchConfiguration(communication_backend='Nccl', node_count=2))"
" distributed_job_config=distr_config)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Per-node launch with `torch.distributed.launch`\n",
"\n",
"If you would instead like to use the PyTorch-provided launch utility `torch.distributed.launch` to handle launching the worker processes on each node, you can do so as well. \n",
"\n",
"1. Provide the launch command to the `command` parameter of ScriptRunConfig. For PyTorch jobs Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, and `NODE_RANK` environment variables on each node, so you can simply just reference those environment variables in your command. If you are using a SKU with >1 GPUs, adjust the `--nproc_per_node` argument accordingly.\n",
"\n",
"2. Create a `PyTorchConfiguration` and specify the `node_count`. You do not need to specify the `process_count`; by default Azure ML will launch one process per node to run the `command` you provided.\n",
"\n",
"Uncomment the code below to configure a job with this method."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"'''\n",
"from azureml.core import ScriptRunConfig\n",
"from azureml.core.runconfig import PyTorchConfiguration\n",
"\n",
"# create distributed config\n",
"distr_config = PyTorchConfiguration(node_count=2)\n",
"\n",
"# define command\n",
"launch_cmd = [\"python -m torch.distributed.launch --nproc_per_node 1 --nnodes 2 \" \\\n",
" \"--node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT --use_env \" \\\n",
" \"train.py --data-dir\", dataset.as_download(), \"--epochs 25\"]\n",
"\n",
"# create job config\n",
"src = ScriptRunConfig(source_directory=project_folder,\n",
" command=launch_cmd,\n",
" compute_target=compute_target,\n",
" environment=pytorch_env,\n",
" distributed_job_config=distr_config)\n",
"'''"
]
},
{
@@ -308,7 +397,7 @@
"metadata": {},
"source": [
"### Submit job\n",
"Run your experiment by submitting your ScriptRunConfig object. Note that this call is asynchronous."
"Run your experiment by submitting your `ScriptRunConfig` object. Note that this call is asynchronous."
]
},
{
@@ -355,50 +444,12 @@
"source": [
"run.wait_for_completion(show_output=True) # this provides a verbose log"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure training job: torch.distributed with Gloo backend\n",
"\n",
"If you would instead like to use the Gloo backend for distributed training, you can do so via the following code. The Gloo backend is recommended for distributed CPU training."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import ScriptRunConfig\n",
"from azureml.core.runconfig import PyTorchConfiguration\n",
"\n",
"args = ['--dist-backend', 'gloo',\n",
" '--dist-url', '$AZ_BATCHAI_PYTORCH_INIT_METHOD',\n",
" '--rank', '$AZ_BATCHAI_TASK_INDEX',\n",
" '--world-size', 2]\n",
"\n",
"src = ScriptRunConfig(source_directory=project_folder,\n",
" script='pytorch_mnist.py',\n",
" arguments=args,\n",
" compute_target=compute_target,\n",
" environment=pytorch_env,\n",
" distributed_job_config=PyTorchConfiguration(communication_backend='Gloo', node_count=2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Once you create the ScriptRunConfig, you can follow the submit steps as shown in the previous steps to submit a PyTorch distributed run using the Gloo backend."
]
}
],
"metadata": {
"authors": [
{
"name": "ninhu"
"name": "minxia"
}
],
"category": "training",
@@ -406,7 +457,7 @@
"AML Compute"
],
"datasets": [
"MNIST"
"CIFAR-10"
],
"deployment": [
"None"
@@ -432,12 +483,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.7.7"
},
"tags": [
"None"
],
"task": "Train a model using distributed training via Nccl/Gloo"
"task": "Train a model using distributed training via PyTorch DistributedDataParallel"
},
"nbformat": 4,
"nbformat_minor": 2

View File

@@ -0,0 +1,5 @@
name: distributed-pytorch-with-distributeddataparallel
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -0,0 +1,238 @@
# Copyright (c) 2017 Facebook, Inc. All rights reserved.
# BSD 3-Clause License
#
# Script adapted from:
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
# ==============================================================================
# imports
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import argparse
# define network architecture
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, 3)
self.conv3 = nn.Conv2d(64, 128, 3)
self.fc1 = nn.Linear(128 * 6 * 6, 120)
self.dropout = nn.Dropout(p=0.2)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(F.relu(self.conv2(x)))
x = self.pool(F.relu(self.conv3(x)))
x = x.view(-1, 128 * 6 * 6)
x = self.dropout(F.relu(self.fc1(x)))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def train(train_loader, model, criterion, optimizer, epoch, device, print_freq, rank):
running_loss = 0.0
for i, data in enumerate(train_loader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data[0].to(device), data[1].to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % print_freq == 0: # print every print_freq mini-batches
print(
"Rank %d: [%d, %5d] loss: %.3f"
% (rank, epoch + 1, i + 1, running_loss / print_freq)
)
running_loss = 0.0
def evaluate(test_loader, model, device):
classes = (
"plane",
"car",
"bird",
"cat",
"deer",
"dog",
"frog",
"horse",
"ship",
"truck",
)
model.eval()
correct = 0
total = 0
class_correct = list(0.0 for i in range(10))
class_total = list(0.0 for i in range(10))
with torch.no_grad():
for data in test_loader:
images, labels = data[0].to(device), data[1].to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
c = (predicted == labels).squeeze()
for i in range(10):
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1
# print total test set accuracy
print(
"Accuracy of the network on the 10000 test images: %d %%"
% (100 * correct / total)
)
# print test accuracy for each of the classes
for i in range(10):
print(
"Accuracy of %5s : %2d %%"
% (classes[i], 100 * class_correct[i] / class_total[i])
)
def main(args):
# get PyTorch environment variables
world_size = int(os.environ["WORLD_SIZE"])
rank = int(os.environ["RANK"])
local_rank = int(os.environ["LOCAL_RANK"])
distributed = world_size > 1
# set device
if distributed:
device = torch.device("cuda", local_rank)
else:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# initialize distributed process group using default env:// method
if distributed:
torch.distributed.init_process_group(backend="nccl")
# define train and test dataset DataLoaders
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)
train_set = torchvision.datasets.CIFAR10(
root=args.data_dir, train=True, download=False, transform=transform
)
if distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(train_set)
else:
train_sampler = None
train_loader = torch.utils.data.DataLoader(
train_set,
batch_size=args.batch_size,
shuffle=(train_sampler is None),
num_workers=args.workers,
sampler=train_sampler,
)
test_set = torchvision.datasets.CIFAR10(
root=args.data_dir, train=False, download=False, transform=transform
)
test_loader = torch.utils.data.DataLoader(
test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers
)
model = Net().to(device)
# wrap model with DDP
if distributed:
model = nn.parallel.DistributedDataParallel(
model, device_ids=[local_rank], output_device=local_rank
)
# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(
model.parameters(), lr=args.learning_rate, momentum=args.momentum
)
# train the model
for epoch in range(args.epochs):
print("Rank %d: Starting epoch %d" % (rank, epoch))
if distributed:
train_sampler.set_epoch(epoch)
model.train()
train(
train_loader,
model,
criterion,
optimizer,
epoch,
device,
args.print_freq,
rank,
)
print("Rank %d: Finished Training" % (rank))
if not distributed or rank == 0:
os.makedirs(args.output_dir, exist_ok=True)
model_path = os.path.join(args.output_dir, "cifar_net.pt")
torch.save(model.state_dict(), model_path)
# evaluate on full test dataset
evaluate(test_loader, model, device)
if __name__ == "__main__":
# setup argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--data-dir", type=str, help="directory containing CIFAR-10 dataset"
)
parser.add_argument("--epochs", default=10, type=int, help="number of epochs")
parser.add_argument(
"--batch-size",
default=16,
type=int,
help="mini batch size for each gpu/process",
)
parser.add_argument(
"--workers",
default=2,
type=int,
help="number of data loading workers for each gpu/process",
)
parser.add_argument(
"--learning-rate", default=0.001, type=float, help="learning rate"
)
parser.add_argument("--momentum", default=0.9, type=float, help="momentum")
parser.add_argument(
"--output-dir", default="outputs", type=str, help="directory to save model to"
)
parser.add_argument(
"--print-freq",
default=200,
type=int,
help="frequency of printing training statistics",
)
args = parser.parse_args()
main(args)

View File

@@ -0,0 +1,5 @@
name: distributed-pytorch-with-horovod
dependencies:
- pip:
- azureml-sdk
- azureml-widgets

View File

@@ -51,6 +51,17 @@ if args.cuda:
kwargs = {}
# Use Azure Open Datasets for MNIST dataset
datasets.MNIST.resources = [
("https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz",
"f68b3c2dcbeaaa9fbdd348bbdeb94873"),
("https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz",
"d53e105ee54ea40749a09fcbcd1e9432"),
("https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz",
"9fb629c4189551a2d022fa330f9573f3"),
("https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz",
"ec29112dd5afa0611ce80d1b7f02629c")
]
train_dataset = \
datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True,
transform=transforms.Compose([

Some files were not shown because too many files have changed in this diff Show More