mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 09:37:04 -05:00
Compare commits
18 Commits
azureml-sd
...
azureml-sd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
883e4a4c59 | ||
|
|
e90826b331 | ||
|
|
ac04172f6d | ||
|
|
8c0000beb4 | ||
|
|
35287ab0d8 | ||
|
|
3fe4f8b038 | ||
|
|
1722678469 | ||
|
|
17da7e8706 | ||
|
|
d2e7213ff3 | ||
|
|
882cb76e8a | ||
|
|
37f37a46c1 | ||
|
|
0cd1412421 | ||
|
|
c3ae9f00f6 | ||
|
|
11b02c650c | ||
|
|
606048c71f | ||
|
|
cb1c354d44 | ||
|
|
c868fff5a2 | ||
|
|
bc4e6611c4 |
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -21,9 +21,8 @@ dependencies:
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.22.0
|
||||
- azureml-widgets~=1.24.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.22.0/validated_win32_requirements.txt [--no-deps]
|
||||
- PyJWT < 2.0.0
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_win32_requirements.txt [--no-deps]
|
||||
|
||||
@@ -21,10 +21,8 @@ dependencies:
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.22.0
|
||||
- azureml-widgets~=1.24.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.22.0/validated_linux_requirements.txt [--no-deps]
|
||||
- PyJWT < 2.0.0
|
||||
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_linux_requirements.txt [--no-deps]
|
||||
|
||||
@@ -22,9 +22,8 @@ dependencies:
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.22.0
|
||||
- azureml-widgets~=1.24.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.22.0/validated_darwin_requirements.txt [--no-deps]
|
||||
- PyJWT < 2.0.0
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_darwin_requirements.txt [--no-deps]
|
||||
|
||||
@@ -105,7 +105,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-classification-bank-marketing-all-features
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -93,7 +93,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-classification-credit-card-fraud
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -96,7 +96,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-classification-text-dnn
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -81,7 +81,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-continuous-retraining
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -5,7 +5,7 @@ set options=%3
|
||||
set PIP_NO_WARN_SCRIPT_LOCATION=0
|
||||
|
||||
IF "%conda_env_name%"=="" SET conda_env_name="azure_automl_experimental"
|
||||
IF "%automl_env_file%"=="" SET automl_env_file="automl_env.yml"
|
||||
IF "%automl_env_file%"=="" SET automl_env_file="automl_thin_client_env.yml"
|
||||
|
||||
IF NOT EXIST %automl_env_file% GOTO YmlMissing
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ fi
|
||||
|
||||
if [ "$AUTOML_ENV_FILE" == "" ]
|
||||
then
|
||||
AUTOML_ENV_FILE="automl_env.yml"
|
||||
AUTOML_ENV_FILE="automl_thin_client_env.yml"
|
||||
fi
|
||||
|
||||
if [ ! -f $AUTOML_ENV_FILE ]; then
|
||||
|
||||
@@ -12,7 +12,7 @@ fi
|
||||
|
||||
if [ "$AUTOML_ENV_FILE" == "" ]
|
||||
then
|
||||
AUTOML_ENV_FILE="automl_env.yml"
|
||||
AUTOML_ENV_FILE="automl_thin_client_env_mac.yml"
|
||||
fi
|
||||
|
||||
if [ ! -f $AUTOML_ENV_FILE ]; then
|
||||
|
||||
@@ -5,17 +5,14 @@ dependencies:
|
||||
- pip<=19.3.1
|
||||
- python>=3.5.2,<3.8
|
||||
- nb_conda
|
||||
- matplotlib==2.1.0
|
||||
- numpy~=1.18.0
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- scikit-learn==0.22.1
|
||||
- pandas==0.25.1
|
||||
- PyJWT < 2.0.0
|
||||
- numpy==1.18.5
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-defaults
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-explain-model
|
||||
- PyJWT < 2.0.0
|
||||
- pandas
|
||||
|
||||
@@ -6,17 +6,14 @@ dependencies:
|
||||
- nomkl
|
||||
- python>=3.5.2,<3.8
|
||||
- nb_conda
|
||||
- matplotlib==2.1.0
|
||||
- numpy~=1.18.0
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- scikit-learn==0.22.1
|
||||
- pandas==0.25.1
|
||||
- PyJWT < 2.0.0
|
||||
- numpy==1.18.5
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-defaults
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-explain-model
|
||||
- PyJWT < 2.0.0
|
||||
- pandas
|
||||
|
||||
@@ -67,11 +67,8 @@
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import json\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
@@ -93,7 +90,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -116,9 +113,7 @@
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Run History Name'] = experiment_name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
"output"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -199,7 +194,6 @@
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**training_data**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**label_column_name**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||
"|**scenario**|We need to set this parameter to 'Latest' to enable some experimental features. This parameter should not be set outside of this experimental notebook.|\n",
|
||||
"\n",
|
||||
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||
]
|
||||
@@ -228,7 +222,6 @@
|
||||
" compute_target = compute_target,\n",
|
||||
" training_data = train_data,\n",
|
||||
" label_column_name = label,\n",
|
||||
" scenario='Latest',\n",
|
||||
" **automl_settings\n",
|
||||
" )"
|
||||
]
|
||||
@@ -276,34 +269,13 @@
|
||||
"## Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(remote_run).show() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run.wait_for_completion()"
|
||||
"remote_run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -368,18 +340,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# preview the first 3 rows of the dataset\n",
|
||||
"\n",
|
||||
"test_data = test_data.to_pandas_dataframe()\n",
|
||||
"y_test = test_data['ERP'].fillna(0)\n",
|
||||
"test_data = test_data.drop('ERP', 1)\n",
|
||||
"test_data = test_data.fillna(0)\n",
|
||||
"y_test = test_data.keep_columns('ERP')\n",
|
||||
"test_data = test_data.drop_columns('ERP')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"train_data = train_data.to_pandas_dataframe()\n",
|
||||
"y_train = train_data['ERP'].fillna(0)\n",
|
||||
"train_data = train_data.drop('ERP', 1)\n",
|
||||
"train_data = train_data.fillna(0)\n"
|
||||
"y_train = train_data.keep_columns('ERP')\n",
|
||||
"train_data = train_data.drop_columns('ERP')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -397,7 +363,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl.model_proxy import ModelProxy\n",
|
||||
"best_model_proxy = ModelProxy(best_run)"
|
||||
"best_model_proxy = ModelProxy(best_run)\n",
|
||||
"y_pred_train = best_model_proxy.predict(train_data)\n",
|
||||
"y_pred_test = best_model_proxy.predict(test_data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Exploring results"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -406,60 +381,15 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_pred_train = best_model_proxy.predict(train_data).to_pandas_dataframe().values.flatten()\n",
|
||||
"y_pred_train = y_pred_train.to_pandas_dataframe().values.flatten()\n",
|
||||
"y_train = y_train.to_pandas_dataframe().values.flatten()\n",
|
||||
"y_residual_train = y_train - y_pred_train\n",
|
||||
"\n",
|
||||
"y_pred_test = best_model_proxy.predict(test_data).to_pandas_dataframe().values.flatten()\n",
|
||||
"y_residual_test = y_test - y_pred_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"from sklearn.metrics import mean_squared_error, r2_score\n",
|
||||
"\n",
|
||||
"# Set up a multi-plot chart.\n",
|
||||
"f, (a0, a1) = plt.subplots(1, 2, gridspec_kw = {'width_ratios':[1, 1], 'wspace':0, 'hspace': 0})\n",
|
||||
"f.suptitle('Regression Residual Values', fontsize = 18)\n",
|
||||
"f.set_figheight(6)\n",
|
||||
"f.set_figwidth(16)\n",
|
||||
"\n",
|
||||
"# Plot residual values of training set.\n",
|
||||
"a0.axis([0, 360, -100, 100])\n",
|
||||
"a0.plot(y_residual_train, 'bo', alpha = 0.5)\n",
|
||||
"a0.plot([-10,360],[0,0], 'r-', lw = 3)\n",
|
||||
"a0.text(16,170,'RMSE = {0:.2f}'.format(np.sqrt(mean_squared_error(y_train, y_pred_train))), fontsize = 12)\n",
|
||||
"a0.text(16,140,'R2 score = {0:.2f}'.format(r2_score(y_train, y_pred_train)),fontsize = 12)\n",
|
||||
"a0.set_xlabel('Training samples', fontsize = 12)\n",
|
||||
"a0.set_ylabel('Residual Values', fontsize = 12)\n",
|
||||
"\n",
|
||||
"# Plot residual values of test set.\n",
|
||||
"a1.axis([0, 90, -100, 100])\n",
|
||||
"a1.plot(y_residual_test, 'bo', alpha = 0.5)\n",
|
||||
"a1.plot([-10,360],[0,0], 'r-', lw = 3)\n",
|
||||
"a1.text(5,170,'RMSE = {0:.2f}'.format(np.sqrt(mean_squared_error(y_test, y_pred_test))), fontsize = 12)\n",
|
||||
"a1.text(5,140,'R2 score = {0:.2f}'.format(r2_score(y_test, y_pred_test)),fontsize = 12)\n",
|
||||
"a1.set_xlabel('Test samples', fontsize = 12)\n",
|
||||
"a1.set_yticklabels([])\n",
|
||||
"\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"test_pred = plt.scatter(y_test, y_pred_test, color='')\n",
|
||||
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||
"plt.show()"
|
||||
"y_pred_test = y_pred_test.to_pandas_dataframe().values.flatten()\n",
|
||||
"y_test = y_test.to_pandas_dataframe().values.flatten()\n",
|
||||
"y_residual_test = y_test - y_pred_test\n",
|
||||
"print(y_residual_train)\n",
|
||||
"print(y_residual_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-regression-model-proxy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -113,7 +113,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-beer-remote
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -87,7 +87,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-bike-share
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -97,7 +97,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-energy-demand
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -94,7 +94,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-function
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -82,7 +82,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-orange-juice-sales
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -96,7 +96,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-classification-credit-card-fraud-local
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -96,7 +96,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-regression-explanation-featurization
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -92,7 +92,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.22.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -375,18 +375,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# preview the first 3 rows of the dataset\n",
|
||||
"\n",
|
||||
"test_data = test_data.to_pandas_dataframe()\n",
|
||||
"y_test = test_data['ERP'].fillna(0)\n",
|
||||
"test_data = test_data.drop('ERP', 1)\n",
|
||||
"test_data = test_data.fillna(0)\n",
|
||||
"y_test = test_data.keep_columns('ERP').to_pandas_dataframe()\n",
|
||||
"test_data = test_data.drop_columns('ERP').to_pandas_dataframe()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"train_data = train_data.to_pandas_dataframe()\n",
|
||||
"y_train = train_data['ERP'].fillna(0)\n",
|
||||
"train_data = train_data.drop('ERP', 1)\n",
|
||||
"train_data = train_data.fillna(0)\n"
|
||||
"y_train = train_data.keep_columns('ERP').to_pandas_dataframe()\n",
|
||||
"train_data = train_data.drop_columns('ERP').to_pandas_dataframe()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -396,10 +390,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_pred_train = fitted_model.predict(train_data)\n",
|
||||
"y_residual_train = y_train - y_pred_train\n",
|
||||
"y_residual_train = y_train.values - y_pred_train\n",
|
||||
"\n",
|
||||
"y_pred_test = fitted_model.predict(test_data)\n",
|
||||
"y_residual_test = y_test - y_pred_test"
|
||||
"y_residual_test = y_test.values - y_pred_test"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-regression
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
84
how-to-use-azureml/azure-synapse/README.md
Normal file
84
how-to-use-azureml/azure-synapse/README.md
Normal file
@@ -0,0 +1,84 @@
|
||||
Azure Synapse Analytics is a limitless analytics service that brings together data integration, enterprise data warehousing, and big data analytics. It gives you the freedom to query data on your terms, using either serverless or dedicated resources—at scale. Azure Synapse brings these worlds together with a unified experience to ingest, explore, prepare, manage, and serve data for immediate BI and machine learning needs. A core offering within Azure Synapse Analytics are serverless Apache Spark pools enhanced for big data workloads.
|
||||
|
||||
Synapse in Aml integration is for customers who want to use Apache Spark in Azure Synapse Analytics to prepare data at scale in Azure ML before training their ML model. This will allow customers to work on their end-to-end ML lifecycle including large-scale data preparation, model training and deployment within Azure ML workspace without having to use suboptimal tools for machine learning or switch between multiple tools for data preparation and model training. The ability to perform all ML tasks within Azure ML will reduce time required for customers to iterate on a machine learning project which typically includes multiple rounds of data preparation and training.
|
||||
|
||||
In the public preview, the capabilities are provided:
|
||||
|
||||
- Link Azure Synapse Analytics workspace to Azure Machine Learning workspace (via ARM, UI or SDK)
|
||||
- Attach Apache Spark pools powered by Azure Synapse Analytics as Azure Machine Learning compute targets (via ARM, UI or SDK)
|
||||
- Launch Apache Spark sessions in notebooks and perform interactive data exploration and preparation. This interactive experience leverages Apache Spark magic and customers will have session-level Conda support to install packages.
|
||||
- Productionize ML pipelines by leveraging Apache Spark pools to pre-process big data
|
||||
|
||||
# Using Synapse in Azure machine learning
|
||||
|
||||
## Create synapse resources
|
||||
|
||||
Follow up the documents to create Synapse workspace and resource-setup.sh is available for you to create the resources.
|
||||
|
||||
- Create from [Portal](https://docs.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-workspace)
|
||||
- Create from [Cli](https://docs.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-workspace-cli)
|
||||
|
||||
Follow up the documents to create Synapse spark pool
|
||||
|
||||
- Create from [Portal](https://docs.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-apache-spark-pool-portal)
|
||||
- Create from [Cli](https://docs.microsoft.com/en-us/cli/azure/ext/synapse/synapse/spark/pool?view=azure-cli-latest)
|
||||
|
||||
## Link Synapse Workspace
|
||||
|
||||
Make sure you are the owner of synapse workspace so that you can link synapse workspace into AML.
|
||||
You can run resource-setup.py to link the synapse workspace and attach compute
|
||||
|
||||
```python
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.from_config()
|
||||
|
||||
from azureml.core import LinkedService, SynapseWorkspaceLinkedServiceConfiguration
|
||||
synapse_link_config = SynapseWorkspaceLinkedServiceConfiguration(
|
||||
subscription_id="<subscription id>",
|
||||
resource_group="<resource group",
|
||||
name="<synapse workspace name>"
|
||||
)
|
||||
|
||||
linked_service = LinkedService.register(
|
||||
workspace=ws,
|
||||
name='<link name>',
|
||||
linked_service_config=synapse_link_config)
|
||||
|
||||
```
|
||||
|
||||
## Attach synapse spark pool as AzureML compute
|
||||
|
||||
```python
|
||||
|
||||
from azureml.core.compute import SynapseCompute, ComputeTarget
|
||||
spark_pool_name = "<spark pool name>"
|
||||
attached_synapse_name = "<attached compute name>"
|
||||
|
||||
attach_config = SynapseCompute.attach_configuration(
|
||||
linked_service,
|
||||
type="SynapseSpark",
|
||||
pool_name=spark_pool_name)
|
||||
|
||||
synapse_compute=ComputeTarget.attach(
|
||||
workspace=ws,
|
||||
name=attached_synapse_name,
|
||||
attach_configuration=attach_config)
|
||||
|
||||
synapse_compute.wait_for_completion()
|
||||
```
|
||||
|
||||
## Set up permission
|
||||
|
||||
Grant Spark admin role to system assigned identity of the linked service so that the user can submit experiment run or pipeline run from AML workspace to synapse spark pool.
|
||||
|
||||
Grant Spark admin role to the specific user so that the user can start spark session to synapse spark pool.
|
||||
|
||||
You can get the system assigned identity information by running
|
||||
|
||||
```python
|
||||
print(linked_service.system_assigned_identity_principal_id)
|
||||
```
|
||||
|
||||
- Launch synapse studio of the synapse workspace and grant linked service MSI "Synapse Apache Spark administrator" role.
|
||||
|
||||
- In azure portal grant linked service MSI "Storage Blob Data Contributor" role of the primary adlsgen2 account of synapse workspace to use the library management feature.
|
||||
@@ -0,0 +1,6 @@
|
||||
name: multi-model-register-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- numpy
|
||||
- scikit-learn
|
||||
@@ -0,0 +1,6 @@
|
||||
name: model-register-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- numpy
|
||||
- scikit-learn
|
||||
@@ -0,0 +1,4 @@
|
||||
name: deploy-aks-with-controlled-rollout
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -0,0 +1,4 @@
|
||||
name: enable-app-insights-in-production-service
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -94,6 +94,17 @@ def main():
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
|
||||
# Use Azure Open Datasets for MNIST dataset
|
||||
datasets.MNIST.resources = [
|
||||
("https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz",
|
||||
"f68b3c2dcbeaaa9fbdd348bbdeb94873"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz",
|
||||
"d53e105ee54ea40749a09fcbcd1e9432"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz",
|
||||
"9fb629c4189551a2d022fa330f9573f3"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz",
|
||||
"ec29112dd5afa0611ce80d1b7f02629c")
|
||||
]
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST('data', train=True, download=True,
|
||||
transform=transforms.Compose([transforms.ToTensor(),
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
name: onnx-convert-aml-deploy-tinyyolo
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- numpy
|
||||
- git+https://github.com/apple/coremltools@v2.1
|
||||
- onnx<1.7.0
|
||||
- onnxmltools
|
||||
@@ -0,0 +1,9 @@
|
||||
name: onnx-inference-facial-expression-recognition-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- numpy
|
||||
- onnx<1.7.0
|
||||
- opencv-python-headless
|
||||
@@ -0,0 +1,9 @@
|
||||
name: onnx-inference-mnist-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- numpy
|
||||
- onnx<1.7.0
|
||||
- opencv-python-headless
|
||||
@@ -0,0 +1,4 @@
|
||||
name: onnx-model-register-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -0,0 +1,4 @@
|
||||
name: onnx-modelzoo-aml-deploy-resnet50
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -0,0 +1,5 @@
|
||||
name: onnx-train-pytorch-aml-deploy-mnist
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: production-deploy-to-aks-gpu
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- tensorflow
|
||||
@@ -0,0 +1,8 @@
|
||||
name: production-deploy-to-aks-ssl
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- matplotlib
|
||||
- tqdm
|
||||
- scipy
|
||||
- sklearn
|
||||
@@ -0,0 +1,8 @@
|
||||
name: production-deploy-to-aks
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- matplotlib
|
||||
- tqdm
|
||||
- scipy
|
||||
- sklearn
|
||||
@@ -0,0 +1,4 @@
|
||||
name: model-register-and-deploy-spark
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -259,7 +259,7 @@
|
||||
"run_config.environment.docker.enabled = True\n",
|
||||
"\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-telemetry', 'azureml-interpret'\n",
|
||||
" 'azureml-defaults', 'azureml-telemetry', 'azureml-interpret'\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# Note: this is to pin the scikit-learn and pandas versions to be same as notebook.\n",
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
name: explain-model-on-amlcompute
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- flask
|
||||
- flask-cors
|
||||
- gevent>=1.3.6
|
||||
- jinja2
|
||||
- ipython
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- ipywidgets
|
||||
@@ -57,7 +57,7 @@
|
||||
"Problem: IBM employee attrition classification with scikit-learn (run model explainer locally and upload explanation to the Azure Machine Learning Run History)\n",
|
||||
"\n",
|
||||
"1. Train a SVM classification model using Scikit-learn\n",
|
||||
"2. Run 'explain_model' with AML Run History, which leverages run history service to store and manage the explanation data\n",
|
||||
"2. Run 'explain-model-sample' with AML Run History, which leverages run history service to store and manage the explanation data\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
@@ -226,36 +226,6 @@
|
||||
" ('classifier', SVC(C=1.0, probability=True))])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"# Uncomment below if sklearn-pandas is not installed\n",
|
||||
"#!pip install sklearn-pandas\n",
|
||||
"from sklearn_pandas import DataFrameMapper\n",
|
||||
"\n",
|
||||
"# Impute, standardize the numeric features and one-hot encode the categorical features. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"numeric_transformations = [([f], Pipeline(steps=[('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())])) for f in numerical]\n",
|
||||
"\n",
|
||||
"categorical_transformations = [([f], OneHotEncoder(handle_unknown='ignore', sparse=False)) for f in categorical]\n",
|
||||
"\n",
|
||||
"transformations = numeric_transformations + categorical_transformations\n",
|
||||
"\n",
|
||||
"# Append classifier to preprocessing pipeline.\n",
|
||||
"# Now we have a full prediction pipeline.\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
|
||||
" ('classifier', SVC(C=1.0, probability=True))]) \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -475,7 +445,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = 'explain_model'\n",
|
||||
"experiment_name = 'explain-model-sample'\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"run = experiment.start_logging()\n",
|
||||
"client = ExplanationClient.from_run(run)"
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
name: save-retrieve-explanations-run-history
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- flask
|
||||
- flask-cors
|
||||
- gevent>=1.3.6
|
||||
- jinja2
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
@@ -166,12 +166,12 @@
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"import joblib\n",
|
||||
"from sklearn.compose import ColumnTransformer\n",
|
||||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"from sklearn_pandas import DataFrameMapper\n",
|
||||
"\n",
|
||||
"from interpret.ext.blackbox import TabularExplainer\n",
|
||||
"\n",
|
||||
@@ -201,17 +201,23 @@
|
||||
"# Store the numerical columns in a list numerical\n",
|
||||
"numerical = attritionXData.columns.difference(categorical)\n",
|
||||
"\n",
|
||||
"numeric_transformations = [([f], Pipeline(steps=[\n",
|
||||
"# We create the preprocessing pipelines for both numeric and categorical data.\n",
|
||||
"numeric_transformer = Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='median')),\n",
|
||||
" ('scaler', StandardScaler())])) for f in numerical]\n",
|
||||
" ('scaler', StandardScaler())])\n",
|
||||
"\n",
|
||||
"categorical_transformations = [([f], OneHotEncoder(handle_unknown='ignore', sparse=False)) for f in categorical]\n",
|
||||
"categorical_transformer = Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n",
|
||||
" ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n",
|
||||
"\n",
|
||||
"transformations = numeric_transformations + categorical_transformations\n",
|
||||
"transformations = ColumnTransformer(\n",
|
||||
" transformers=[\n",
|
||||
" ('num', numeric_transformer, numerical),\n",
|
||||
" ('cat', categorical_transformer, categorical)])\n",
|
||||
"\n",
|
||||
"# Append classifier to preprocessing pipeline.\n",
|
||||
"# Now we have a full prediction pipeline.\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
|
||||
" ('classifier', RandomForestClassifier())])\n",
|
||||
"\n",
|
||||
"# Split data into train and test\n",
|
||||
@@ -323,7 +329,7 @@
|
||||
"\n",
|
||||
"# azureml-defaults is required to host the model as a web service.\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-defaults', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-interpret'\n",
|
||||
"]\n",
|
||||
" \n",
|
||||
@@ -350,7 +356,7 @@
|
||||
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
|
||||
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
|
||||
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=['sklearn-pandas', 'pyyaml', sklearn_dep, pandas_dep] + azureml_pip_packages,\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=['pyyaml', sklearn_dep, pandas_dep] + azureml_pip_packages,\n",
|
||||
" pin_sdk_version=False)\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
name: train-explain-model-locally-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- flask
|
||||
- flask-cors
|
||||
- gevent>=1.3.6
|
||||
- jinja2
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
@@ -267,7 +267,7 @@
|
||||
"run_config.environment.python.user_managed_dependencies = False\n",
|
||||
"\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-telemetry', 'azureml-interpret'\n",
|
||||
" 'azureml-defaults', 'azureml-telemetry', 'azureml-interpret'\n",
|
||||
"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
@@ -294,7 +294,7 @@
|
||||
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
|
||||
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
|
||||
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
|
||||
"azureml_pip_packages.extend(['sklearn-pandas', 'pyyaml', sklearn_dep, pandas_dep])\n",
|
||||
"azureml_pip_packages.extend(['pyyaml', sklearn_dep, pandas_dep])\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=azureml_pip_packages)\n",
|
||||
"# Now submit a run on AmlCompute\n",
|
||||
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
||||
@@ -431,7 +431,7 @@
|
||||
"\n",
|
||||
"# WARNING: to install this, g++ needs to be available on the Docker image and is not by default (look at the next cell)\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-defaults', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-interpret'\n",
|
||||
"]\n",
|
||||
" \n",
|
||||
@@ -458,7 +458,7 @@
|
||||
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
|
||||
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
|
||||
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
|
||||
"azureml_pip_packages.extend(['sklearn-pandas', 'pyyaml', sklearn_dep, pandas_dep])\n",
|
||||
"azureml_pip_packages.extend(['pyyaml', sklearn_dep, pandas_dep])\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=azureml_pip_packages)\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
name: train-explain-model-on-amlcompute-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- flask
|
||||
- flask-cors
|
||||
- gevent>=1.3.6
|
||||
- jinja2
|
||||
- ipython
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- azureml-core
|
||||
- ipywidgets
|
||||
@@ -5,13 +5,13 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import zipfile
|
||||
from sklearn.model_selection import train_test_split
|
||||
import joblib
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn_pandas import DataFrameMapper
|
||||
|
||||
from azureml.core.run import Run
|
||||
from interpret.ext.blackbox import TabularExplainer
|
||||
@@ -57,16 +57,22 @@ for col, value in attritionXData.iteritems():
|
||||
# store the numerical columns
|
||||
numerical = attritionXData.columns.difference(categorical)
|
||||
|
||||
numeric_transformations = [([f], Pipeline(steps=[
|
||||
# We create the preprocessing pipelines for both numeric and categorical data.
|
||||
numeric_transformer = Pipeline(steps=[
|
||||
('imputer', SimpleImputer(strategy='median')),
|
||||
('scaler', StandardScaler())])) for f in numerical]
|
||||
('scaler', StandardScaler())])
|
||||
|
||||
categorical_transformations = [([f], OneHotEncoder(handle_unknown='ignore', sparse=False)) for f in categorical]
|
||||
categorical_transformer = Pipeline(steps=[
|
||||
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
|
||||
('onehot', OneHotEncoder(handle_unknown='ignore'))])
|
||||
|
||||
transformations = numeric_transformations + categorical_transformations
|
||||
transformations = ColumnTransformer(
|
||||
transformers=[
|
||||
('num', numeric_transformer, numerical),
|
||||
('cat', categorical_transformer, categorical)])
|
||||
|
||||
# append classifier to preprocessing pipeline
|
||||
clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),
|
||||
clf = Pipeline(steps=[('preprocessor', transformations),
|
||||
('classifier', LogisticRegression(solver='lbfgs'))])
|
||||
|
||||
# get the run this was submitted from to interact with run history
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-data-transfer
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-getting-started
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -341,7 +341,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline = Pipeline(workspace=ws, steps=[step])\n",
|
||||
"pipeline_run = Experiment(ws, 'azurebatch_experiment').submit(pipeline)"
|
||||
"pipeline_run = Experiment(ws, 'azurebatch_sample').submit(pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-how-to-use-modulestep
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -130,7 +130,7 @@
|
||||
"\n",
|
||||
"pipeline_draft = PipelineDraft.create(ws, name=\"TestPipelineDraft\",\n",
|
||||
" description=\"draft description\",\n",
|
||||
" experiment_name=\"helloworld\",\n",
|
||||
" experiment_name=\"pipeline_draft_sample\",\n",
|
||||
" pipeline=pipeline,\n",
|
||||
" continue_on_step_failure=True,\n",
|
||||
" tags={'dev': 'true'},\n",
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-how-to-use-pipeline-drafts
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -121,12 +121,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.makedirs('./data/mnist', exist_ok=True)\n",
|
||||
"data_folder = os.path.join(os.getcwd(), 'data/mnist')\n",
|
||||
"os.makedirs(data_folder, exist_ok=True)\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename = './data/mnist/train-images.gz')\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename = './data/mnist/train-labels.gz')\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')"
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'train-images-idx3-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,11 +151,11 @@
|
||||
"from utils import load_data\n",
|
||||
"\n",
|
||||
"# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n",
|
||||
"X_train = load_data('./data/mnist/train-images.gz', False) / 255.0\n",
|
||||
"y_train = load_data('./data/mnist/train-labels.gz', True).reshape(-1)\n",
|
||||
"X_train = load_data(os.path.join(data_folder, 'train-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
|
||||
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
|
||||
"y_train = load_data(os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
|
||||
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
|
||||
"\n",
|
||||
"X_test = load_data('./data/mnist/test-images.gz', False) / 255.0\n",
|
||||
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
|
||||
"\n",
|
||||
"count = 0\n",
|
||||
"sample_size = 30\n",
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
name: aml-pipelines-parameter-tuning-with-hyperdrive
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- numpy
|
||||
- pandas_ml
|
||||
- azureml-dataset-runtime[pandas,fuse]
|
||||
@@ -325,7 +325,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# submit a pipeline run\n",
|
||||
"pipeline_run1 = Experiment(ws, 'Pipeline_experiment').submit(pipeline1)\n",
|
||||
"pipeline_run1 = Experiment(ws, 'Pipeline_experiment_sample').submit(pipeline1)\n",
|
||||
"# publish a pipeline from the submitted pipeline run\n",
|
||||
"published_pipeline2 = pipeline_run1.publish_pipeline(name=\"My_New_Pipeline2\", description=\"My Published Pipeline Description\", version=\"0.1\", continue_on_step_failure=True)\n",
|
||||
"published_pipeline2"
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
name: aml-pipelines-publish-and-run-using-rest-endpoint
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- requests
|
||||
@@ -259,7 +259,7 @@
|
||||
"\n",
|
||||
"schedule = Schedule.create(workspace=ws, name=\"My_Schedule\",\n",
|
||||
" pipeline_id=pub_pipeline_id, \n",
|
||||
" experiment_name='Schedule_Run',\n",
|
||||
" experiment_name='Schedule-run-sample',\n",
|
||||
" recurrence=recurrence,\n",
|
||||
" wait_for_provisioning=True,\n",
|
||||
" description=\"Schedule Run\")\n",
|
||||
@@ -445,7 +445,7 @@
|
||||
"\n",
|
||||
"schedule = Schedule.create(workspace=ws, name=\"My_Schedule\",\n",
|
||||
" pipeline_id=pub_pipeline_id, \n",
|
||||
" experiment_name='Schedule_Run',\n",
|
||||
" experiment_name='Schedule-run-sample',\n",
|
||||
" datastore=datastore,\n",
|
||||
" wait_for_provisioning=True,\n",
|
||||
" description=\"Schedule Run\")\n",
|
||||
@@ -516,7 +516,7 @@
|
||||
"\n",
|
||||
"schedule = Schedule.create_for_pipeline_endpoint(workspace=ws, name=\"My_Endpoint_Schedule\",\n",
|
||||
" pipeline_endpoint_id=published_pipeline_endpoint_id,\n",
|
||||
" experiment_name='Schedule_Run',\n",
|
||||
" experiment_name='Schedule-run-sample',\n",
|
||||
" recurrence=recurrence, description=\"Schedule_Run\",\n",
|
||||
" wait_for_provisioning=True)\n",
|
||||
"\n",
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-setup-schedule-for-a-published-pipeline
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -553,7 +553,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"pipeline_run = Experiment(ws, name=\"submit_from_endpoint\").submit(pipeline_endpoint_by_name, tags={'endpoint_tag': \"1\"}, pipeline_version=\"0\")"
|
||||
"pipeline_run = Experiment(ws, name=\"submit_endpoint_sample\").submit(pipeline_endpoint_by_name, tags={'endpoint_tag': \"1\"}, pipeline_version=\"0\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
name: aml-pipelines-setup-versioned-pipeline-endpoints
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- requests
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-showcasing-datapath-and-pipelineparameter
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-showcasing-dataset-and-pipelineparameter
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -101,7 +101,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create an Azure ML experiment\n",
|
||||
"Let's create an experiment named \"automlstep-classification\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure.\n",
|
||||
"Let's create an experiment named \"automlstep-sample\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure.\n",
|
||||
"\n",
|
||||
"The best practice is to use separate folders for scripts and its dependent files for each step and specify that folder as the `source_directory` for the step. This helps reduce the size of the snapshot created for the step (only the specific folder is snapshotted). Since changes in any files in the `source_directory` would trigger a re-upload of the snapshot, this helps keep the reuse of the step when there are no changes in the `source_directory` of the step."
|
||||
]
|
||||
@@ -113,7 +113,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Choose a name for the run history container in the workspace.\n",
|
||||
"experiment_name = 'automlstep-classification'\n",
|
||||
"experiment_name = 'automlstep-sample'\n",
|
||||
"project_folder = './project'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: aml-pipelines-with-automated-machine-learning-step
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-with-commandstep-r
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-with-commandstep
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -428,7 +428,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run1 = Experiment(ws, 'Data_dependency').submit(pipeline1)\n",
|
||||
"pipeline_run1 = Experiment(ws, 'Data_dependency_sample').submit(pipeline1)\n",
|
||||
"print(\"Pipeline is submitted for execution\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-with-data-dependency-steps
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,6 @@
|
||||
name: aml-pipelines-with-notebook-runner-step
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-contrib-notebook
|
||||
@@ -0,0 +1,10 @@
|
||||
name: nyc-taxi-data-regression-model-building
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-opendatasets
|
||||
- azureml-train-automl
|
||||
- matplotlib
|
||||
- pandas
|
||||
- pyarrow
|
||||
@@ -0,0 +1,7 @@
|
||||
name: file-dataset-image-inference-mnist
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-pipeline-steps
|
||||
- azureml-widgets
|
||||
- pandas
|
||||
@@ -0,0 +1,7 @@
|
||||
name: tabular-dataset-inference-iris
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-pipeline-steps
|
||||
- azureml-widgets
|
||||
- pandas
|
||||
@@ -0,0 +1,7 @@
|
||||
name: pipeline-style-transfer-parallel-run
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-pipeline-steps
|
||||
- azureml-widgets
|
||||
- requests
|
||||
@@ -0,0 +1,5 @@
|
||||
name: distributed-chainer
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -4,6 +4,8 @@ import os
|
||||
|
||||
import numpy as np
|
||||
|
||||
from utils import download_mnist
|
||||
|
||||
import chainer
|
||||
from chainer import backend
|
||||
from chainer import backends
|
||||
@@ -17,6 +19,7 @@ from chainer.training import extensions
|
||||
from chainer.dataset import concat_examples
|
||||
from chainer.backends.cuda import to_cpu
|
||||
|
||||
|
||||
from azureml.core.run import Run
|
||||
run = Run.get_context()
|
||||
|
||||
@@ -49,7 +52,7 @@ def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
# Download the MNIST data if you haven't downloaded it yet
|
||||
train, test = datasets.mnist.get_mnist(withlabel=True, ndim=1)
|
||||
train, test = download_mnist()
|
||||
|
||||
gpu_id = args.gpu_id
|
||||
batchsize = args.batchsize
|
||||
|
||||
@@ -2,6 +2,8 @@ import numpy as np
|
||||
import os
|
||||
import json
|
||||
|
||||
from utils import download_mnist
|
||||
|
||||
from chainer import serializers, using_config, Variable, datasets
|
||||
import chainer.functions as F
|
||||
import chainer.links as L
|
||||
@@ -41,7 +43,7 @@ def init():
|
||||
def run(input_data):
|
||||
i = np.array(json.loads(input_data)['data'])
|
||||
|
||||
_, test = datasets.get_mnist()
|
||||
_, test = download_mnist()
|
||||
x = Variable(np.asarray([test[i][0]]))
|
||||
y = model(x)
|
||||
|
||||
|
||||
@@ -217,7 +217,8 @@
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"shutil.copy('chainer_mnist.py', project_folder)\n",
|
||||
"shutil.copy('chainer_score.py', project_folder)"
|
||||
"shutil.copy('chainer_score.py', project_folder)\n",
|
||||
"shutil.copy('utils.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -263,6 +264,7 @@
|
||||
"- python=3.6.2\n",
|
||||
"- pip:\n",
|
||||
" - azureml-defaults\n",
|
||||
" - azureml-opendatasets\n",
|
||||
" - chainer==5.1.0\n",
|
||||
" - cupy-cuda90==5.1.0\n",
|
||||
" - mpi4py==3.0.0\n",
|
||||
@@ -557,6 +559,7 @@
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_pip_package('chainer==5.1.0')\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.add_pip_package(\"azureml-opendatasets\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
@@ -584,7 +587,8 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv)\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv,\n",
|
||||
" source_directory=project_folder)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
|
||||
" auth_enabled=True, # this flag generates API keys to secure access\n",
|
||||
@@ -592,11 +596,11 @@
|
||||
" tags={'name': 'mnist', 'framework': 'Chainer'},\n",
|
||||
" description='Chainer DNN with MNIST')\n",
|
||||
"\n",
|
||||
"service = Model.deploy(workspace=ws, \n",
|
||||
" name='chainer-mnist-1', \n",
|
||||
" models=[model], \n",
|
||||
" inference_config=inference_config, \n",
|
||||
" deployment_config=aciconfig)\n",
|
||||
"service = Model.deploy(workspace=ws,\n",
|
||||
" name='chainer-mnist-1',\n",
|
||||
" models=[model],\n",
|
||||
" inference_config=inference_config,\n",
|
||||
" deployment_config=aciconfig)\n",
|
||||
"service.wait_for_deployment(True)\n",
|
||||
"print(service.state)\n",
|
||||
"print(service.scoring_uri)"
|
||||
@@ -685,13 +689,16 @@
|
||||
" res = res.reshape(n_items[0], 1)\n",
|
||||
" return res\n",
|
||||
"\n",
|
||||
"os.makedirs('./data/mnist', exist_ok=True)\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')\n",
|
||||
"data_folder = os.path.join(os.getcwd(), 'data/mnist')\n",
|
||||
"os.makedirs(data_folder, exist_ok=True)\n",
|
||||
"\n",
|
||||
"X_test = load_data('./data/mnist/test-images.gz', False)\n",
|
||||
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))\n",
|
||||
"\n",
|
||||
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
|
||||
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
|
||||
"\n",
|
||||
"# send a random row from the test set to score\n",
|
||||
"random_index = np.random.randint(0, len(X_test)-1)\n",
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
name: train-hyperparameter-tune-deploy-with-chainer
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- numpy
|
||||
- matplotlib
|
||||
- json
|
||||
- urllib
|
||||
- gzip
|
||||
- struct
|
||||
- requests
|
||||
- azureml-opendatasets
|
||||
@@ -0,0 +1,50 @@
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import glob
|
||||
import gzip
|
||||
import numpy as np
|
||||
import os
|
||||
import struct
|
||||
|
||||
from azureml.core import Dataset
|
||||
from azureml.opendatasets import MNIST
|
||||
from chainer.datasets import tuple_dataset
|
||||
|
||||
|
||||
# load compressed MNIST gz files and return numpy arrays
|
||||
def load_data(filename, label=False):
|
||||
with gzip.open(filename) as gz:
|
||||
struct.unpack('I', gz.read(4))
|
||||
n_items = struct.unpack('>I', gz.read(4))
|
||||
if not label:
|
||||
n_rows = struct.unpack('>I', gz.read(4))[0]
|
||||
n_cols = struct.unpack('>I', gz.read(4))[0]
|
||||
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
|
||||
res = res.reshape(n_items[0], n_rows * n_cols)
|
||||
else:
|
||||
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
|
||||
res = res.reshape(n_items[0], 1)
|
||||
return res
|
||||
|
||||
|
||||
def download_mnist():
|
||||
data_folder = os.path.join(os.getcwd(), 'data/mnist')
|
||||
os.makedirs(data_folder, exist_ok=True)
|
||||
|
||||
mnist_file_dataset = MNIST.get_file_dataset()
|
||||
mnist_file_dataset.download(data_folder, overwrite=True)
|
||||
|
||||
X_train = load_data(glob.glob(os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"),
|
||||
recursive=True)[0], False) / 255.0
|
||||
X_test = load_data(glob.glob(os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"),
|
||||
recursive=True)[0], False) / 255.0
|
||||
y_train = load_data(glob.glob(os.path.join(data_folder, "**/train-labels-idx1-ubyte.gz"),
|
||||
recursive=True)[0], True).reshape(-1)
|
||||
y_test = load_data(glob.glob(os.path.join(data_folder, "**/t10k-labels-idx1-ubyte.gz"),
|
||||
recursive=True)[0], True).reshape(-1)
|
||||
|
||||
train = tuple_dataset.TupleDataset(X_train.astype(np.float32), y_train.astype(np.int32))
|
||||
test = tuple_dataset.TupleDataset(X_test.astype(np.float32), y_test.astype(np.int32))
|
||||
|
||||
return train, test
|
||||
@@ -0,0 +1,5 @@
|
||||
name: fastai-with-custom-docker
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- fastai==1.0.61
|
||||
@@ -0,0 +1,8 @@
|
||||
name: train-hyperparameter-tune-deploy-with-keras
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- tensorflow
|
||||
- keras<=2.3.1
|
||||
- matplotlib
|
||||
@@ -21,7 +21,8 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Distributed PyTorch with DistributedDataParallel\n",
|
||||
"In this tutorial, you will train a PyTorch model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using distributed training with PyTorch's `DistributedDataParallel` module across a GPU cluster. "
|
||||
"\n",
|
||||
"In this tutorial, you will train a PyTorch model on the [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset using distributed training with PyTorch's `DistributedDataParallel` module across a GPU cluster."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -113,7 +114,7 @@
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"gpu-cluster\"\n",
|
||||
"cluster_name = 'gpu-cluster'\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
@@ -139,6 +140,68 @@
|
||||
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare dataset\n",
|
||||
"\n",
|
||||
"Prepare the dataset used for training. We will first download and extract the publicly available CIFAR-10 dataset from the cs.toronto.edu website and then create an Azure ML FileDataset to use the data for training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Download and extract CIFAR-10 data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import urllib\n",
|
||||
"import tarfile\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'\n",
|
||||
"filename = 'cifar-10-python.tar.gz'\n",
|
||||
"data_root = 'cifar-10'\n",
|
||||
"filepath = os.path.join(data_root, filename)\n",
|
||||
"\n",
|
||||
"if not os.path.isdir(data_root):\n",
|
||||
" os.makedirs(data_root, exist_ok=True)\n",
|
||||
" urllib.request.urlretrieve(url, filepath)\n",
|
||||
" with tarfile.open(filepath, \"r:gz\") as tar:\n",
|
||||
" tar.extractall(path=data_root)\n",
|
||||
" os.remove(filepath) # delete tar.gz file after extraction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Azure ML dataset\n",
|
||||
"\n",
|
||||
"The `upload_directory` method will upload the data to a datastore and create a FileDataset from it. In this tutorial we will use the workspace's default datastore."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Dataset\n",
|
||||
"\n",
|
||||
"datastore = ws.get_default_datastore()\n",
|
||||
"dataset = Dataset.File.upload_directory(\n",
|
||||
" src_dir=data_root, target=(datastore, data_root)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -161,8 +224,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"project_folder = './pytorch-distr'\n",
|
||||
"os.makedirs(project_folder, exist_ok=True)"
|
||||
]
|
||||
@@ -172,26 +233,14 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare training script\n",
|
||||
"Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `pytorch_mnist.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code.\n",
|
||||
"\n",
|
||||
"However, if you would like to use Azure ML's [metric logging](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#logging) capabilities, you will have to add a small amount of Azure ML logic inside your training script. In this example, at each logging interval, we will log the loss for that minibatch to our Azure ML run.\n",
|
||||
"\n",
|
||||
"To do so, in `pytorch_mnist.py`, we will first access the Azure ML `Run` object within the script:\n",
|
||||
"```Python\n",
|
||||
"from azureml.core.run import Run\n",
|
||||
"run = Run.get_context()\n",
|
||||
"```\n",
|
||||
"Later within the script, we log the loss metric to our run:\n",
|
||||
"```Python\n",
|
||||
"run.log('loss', losses.avg)\n",
|
||||
"```"
|
||||
"Now you will need to create your training script. In this tutorial, the script for distributed training on CIFAR-10 is already provided for you at `train.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once your script is ready, copy the training script `pytorch_mnist.py` into the project directory."
|
||||
"Once your script is ready, copy the training script `train.py` into the project directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -202,7 +251,7 @@
|
||||
"source": [
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"shutil.copy('pytorch_mnist.py', project_folder)"
|
||||
"shutil.copy('train.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -231,26 +280,7 @@
|
||||
"source": [
|
||||
"### Create an environment\n",
|
||||
"\n",
|
||||
"Define a conda environment YAML file with your training script dependencies and create an Azure ML environment."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile conda_dependencies.yml\n",
|
||||
"\n",
|
||||
"channels:\n",
|
||||
"- conda-forge\n",
|
||||
"dependencies:\n",
|
||||
"- python=3.6.2\n",
|
||||
"- pip:\n",
|
||||
" - azureml-defaults\n",
|
||||
" - torch==1.6.0\n",
|
||||
" - torchvision==0.7.0\n",
|
||||
" - future==0.17.1"
|
||||
"In this tutorial, we will use one of Azure ML's curated PyTorch environments for training. [Curated environments](https://docs.microsoft.com/azure/machine-learning/how-to-use-environments#use-a-curated-environment) are available in your workspace by default. Specifically, we will use the PyTorch 1.6 GPU curated environment."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -261,24 +291,39 @@
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"\n",
|
||||
"pytorch_env = Environment.from_conda_specification(name = 'pytorch-1.6-gpu', file_path = './conda_dependencies.yml')\n",
|
||||
"\n",
|
||||
"# Specify a GPU base image\n",
|
||||
"pytorch_env.docker.enabled = True\n",
|
||||
"pytorch_env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04'"
|
||||
"pytorch_env = Environment.get(ws, name='AzureML-PyTorch-1.6-GPU')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure the training job: torch.distributed with NCCL backend\n",
|
||||
"### Configure the training job\n",
|
||||
"\n",
|
||||
"Create a ScriptRunConfig object to specify the configuration details of your training job, including your training script, environment to use, and the compute target to run on.\n",
|
||||
"To launch a distributed PyTorch job on Azure ML, you have two options:\n",
|
||||
"\n",
|
||||
"In order to run a distributed PyTorch job with **torch.distributed** using the NCCL backend, create a `PyTorchConfiguration` and pass it to the `distributed_job_config` parameter of the ScriptRunConfig constructor. Specify `communication_backend='Nccl'` in the PyTorchConfiguration. The below code will configure a 2-node distributed job. The NCCL backend is the recommended backend for PyTorch distributed GPU training.\n",
|
||||
"1. Per-process launch - specify the total # of worker processes (typically one per GPU) you want to run, and\n",
|
||||
"Azure ML will handle launching each process.\n",
|
||||
"2. Per-node launch with [torch.distributed.launch](https://pytorch.org/docs/stable/distributed.html#launch-utility) - provide the `torch.distributed.launch` command you want to\n",
|
||||
"run on each node.\n",
|
||||
"\n",
|
||||
"The script arguments refers to the Azure ML-set environment variables `AZ_BATCHAI_PYTORCH_INIT_METHOD` for shared file-system initialization and `AZ_BATCHAI_TASK_INDEX` for the global rank of each worker process."
|
||||
"For more information, see the [documentation](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-pytorch#distributeddataparallel).\n",
|
||||
"\n",
|
||||
"Both options are shown below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Per-process launch\n",
|
||||
"\n",
|
||||
"To use the per-process launch option in which Azure ML will handle launching each of the processes to run your training script,\n",
|
||||
"\n",
|
||||
"1. Specify the training script and arguments\n",
|
||||
"2. Create a `PyTorchConfiguration` and specify `node_count` and `process_count`. The `process_count` is the total number of processes you want to run for the job; this should typically equal the # of GPUs available on each node multiplied by the # of nodes. Since this tutorial uses the `STANDARD_NC6` SKU, which has one GPU, the total process count for a 2-node job is `2`. If you are using a SKU with >1 GPUs, adjust the `process_count` accordingly.\n",
|
||||
"\n",
|
||||
"Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, `NODE_RANK`, `WORLD_SIZE` environment variables on each node, in addition to the process-level `RANK` and `LOCAL_RANK` environment variables, that are needed for distributed PyTorch training."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -290,17 +335,61 @@
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"from azureml.core.runconfig import PyTorchConfiguration\n",
|
||||
"\n",
|
||||
"args = ['--dist-backend', 'nccl',\n",
|
||||
" '--dist-url', '$AZ_BATCHAI_PYTORCH_INIT_METHOD',\n",
|
||||
" '--rank', '$AZ_BATCHAI_TASK_INDEX',\n",
|
||||
" '--world-size', 2]\n",
|
||||
"# create distributed config\n",
|
||||
"distr_config = PyTorchConfiguration(process_count=2, node_count=2)\n",
|
||||
"\n",
|
||||
"# create args\n",
|
||||
"args = [\"--data-dir\", dataset.as_download(), \"--epochs\", 25]\n",
|
||||
"\n",
|
||||
"# create job config\n",
|
||||
"src = ScriptRunConfig(source_directory=project_folder,\n",
|
||||
" script='pytorch_mnist.py',\n",
|
||||
" script='train.py',\n",
|
||||
" arguments=args,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" environment=pytorch_env,\n",
|
||||
" distributed_job_config=PyTorchConfiguration(communication_backend='Nccl', node_count=2))"
|
||||
" distributed_job_config=distr_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Per-node launch with `torch.distributed.launch`\n",
|
||||
"\n",
|
||||
"If you would instead like to use the PyTorch-provided launch utility `torch.distributed.launch` to handle launching the worker processes on each node, you can do so as well. \n",
|
||||
"\n",
|
||||
"1. Provide the launch command to the `command` parameter of ScriptRunConfig. For PyTorch jobs Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, and `NODE_RANK` environment variables on each node, so you can simply just reference those environment variables in your command. If you are using a SKU with >1 GPUs, adjust the `--nproc_per_node` argument accordingly.\n",
|
||||
"\n",
|
||||
"2. Create a `PyTorchConfiguration` and specify the `node_count`. You do not need to specify the `process_count`; by default Azure ML will launch one process per node to run the `command` you provided.\n",
|
||||
"\n",
|
||||
"Uncomment the code below to configure a job with this method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"from azureml.core.runconfig import PyTorchConfiguration\n",
|
||||
"\n",
|
||||
"# create distributed config\n",
|
||||
"distr_config = PyTorchConfiguration(node_count=2)\n",
|
||||
"\n",
|
||||
"# define command\n",
|
||||
"launch_cmd = [\"python -m torch.distributed.launch --nproc_per_node 1 --nnodes 2 \" \\\n",
|
||||
" \"--node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT --use_env \" \\\n",
|
||||
" \"train.py --data-dir\", dataset.as_download(), \"--epochs 25\"]\n",
|
||||
"\n",
|
||||
"# create job config\n",
|
||||
"src = ScriptRunConfig(source_directory=project_folder,\n",
|
||||
" command=launch_cmd,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" environment=pytorch_env,\n",
|
||||
" distributed_job_config=distr_config)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -308,7 +397,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit job\n",
|
||||
"Run your experiment by submitting your ScriptRunConfig object. Note that this call is asynchronous."
|
||||
"Run your experiment by submitting your `ScriptRunConfig` object. Note that this call is asynchronous."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -355,50 +444,12 @@
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True) # this provides a verbose log"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure training job: torch.distributed with Gloo backend\n",
|
||||
"\n",
|
||||
"If you would instead like to use the Gloo backend for distributed training, you can do so via the following code. The Gloo backend is recommended for distributed CPU training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"from azureml.core.runconfig import PyTorchConfiguration\n",
|
||||
"\n",
|
||||
"args = ['--dist-backend', 'gloo',\n",
|
||||
" '--dist-url', '$AZ_BATCHAI_PYTORCH_INIT_METHOD',\n",
|
||||
" '--rank', '$AZ_BATCHAI_TASK_INDEX',\n",
|
||||
" '--world-size', 2]\n",
|
||||
"\n",
|
||||
"src = ScriptRunConfig(source_directory=project_folder,\n",
|
||||
" script='pytorch_mnist.py',\n",
|
||||
" arguments=args,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" environment=pytorch_env,\n",
|
||||
" distributed_job_config=PyTorchConfiguration(communication_backend='Gloo', node_count=2))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once you create the ScriptRunConfig, you can follow the submit steps as shown in the previous steps to submit a PyTorch distributed run using the Gloo backend."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "ninhu"
|
||||
"name": "minxia"
|
||||
}
|
||||
],
|
||||
"category": "training",
|
||||
@@ -406,7 +457,7 @@
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"MNIST"
|
||||
"CIFAR-10"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
@@ -432,12 +483,12 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
"version": "3.7.7"
|
||||
},
|
||||
"tags": [
|
||||
"None"
|
||||
],
|
||||
"task": "Train a model using distributed training via Nccl/Gloo"
|
||||
"task": "Train a model using distributed training via PyTorch DistributedDataParallel"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
@@ -0,0 +1,5 @@
|
||||
name: distributed-pytorch-with-distributeddataparallel
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,238 @@
|
||||
# Copyright (c) 2017 Facebook, Inc. All rights reserved.
|
||||
# BSD 3-Clause License
|
||||
#
|
||||
# Script adapted from:
|
||||
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
|
||||
# ==============================================================================
|
||||
|
||||
# imports
|
||||
import torch
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
import os
|
||||
import argparse
|
||||
|
||||
|
||||
# define network architecture
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 32, 3)
|
||||
self.pool = nn.MaxPool2d(2, 2)
|
||||
self.conv2 = nn.Conv2d(32, 64, 3)
|
||||
self.conv3 = nn.Conv2d(64, 128, 3)
|
||||
self.fc1 = nn.Linear(128 * 6 * 6, 120)
|
||||
self.dropout = nn.Dropout(p=0.2)
|
||||
self.fc2 = nn.Linear(120, 84)
|
||||
self.fc3 = nn.Linear(84, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(self.conv1(x))
|
||||
x = self.pool(F.relu(self.conv2(x)))
|
||||
x = self.pool(F.relu(self.conv3(x)))
|
||||
x = x.view(-1, 128 * 6 * 6)
|
||||
x = self.dropout(F.relu(self.fc1(x)))
|
||||
x = F.relu(self.fc2(x))
|
||||
x = self.fc3(x)
|
||||
return x
|
||||
|
||||
|
||||
def train(train_loader, model, criterion, optimizer, epoch, device, print_freq, rank):
|
||||
running_loss = 0.0
|
||||
for i, data in enumerate(train_loader, 0):
|
||||
# get the inputs; data is a list of [inputs, labels]
|
||||
inputs, labels = data[0].to(device), data[1].to(device)
|
||||
|
||||
# zero the parameter gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
# forward + backward + optimize
|
||||
outputs = model(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# print statistics
|
||||
running_loss += loss.item()
|
||||
if i % print_freq == 0: # print every print_freq mini-batches
|
||||
print(
|
||||
"Rank %d: [%d, %5d] loss: %.3f"
|
||||
% (rank, epoch + 1, i + 1, running_loss / print_freq)
|
||||
)
|
||||
running_loss = 0.0
|
||||
|
||||
|
||||
def evaluate(test_loader, model, device):
|
||||
classes = (
|
||||
"plane",
|
||||
"car",
|
||||
"bird",
|
||||
"cat",
|
||||
"deer",
|
||||
"dog",
|
||||
"frog",
|
||||
"horse",
|
||||
"ship",
|
||||
"truck",
|
||||
)
|
||||
|
||||
model.eval()
|
||||
|
||||
correct = 0
|
||||
total = 0
|
||||
class_correct = list(0.0 for i in range(10))
|
||||
class_total = list(0.0 for i in range(10))
|
||||
with torch.no_grad():
|
||||
for data in test_loader:
|
||||
images, labels = data[0].to(device), data[1].to(device)
|
||||
outputs = model(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
c = (predicted == labels).squeeze()
|
||||
for i in range(10):
|
||||
label = labels[i]
|
||||
class_correct[label] += c[i].item()
|
||||
class_total[label] += 1
|
||||
|
||||
# print total test set accuracy
|
||||
print(
|
||||
"Accuracy of the network on the 10000 test images: %d %%"
|
||||
% (100 * correct / total)
|
||||
)
|
||||
|
||||
# print test accuracy for each of the classes
|
||||
for i in range(10):
|
||||
print(
|
||||
"Accuracy of %5s : %2d %%"
|
||||
% (classes[i], 100 * class_correct[i] / class_total[i])
|
||||
)
|
||||
|
||||
|
||||
def main(args):
|
||||
# get PyTorch environment variables
|
||||
world_size = int(os.environ["WORLD_SIZE"])
|
||||
rank = int(os.environ["RANK"])
|
||||
local_rank = int(os.environ["LOCAL_RANK"])
|
||||
|
||||
distributed = world_size > 1
|
||||
|
||||
# set device
|
||||
if distributed:
|
||||
device = torch.device("cuda", local_rank)
|
||||
else:
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# initialize distributed process group using default env:// method
|
||||
if distributed:
|
||||
torch.distributed.init_process_group(backend="nccl")
|
||||
|
||||
# define train and test dataset DataLoaders
|
||||
transform = transforms.Compose(
|
||||
[transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
|
||||
)
|
||||
|
||||
train_set = torchvision.datasets.CIFAR10(
|
||||
root=args.data_dir, train=True, download=False, transform=transform
|
||||
)
|
||||
|
||||
if distributed:
|
||||
train_sampler = torch.utils.data.distributed.DistributedSampler(train_set)
|
||||
else:
|
||||
train_sampler = None
|
||||
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
train_set,
|
||||
batch_size=args.batch_size,
|
||||
shuffle=(train_sampler is None),
|
||||
num_workers=args.workers,
|
||||
sampler=train_sampler,
|
||||
)
|
||||
|
||||
test_set = torchvision.datasets.CIFAR10(
|
||||
root=args.data_dir, train=False, download=False, transform=transform
|
||||
)
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers
|
||||
)
|
||||
|
||||
model = Net().to(device)
|
||||
|
||||
# wrap model with DDP
|
||||
if distributed:
|
||||
model = nn.parallel.DistributedDataParallel(
|
||||
model, device_ids=[local_rank], output_device=local_rank
|
||||
)
|
||||
|
||||
# define loss function and optimizer
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.SGD(
|
||||
model.parameters(), lr=args.learning_rate, momentum=args.momentum
|
||||
)
|
||||
|
||||
# train the model
|
||||
for epoch in range(args.epochs):
|
||||
print("Rank %d: Starting epoch %d" % (rank, epoch))
|
||||
if distributed:
|
||||
train_sampler.set_epoch(epoch)
|
||||
model.train()
|
||||
train(
|
||||
train_loader,
|
||||
model,
|
||||
criterion,
|
||||
optimizer,
|
||||
epoch,
|
||||
device,
|
||||
args.print_freq,
|
||||
rank,
|
||||
)
|
||||
|
||||
print("Rank %d: Finished Training" % (rank))
|
||||
|
||||
if not distributed or rank == 0:
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
model_path = os.path.join(args.output_dir, "cifar_net.pt")
|
||||
torch.save(model.state_dict(), model_path)
|
||||
|
||||
# evaluate on full test dataset
|
||||
evaluate(test_loader, model, device)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# setup argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--data-dir", type=str, help="directory containing CIFAR-10 dataset"
|
||||
)
|
||||
parser.add_argument("--epochs", default=10, type=int, help="number of epochs")
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
default=16,
|
||||
type=int,
|
||||
help="mini batch size for each gpu/process",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workers",
|
||||
default=2,
|
||||
type=int,
|
||||
help="number of data loading workers for each gpu/process",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--learning-rate", default=0.001, type=float, help="learning rate"
|
||||
)
|
||||
parser.add_argument("--momentum", default=0.9, type=float, help="momentum")
|
||||
parser.add_argument(
|
||||
"--output-dir", default="outputs", type=str, help="directory to save model to"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-freq",
|
||||
default=200,
|
||||
type=int,
|
||||
help="frequency of printing training statistics",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args)
|
||||
@@ -0,0 +1,5 @@
|
||||
name: distributed-pytorch-with-horovod
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -51,6 +51,17 @@ if args.cuda:
|
||||
|
||||
|
||||
kwargs = {}
|
||||
# Use Azure Open Datasets for MNIST dataset
|
||||
datasets.MNIST.resources = [
|
||||
("https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz",
|
||||
"f68b3c2dcbeaaa9fbdd348bbdeb94873"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz",
|
||||
"d53e105ee54ea40749a09fcbcd1e9432"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz",
|
||||
"9fb629c4189551a2d022fa330f9573f3"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz",
|
||||
"ec29112dd5afa0611ce80d1b7f02629c")
|
||||
]
|
||||
train_dataset = \
|
||||
datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True,
|
||||
transform=transforms.Compose([
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user