mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-19 17:17:04 -05:00
Compare commits
10 Commits
minxia/dis
...
release_up
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
883e4a4c59 | ||
|
|
e90826b331 | ||
|
|
ac04172f6d | ||
|
|
8c0000beb4 | ||
|
|
35287ab0d8 | ||
|
|
3fe4f8b038 | ||
|
|
1722678469 | ||
|
|
17da7e8706 | ||
|
|
d2e7213ff3 | ||
|
|
882cb76e8a |
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -21,9 +21,8 @@ dependencies:
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.23.0
|
||||
- azureml-widgets~=1.24.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.23.0/validated_win32_requirements.txt [--no-deps]
|
||||
- PyJWT < 2.0.0
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_win32_requirements.txt [--no-deps]
|
||||
|
||||
@@ -21,10 +21,8 @@ dependencies:
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.23.0
|
||||
- azureml-widgets~=1.24.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.23.0/validated_linux_requirements.txt [--no-deps]
|
||||
- PyJWT < 2.0.0
|
||||
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_linux_requirements.txt [--no-deps]
|
||||
|
||||
@@ -22,9 +22,8 @@ dependencies:
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.23.0
|
||||
- azureml-widgets~=1.24.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.23.0/validated_darwin_requirements.txt [--no-deps]
|
||||
- PyJWT < 2.0.0
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.24.0/validated_darwin_requirements.txt [--no-deps]
|
||||
|
||||
@@ -105,7 +105,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-classification-bank-marketing-all-features
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -93,7 +93,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-classification-credit-card-fraud
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -96,7 +96,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-classification-text-dnn
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -81,7 +81,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-continuous-retraining
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -5,7 +5,7 @@ set options=%3
|
||||
set PIP_NO_WARN_SCRIPT_LOCATION=0
|
||||
|
||||
IF "%conda_env_name%"=="" SET conda_env_name="azure_automl_experimental"
|
||||
IF "%automl_env_file%"=="" SET automl_env_file="automl_env.yml"
|
||||
IF "%automl_env_file%"=="" SET automl_env_file="automl_thin_client_env.yml"
|
||||
|
||||
IF NOT EXIST %automl_env_file% GOTO YmlMissing
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ fi
|
||||
|
||||
if [ "$AUTOML_ENV_FILE" == "" ]
|
||||
then
|
||||
AUTOML_ENV_FILE="automl_env.yml"
|
||||
AUTOML_ENV_FILE="automl_thin_client_env.yml"
|
||||
fi
|
||||
|
||||
if [ ! -f $AUTOML_ENV_FILE ]; then
|
||||
|
||||
@@ -12,7 +12,7 @@ fi
|
||||
|
||||
if [ "$AUTOML_ENV_FILE" == "" ]
|
||||
then
|
||||
AUTOML_ENV_FILE="automl_env.yml"
|
||||
AUTOML_ENV_FILE="automl_thin_client_env_mac.yml"
|
||||
fi
|
||||
|
||||
if [ ! -f $AUTOML_ENV_FILE ]; then
|
||||
|
||||
@@ -7,6 +7,8 @@ dependencies:
|
||||
- nb_conda
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- PyJWT < 2.0.0
|
||||
- numpy==1.18.5
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
@@ -14,4 +16,3 @@ dependencies:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- pandas
|
||||
- PyJWT < 2.0.0
|
||||
|
||||
@@ -8,6 +8,8 @@ dependencies:
|
||||
- nb_conda
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- PyJWT < 2.0.0
|
||||
- numpy==1.18.5
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
@@ -15,4 +17,3 @@ dependencies:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- pandas
|
||||
- PyJWT < 2.0.0
|
||||
|
||||
@@ -90,7 +90,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -194,7 +194,6 @@
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**training_data**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**label_column_name**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||
"|**scenario**|We need to set this parameter to 'Latest' to enable some experimental features. This parameter should not be set outside of this experimental notebook.|\n",
|
||||
"\n",
|
||||
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||
]
|
||||
@@ -223,7 +222,6 @@
|
||||
" compute_target = compute_target,\n",
|
||||
" training_data = train_data,\n",
|
||||
" label_column_name = label,\n",
|
||||
" scenario='Latest',\n",
|
||||
" **automl_settings\n",
|
||||
" )"
|
||||
]
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-regression-model-proxy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -113,7 +113,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-beer-remote
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -87,7 +87,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-bike-share
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -97,7 +97,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-energy-demand
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -94,7 +94,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-function
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -82,7 +82,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-orange-juice-sales
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -96,7 +96,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-classification-credit-card-fraud-local
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -96,7 +96,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-regression-explanation-featurization
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -92,7 +92,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.23.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.24.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-regression
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
84
how-to-use-azureml/azure-synapse/README.md
Normal file
84
how-to-use-azureml/azure-synapse/README.md
Normal file
@@ -0,0 +1,84 @@
|
||||
Azure Synapse Analytics is a limitless analytics service that brings together data integration, enterprise data warehousing, and big data analytics. It gives you the freedom to query data on your terms, using either serverless or dedicated resources—at scale. Azure Synapse brings these worlds together with a unified experience to ingest, explore, prepare, manage, and serve data for immediate BI and machine learning needs. A core offering within Azure Synapse Analytics are serverless Apache Spark pools enhanced for big data workloads.
|
||||
|
||||
Synapse in Aml integration is for customers who want to use Apache Spark in Azure Synapse Analytics to prepare data at scale in Azure ML before training their ML model. This will allow customers to work on their end-to-end ML lifecycle including large-scale data preparation, model training and deployment within Azure ML workspace without having to use suboptimal tools for machine learning or switch between multiple tools for data preparation and model training. The ability to perform all ML tasks within Azure ML will reduce time required for customers to iterate on a machine learning project which typically includes multiple rounds of data preparation and training.
|
||||
|
||||
In the public preview, the capabilities are provided:
|
||||
|
||||
- Link Azure Synapse Analytics workspace to Azure Machine Learning workspace (via ARM, UI or SDK)
|
||||
- Attach Apache Spark pools powered by Azure Synapse Analytics as Azure Machine Learning compute targets (via ARM, UI or SDK)
|
||||
- Launch Apache Spark sessions in notebooks and perform interactive data exploration and preparation. This interactive experience leverages Apache Spark magic and customers will have session-level Conda support to install packages.
|
||||
- Productionize ML pipelines by leveraging Apache Spark pools to pre-process big data
|
||||
|
||||
# Using Synapse in Azure machine learning
|
||||
|
||||
## Create synapse resources
|
||||
|
||||
Follow up the documents to create Synapse workspace and resource-setup.sh is available for you to create the resources.
|
||||
|
||||
- Create from [Portal](https://docs.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-workspace)
|
||||
- Create from [Cli](https://docs.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-workspace-cli)
|
||||
|
||||
Follow up the documents to create Synapse spark pool
|
||||
|
||||
- Create from [Portal](https://docs.microsoft.com/en-us/azure/synapse-analytics/quickstart-create-apache-spark-pool-portal)
|
||||
- Create from [Cli](https://docs.microsoft.com/en-us/cli/azure/ext/synapse/synapse/spark/pool?view=azure-cli-latest)
|
||||
|
||||
## Link Synapse Workspace
|
||||
|
||||
Make sure you are the owner of synapse workspace so that you can link synapse workspace into AML.
|
||||
You can run resource-setup.py to link the synapse workspace and attach compute
|
||||
|
||||
```python
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.from_config()
|
||||
|
||||
from azureml.core import LinkedService, SynapseWorkspaceLinkedServiceConfiguration
|
||||
synapse_link_config = SynapseWorkspaceLinkedServiceConfiguration(
|
||||
subscription_id="<subscription id>",
|
||||
resource_group="<resource group",
|
||||
name="<synapse workspace name>"
|
||||
)
|
||||
|
||||
linked_service = LinkedService.register(
|
||||
workspace=ws,
|
||||
name='<link name>',
|
||||
linked_service_config=synapse_link_config)
|
||||
|
||||
```
|
||||
|
||||
## Attach synapse spark pool as AzureML compute
|
||||
|
||||
```python
|
||||
|
||||
from azureml.core.compute import SynapseCompute, ComputeTarget
|
||||
spark_pool_name = "<spark pool name>"
|
||||
attached_synapse_name = "<attached compute name>"
|
||||
|
||||
attach_config = SynapseCompute.attach_configuration(
|
||||
linked_service,
|
||||
type="SynapseSpark",
|
||||
pool_name=spark_pool_name)
|
||||
|
||||
synapse_compute=ComputeTarget.attach(
|
||||
workspace=ws,
|
||||
name=attached_synapse_name,
|
||||
attach_configuration=attach_config)
|
||||
|
||||
synapse_compute.wait_for_completion()
|
||||
```
|
||||
|
||||
## Set up permission
|
||||
|
||||
Grant Spark admin role to system assigned identity of the linked service so that the user can submit experiment run or pipeline run from AML workspace to synapse spark pool.
|
||||
|
||||
Grant Spark admin role to the specific user so that the user can start spark session to synapse spark pool.
|
||||
|
||||
You can get the system assigned identity information by running
|
||||
|
||||
```python
|
||||
print(linked_service.system_assigned_identity_principal_id)
|
||||
```
|
||||
|
||||
- Launch synapse studio of the synapse workspace and grant linked service MSI "Synapse Apache Spark administrator" role.
|
||||
|
||||
- In azure portal grant linked service MSI "Storage Blob Data Contributor" role of the primary adlsgen2 account of synapse workspace to use the library management feature.
|
||||
@@ -0,0 +1,6 @@
|
||||
name: multi-model-register-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- numpy
|
||||
- scikit-learn
|
||||
@@ -0,0 +1,6 @@
|
||||
name: model-register-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- numpy
|
||||
- scikit-learn
|
||||
@@ -0,0 +1,4 @@
|
||||
name: deploy-aks-with-controlled-rollout
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -0,0 +1,4 @@
|
||||
name: enable-app-insights-in-production-service
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -94,6 +94,17 @@ def main():
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
|
||||
# Use Azure Open Datasets for MNIST dataset
|
||||
datasets.MNIST.resources = [
|
||||
("https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz",
|
||||
"f68b3c2dcbeaaa9fbdd348bbdeb94873"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz",
|
||||
"d53e105ee54ea40749a09fcbcd1e9432"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz",
|
||||
"9fb629c4189551a2d022fa330f9573f3"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz",
|
||||
"ec29112dd5afa0611ce80d1b7f02629c")
|
||||
]
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST('data', train=True, download=True,
|
||||
transform=transforms.Compose([transforms.ToTensor(),
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
name: onnx-convert-aml-deploy-tinyyolo
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- numpy
|
||||
- git+https://github.com/apple/coremltools@v2.1
|
||||
- onnx<1.7.0
|
||||
- onnxmltools
|
||||
@@ -0,0 +1,9 @@
|
||||
name: onnx-inference-facial-expression-recognition-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- numpy
|
||||
- onnx<1.7.0
|
||||
- opencv-python-headless
|
||||
@@ -0,0 +1,9 @@
|
||||
name: onnx-inference-mnist-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- numpy
|
||||
- onnx<1.7.0
|
||||
- opencv-python-headless
|
||||
@@ -0,0 +1,4 @@
|
||||
name: onnx-model-register-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -0,0 +1,4 @@
|
||||
name: onnx-modelzoo-aml-deploy-resnet50
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -0,0 +1,5 @@
|
||||
name: onnx-train-pytorch-aml-deploy-mnist
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: production-deploy-to-aks-gpu
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- tensorflow
|
||||
@@ -0,0 +1,8 @@
|
||||
name: production-deploy-to-aks-ssl
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- matplotlib
|
||||
- tqdm
|
||||
- scipy
|
||||
- sklearn
|
||||
@@ -0,0 +1,8 @@
|
||||
name: production-deploy-to-aks
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- matplotlib
|
||||
- tqdm
|
||||
- scipy
|
||||
- sklearn
|
||||
@@ -0,0 +1,4 @@
|
||||
name: model-register-and-deploy-spark
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -0,0 +1,13 @@
|
||||
name: explain-model-on-amlcompute
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- flask
|
||||
- flask-cors
|
||||
- gevent>=1.3.6
|
||||
- jinja2
|
||||
- ipython
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- ipywidgets
|
||||
@@ -226,36 +226,6 @@
|
||||
" ('classifier', SVC(C=1.0, probability=True))])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"# Uncomment below if sklearn-pandas is not installed\n",
|
||||
"#!pip install sklearn-pandas\n",
|
||||
"from sklearn_pandas import DataFrameMapper\n",
|
||||
"\n",
|
||||
"# Impute, standardize the numeric features and one-hot encode the categorical features. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"numeric_transformations = [([f], Pipeline(steps=[('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())])) for f in numerical]\n",
|
||||
"\n",
|
||||
"categorical_transformations = [([f], OneHotEncoder(handle_unknown='ignore', sparse=False)) for f in categorical]\n",
|
||||
"\n",
|
||||
"transformations = numeric_transformations + categorical_transformations\n",
|
||||
"\n",
|
||||
"# Append classifier to preprocessing pipeline.\n",
|
||||
"# Now we have a full prediction pipeline.\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
|
||||
" ('classifier', SVC(C=1.0, probability=True))]) \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
name: save-retrieve-explanations-run-history
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- flask
|
||||
- flask-cors
|
||||
- gevent>=1.3.6
|
||||
- jinja2
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
@@ -166,12 +166,12 @@
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"import joblib\n",
|
||||
"from sklearn.compose import ColumnTransformer\n",
|
||||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from sklearn.ensemble import RandomForestClassifier\n",
|
||||
"from sklearn_pandas import DataFrameMapper\n",
|
||||
"\n",
|
||||
"from interpret.ext.blackbox import TabularExplainer\n",
|
||||
"\n",
|
||||
@@ -201,17 +201,23 @@
|
||||
"# Store the numerical columns in a list numerical\n",
|
||||
"numerical = attritionXData.columns.difference(categorical)\n",
|
||||
"\n",
|
||||
"numeric_transformations = [([f], Pipeline(steps=[\n",
|
||||
"# We create the preprocessing pipelines for both numeric and categorical data.\n",
|
||||
"numeric_transformer = Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='median')),\n",
|
||||
" ('scaler', StandardScaler())])) for f in numerical]\n",
|
||||
" ('scaler', StandardScaler())])\n",
|
||||
"\n",
|
||||
"categorical_transformations = [([f], OneHotEncoder(handle_unknown='ignore', sparse=False)) for f in categorical]\n",
|
||||
"categorical_transformer = Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n",
|
||||
" ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n",
|
||||
"\n",
|
||||
"transformations = numeric_transformations + categorical_transformations\n",
|
||||
"transformations = ColumnTransformer(\n",
|
||||
" transformers=[\n",
|
||||
" ('num', numeric_transformer, numerical),\n",
|
||||
" ('cat', categorical_transformer, categorical)])\n",
|
||||
"\n",
|
||||
"# Append classifier to preprocessing pipeline.\n",
|
||||
"# Now we have a full prediction pipeline.\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
|
||||
" ('classifier', RandomForestClassifier())])\n",
|
||||
"\n",
|
||||
"# Split data into train and test\n",
|
||||
@@ -350,7 +356,7 @@
|
||||
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
|
||||
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
|
||||
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=['sklearn-pandas', 'pyyaml', sklearn_dep, pandas_dep] + azureml_pip_packages,\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=['pyyaml', sklearn_dep, pandas_dep] + azureml_pip_packages,\n",
|
||||
" pin_sdk_version=False)\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
name: train-explain-model-locally-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- flask
|
||||
- flask-cors
|
||||
- gevent>=1.3.6
|
||||
- jinja2
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
@@ -294,7 +294,7 @@
|
||||
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
|
||||
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
|
||||
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
|
||||
"azureml_pip_packages.extend(['sklearn-pandas', 'pyyaml', sklearn_dep, pandas_dep])\n",
|
||||
"azureml_pip_packages.extend(['pyyaml', sklearn_dep, pandas_dep])\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(pip_packages=azureml_pip_packages)\n",
|
||||
"# Now submit a run on AmlCompute\n",
|
||||
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
||||
@@ -458,7 +458,7 @@
|
||||
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
|
||||
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
|
||||
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
|
||||
"azureml_pip_packages.extend(['sklearn-pandas', 'pyyaml', sklearn_dep, pandas_dep])\n",
|
||||
"azureml_pip_packages.extend(['pyyaml', sklearn_dep, pandas_dep])\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=azureml_pip_packages)\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
name: train-explain-model-on-amlcompute-and-deploy
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- flask
|
||||
- flask-cors
|
||||
- gevent>=1.3.6
|
||||
- jinja2
|
||||
- ipython
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- azureml-core
|
||||
- ipywidgets
|
||||
@@ -5,13 +5,13 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
import zipfile
|
||||
from sklearn.model_selection import train_test_split
|
||||
import joblib
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn_pandas import DataFrameMapper
|
||||
|
||||
from azureml.core.run import Run
|
||||
from interpret.ext.blackbox import TabularExplainer
|
||||
@@ -57,16 +57,22 @@ for col, value in attritionXData.iteritems():
|
||||
# store the numerical columns
|
||||
numerical = attritionXData.columns.difference(categorical)
|
||||
|
||||
numeric_transformations = [([f], Pipeline(steps=[
|
||||
# We create the preprocessing pipelines for both numeric and categorical data.
|
||||
numeric_transformer = Pipeline(steps=[
|
||||
('imputer', SimpleImputer(strategy='median')),
|
||||
('scaler', StandardScaler())])) for f in numerical]
|
||||
('scaler', StandardScaler())])
|
||||
|
||||
categorical_transformations = [([f], OneHotEncoder(handle_unknown='ignore', sparse=False)) for f in categorical]
|
||||
categorical_transformer = Pipeline(steps=[
|
||||
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
|
||||
('onehot', OneHotEncoder(handle_unknown='ignore'))])
|
||||
|
||||
transformations = numeric_transformations + categorical_transformations
|
||||
transformations = ColumnTransformer(
|
||||
transformers=[
|
||||
('num', numeric_transformer, numerical),
|
||||
('cat', categorical_transformer, categorical)])
|
||||
|
||||
# append classifier to preprocessing pipeline
|
||||
clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),
|
||||
clf = Pipeline(steps=[('preprocessor', transformations),
|
||||
('classifier', LogisticRegression(solver='lbfgs'))])
|
||||
|
||||
# get the run this was submitted from to interact with run history
|
||||
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-data-transfer
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-getting-started
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-how-to-use-modulestep
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-how-to-use-pipeline-drafts
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -121,12 +121,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.makedirs('./data/mnist', exist_ok=True)\n",
|
||||
"data_folder = os.path.join(os.getcwd(), 'data/mnist')\n",
|
||||
"os.makedirs(data_folder, exist_ok=True)\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename = './data/mnist/train-images.gz')\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename = './data/mnist/train-labels.gz')\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')"
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'train-images-idx3-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,11 +151,11 @@
|
||||
"from utils import load_data\n",
|
||||
"\n",
|
||||
"# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n",
|
||||
"X_train = load_data('./data/mnist/train-images.gz', False) / 255.0\n",
|
||||
"y_train = load_data('./data/mnist/train-labels.gz', True).reshape(-1)\n",
|
||||
"X_train = load_data(os.path.join(data_folder, 'train-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
|
||||
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
|
||||
"y_train = load_data(os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
|
||||
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
|
||||
"\n",
|
||||
"X_test = load_data('./data/mnist/test-images.gz', False) / 255.0\n",
|
||||
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
|
||||
"\n",
|
||||
"count = 0\n",
|
||||
"sample_size = 30\n",
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
name: aml-pipelines-parameter-tuning-with-hyperdrive
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- numpy
|
||||
- pandas_ml
|
||||
- azureml-dataset-runtime[pandas,fuse]
|
||||
@@ -0,0 +1,6 @@
|
||||
name: aml-pipelines-publish-and-run-using-rest-endpoint
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- requests
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-setup-schedule-for-a-published-pipeline
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,6 @@
|
||||
name: aml-pipelines-setup-versioned-pipeline-endpoints
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- requests
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-showcasing-datapath-and-pipelineparameter
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-showcasing-dataset-and-pipelineparameter
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,4 @@
|
||||
name: aml-pipelines-with-automated-machine-learning-step
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-with-commandstep-r
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-with-commandstep
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: aml-pipelines-with-data-dependency-steps
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,6 @@
|
||||
name: aml-pipelines-with-notebook-runner-step
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-contrib-notebook
|
||||
@@ -0,0 +1,10 @@
|
||||
name: nyc-taxi-data-regression-model-building
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-opendatasets
|
||||
- azureml-train-automl
|
||||
- matplotlib
|
||||
- pandas
|
||||
- pyarrow
|
||||
@@ -0,0 +1,7 @@
|
||||
name: file-dataset-image-inference-mnist
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-pipeline-steps
|
||||
- azureml-widgets
|
||||
- pandas
|
||||
@@ -0,0 +1,7 @@
|
||||
name: tabular-dataset-inference-iris
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-pipeline-steps
|
||||
- azureml-widgets
|
||||
- pandas
|
||||
@@ -0,0 +1,7 @@
|
||||
name: pipeline-style-transfer-parallel-run
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-pipeline-steps
|
||||
- azureml-widgets
|
||||
- requests
|
||||
@@ -0,0 +1,5 @@
|
||||
name: distributed-chainer
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -4,6 +4,8 @@ import os
|
||||
|
||||
import numpy as np
|
||||
|
||||
from utils import download_mnist
|
||||
|
||||
import chainer
|
||||
from chainer import backend
|
||||
from chainer import backends
|
||||
@@ -17,6 +19,7 @@ from chainer.training import extensions
|
||||
from chainer.dataset import concat_examples
|
||||
from chainer.backends.cuda import to_cpu
|
||||
|
||||
|
||||
from azureml.core.run import Run
|
||||
run = Run.get_context()
|
||||
|
||||
@@ -49,7 +52,7 @@ def main():
|
||||
args = parser.parse_args()
|
||||
|
||||
# Download the MNIST data if you haven't downloaded it yet
|
||||
train, test = datasets.mnist.get_mnist(withlabel=True, ndim=1)
|
||||
train, test = download_mnist()
|
||||
|
||||
gpu_id = args.gpu_id
|
||||
batchsize = args.batchsize
|
||||
|
||||
@@ -2,6 +2,8 @@ import numpy as np
|
||||
import os
|
||||
import json
|
||||
|
||||
from utils import download_mnist
|
||||
|
||||
from chainer import serializers, using_config, Variable, datasets
|
||||
import chainer.functions as F
|
||||
import chainer.links as L
|
||||
@@ -41,7 +43,7 @@ def init():
|
||||
def run(input_data):
|
||||
i = np.array(json.loads(input_data)['data'])
|
||||
|
||||
_, test = datasets.get_mnist()
|
||||
_, test = download_mnist()
|
||||
x = Variable(np.asarray([test[i][0]]))
|
||||
y = model(x)
|
||||
|
||||
|
||||
@@ -217,7 +217,8 @@
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"shutil.copy('chainer_mnist.py', project_folder)\n",
|
||||
"shutil.copy('chainer_score.py', project_folder)"
|
||||
"shutil.copy('chainer_score.py', project_folder)\n",
|
||||
"shutil.copy('utils.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -263,6 +264,7 @@
|
||||
"- python=3.6.2\n",
|
||||
"- pip:\n",
|
||||
" - azureml-defaults\n",
|
||||
" - azureml-opendatasets\n",
|
||||
" - chainer==5.1.0\n",
|
||||
" - cupy-cuda90==5.1.0\n",
|
||||
" - mpi4py==3.0.0\n",
|
||||
@@ -557,6 +559,7 @@
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_pip_package('chainer==5.1.0')\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.add_pip_package(\"azureml-opendatasets\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
@@ -584,7 +587,8 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv)\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv,\n",
|
||||
" source_directory=project_folder)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
|
||||
" auth_enabled=True, # this flag generates API keys to secure access\n",
|
||||
@@ -592,11 +596,11 @@
|
||||
" tags={'name': 'mnist', 'framework': 'Chainer'},\n",
|
||||
" description='Chainer DNN with MNIST')\n",
|
||||
"\n",
|
||||
"service = Model.deploy(workspace=ws, \n",
|
||||
" name='chainer-mnist-1', \n",
|
||||
" models=[model], \n",
|
||||
" inference_config=inference_config, \n",
|
||||
" deployment_config=aciconfig)\n",
|
||||
"service = Model.deploy(workspace=ws,\n",
|
||||
" name='chainer-mnist-1',\n",
|
||||
" models=[model],\n",
|
||||
" inference_config=inference_config,\n",
|
||||
" deployment_config=aciconfig)\n",
|
||||
"service.wait_for_deployment(True)\n",
|
||||
"print(service.state)\n",
|
||||
"print(service.scoring_uri)"
|
||||
@@ -685,13 +689,16 @@
|
||||
" res = res.reshape(n_items[0], 1)\n",
|
||||
" return res\n",
|
||||
"\n",
|
||||
"os.makedirs('./data/mnist', exist_ok=True)\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
|
||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')\n",
|
||||
"data_folder = os.path.join(os.getcwd(), 'data/mnist')\n",
|
||||
"os.makedirs(data_folder, exist_ok=True)\n",
|
||||
"\n",
|
||||
"X_test = load_data('./data/mnist/test-images.gz', False)\n",
|
||||
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
|
||||
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
|
||||
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))\n",
|
||||
"\n",
|
||||
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
|
||||
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
|
||||
"\n",
|
||||
"# send a random row from the test set to score\n",
|
||||
"random_index = np.random.randint(0, len(X_test)-1)\n",
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
name: train-hyperparameter-tune-deploy-with-chainer
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- numpy
|
||||
- matplotlib
|
||||
- json
|
||||
- urllib
|
||||
- gzip
|
||||
- struct
|
||||
- requests
|
||||
- azureml-opendatasets
|
||||
@@ -0,0 +1,50 @@
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import glob
|
||||
import gzip
|
||||
import numpy as np
|
||||
import os
|
||||
import struct
|
||||
|
||||
from azureml.core import Dataset
|
||||
from azureml.opendatasets import MNIST
|
||||
from chainer.datasets import tuple_dataset
|
||||
|
||||
|
||||
# load compressed MNIST gz files and return numpy arrays
|
||||
def load_data(filename, label=False):
|
||||
with gzip.open(filename) as gz:
|
||||
struct.unpack('I', gz.read(4))
|
||||
n_items = struct.unpack('>I', gz.read(4))
|
||||
if not label:
|
||||
n_rows = struct.unpack('>I', gz.read(4))[0]
|
||||
n_cols = struct.unpack('>I', gz.read(4))[0]
|
||||
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
|
||||
res = res.reshape(n_items[0], n_rows * n_cols)
|
||||
else:
|
||||
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
|
||||
res = res.reshape(n_items[0], 1)
|
||||
return res
|
||||
|
||||
|
||||
def download_mnist():
|
||||
data_folder = os.path.join(os.getcwd(), 'data/mnist')
|
||||
os.makedirs(data_folder, exist_ok=True)
|
||||
|
||||
mnist_file_dataset = MNIST.get_file_dataset()
|
||||
mnist_file_dataset.download(data_folder, overwrite=True)
|
||||
|
||||
X_train = load_data(glob.glob(os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"),
|
||||
recursive=True)[0], False) / 255.0
|
||||
X_test = load_data(glob.glob(os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"),
|
||||
recursive=True)[0], False) / 255.0
|
||||
y_train = load_data(glob.glob(os.path.join(data_folder, "**/train-labels-idx1-ubyte.gz"),
|
||||
recursive=True)[0], True).reshape(-1)
|
||||
y_test = load_data(glob.glob(os.path.join(data_folder, "**/t10k-labels-idx1-ubyte.gz"),
|
||||
recursive=True)[0], True).reshape(-1)
|
||||
|
||||
train = tuple_dataset.TupleDataset(X_train.astype(np.float32), y_train.astype(np.int32))
|
||||
test = tuple_dataset.TupleDataset(X_test.astype(np.float32), y_test.astype(np.int32))
|
||||
|
||||
return train, test
|
||||
@@ -0,0 +1,5 @@
|
||||
name: fastai-with-custom-docker
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- fastai==1.0.61
|
||||
@@ -0,0 +1,8 @@
|
||||
name: train-hyperparameter-tune-deploy-with-keras
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- tensorflow
|
||||
- keras<=2.3.1
|
||||
- matplotlib
|
||||
@@ -1,495 +1,495 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Distributed PyTorch with DistributedDataParallel\n",
|
||||
"\n",
|
||||
"In this tutorial, you will train a PyTorch model on the [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset using distributed training with PyTorch's `DistributedDataParallel` module across a GPU cluster."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"Diagnostics"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize workspace\n",
|
||||
"\n",
|
||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or attach existing AmlCompute\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
||||
"\n",
|
||||
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n",
|
||||
"\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = 'gpu-cluster'\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" compute_target.wait_for_completion(show_output=True)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current AmlCompute. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare dataset\n",
|
||||
"\n",
|
||||
"Prepare the dataset used for training. We will first download and extract the publicly available CIFAR-10 dataset from the cs.toronto.edu website and then create an Azure ML FileDataset to use the data for training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Download and extract CIFAR-10 data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import urllib\n",
|
||||
"import tarfile\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'\n",
|
||||
"filename = 'cifar-10-python.tar.gz'\n",
|
||||
"data_root = 'cifar-10'\n",
|
||||
"filepath = os.path.join(data_root, filename)\n",
|
||||
"\n",
|
||||
"if not os.path.isdir(data_root):\n",
|
||||
" os.makedirs(data_root, exist_ok=True)\n",
|
||||
" urllib.request.urlretrieve(url, filepath)\n",
|
||||
" with tarfile.open(filepath, \"r:gz\") as tar:\n",
|
||||
" tar.extractall(path=data_root)\n",
|
||||
" os.remove(filepath) # delete tar.gz file after extraction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Azure ML dataset\n",
|
||||
"\n",
|
||||
"The `upload_directory` method will upload the data to a datastore and create a FileDataset from it. In this tutorial we will use the workspace's default datastore."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Dataset\n",
|
||||
"\n",
|
||||
"datastore = ws.get_default_datastore()\n",
|
||||
"dataset = Dataset.File.upload_directory(\n",
|
||||
" src_dir=data_root, target=(datastore, data_root)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train model on the remote compute\n",
|
||||
"Now that we have the AmlCompute ready to go, let's run our distributed training job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a project directory\n",
|
||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"project_folder = './pytorch-distr'\n",
|
||||
"os.makedirs(project_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare training script\n",
|
||||
"Now you will need to create your training script. In this tutorial, the script for distributed training on CIFAR-10 is already provided for you at `train.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once your script is ready, copy the training script `train.py` into the project directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"shutil.copy('train.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an experiment\n",
|
||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed PyTorch tutorial. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"experiment_name = 'pytorch-distr'\n",
|
||||
"experiment = Experiment(ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an environment\n",
|
||||
"\n",
|
||||
"In this tutorial, we will use one of Azure ML's curated PyTorch environments for training. [Curated environments](https://docs.microsoft.com/azure/machine-learning/how-to-use-environments#use-a-curated-environment) are available in your workspace by default. Specifically, we will use the PyTorch 1.6 GPU curated environment."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"\n",
|
||||
"pytorch_env = Environment.get(ws, name='AzureML-PyTorch-1.6-GPU')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure the training job\n",
|
||||
"\n",
|
||||
"To launch a distributed PyTorch job on Azure ML, you have two options:\n",
|
||||
"\n",
|
||||
"1. Per-process launch - specify the total # of worker processes (typically one per GPU) you want to run, and\n",
|
||||
"Azure ML will handle launching each process.\n",
|
||||
"2. Per-node launch with [torch.distributed.launch](https://pytorch.org/docs/stable/distributed.html#launch-utility) - provide the `torch.distributed.launch` command you want to\n",
|
||||
"run on each node.\n",
|
||||
"\n",
|
||||
"For more information, see the [documentation](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-pytorch#distributeddataparallel).\n",
|
||||
"\n",
|
||||
"Both options are shown below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Per-process launch\n",
|
||||
"\n",
|
||||
"To use the per-process launch option in which Azure ML will handle launching each of the processes to run your training script,\n",
|
||||
"\n",
|
||||
"1. Specify the training script and arguments\n",
|
||||
"2. Create a `PyTorchConfiguration` and specify `node_count` and `process_count`. The `process_count` is the total number of processes you want to run for the job; this should typically equal the # of GPUs available on each node multiplied by the # of nodes. Since this tutorial uses the `STANDARD_NC6` SKU, which has one GPU, the total process count for a 2-node job is `2`. If you are using a SKU with >1 GPUs, adjust the `process_count` accordingly.\n",
|
||||
"\n",
|
||||
"Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, `NODE_RANK`, `WORLD_SIZE` environment variables on each node, in addition to the process-level `RANK` and `LOCAL_RANK` environment variables, that are needed for distributed PyTorch training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"from azureml.core.runconfig import PyTorchConfiguration\n",
|
||||
"\n",
|
||||
"# create distributed config\n",
|
||||
"distr_config = PyTorchConfiguration(process_count=2, node_count=2)\n",
|
||||
"\n",
|
||||
"# create args\n",
|
||||
"args = [\"--data-dir\", dataset.as_download(), \"--epochs\", 25]\n",
|
||||
"\n",
|
||||
"# create job config\n",
|
||||
"src = ScriptRunConfig(source_directory=project_folder,\n",
|
||||
" script='train.py',\n",
|
||||
" arguments=args,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" environment=pytorch_env,\n",
|
||||
" distributed_job_config=distr_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Per-node launch with `torch.distributed.launch`\n",
|
||||
"\n",
|
||||
"If you would instead like to use the PyTorch-provided launch utility `torch.distributed.launch` to handle launching the worker processes on each node, you can do so as well. \n",
|
||||
"\n",
|
||||
"1. Provide the launch command to the `command` parameter of ScriptRunConfig. For PyTorch jobs Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, and `NODE_RANK` environment variables on each node, so you can simply just reference those environment variables in your command. If you are using a SKU with >1 GPUs, adjust the `--nproc_per_node` argument accordingly.\n",
|
||||
"\n",
|
||||
"2. Create a `PyTorchConfiguration` and specify the `node_count`. You do not need to specify the `process_count`; by default Azure ML will launch one process per node to run the `command` you provided.\n",
|
||||
"\n",
|
||||
"Uncomment the code below to configure a job with this method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"from azureml.core.runconfig import PyTorchConfiguration\n",
|
||||
"\n",
|
||||
"# create distributed config\n",
|
||||
"distr_config = PyTorchConfiguration(node_count=2)\n",
|
||||
"\n",
|
||||
"# define command\n",
|
||||
"launch_cmd = [\"python -m torch.distributed.launch --nproc_per_node 1 --nnodes 2 \" \\\n",
|
||||
" \"--node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT --use_env \" \\\n",
|
||||
" \"train.py --data-dir\", dataset.as_download(), \"--epochs 25\"]\n",
|
||||
"\n",
|
||||
"# create job config\n",
|
||||
"src = ScriptRunConfig(source_directory=project_folder,\n",
|
||||
" command=launch_cmd,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" environment=pytorch_env,\n",
|
||||
" distributed_job_config=distr_config)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit job\n",
|
||||
"Run your experiment by submitting your `ScriptRunConfig` object. Note that this call is asynchronous."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = experiment.submit(src)\n",
|
||||
"print(run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor your run\n",
|
||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"RunDetails(run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, you can block until the script has completed training before running more code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True) # this provides a verbose log"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "minxia"
|
||||
}
|
||||
],
|
||||
"category": "training",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"CIFAR-10"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"PyTorch"
|
||||
],
|
||||
"friendly_name": "Distributed training with PyTorch",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
},
|
||||
"tags": [
|
||||
"None"
|
||||
],
|
||||
"task": "Train a model using distributed training via PyTorch DistributedDataParallel"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Distributed PyTorch with DistributedDataParallel\n",
|
||||
"\n",
|
||||
"In this tutorial, you will train a PyTorch model on the [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) dataset using distributed training with PyTorch's `DistributedDataParallel` module across a GPU cluster."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"Diagnostics"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize workspace\n",
|
||||
"\n",
|
||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or attach existing AmlCompute\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
||||
"\n",
|
||||
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n",
|
||||
"\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = 'gpu-cluster'\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" compute_target.wait_for_completion(show_output=True)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current AmlCompute. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare dataset\n",
|
||||
"\n",
|
||||
"Prepare the dataset used for training. We will first download and extract the publicly available CIFAR-10 dataset from the cs.toronto.edu website and then create an Azure ML FileDataset to use the data for training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Download and extract CIFAR-10 data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import urllib\n",
|
||||
"import tarfile\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'\n",
|
||||
"filename = 'cifar-10-python.tar.gz'\n",
|
||||
"data_root = 'cifar-10'\n",
|
||||
"filepath = os.path.join(data_root, filename)\n",
|
||||
"\n",
|
||||
"if not os.path.isdir(data_root):\n",
|
||||
" os.makedirs(data_root, exist_ok=True)\n",
|
||||
" urllib.request.urlretrieve(url, filepath)\n",
|
||||
" with tarfile.open(filepath, \"r:gz\") as tar:\n",
|
||||
" tar.extractall(path=data_root)\n",
|
||||
" os.remove(filepath) # delete tar.gz file after extraction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Azure ML dataset\n",
|
||||
"\n",
|
||||
"The `upload_directory` method will upload the data to a datastore and create a FileDataset from it. In this tutorial we will use the workspace's default datastore."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Dataset\n",
|
||||
"\n",
|
||||
"datastore = ws.get_default_datastore()\n",
|
||||
"dataset = Dataset.File.upload_directory(\n",
|
||||
" src_dir=data_root, target=(datastore, data_root)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train model on the remote compute\n",
|
||||
"Now that we have the AmlCompute ready to go, let's run our distributed training job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a project directory\n",
|
||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"project_folder = './pytorch-distr'\n",
|
||||
"os.makedirs(project_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare training script\n",
|
||||
"Now you will need to create your training script. In this tutorial, the script for distributed training on CIFAR-10 is already provided for you at `train.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once your script is ready, copy the training script `train.py` into the project directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"shutil.copy('train.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an experiment\n",
|
||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed PyTorch tutorial. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"experiment_name = 'pytorch-distr'\n",
|
||||
"experiment = Experiment(ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an environment\n",
|
||||
"\n",
|
||||
"In this tutorial, we will use one of Azure ML's curated PyTorch environments for training. [Curated environments](https://docs.microsoft.com/azure/machine-learning/how-to-use-environments#use-a-curated-environment) are available in your workspace by default. Specifically, we will use the PyTorch 1.6 GPU curated environment."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"\n",
|
||||
"pytorch_env = Environment.get(ws, name='AzureML-PyTorch-1.6-GPU')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure the training job\n",
|
||||
"\n",
|
||||
"To launch a distributed PyTorch job on Azure ML, you have two options:\n",
|
||||
"\n",
|
||||
"1. Per-process launch - specify the total # of worker processes (typically one per GPU) you want to run, and\n",
|
||||
"Azure ML will handle launching each process.\n",
|
||||
"2. Per-node launch with [torch.distributed.launch](https://pytorch.org/docs/stable/distributed.html#launch-utility) - provide the `torch.distributed.launch` command you want to\n",
|
||||
"run on each node.\n",
|
||||
"\n",
|
||||
"For more information, see the [documentation](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-train-pytorch#distributeddataparallel).\n",
|
||||
"\n",
|
||||
"Both options are shown below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Per-process launch\n",
|
||||
"\n",
|
||||
"To use the per-process launch option in which Azure ML will handle launching each of the processes to run your training script,\n",
|
||||
"\n",
|
||||
"1. Specify the training script and arguments\n",
|
||||
"2. Create a `PyTorchConfiguration` and specify `node_count` and `process_count`. The `process_count` is the total number of processes you want to run for the job; this should typically equal the # of GPUs available on each node multiplied by the # of nodes. Since this tutorial uses the `STANDARD_NC6` SKU, which has one GPU, the total process count for a 2-node job is `2`. If you are using a SKU with >1 GPUs, adjust the `process_count` accordingly.\n",
|
||||
"\n",
|
||||
"Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, `NODE_RANK`, `WORLD_SIZE` environment variables on each node, in addition to the process-level `RANK` and `LOCAL_RANK` environment variables, that are needed for distributed PyTorch training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"from azureml.core.runconfig import PyTorchConfiguration\n",
|
||||
"\n",
|
||||
"# create distributed config\n",
|
||||
"distr_config = PyTorchConfiguration(process_count=2, node_count=2)\n",
|
||||
"\n",
|
||||
"# create args\n",
|
||||
"args = [\"--data-dir\", dataset.as_download(), \"--epochs\", 25]\n",
|
||||
"\n",
|
||||
"# create job config\n",
|
||||
"src = ScriptRunConfig(source_directory=project_folder,\n",
|
||||
" script='train.py',\n",
|
||||
" arguments=args,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" environment=pytorch_env,\n",
|
||||
" distributed_job_config=distr_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Per-node launch with `torch.distributed.launch`\n",
|
||||
"\n",
|
||||
"If you would instead like to use the PyTorch-provided launch utility `torch.distributed.launch` to handle launching the worker processes on each node, you can do so as well. \n",
|
||||
"\n",
|
||||
"1. Provide the launch command to the `command` parameter of ScriptRunConfig. For PyTorch jobs Azure ML will set the `MASTER_ADDR`, `MASTER_PORT`, and `NODE_RANK` environment variables on each node, so you can simply just reference those environment variables in your command. If you are using a SKU with >1 GPUs, adjust the `--nproc_per_node` argument accordingly.\n",
|
||||
"\n",
|
||||
"2. Create a `PyTorchConfiguration` and specify the `node_count`. You do not need to specify the `process_count`; by default Azure ML will launch one process per node to run the `command` you provided.\n",
|
||||
"\n",
|
||||
"Uncomment the code below to configure a job with this method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"from azureml.core.runconfig import PyTorchConfiguration\n",
|
||||
"\n",
|
||||
"# create distributed config\n",
|
||||
"distr_config = PyTorchConfiguration(node_count=2)\n",
|
||||
"\n",
|
||||
"# define command\n",
|
||||
"launch_cmd = [\"python -m torch.distributed.launch --nproc_per_node 1 --nnodes 2 \" \\\n",
|
||||
" \"--node_rank $NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT --use_env \" \\\n",
|
||||
" \"train.py --data-dir\", dataset.as_download(), \"--epochs 25\"]\n",
|
||||
"\n",
|
||||
"# create job config\n",
|
||||
"src = ScriptRunConfig(source_directory=project_folder,\n",
|
||||
" command=launch_cmd,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" environment=pytorch_env,\n",
|
||||
" distributed_job_config=distr_config)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit job\n",
|
||||
"Run your experiment by submitting your `ScriptRunConfig` object. Note that this call is asynchronous."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = experiment.submit(src)\n",
|
||||
"print(run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor your run\n",
|
||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"RunDetails(run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, you can block until the script has completed training before running more code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True) # this provides a verbose log"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "minxia"
|
||||
}
|
||||
],
|
||||
"category": "training",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"CIFAR-10"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"PyTorch"
|
||||
],
|
||||
"friendly_name": "Distributed training with PyTorch",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.7"
|
||||
},
|
||||
"tags": [
|
||||
"None"
|
||||
],
|
||||
"task": "Train a model using distributed training via PyTorch DistributedDataParallel"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
name: distributed-pytorch-with-distributeddataparallel
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,5 @@
|
||||
name: distributed-pytorch-with-horovod
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -51,6 +51,17 @@ if args.cuda:
|
||||
|
||||
|
||||
kwargs = {}
|
||||
# Use Azure Open Datasets for MNIST dataset
|
||||
datasets.MNIST.resources = [
|
||||
("https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz",
|
||||
"f68b3c2dcbeaaa9fbdd348bbdeb94873"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz",
|
||||
"d53e105ee54ea40749a09fcbcd1e9432"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz",
|
||||
"9fb629c4189551a2d022fa330f9573f3"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz",
|
||||
"ec29112dd5afa0611ce80d1b7f02629c")
|
||||
]
|
||||
train_dataset = \
|
||||
datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True,
|
||||
transform=transforms.Compose([
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
name: train-hyperparameter-tune-deploy-with-pytorch
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- pillow==5.4.1
|
||||
- matplotlib
|
||||
- numpy==1.19.3
|
||||
- https://download.pytorch.org/whl/cpu/torch-1.6.0%2Bcpu-cp36-cp36m-win_amd64.whl
|
||||
- https://download.pytorch.org/whl/cpu/torchvision-0.7.0%2Bcpu-cp36-cp36m-win_amd64.whl
|
||||
@@ -0,0 +1,6 @@
|
||||
name: train-hyperparameter-tune-deploy-with-sklearn
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- numpy
|
||||
@@ -0,0 +1,11 @@
|
||||
name: distributed-tensorflow-with-horovod
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- keras
|
||||
- tensorflow-gpu==1.13.2
|
||||
- horovod==0.19.1
|
||||
- matplotlib
|
||||
- pandas
|
||||
- fuse
|
||||
@@ -0,0 +1,5 @@
|
||||
name: distributed-tensorflow-with-parameter-server
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -0,0 +1,12 @@
|
||||
name: train-hyperparameter-tune-deploy-with-tensorflow
|
||||
dependencies:
|
||||
- numpy
|
||||
- matplotlib
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- pandas
|
||||
- keras
|
||||
- tensorflow==2.0.0
|
||||
- matplotlib
|
||||
- fuse
|
||||
@@ -102,6 +102,17 @@ torch.manual_seed(args.seed)
|
||||
device = torch.device("cuda" if use_cuda else "cpu")
|
||||
|
||||
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
|
||||
# Use Azure Open Datasets for MNIST dataset
|
||||
datasets.MNIST.resources = [
|
||||
("https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz",
|
||||
"f68b3c2dcbeaaa9fbdd348bbdeb94873"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz",
|
||||
"d53e105ee54ea40749a09fcbcd1e9432"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz",
|
||||
"9fb629c4189551a2d022fa330f9573f3"),
|
||||
("https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz",
|
||||
"ec29112dd5afa0611ce80d1b7f02629c")
|
||||
]
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST('../data', train=True, download=True,
|
||||
transform=transforms.Compose([
|
||||
|
||||
@@ -332,6 +332,18 @@
|
||||
"import random\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# Use Azure Open Datasets for MNIST dataset\n",
|
||||
"datasets.MNIST.resources = [\n",
|
||||
" (\"https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz\",\n",
|
||||
" \"f68b3c2dcbeaaa9fbdd348bbdeb94873\"),\n",
|
||||
" (\"https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz\",\n",
|
||||
" \"d53e105ee54ea40749a09fcbcd1e9432\"),\n",
|
||||
" (\"https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz\",\n",
|
||||
" \"9fb629c4189551a2d022fa330f9573f3\"),\n",
|
||||
" (\"https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz\",\n",
|
||||
" \"ec29112dd5afa0611ce80d1b7f02629c\")\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"test_data = datasets.MNIST('../data', train=False, transform=transforms.Compose([\n",
|
||||
" transforms.ToTensor(),\n",
|
||||
" transforms.Normalize((0.1307,), (0.3081,))]))\n",
|
||||
|
||||
@@ -0,0 +1,8 @@
|
||||
name: pong_rllib
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-contrib-reinforcementlearning
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- azure-mgmt-network==12.0.0
|
||||
@@ -0,0 +1,6 @@
|
||||
name: cartpole_ci
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-contrib-reinforcementlearning
|
||||
- azureml-widgets
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user