mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-22 10:35:12 -05:00
Compare commits
42 Commits
azureml-sd
...
release_up
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7c2f294c1a | ||
|
|
4a6bcebccc | ||
|
|
56e0ebc5ac | ||
|
|
2aa39f2f4a | ||
|
|
4d247c1877 | ||
|
|
f6682f6f6d | ||
|
|
26ecf25233 | ||
|
|
44c3a486c0 | ||
|
|
c574f429b8 | ||
|
|
77d557a5dc | ||
|
|
13dedec4a4 | ||
|
|
6f5c52676f | ||
|
|
90c105537c | ||
|
|
ef264b1073 | ||
|
|
824ac5e021 | ||
|
|
e9a7b95716 | ||
|
|
789ee26357 | ||
|
|
fc541706e7 | ||
|
|
64b8aa2a55 | ||
|
|
d3dc35dbb6 | ||
|
|
b55ac368e7 | ||
|
|
de162316d7 | ||
|
|
4ecc58dfe2 | ||
|
|
daf27a76e4 | ||
|
|
a05444845b | ||
|
|
79c9f50c15 | ||
|
|
67e10e0f6b | ||
|
|
1ef0331a0f | ||
|
|
5e91c836b9 | ||
|
|
661762854a | ||
|
|
fbc90ba74f | ||
|
|
0d9c83d0a8 | ||
|
|
ca4cab1de9 | ||
|
|
ddbb3c45f6 | ||
|
|
8eed4e39d0 | ||
|
|
b37c0297db | ||
|
|
968cc798d0 | ||
|
|
5c9ca452fb | ||
|
|
5e82680272 | ||
|
|
41841fc8c0 | ||
|
|
896bf63736 | ||
|
|
d4751bf6ec |
29
Dockerfiles/1.0.41/Dockerfile
Normal file
29
Dockerfiles/1.0.41/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
FROM continuumio/miniconda:4.5.11
|
||||||
|
|
||||||
|
# install git
|
||||||
|
RUN apt-get update && apt-get upgrade -y && apt-get install -y git
|
||||||
|
|
||||||
|
# create a new conda environment named azureml
|
||||||
|
RUN conda create -n azureml -y -q Python=3.6
|
||||||
|
|
||||||
|
# install additional packages used by sample notebooks. this is optional
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
|
||||||
|
|
||||||
|
# install azurmel-sdk components
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.41"]
|
||||||
|
|
||||||
|
# clone Azure ML GitHub sample notebooks
|
||||||
|
RUN cd /home && git clone -b "azureml-sdk-1.0.41" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
|
||||||
|
|
||||||
|
# generate jupyter configuration file
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
|
||||||
|
|
||||||
|
# set an emtpy token for Jupyter to remove authentication.
|
||||||
|
# this is NOT recommended for production environment
|
||||||
|
RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||||
|
|
||||||
|
# open up port 8887 on the container
|
||||||
|
EXPOSE 8887
|
||||||
|
|
||||||
|
# start Jupyter notebook server on port 8887 when the container starts
|
||||||
|
CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
|
||||||
29
Dockerfiles/1.0.43/Dockerfile
Normal file
29
Dockerfiles/1.0.43/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
FROM continuumio/miniconda:4.5.11
|
||||||
|
|
||||||
|
# install git
|
||||||
|
RUN apt-get update && apt-get upgrade -y && apt-get install -y git
|
||||||
|
|
||||||
|
# create a new conda environment named azureml
|
||||||
|
RUN conda create -n azureml -y -q Python=3.6
|
||||||
|
|
||||||
|
# install additional packages used by sample notebooks. this is optional
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
|
||||||
|
|
||||||
|
# install azurmel-sdk components
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.43"]
|
||||||
|
|
||||||
|
# clone Azure ML GitHub sample notebooks
|
||||||
|
RUN cd /home && git clone -b "azureml-sdk-1.0.43" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
|
||||||
|
|
||||||
|
# generate jupyter configuration file
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
|
||||||
|
|
||||||
|
# set an emtpy token for Jupyter to remove authentication.
|
||||||
|
# this is NOT recommended for production environment
|
||||||
|
RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||||
|
|
||||||
|
# open up port 8887 on the container
|
||||||
|
EXPOSE 8887
|
||||||
|
|
||||||
|
# start Jupyter notebook server on port 8887 when the container starts
|
||||||
|
CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
|
||||||
15
README.md
15
README.md
@@ -4,6 +4,10 @@ This repository contains example notebooks demonstrating the [Azure Machine Lear
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
## News
|
||||||
|
|
||||||
|
* [Try Azure Machine Learning with MLflow](./how-to-use-azureml/using-mlflow)
|
||||||
|
|
||||||
## Quick installation
|
## Quick installation
|
||||||
```sh
|
```sh
|
||||||
pip install azureml-sdk
|
pip install azureml-sdk
|
||||||
@@ -52,6 +56,7 @@ The [How to use Azure ML](./how-to-use-azureml) folder contains specific example
|
|||||||
|
|
||||||
Visit following repos to see projects contributed by Azure ML users:
|
Visit following repos to see projects contributed by Azure ML users:
|
||||||
|
|
||||||
|
- [AMLSamples](https://github.com/Azure/AMLSamples) Number of end-to-end examples, including face recognition, predictive maintenance, customer churn and sentiment analysis.
|
||||||
- [Fine tune natural language processing models using Azure Machine Learning service](https://github.com/Microsoft/AzureML-BERT)
|
- [Fine tune natural language processing models using Azure Machine Learning service](https://github.com/Microsoft/AzureML-BERT)
|
||||||
- [Fashion MNIST with Azure ML SDK](https://github.com/amynic/azureml-sdk-fashion)
|
- [Fashion MNIST with Azure ML SDK](https://github.com/amynic/azureml-sdk-fashion)
|
||||||
|
|
||||||
@@ -60,16 +65,6 @@ This repository collects usage data and sends it to Mircosoft to help improve ou
|
|||||||
|
|
||||||
To opt out of tracking, please go to the raw markdown or .ipynb files and remove the following line of code:
|
To opt out of tracking, please go to the raw markdown or .ipynb files and remove the following line of code:
|
||||||
|
|
||||||
```sh
|
|
||||||
""
|
|
||||||
```
|
|
||||||
This URL will be slightly different depending on the file.
|
|
||||||
|
|
||||||
## Data/Telemetry
|
|
||||||
This repository collects usage data and sends it to Mircosoft to help improve our products and services. Read Microsoft's [privacy statement to learn more](https://privacy.microsoft.com/en-US/privacystatement)
|
|
||||||
|
|
||||||
To opt out of tracking, please go to the raw markdown or .ipynb files and remove the following line of code:
|
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
""
|
""
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -39,6 +39,7 @@
|
|||||||
" 1. Workspace parameters\n",
|
" 1. Workspace parameters\n",
|
||||||
" 1. Access your workspace\n",
|
" 1. Access your workspace\n",
|
||||||
" 1. Create a new workspace\n",
|
" 1. Create a new workspace\n",
|
||||||
|
" 1. Create compute resources\n",
|
||||||
"1. [Next steps](#Next%20steps)\n",
|
"1. [Next steps](#Next%20steps)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"---\n",
|
"---\n",
|
||||||
@@ -102,7 +103,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"This notebook was created using version 1.0.41 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version AZUREML-SDK-VERSION of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -204,7 +205,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"If you don't have an existing workspace and are the owner of the subscription or resource group, you can create a new workspace. If you don't have a resource group, the create workspace command will create one for you using the name you provide.\n",
|
"If you don't have an existing workspace and are the owner of the subscription or resource group, you can create a new workspace. If you don't have a resource group, the create workspace command will create one for you using the name you provide.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Note**: The Workspace creation command will create default CPU and GPU compute clusters for you. As with other Azure services, there are limits on certain resources (for example AmlCompute quota) associated with the Azure ML service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota.\n",
|
"**Note**: As with other Azure services, there are limits on certain resources (for example AmlCompute quota) associated with the Azure ML service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This cell will create an Azure ML workspace for you in a subscription provided you have the correct permissions.\n",
|
"This cell will create an Azure ML workspace for you in a subscription provided you have the correct permissions.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -233,8 +234,6 @@
|
|||||||
" subscription_id = subscription_id,\n",
|
" subscription_id = subscription_id,\n",
|
||||||
" resource_group = resource_group, \n",
|
" resource_group = resource_group, \n",
|
||||||
" location = workspace_region,\n",
|
" location = workspace_region,\n",
|
||||||
" default_cpu_compute_target=Workspace.DEFAULT_CPU_CLUSTER_CONFIGURATION,\n",
|
|
||||||
" default_gpu_compute_target=Workspace.DEFAULT_GPU_CLUSTER_CONFIGURATION,\n",
|
|
||||||
" create_resource_group = True,\n",
|
" create_resource_group = True,\n",
|
||||||
" exist_ok = True)\n",
|
" exist_ok = True)\n",
|
||||||
"ws.get_details()\n",
|
"ws.get_details()\n",
|
||||||
@@ -243,6 +242,97 @@
|
|||||||
"ws.write_config()"
|
"ws.write_config()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create compute resources for your training experiments\n",
|
||||||
|
"\n",
|
||||||
|
"Many of the sample notebooks use Azure ML managed compute (AmlCompute) to train models using a dynamically scalable pool of compute. In this section you will create default compute clusters for use by the other notebooks and any other operations you choose.\n",
|
||||||
|
"\n",
|
||||||
|
"To create a cluster, you need to specify a compute configuration that specifies the type of machine to be used and the scalability behaviors. Then you choose a name for the cluster that is unique within the workspace that can be used to address the cluster later.\n",
|
||||||
|
"\n",
|
||||||
|
"The cluster parameters are:\n",
|
||||||
|
"* vm_size - this describes the virtual machine type and size used in the cluster. All machines in the cluster are the same type. You can get the list of vm sizes available in your region by using the CLI command\n",
|
||||||
|
"\n",
|
||||||
|
"```shell\n",
|
||||||
|
"az vm list-skus -o tsv\n",
|
||||||
|
"```\n",
|
||||||
|
"* min_nodes - this sets the minimum size of the cluster. If you set the minimum to 0 the cluster will shut down all nodes while not in use. Setting this number to a value higher than 0 will allow for faster start-up times, but you will also be billed when the cluster is not in use.\n",
|
||||||
|
"* max_nodes - this sets the maximum size of the cluster. Setting this to a larger number allows for more concurrency and a greater distributed processing of scale-out jobs.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"To create a **CPU** cluster now, run the cell below. The autoscale settings mean that the cluster will scale down to 0 nodes when inactive and up to 4 nodes when busy."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# Choose a name for your CPU cluster\n",
|
||||||
|
"cpu_cluster_name = \"cpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Verify that cluster does not exist already\n",
|
||||||
|
"try:\n",
|
||||||
|
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||||
|
" print(\"Found existing cpu-cluster\")\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print(\"Creating new cpu-cluster\")\n",
|
||||||
|
" \n",
|
||||||
|
" # Specify the configuration for the new cluster\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_D2_V2\",\n",
|
||||||
|
" min_nodes=0,\n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" # Create the cluster with the specified name and configuration\n",
|
||||||
|
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||||
|
" \n",
|
||||||
|
" # Wait for the cluster to complete, show the output log\n",
|
||||||
|
" cpu_cluster.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"To create a **GPU** cluster, run the cell below. Note that your subscription must have sufficient quota for GPU VMs or the command will fail. To increase quota, see [these instructions](https://docs.microsoft.com/en-us/azure/azure-supportability/resource-manager-core-quotas-request). "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# Choose a name for your GPU cluster\n",
|
||||||
|
"gpu_cluster_name = \"gpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Verify that cluster does not exist already\n",
|
||||||
|
"try:\n",
|
||||||
|
" gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)\n",
|
||||||
|
" print(\"Found existing gpu cluster\")\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print(\"Creating new gpu-cluster\")\n",
|
||||||
|
" \n",
|
||||||
|
" # Specify the configuration for the new cluster\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n",
|
||||||
|
" min_nodes=0,\n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
" # Create the cluster with the specified name and configuration\n",
|
||||||
|
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
" # Wait for the cluster to complete, show the output log\n",
|
||||||
|
" gpu_cluster.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -115,16 +115,7 @@ jupyter notebook
|
|||||||
- Simple example of using automated ML for regression
|
- Simple example of using automated ML for regression
|
||||||
- Uses local compute for training
|
- Uses local compute for training
|
||||||
|
|
||||||
- [auto-ml-remote-execution.ipynb](remote-execution/auto-ml-remote-execution.ipynb)
|
- [auto-ml-remote-amlcompute.ipynb](remote-amlcompute/auto-ml-remote-amlcompute.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
|
||||||
- Example of using automated ML for classification using a remote linux DSVM for training
|
|
||||||
- Parallel execution of iterations
|
|
||||||
- Async tracking of progress
|
|
||||||
- Cancelling individual iterations or entire run
|
|
||||||
- Retrieving models for any iteration or logged metric
|
|
||||||
- Specify automated ML settings as kwargs
|
|
||||||
|
|
||||||
- [auto-ml-remote-amlcompute.ipynb](remote-batchai/auto-ml-remote-amlcompute.ipynb)
|
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
- Example of using automated ML for classification using remote AmlCompute for training
|
- Example of using automated ML for classification using remote AmlCompute for training
|
||||||
- Parallel execution of iterations
|
- Parallel execution of iterations
|
||||||
@@ -133,12 +124,6 @@ jupyter notebook
|
|||||||
- Retrieving models for any iteration or logged metric
|
- Retrieving models for any iteration or logged metric
|
||||||
- Specify automated ML settings as kwargs
|
- Specify automated ML settings as kwargs
|
||||||
|
|
||||||
- [auto-ml-remote-attach.ipynb](remote-attach/auto-ml-remote-attach.ipynb)
|
|
||||||
- Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html)
|
|
||||||
- handling text data with preprocess flag
|
|
||||||
- Reading data from a blob store for remote executions
|
|
||||||
- using pandas dataframes for reading data
|
|
||||||
|
|
||||||
- [auto-ml-missing-data-blacklist-early-termination.ipynb](missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb)
|
- [auto-ml-missing-data-blacklist-early-termination.ipynb](missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
- Blacklist certain pipelines
|
- Blacklist certain pipelines
|
||||||
@@ -156,10 +141,6 @@ jupyter notebook
|
|||||||
- Get details for a automated ML Run. (automated ML settings, run widget & all metrics)
|
- Get details for a automated ML Run. (automated ML settings, run widget & all metrics)
|
||||||
- Download fitted pipeline for any iteration
|
- Download fitted pipeline for any iteration
|
||||||
|
|
||||||
- [auto-ml-remote-execution-with-datastore.ipynb](remote-execution-with-datastore/auto-ml-remote-execution-with-datastore.ipynb)
|
|
||||||
- Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html)
|
|
||||||
- Download the data and store it in DataStore.
|
|
||||||
|
|
||||||
- [auto-ml-classification-with-deployment.ipynb](classification-with-deployment/auto-ml-classification-with-deployment.ipynb)
|
- [auto-ml-classification-with-deployment.ipynb](classification-with-deployment/auto-ml-classification-with-deployment.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
- Simple example of using automated ML for classification
|
- Simple example of using automated ML for classification
|
||||||
|
|||||||
@@ -192,7 +192,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Create or Attach a Remote Linux DSVM"
|
"### Create or Attach an AmlCompute cluster"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -201,21 +201,36 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"dsvm_name = 'mydsvmb'\n",
|
"from azureml.core.compute import AmlCompute\n",
|
||||||
|
"from azureml.core.compute import ComputeTarget\n",
|
||||||
"\n",
|
"\n",
|
||||||
"try:\n",
|
"# Choose a name for your cluster.\n",
|
||||||
" while ws.compute_targets[dsvm_name].provisioning_state == 'Creating':\n",
|
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||||
" time.sleep(1)\n",
|
"\n",
|
||||||
" \n",
|
"found = False\n",
|
||||||
" dsvm_compute = DsvmCompute(ws, dsvm_name)\n",
|
"\n",
|
||||||
" print('Found existing DVSM.')\n",
|
"# Check if this compute target already exists in the workspace.\n",
|
||||||
"except:\n",
|
"\n",
|
||||||
" print('Creating a new DSVM.')\n",
|
"cts = ws.compute_targets\n",
|
||||||
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
|
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||||
" dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n",
|
" found = True\n",
|
||||||
" dsvm_compute.wait_for_completion(show_output = True)\n",
|
" print('Found existing compute target.')\n",
|
||||||
" print(\"Waiting one minute for ssh to be accessible\")\n",
|
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||||
" time.sleep(90) # Wait for ssh to be accessible"
|
"\n",
|
||||||
|
"if not found:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||||
|
" #vm_priority = 'lowpriority', # optional\n",
|
||||||
|
" max_nodes = 6)\n",
|
||||||
|
"\n",
|
||||||
|
" # Create the cluster.\\n\",\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||||
|
"\n",
|
||||||
|
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||||
|
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||||
|
" compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||||
|
"\n",
|
||||||
|
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -227,9 +242,13 @@
|
|||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# create a new RunConfig object\n",
|
||||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"conda_run_config.target = dsvm_compute\n",
|
"# Set compute target to AmlCompute\n",
|
||||||
|
"conda_run_config.target = compute_target\n",
|
||||||
|
"conda_run_config.environment.docker.enabled = True\n",
|
||||||
|
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||||
"\n",
|
"\n",
|
||||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||||
@@ -294,6 +313,27 @@
|
|||||||
"remote_run.clean_preprocessor_cache()"
|
"remote_run.clean_preprocessor_cache()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Cancelling Runs\n",
|
||||||
|
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Cancel the ongoing experiment and stop scheduling new iterations.\n",
|
||||||
|
"# remote_run.cancel()\n",
|
||||||
|
"\n",
|
||||||
|
"# Cancel iteration 1 and move onto iteration 2.\n",
|
||||||
|
"# remote_run.cancel_iteration(1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -119,7 +119,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Create or Attach existing AmlCompute\n",
|
"### Create or Attach existing AmlCompute\n",
|
||||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you create an AmlCompute as your training compute resource.\n",
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you create `AmlCompute` as your training compute resource.\n",
|
||||||
|
"\n",
|
||||||
|
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
]
|
]
|
||||||
@@ -134,12 +136,10 @@
|
|||||||
"from azureml.core.compute import ComputeTarget\n",
|
"from azureml.core.compute import ComputeTarget\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Choose a name for your cluster.\n",
|
"# Choose a name for your cluster.\n",
|
||||||
"amlcompute_cluster_name = \"cpucluster\"\n",
|
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"found = False\n",
|
"found = False\n",
|
||||||
"\n",
|
|
||||||
"# Check if this compute target already exists in the workspace.\n",
|
"# Check if this compute target already exists in the workspace.\n",
|
||||||
"\n",
|
|
||||||
"cts = ws.compute_targets\n",
|
"cts = ws.compute_targets\n",
|
||||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||||
" found = True\n",
|
" found = True\n",
|
||||||
|
|||||||
@@ -1,565 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Automated Machine Learning\n",
|
|
||||||
"_**Remote Execution using attach**_\n",
|
|
||||||
"\n",
|
|
||||||
"## Contents\n",
|
|
||||||
"1. [Introduction](#Introduction)\n",
|
|
||||||
"1. [Setup](#Setup)\n",
|
|
||||||
"1. [Data](#Data)\n",
|
|
||||||
"1. [Train](#Train)\n",
|
|
||||||
"1. [Results](#Results)\n",
|
|
||||||
"1. [Test](#Test)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Introduction\n",
|
|
||||||
"In this example we use the scikit-learn's [20newsgroup](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_20newsgroups.html) to showcase how you can use AutoML to handle text data with remote attach.\n",
|
|
||||||
"\n",
|
|
||||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
|
||||||
"\n",
|
|
||||||
"In this notebook you will learn how to:\n",
|
|
||||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
|
||||||
"2. Attach an existing DSVM to a workspace.\n",
|
|
||||||
"3. Configure AutoML using `AutoMLConfig`.\n",
|
|
||||||
"4. Train the model using the DSVM.\n",
|
|
||||||
"5. Explore the results.\n",
|
|
||||||
"6. Viewing the engineered names for featurized data and featurization summary for all raw features.\n",
|
|
||||||
"7. Test the best fitted model.\n",
|
|
||||||
"\n",
|
|
||||||
"In addition this notebook showcases the following features\n",
|
|
||||||
"- **Parallel** executions for iterations\n",
|
|
||||||
"- **Asynchronous** tracking of progress\n",
|
|
||||||
"- **Cancellation** of individual iterations or the entire run\n",
|
|
||||||
"- Retrieving models for any iteration or logged metric\n",
|
|
||||||
"- Specifying AutoML settings as `**kwargs`\n",
|
|
||||||
"- Handling **text** data using the `preprocess` flag"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Setup\n",
|
|
||||||
"\n",
|
|
||||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core.experiment import Experiment\n",
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"from azureml.train.automl import AutoMLConfig"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose a name for the run history container in the workspace.\n",
|
|
||||||
"experiment_name = 'automl-remote-attach'\n",
|
|
||||||
"project_folder = './sample_projects/automl-remote-attach'\n",
|
|
||||||
"\n",
|
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
|
||||||
"\n",
|
|
||||||
"output = {}\n",
|
|
||||||
"output['SDK version'] = azureml.core.VERSION\n",
|
|
||||||
"output['Subscription ID'] = ws.subscription_id\n",
|
|
||||||
"output['Workspace'] = ws.name\n",
|
|
||||||
"output['Resource Group'] = ws.resource_group\n",
|
|
||||||
"output['Location'] = ws.location\n",
|
|
||||||
"output['Project Directory'] = project_folder\n",
|
|
||||||
"output['Experiment Name'] = experiment.name\n",
|
|
||||||
"pd.set_option('display.max_colwidth', -1)\n",
|
|
||||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
|
||||||
"outputDf.T"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Attach a Remote Linux DSVM\n",
|
|
||||||
"To use a remote Docker compute target:\n",
|
|
||||||
"1. Create a Linux DSVM in Azure, following these [instructions](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/dsvm-ubuntu-intro). Make sure you use the Ubuntu flavor (not CentOS). Make sure that disk space is available under `/tmp` because AutoML creates files under `/tmp/azureml_run`s. The DSVM should have more cores than the number of parallel runs that you plan to enable. It should also have at least 4GB per core.\n",
|
|
||||||
"2. Enter the IP address, user name and password below.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** By default, SSH runs on port 22 and you don't need to change the port number below. If you've configured SSH to use a different port, change `dsvm_ssh_port` accordinglyaddress. [Read more](https://docs.microsoft.com/en-us/azure/virtual-machines/troubleshooting/detailed-troubleshoot-ssh-connection) on changing SSH ports for security reasons."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import ComputeTarget, RemoteCompute\n",
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"# Add your VM information below\n",
|
|
||||||
"# If a compute with the specified compute_name already exists, it will be used and the dsvm_ip_addr, dsvm_ssh_port, \n",
|
|
||||||
"# dsvm_username and dsvm_password will be ignored.\n",
|
|
||||||
"compute_name = 'mydsvmb'\n",
|
|
||||||
"dsvm_ip_addr = '<<ip_addr>>'\n",
|
|
||||||
"dsvm_ssh_port = 22\n",
|
|
||||||
"dsvm_username = '<<username>>'\n",
|
|
||||||
"dsvm_password = '<<password>>'\n",
|
|
||||||
"\n",
|
|
||||||
"if compute_name in ws.compute_targets:\n",
|
|
||||||
" print('Using existing compute.')\n",
|
|
||||||
" dsvm_compute = ws.compute_targets[compute_name]\n",
|
|
||||||
"else:\n",
|
|
||||||
" attach_config = RemoteCompute.attach_configuration(address=dsvm_ip_addr, username=dsvm_username, password=dsvm_password, ssh_port=dsvm_ssh_port)\n",
|
|
||||||
" ComputeTarget.attach(workspace=ws, name=compute_name, attach_configuration=attach_config)\n",
|
|
||||||
"\n",
|
|
||||||
" while ws.compute_targets[compute_name].provisioning_state == 'Creating':\n",
|
|
||||||
" time.sleep(1)\n",
|
|
||||||
"\n",
|
|
||||||
" dsvm_compute = ws.compute_targets[compute_name]\n",
|
|
||||||
" \n",
|
|
||||||
" if dsvm_compute.provisioning_state == 'Failed':\n",
|
|
||||||
" print('Attached failed.')\n",
|
|
||||||
" print(dsvm_compute.provisioning_errors)\n",
|
|
||||||
" dsvm_compute.detach()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"import pkg_resources\n",
|
|
||||||
"\n",
|
|
||||||
"# create a new RunConfig object\n",
|
|
||||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Set compute target to the Linux DSVM\n",
|
|
||||||
"conda_run_config.target = dsvm_compute\n",
|
|
||||||
"\n",
|
|
||||||
"pandas_dependency = 'pandas==' + pkg_resources.get_distribution(\"pandas\").version\n",
|
|
||||||
"\n",
|
|
||||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80',pandas_dependency])\n",
|
|
||||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Data\n",
|
|
||||||
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
|
||||||
"In this example, the `get_data()` function returns a [dictionary](README.md#getdata)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if not os.path.exists(project_folder):\n",
|
|
||||||
" os.makedirs(project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile $project_folder/get_data.py\n",
|
|
||||||
"\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"from sklearn.datasets import fetch_20newsgroups\n",
|
|
||||||
"\n",
|
|
||||||
"def get_data():\n",
|
|
||||||
" remove = ('headers', 'footers', 'quotes')\n",
|
|
||||||
" categories = [\n",
|
|
||||||
" 'alt.atheism',\n",
|
|
||||||
" 'talk.religion.misc',\n",
|
|
||||||
" 'comp.graphics',\n",
|
|
||||||
" 'sci.space',\n",
|
|
||||||
" ]\n",
|
|
||||||
" data_train = fetch_20newsgroups(subset = 'train', categories = categories,\n",
|
|
||||||
" shuffle = True, random_state = 42,\n",
|
|
||||||
" remove = remove)\n",
|
|
||||||
" \n",
|
|
||||||
" X_train = np.array(data_train.data).reshape((len(data_train.data),1))\n",
|
|
||||||
" y_train = np.array(data_train.target)\n",
|
|
||||||
" \n",
|
|
||||||
" return { \"X\" : X_train, \"y\" : y_train }"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train\n",
|
|
||||||
"\n",
|
|
||||||
"You can specify `automl_settings` as `**kwargs` as well. Also note that you can use a `get_data()` function for local excutions too.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** When using Remote DSVM, you can't pass Numpy arrays directly to the fit method.\n",
|
|
||||||
"\n",
|
|
||||||
"|Property|Description|\n",
|
|
||||||
"|-|-|\n",
|
|
||||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
|
||||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
|
||||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
|
||||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
|
||||||
"|**max_concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|\n",
|
|
||||||
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
|
|
||||||
"|**enable_cache**|Setting this to *True* enables preprocess done once and reuse the same preprocessed data for all the iterations. Default value is True.\n",
|
|
||||||
"|**max_cores_per_iteration**|Indicates how many cores on the compute target would be used to train a single pipeline.<br>Default is *1*; you can set it to *-1* to use all cores.|"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"automl_settings = {\n",
|
|
||||||
" \"iteration_timeout_minutes\": 60,\n",
|
|
||||||
" \"iterations\": 4,\n",
|
|
||||||
" \"n_cross_validations\": 5,\n",
|
|
||||||
" \"primary_metric\": 'AUC_weighted',\n",
|
|
||||||
" \"preprocess\": True,\n",
|
|
||||||
" \"max_cores_per_iteration\": 2\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
|
||||||
" path = project_folder,\n",
|
|
||||||
" run_configuration=conda_run_config,\n",
|
|
||||||
" data_script = project_folder + \"/get_data.py\",\n",
|
|
||||||
" **automl_settings\n",
|
|
||||||
" )\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run = experiment.submit(automl_config)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Results\n",
|
|
||||||
"#### Widget for Monitoring Runs\n",
|
|
||||||
"\n",
|
|
||||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
|
||||||
"\n",
|
|
||||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under `/tmp/azureml_run/{iterationid}/azureml-logs`\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"RunDetails(remote_run).show() "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Wait until the run finishes.\n",
|
|
||||||
"remote_run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Pre-process cache cleanup\n",
|
|
||||||
"The preprocess data gets cache at user default file store. When the run is completed the cache can be cleaned by running below cell"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run.clean_preprocessor_cache()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"\n",
|
|
||||||
"#### Retrieve All Child Runs\n",
|
|
||||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"children = list(remote_run.get_children())\n",
|
|
||||||
"metricslist = {}\n",
|
|
||||||
"for run in children:\n",
|
|
||||||
" properties = run.get_properties()\n",
|
|
||||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
|
||||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
|
||||||
"\n",
|
|
||||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
|
||||||
"rundata"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Cancelling Runs\n",
|
|
||||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Cancel the ongoing experiment and stop scheduling new iterations.\n",
|
|
||||||
"# remote_run.cancel()\n",
|
|
||||||
"\n",
|
|
||||||
"# Cancel iteration 1 and move onto iteration 2.\n",
|
|
||||||
"# remote_run.cancel_iteration(1)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Retrieve the Best Model\n",
|
|
||||||
"\n",
|
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"best_run, fitted_model = remote_run.get_output()\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### View the engineered names for featurized data\n",
|
|
||||||
"Below we display the engineered feature names generated for the featurized data using the preprocessing featurization."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"fitted_model.named_steps['datatransformer'].get_engineered_feature_names()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### View the featurization summary\n",
|
|
||||||
"Below we display the featurization that was performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:-\n",
|
|
||||||
"- Raw feature name\n",
|
|
||||||
"- Number of engineered features formed out of this raw feature\n",
|
|
||||||
"- Type detected\n",
|
|
||||||
"- If feature was dropped\n",
|
|
||||||
"- List of feature transformations for the raw feature"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"fitted_model.named_steps['datatransformer'].get_featurization_summary()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Best Model Based on Any Other Metric\n",
|
|
||||||
"Show the run and the model which has the smallest `accuracy` value:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# lookup_metric = \"accuracy\"\n",
|
|
||||||
"# best_run, fitted_model = remote_run.get_output(metric = lookup_metric)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Model from a Specific Iteration"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"iteration = 0\n",
|
|
||||||
"zero_run, zero_model = remote_run.get_output(iteration = iteration)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Test"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Load test data.\n",
|
|
||||||
"from pandas_ml import ConfusionMatrix\n",
|
|
||||||
"from sklearn.datasets import fetch_20newsgroups\n",
|
|
||||||
"\n",
|
|
||||||
"remove = ('headers', 'footers', 'quotes')\n",
|
|
||||||
"categories = [\n",
|
|
||||||
" 'alt.atheism',\n",
|
|
||||||
" 'talk.religion.misc',\n",
|
|
||||||
" 'comp.graphics',\n",
|
|
||||||
" 'sci.space',\n",
|
|
||||||
" ]\n",
|
|
||||||
"\n",
|
|
||||||
"data_test = fetch_20newsgroups(subset = 'test', categories = categories,\n",
|
|
||||||
" shuffle = True, random_state = 42,\n",
|
|
||||||
" remove = remove)\n",
|
|
||||||
"\n",
|
|
||||||
"X_test = np.array(data_test.data).reshape((len(data_test.data),1))\n",
|
|
||||||
"y_test = data_test.target\n",
|
|
||||||
"\n",
|
|
||||||
"# Test our best pipeline.\n",
|
|
||||||
"\n",
|
|
||||||
"y_pred = fitted_model.predict(X_test)\n",
|
|
||||||
"y_pred_strings = [data_test.target_names[i] for i in y_pred]\n",
|
|
||||||
"y_test_strings = [data_test.target_names[i] for i in y_test]\n",
|
|
||||||
"\n",
|
|
||||||
"cm = ConfusionMatrix(y_test_strings, y_pred_strings)\n",
|
|
||||||
"print(cm)\n",
|
|
||||||
"cm.plot()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "savitam"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,555 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Automated Machine Learning\n",
|
|
||||||
"_**Remote Execution using AmlCompute**_\n",
|
|
||||||
"\n",
|
|
||||||
"## Contents\n",
|
|
||||||
"1. [Introduction](#Introduction)\n",
|
|
||||||
"1. [Setup](#Setup)\n",
|
|
||||||
"1. [Data](#Data)\n",
|
|
||||||
"1. [Train](#Train)\n",
|
|
||||||
"1. [Results](#Results)\n",
|
|
||||||
"1. [Test](#Test)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Introduction\n",
|
|
||||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n",
|
|
||||||
"\n",
|
|
||||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
|
||||||
"\n",
|
|
||||||
"In this notebook you would see\n",
|
|
||||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
|
||||||
"2. Create or Attach existing AmlCompute to a workspace.\n",
|
|
||||||
"3. Configure AutoML using `AutoMLConfig`.\n",
|
|
||||||
"4. Train the model using AmlCompute\n",
|
|
||||||
"5. Explore the results.\n",
|
|
||||||
"6. Test the best fitted model.\n",
|
|
||||||
"\n",
|
|
||||||
"In addition this notebook showcases the following features\n",
|
|
||||||
"- **Parallel** executions for iterations\n",
|
|
||||||
"- **Asynchronous** tracking of progress\n",
|
|
||||||
"- **Cancellation** of individual iterations or the entire run\n",
|
|
||||||
"- Retrieving models for any iteration or logged metric\n",
|
|
||||||
"- Specifying AutoML settings as `**kwargs`"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Setup\n",
|
|
||||||
"\n",
|
|
||||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import logging\n",
|
|
||||||
"import os\n",
|
|
||||||
"import csv\n",
|
|
||||||
"\n",
|
|
||||||
"from matplotlib import pyplot as plt\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sklearn import datasets\n",
|
|
||||||
"\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core.experiment import Experiment\n",
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"from azureml.train.automl import AutoMLConfig"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose a name for the run history container in the workspace.\n",
|
|
||||||
"experiment_name = 'automl-remote-amlcompute'\n",
|
|
||||||
"project_folder = './project'\n",
|
|
||||||
"\n",
|
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
|
||||||
"\n",
|
|
||||||
"output = {}\n",
|
|
||||||
"output['SDK version'] = azureml.core.VERSION\n",
|
|
||||||
"output['Subscription ID'] = ws.subscription_id\n",
|
|
||||||
"output['Workspace Name'] = ws.name\n",
|
|
||||||
"output['Resource Group'] = ws.resource_group\n",
|
|
||||||
"output['Location'] = ws.location\n",
|
|
||||||
"output['Project Directory'] = project_folder\n",
|
|
||||||
"output['Experiment Name'] = experiment.name\n",
|
|
||||||
"pd.set_option('display.max_colwidth', -1)\n",
|
|
||||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
|
||||||
"outputDf.T"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create or Attach existing AmlCompute\n",
|
|
||||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you create `AmlCompute` as your training compute resource.\n",
|
|
||||||
"\n",
|
|
||||||
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
|
||||||
"\n",
|
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import AmlCompute\n",
|
|
||||||
"from azureml.core.compute import ComputeTarget\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose a name for your cluster.\n",
|
|
||||||
"amlcompute_cluster_name = \"automlcl\"\n",
|
|
||||||
"\n",
|
|
||||||
"found = False\n",
|
|
||||||
"# Check if this compute target already exists in the workspace.\n",
|
|
||||||
"cts = ws.compute_targets\n",
|
|
||||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
|
||||||
" found = True\n",
|
|
||||||
" print('Found existing compute target.')\n",
|
|
||||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
|
||||||
" \n",
|
|
||||||
"if not found:\n",
|
|
||||||
" print('Creating a new compute target...')\n",
|
|
||||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
|
||||||
" #vm_priority = 'lowpriority', # optional\n",
|
|
||||||
" max_nodes = 6)\n",
|
|
||||||
"\n",
|
|
||||||
" # Create the cluster.\n",
|
|
||||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
|
||||||
" \n",
|
|
||||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
|
||||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
|
||||||
" compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
|
||||||
" \n",
|
|
||||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Data\n",
|
|
||||||
"For remote executions, you need to make the data accessible from the remote compute.\n",
|
|
||||||
"This can be done by uploading the data to DataStore.\n",
|
|
||||||
"In this example, we upload scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) data."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"data_train = datasets.load_digits()\n",
|
|
||||||
"\n",
|
|
||||||
"if not os.path.isdir('data'):\n",
|
|
||||||
" os.mkdir('data')\n",
|
|
||||||
" \n",
|
|
||||||
"if not os.path.exists(project_folder):\n",
|
|
||||||
" os.makedirs(project_folder)\n",
|
|
||||||
" \n",
|
|
||||||
"pd.DataFrame(data_train.data).to_csv(\"data/X_train.tsv\", index=False, header=False, quoting=csv.QUOTE_ALL, sep=\"\\t\")\n",
|
|
||||||
"pd.DataFrame(data_train.target).to_csv(\"data/y_train.tsv\", index=False, header=False, sep=\"\\t\")\n",
|
|
||||||
"\n",
|
|
||||||
"ds = ws.get_default_datastore()\n",
|
|
||||||
"ds.upload(src_dir='./data', target_path='bai_data', overwrite=True, show_progress=True)\n",
|
|
||||||
"\n",
|
|
||||||
"from azureml.core.runconfig import DataReferenceConfiguration\n",
|
|
||||||
"dr = DataReferenceConfiguration(datastore_name=ds.name, \n",
|
|
||||||
" path_on_datastore='bai_data', \n",
|
|
||||||
" path_on_compute='/tmp/azureml_runs',\n",
|
|
||||||
" mode='download', # download files from datastore to compute target\n",
|
|
||||||
" overwrite=False)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"# create a new RunConfig object\n",
|
|
||||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Set compute target to AmlCompute\n",
|
|
||||||
"conda_run_config.target = compute_target\n",
|
|
||||||
"conda_run_config.environment.docker.enabled = True\n",
|
|
||||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
|
||||||
"\n",
|
|
||||||
"# set the data reference of the run coonfiguration\n",
|
|
||||||
"conda_run_config.data_references = {ds.name: dr}\n",
|
|
||||||
"\n",
|
|
||||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy'])\n",
|
|
||||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile $project_folder/get_data.py\n",
|
|
||||||
"\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"\n",
|
|
||||||
"def get_data():\n",
|
|
||||||
" X_train = pd.read_csv(\"/tmp/azureml_runs/bai_data/X_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
|
|
||||||
" y_train = pd.read_csv(\"/tmp/azureml_runs/bai_data/y_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
|
|
||||||
"\n",
|
|
||||||
" return { \"X\" : X_train.values, \"y\" : y_train[0].values }\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train\n",
|
|
||||||
"\n",
|
|
||||||
"You can specify `automl_settings` as `**kwargs` as well. Also note that you can use a `get_data()` function for local excutions too.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** When using AmlCompute, you can't pass Numpy arrays directly to the fit method.\n",
|
|
||||||
"\n",
|
|
||||||
"|Property|Description|\n",
|
|
||||||
"|-|-|\n",
|
|
||||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
|
||||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
|
||||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
|
||||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
|
||||||
"|**max_concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"automl_settings = {\n",
|
|
||||||
" \"iteration_timeout_minutes\": 2,\n",
|
|
||||||
" \"iterations\": 20,\n",
|
|
||||||
" \"n_cross_validations\": 5,\n",
|
|
||||||
" \"primary_metric\": 'AUC_weighted',\n",
|
|
||||||
" \"preprocess\": False,\n",
|
|
||||||
" \"max_concurrent_iterations\": 5,\n",
|
|
||||||
" \"verbosity\": logging.INFO\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
|
||||||
" debug_log = 'automl_errors.log',\n",
|
|
||||||
" path = project_folder,\n",
|
|
||||||
" run_configuration=conda_run_config,\n",
|
|
||||||
" data_script = project_folder + \"/get_data.py\",\n",
|
|
||||||
" **automl_settings\n",
|
|
||||||
" )\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
|
|
||||||
"In this example, we specify `show_output = False` to suppress console output while the run is in progress."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run = experiment.submit(automl_config, show_output = False)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Results\n",
|
|
||||||
"\n",
|
|
||||||
"#### Loading executed runs\n",
|
|
||||||
"In case you need to load a previously executed run, enable the cell below and replace the `run_id` value."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "raw",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"remote_run = AutoMLRun(experiment = experiment, run_id = 'AutoML_5db13491-c92a-4f1d-b622-8ab8d973a058')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Widget for Monitoring Runs\n",
|
|
||||||
"\n",
|
|
||||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
|
||||||
"\n",
|
|
||||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under `/tmp/azureml_run/{iterationid}/azureml-logs`\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"RunDetails(remote_run).show() "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Wait until the run finishes.\n",
|
|
||||||
"remote_run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"\n",
|
|
||||||
"#### Retrieve All Child Runs\n",
|
|
||||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"children = list(remote_run.get_children())\n",
|
|
||||||
"metricslist = {}\n",
|
|
||||||
"for run in children:\n",
|
|
||||||
" properties = run.get_properties()\n",
|
|
||||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
|
||||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
|
||||||
"\n",
|
|
||||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
|
||||||
"rundata"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Cancelling Runs\n",
|
|
||||||
"\n",
|
|
||||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Cancel the ongoing experiment and stop scheduling new iterations.\n",
|
|
||||||
"# remote_run.cancel()\n",
|
|
||||||
"\n",
|
|
||||||
"# Cancel iteration 1 and move onto iteration 2.\n",
|
|
||||||
"# remote_run.cancel_iteration(1)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Retrieve the Best Model\n",
|
|
||||||
"\n",
|
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"best_run, fitted_model = remote_run.get_output()\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Best Model Based on Any Other Metric\n",
|
|
||||||
"Show the run and the model which has the smallest `log_loss` value:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"lookup_metric = \"log_loss\"\n",
|
|
||||||
"best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Model from a Specific Iteration\n",
|
|
||||||
"Show the run and the model from the third iteration:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"iteration = 3\n",
|
|
||||||
"third_run, third_model = remote_run.get_output(iteration=iteration)\n",
|
|
||||||
"print(third_run)\n",
|
|
||||||
"print(third_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Test\n",
|
|
||||||
"\n",
|
|
||||||
"#### Load Test Data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"digits = datasets.load_digits()\n",
|
|
||||||
"X_test = digits.data[:10, :]\n",
|
|
||||||
"y_test = digits.target[:10]\n",
|
|
||||||
"images = digits.images[:10]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Testing Our Best Fitted Model"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Randomly select digits and test.\n",
|
|
||||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
|
||||||
" print(index)\n",
|
|
||||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
|
||||||
" label = y_test[index]\n",
|
|
||||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
|
||||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
|
||||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
|
||||||
" ax1.set_title(title)\n",
|
|
||||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
|
||||||
" plt.show()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "savitam"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,593 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Automated Machine Learning\n",
|
|
||||||
"_**Remote Execution with DataStore**_\n",
|
|
||||||
"\n",
|
|
||||||
"## Contents\n",
|
|
||||||
"1. [Introduction](#Introduction)\n",
|
|
||||||
"1. [Setup](#Setup)\n",
|
|
||||||
"1. [Data](#Data)\n",
|
|
||||||
"1. [Train](#Train)\n",
|
|
||||||
"1. [Results](#Results)\n",
|
|
||||||
"1. [Test](#Test)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Introduction\n",
|
|
||||||
"This sample accesses a data file on a remote DSVM through DataStore. Advantages of using data store are:\n",
|
|
||||||
"1. DataStore secures the access details.\n",
|
|
||||||
"2. DataStore supports read, write to blob and file store\n",
|
|
||||||
"3. AutoML natively supports copying data from DataStore to DSVM\n",
|
|
||||||
"\n",
|
|
||||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
|
||||||
"\n",
|
|
||||||
"In this notebook you would see\n",
|
|
||||||
"1. Storing data in DataStore.\n",
|
|
||||||
"2. get_data returning data from DataStore."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Setup\n",
|
|
||||||
"\n",
|
|
||||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import logging\n",
|
|
||||||
"import os\n",
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core.compute import DsvmCompute\n",
|
|
||||||
"from azureml.core.experiment import Experiment\n",
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"from azureml.train.automl import AutoMLConfig"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"\n",
|
|
||||||
"# choose a name for experiment\n",
|
|
||||||
"experiment_name = 'automl-remote-datastore-file'\n",
|
|
||||||
"# project folder\n",
|
|
||||||
"project_folder = './sample_projects/automl-remote-datastore-file'\n",
|
|
||||||
"\n",
|
|
||||||
"experiment=Experiment(ws, experiment_name)\n",
|
|
||||||
"\n",
|
|
||||||
"output = {}\n",
|
|
||||||
"output['SDK version'] = azureml.core.VERSION\n",
|
|
||||||
"output['Subscription ID'] = ws.subscription_id\n",
|
|
||||||
"output['Workspace'] = ws.name\n",
|
|
||||||
"output['Resource Group'] = ws.resource_group\n",
|
|
||||||
"output['Location'] = ws.location\n",
|
|
||||||
"output['Project Directory'] = project_folder\n",
|
|
||||||
"output['Experiment Name'] = experiment.name\n",
|
|
||||||
"pd.set_option('display.max_colwidth', -1)\n",
|
|
||||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
|
||||||
"outputDf.T"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a Remote Linux DSVM\n",
|
|
||||||
"Note: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you can switch to a different port (such as 5022), you can append the port number to the address. [Read more](https://docs.microsoft.com/en-us/azure/virtual-machines/troubleshooting/detailed-troubleshoot-ssh-connection) on this."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"compute_target_name = 'mydsvmc'\n",
|
|
||||||
"\n",
|
|
||||||
"try:\n",
|
|
||||||
" while ws.compute_targets[compute_target_name].provisioning_state == 'Creating':\n",
|
|
||||||
" time.sleep(1)\n",
|
|
||||||
" \n",
|
|
||||||
" dsvm_compute = DsvmCompute(workspace=ws, name=compute_target_name)\n",
|
|
||||||
" print('found existing:', dsvm_compute.name)\n",
|
|
||||||
"except:\n",
|
|
||||||
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size=\"Standard_D2_v2\")\n",
|
|
||||||
" dsvm_compute = DsvmCompute.create(ws, name=compute_target_name, provisioning_configuration=dsvm_config)\n",
|
|
||||||
" dsvm_compute.wait_for_completion(show_output=True)\n",
|
|
||||||
" print(\"Waiting one minute for ssh to be accessible\")\n",
|
|
||||||
" time.sleep(90) # Wait for ssh to be accessible"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Data\n",
|
|
||||||
"\n",
|
|
||||||
"### Copy data file to local\n",
|
|
||||||
"\n",
|
|
||||||
"Download the data file.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if not os.path.isdir('data'):\n",
|
|
||||||
" os.mkdir('data') "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn.datasets import fetch_20newsgroups\n",
|
|
||||||
"import csv\n",
|
|
||||||
"\n",
|
|
||||||
"remove = ('headers', 'footers', 'quotes')\n",
|
|
||||||
"categories = [\n",
|
|
||||||
" 'alt.atheism',\n",
|
|
||||||
" 'talk.religion.misc',\n",
|
|
||||||
" 'comp.graphics',\n",
|
|
||||||
" 'sci.space',\n",
|
|
||||||
" ]\n",
|
|
||||||
"data_train = fetch_20newsgroups(subset = 'train', categories = categories,\n",
|
|
||||||
" shuffle = True, random_state = 42,\n",
|
|
||||||
" remove = remove)\n",
|
|
||||||
" \n",
|
|
||||||
"pd.DataFrame(data_train.data).to_csv(\"data/X_train.tsv\", index=False, header=False, quoting=csv.QUOTE_ALL, sep=\"\\t\")\n",
|
|
||||||
"pd.DataFrame(data_train.target).to_csv(\"data/y_train.tsv\", index=False, header=False, sep=\"\\t\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Upload data to the cloud"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Now make the data accessible remotely by uploading that data from your local machine into Azure so it can be accessed for remote training. The datastore is a convenient construct associated with your workspace for you to upload/download data, and interact with it from your remote compute targets. It is backed by Azure blob storage account.\n",
|
|
||||||
"\n",
|
|
||||||
"The data.tsv files are uploaded into a directory named data at the root of the datastore."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#blob_datastore = Datastore(ws, blob_datastore_name)\n",
|
|
||||||
"ds = ws.get_default_datastore()\n",
|
|
||||||
"print(ds.datastore_type, ds.account_name, ds.container_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# ds.upload_files(\"data.tsv\")\n",
|
|
||||||
"ds.upload(src_dir='./data', target_path='data', overwrite=True, show_progress=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Configure & Run\n",
|
|
||||||
"\n",
|
|
||||||
"First let's create a DataReferenceConfigruation object to inform the system what data folder to download to the compute target.\n",
|
|
||||||
"The path_on_compute should be an absolute path to ensure that the data files are downloaded only once. The get_data method should use this same path to access the data files."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import DataReferenceConfiguration\n",
|
|
||||||
"dr = DataReferenceConfiguration(datastore_name=ds.name, \n",
|
|
||||||
" path_on_datastore='data', \n",
|
|
||||||
" path_on_compute='/tmp/azureml_runs',\n",
|
|
||||||
" mode='download', # download files from datastore to compute target\n",
|
|
||||||
" overwrite=False)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"import pkg_resources\n",
|
|
||||||
"\n",
|
|
||||||
"# create a new RunConfig object\n",
|
|
||||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Set compute target to the Linux DSVM\n",
|
|
||||||
"conda_run_config.target = dsvm_compute\n",
|
|
||||||
"# set the data reference of the run coonfiguration\n",
|
|
||||||
"conda_run_config.data_references = {ds.name: dr}\n",
|
|
||||||
"\n",
|
|
||||||
"pandas_dependency = 'pandas==' + pkg_resources.get_distribution(\"pandas\").version\n",
|
|
||||||
"\n",
|
|
||||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80',pandas_dependency])\n",
|
|
||||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create Get Data File\n",
|
|
||||||
"For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
|
||||||
"\n",
|
|
||||||
"The *get_data()* function returns a [dictionary](README.md#getdata).\n",
|
|
||||||
"\n",
|
|
||||||
"The read_csv uses the path_on_compute value specified in the DataReferenceConfiguration call plus the path_on_datastore folder and then the actual file name."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if not os.path.exists(project_folder):\n",
|
|
||||||
" os.makedirs(project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile $project_folder/get_data.py\n",
|
|
||||||
"\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"\n",
|
|
||||||
"def get_data():\n",
|
|
||||||
" X_train = pd.read_csv(\"/tmp/azureml_runs/data/X_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
|
|
||||||
" y_train = pd.read_csv(\"/tmp/azureml_runs/data/y_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
|
|
||||||
"\n",
|
|
||||||
" return { \"X\" : X_train.values, \"y\" : y_train[0].values }"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train\n",
|
|
||||||
"\n",
|
|
||||||
"You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n",
|
|
||||||
"\n",
|
|
||||||
"<i>Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to AutoMLConfig.</i>\n",
|
|
||||||
"\n",
|
|
||||||
"|Property|Description|\n",
|
|
||||||
"|-|-|\n",
|
|
||||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
|
||||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration|\n",
|
|
||||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains a specific pipeline with the data|\n",
|
|
||||||
"|**n_cross_validations**|Number of cross validation splits|\n",
|
|
||||||
"|**max_concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM\n",
|
|
||||||
"|**preprocess**| *True/False* <br>Setting this to *True* enables Auto ML to perform preprocessing <br>on the input to handle *missing data*, and perform some common *feature extraction*|\n",
|
|
||||||
"|**enable_cache**|Setting this to *True* enables preprocess done once and reuse the same preprocessed data for all the iterations. Default value is True.|\n",
|
|
||||||
"|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.<br> Default is *1*, you can set it to *-1* to use all cores|"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"automl_settings = {\n",
|
|
||||||
" \"iteration_timeout_minutes\": 60,\n",
|
|
||||||
" \"iterations\": 4,\n",
|
|
||||||
" \"n_cross_validations\": 5,\n",
|
|
||||||
" \"primary_metric\": 'AUC_weighted',\n",
|
|
||||||
" \"preprocess\": True,\n",
|
|
||||||
" \"max_cores_per_iteration\": 1,\n",
|
|
||||||
" \"verbosity\": logging.INFO\n",
|
|
||||||
"}\n",
|
|
||||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
|
||||||
" debug_log = 'automl_errors.log',\n",
|
|
||||||
" path=project_folder,\n",
|
|
||||||
" run_configuration=conda_run_config,\n",
|
|
||||||
" #compute_target = dsvm_compute,\n",
|
|
||||||
" data_script = project_folder + \"/get_data.py\",\n",
|
|
||||||
" **automl_settings\n",
|
|
||||||
" )"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets/models even when the experiment is running to retreive the best model up to that point. Once you are satisfied with the model you can cancel a particular iteration or the whole run."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run = experiment.submit(automl_config, show_output=False)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Results\n",
|
|
||||||
"#### Widget for monitoring runs\n",
|
|
||||||
"\n",
|
|
||||||
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
|
|
||||||
"\n",
|
|
||||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under /tmp/azureml_run/{iterationid}/azureml-logs\n",
|
|
||||||
"\n",
|
|
||||||
"NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"RunDetails(remote_run).show() "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Wait until the run finishes.\n",
|
|
||||||
"remote_run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"\n",
|
|
||||||
"#### Retrieve All Child Runs\n",
|
|
||||||
"You can also use sdk methods to fetch all the child runs and see individual metrics that we log. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"children = list(remote_run.get_children())\n",
|
|
||||||
"metricslist = {}\n",
|
|
||||||
"for run in children:\n",
|
|
||||||
" properties = run.get_properties()\n",
|
|
||||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
|
||||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
|
||||||
"\n",
|
|
||||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
|
||||||
"rundata"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Canceling Runs\n",
|
|
||||||
"You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Cancel the ongoing experiment and stop scheduling new iterations\n",
|
|
||||||
"# remote_run.cancel()\n",
|
|
||||||
"\n",
|
|
||||||
"# Cancel iteration 1 and move onto iteration 2\n",
|
|
||||||
"# remote_run.cancel_iteration(1)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Pre-process cache cleanup\n",
|
|
||||||
"The preprocess data gets cache at user default file store. When the run is completed the cache can be cleaned by running below cell"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run.clean_preprocessor_cache()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Retrieve the Best Model\n",
|
|
||||||
"\n",
|
|
||||||
"Below we select the best pipeline from our iterations. The *get_output* method returns the best run and the fitted model. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"best_run, fitted_model = remote_run.get_output()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Best Model based on any other metric"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# lookup_metric = \"accuracy\"\n",
|
|
||||||
"# best_run, fitted_model = remote_run.get_output(metric=lookup_metric)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Model from a specific iteration"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# iteration = 1\n",
|
|
||||||
"# best_run, fitted_model = remote_run.get_output(iteration=iteration)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Test\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Load test data.\n",
|
|
||||||
"from pandas_ml import ConfusionMatrix\n",
|
|
||||||
"\n",
|
|
||||||
"data_test = fetch_20newsgroups(subset = 'test', categories = categories,\n",
|
|
||||||
" shuffle = True, random_state = 42,\n",
|
|
||||||
" remove = remove)\n",
|
|
||||||
"\n",
|
|
||||||
"X_test = np.array(data_test.data).reshape((len(data_test.data),1))\n",
|
|
||||||
"y_test = data_test.target\n",
|
|
||||||
"\n",
|
|
||||||
"# Test our best pipeline.\n",
|
|
||||||
"\n",
|
|
||||||
"y_pred = fitted_model.predict(X_test)\n",
|
|
||||||
"y_pred_strings = [data_test.target_names[i] for i in y_pred]\n",
|
|
||||||
"y_test_strings = [data_test.target_names[i] for i in y_test]\n",
|
|
||||||
"\n",
|
|
||||||
"cm = ConfusionMatrix(y_test_strings, y_pred_strings)\n",
|
|
||||||
"print(cm)\n",
|
|
||||||
"cm.plot()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "savitam"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,534 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Automated Machine Learning\n",
|
|
||||||
"_**Remote Execution using DSVM (Ubuntu)**_\n",
|
|
||||||
"\n",
|
|
||||||
"## Contents\n",
|
|
||||||
"1. [Introduction](#Introduction)\n",
|
|
||||||
"1. [Setup](#Setup)\n",
|
|
||||||
"1. [Data](#Data)\n",
|
|
||||||
"1. [Train](#Train)\n",
|
|
||||||
"1. [Results](#Results)\n",
|
|
||||||
"1. [Test](#Test)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Introduction\n",
|
|
||||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n",
|
|
||||||
"\n",
|
|
||||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
|
||||||
"\n",
|
|
||||||
"In this notebook you wiil learn how to:\n",
|
|
||||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
|
||||||
"2. Attach an existing DSVM to a workspace.\n",
|
|
||||||
"3. Configure AutoML using `AutoMLConfig`.\n",
|
|
||||||
"4. Train the model using the DSVM.\n",
|
|
||||||
"5. Explore the results.\n",
|
|
||||||
"6. Test the best fitted model.\n",
|
|
||||||
"\n",
|
|
||||||
"In addition, this notebook showcases the following features:\n",
|
|
||||||
"- **Parallel** executions for iterations\n",
|
|
||||||
"- **Asynchronous** tracking of progress\n",
|
|
||||||
"- **Cancellation** of individual iterations or the entire run\n",
|
|
||||||
"- Retrieving models for any iteration or logged metric\n",
|
|
||||||
"- Specifying AutoML settings as `**kwargs`"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Setup\n",
|
|
||||||
"\n",
|
|
||||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import logging\n",
|
|
||||||
"import os\n",
|
|
||||||
"import time\n",
|
|
||||||
"import csv\n",
|
|
||||||
"\n",
|
|
||||||
"from matplotlib import pyplot as plt\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sklearn import datasets\n",
|
|
||||||
"\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core.experiment import Experiment\n",
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"from azureml.train.automl import AutoMLConfig"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose a name for the run history container in the workspace.\n",
|
|
||||||
"experiment_name = 'automl-remote-dsvm'\n",
|
|
||||||
"project_folder = './project'\n",
|
|
||||||
"\n",
|
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
|
||||||
"\n",
|
|
||||||
"output = {}\n",
|
|
||||||
"output['SDK version'] = azureml.core.VERSION\n",
|
|
||||||
"output['Subscription ID'] = ws.subscription_id\n",
|
|
||||||
"output['Workspace Name'] = ws.name\n",
|
|
||||||
"output['Resource Group'] = ws.resource_group\n",
|
|
||||||
"output['Location'] = ws.location\n",
|
|
||||||
"output['Project Directory'] = project_folder\n",
|
|
||||||
"output['Experiment Name'] = experiment.name\n",
|
|
||||||
"pd.set_option('display.max_colwidth', -1)\n",
|
|
||||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
|
||||||
"outputDf.T"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a Remote Linux DSVM\n",
|
|
||||||
"**Note:** If creation fails with a message about Marketplace purchase eligibilty, start creation of a DSVM through the [Azure portal](https://portal.azure.com), and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled this setting, you can exit the portal without actually creating the DSVM, and creation of the DSVM through the notebook should work.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import DsvmCompute\n",
|
|
||||||
"\n",
|
|
||||||
"dsvm_name = 'mydsvma'\n",
|
|
||||||
"try:\n",
|
|
||||||
" dsvm_compute = DsvmCompute(ws, dsvm_name)\n",
|
|
||||||
" print('Found an existing DSVM.')\n",
|
|
||||||
"except:\n",
|
|
||||||
" print('Creating a new DSVM.')\n",
|
|
||||||
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
|
|
||||||
" dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n",
|
|
||||||
" dsvm_compute.wait_for_completion(show_output = True)\n",
|
|
||||||
" print(\"Waiting one minute for ssh to be accessible\")\n",
|
|
||||||
" time.sleep(90) # Wait for ssh to be accessible"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Data\n",
|
|
||||||
"For remote executions, you need to make the data accessible from the remote compute.\n",
|
|
||||||
"This can be done by uploading the data to DataStore.\n",
|
|
||||||
"In this example, we upload scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) data."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"data_train = datasets.load_digits()\n",
|
|
||||||
"\n",
|
|
||||||
"if not os.path.isdir('data'):\n",
|
|
||||||
" os.mkdir('data')\n",
|
|
||||||
" \n",
|
|
||||||
"if not os.path.exists(project_folder):\n",
|
|
||||||
" os.makedirs(project_folder)\n",
|
|
||||||
" \n",
|
|
||||||
"pd.DataFrame(data_train.data).to_csv(\"data/X_train.tsv\", index=False, header=False, quoting=csv.QUOTE_ALL, sep=\"\\t\")\n",
|
|
||||||
"pd.DataFrame(data_train.target).to_csv(\"data/y_train.tsv\", index=False, header=False, sep=\"\\t\")\n",
|
|
||||||
"\n",
|
|
||||||
"ds = ws.get_default_datastore()\n",
|
|
||||||
"ds.upload(src_dir='./data', target_path='re_data', overwrite=True, show_progress=True)\n",
|
|
||||||
"\n",
|
|
||||||
"from azureml.core.runconfig import DataReferenceConfiguration\n",
|
|
||||||
"dr = DataReferenceConfiguration(datastore_name=ds.name, \n",
|
|
||||||
" path_on_datastore='re_data', \n",
|
|
||||||
" path_on_compute='/tmp/azureml_runs',\n",
|
|
||||||
" mode='download', # download files from datastore to compute target\n",
|
|
||||||
" overwrite=False)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"# create a new RunConfig object\n",
|
|
||||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Set compute target to the Linux DSVM\n",
|
|
||||||
"conda_run_config.target = dsvm_compute\n",
|
|
||||||
"\n",
|
|
||||||
"# set the data reference of the run coonfiguration\n",
|
|
||||||
"conda_run_config.data_references = {ds.name: dr}\n",
|
|
||||||
"\n",
|
|
||||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
|
||||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile $project_folder/get_data.py\n",
|
|
||||||
"\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"\n",
|
|
||||||
"def get_data():\n",
|
|
||||||
" X_train = pd.read_csv(\"/tmp/azureml_runs/re_data/X_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
|
|
||||||
" y_train = pd.read_csv(\"/tmp/azureml_runs/re_data/y_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
|
|
||||||
"\n",
|
|
||||||
" return { \"X\" : X_train.values, \"y\" : y_train[0].values }\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train\n",
|
|
||||||
"\n",
|
|
||||||
"You can specify `automl_settings` as `**kwargs` as well. Also note that you can use a `get_data()` function for local excutions too.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** When using Remote DSVM, you can't pass Numpy arrays directly to the fit method.\n",
|
|
||||||
"\n",
|
|
||||||
"|Property|Description|\n",
|
|
||||||
"|-|-|\n",
|
|
||||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
|
||||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
|
||||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
|
||||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
|
||||||
"|**max_concurrent_iterations**|Maximum number of iterations to execute in parallel. This should be less than the number of cores on the DSVM.|"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"automl_settings = {\n",
|
|
||||||
" \"iteration_timeout_minutes\": 10,\n",
|
|
||||||
" \"iterations\": 20,\n",
|
|
||||||
" \"n_cross_validations\": 5,\n",
|
|
||||||
" \"primary_metric\": 'AUC_weighted',\n",
|
|
||||||
" \"preprocess\": False,\n",
|
|
||||||
" \"max_concurrent_iterations\": 2,\n",
|
|
||||||
" \"verbosity\": logging.INFO\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
|
||||||
" debug_log = 'automl_errors.log',\n",
|
|
||||||
" path = project_folder, \n",
|
|
||||||
" run_configuration=conda_run_config,\n",
|
|
||||||
" data_script = project_folder + \"/get_data.py\",\n",
|
|
||||||
" **automl_settings\n",
|
|
||||||
" )\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"**Note:** The first run on a new DSVM may take several minutes to prepare the environment."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
|
|
||||||
"\n",
|
|
||||||
"In this example, we specify `show_output = False` to suppress console output while the run is in progress."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run = experiment.submit(automl_config, show_output = False)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Results\n",
|
|
||||||
"\n",
|
|
||||||
"#### Loading Executed Runs\n",
|
|
||||||
"In case you need to load a previously executed run, enable the cell below and replace the `run_id` value."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "raw",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"remote_run = AutoMLRun(experiment=experiment, run_id = 'AutoML_480d3ed6-fc94-44aa-8f4e-0b945db9d3ef')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Widget for Monitoring Runs\n",
|
|
||||||
"\n",
|
|
||||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
|
||||||
"\n",
|
|
||||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under `/tmp/azureml_run/{iterationid}/azureml-logs`\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"RunDetails(remote_run).show() "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Wait until the run finishes.\n",
|
|
||||||
"remote_run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"\n",
|
|
||||||
"#### Retrieve All Child Runs\n",
|
|
||||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"children = list(remote_run.get_children())\n",
|
|
||||||
"metricslist = {}\n",
|
|
||||||
"for run in children:\n",
|
|
||||||
" properties = run.get_properties()\n",
|
|
||||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
|
||||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
|
||||||
"\n",
|
|
||||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
|
||||||
"rundata"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Cancelling Runs\n",
|
|
||||||
"\n",
|
|
||||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Cancel the ongoing experiment and stop scheduling new iterations.\n",
|
|
||||||
"# remote_run.cancel()\n",
|
|
||||||
"\n",
|
|
||||||
"# Cancel iteration 1 and move onto iteration 2.\n",
|
|
||||||
"# remote_run.cancel_iteration(1)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Retrieve the Best Model\n",
|
|
||||||
"\n",
|
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"best_run, fitted_model = remote_run.get_output()\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Best Model Based on Any Other Metric\n",
|
|
||||||
"Show the run and the model which has the smallest `log_loss` value:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"lookup_metric = \"log_loss\"\n",
|
|
||||||
"best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Model from a Specific Iteration\n",
|
|
||||||
"Show the run and the model from the third iteration:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"iteration = 3\n",
|
|
||||||
"third_run, third_model = remote_run.get_output(iteration = iteration)\n",
|
|
||||||
"print(third_run)\n",
|
|
||||||
"print(third_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Test\n",
|
|
||||||
"\n",
|
|
||||||
"#### Load Test Data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"digits = datasets.load_digits()\n",
|
|
||||||
"X_test = digits.data[:10, :]\n",
|
|
||||||
"y_test = digits.target[:10]\n",
|
|
||||||
"images = digits.images[:10]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Test Our Best Fitted Model"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Randomly select digits and test.\n",
|
|
||||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
|
||||||
" print(index)\n",
|
|
||||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
|
||||||
" label = y_test[index]\n",
|
|
||||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
|
||||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
|
||||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
|
||||||
" ax1.set_title(title)\n",
|
|
||||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
|
||||||
" plt.show()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "savitam"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
709
how-to-use-azureml/data-drift/azure-ml-datadrift.ipynb
Normal file
709
how-to-use-azureml/data-drift/azure-ml-datadrift.ipynb
Normal file
@@ -0,0 +1,709 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Track Data Drift between Training and Inference Data in Production \n",
|
||||||
|
"\n",
|
||||||
|
"With this notebook, you will learn how to enable the DataDrift service to automatically track and determine whether your inference data is drifting from the data your model was initially trained on. The DataDrift service provides metrics and visualizations to help stakeholders identify which specific features cause the concept drift to occur.\n",
|
||||||
|
"\n",
|
||||||
|
"Please email driftfeedback@microsoft.com with any issues. A member from the DataDrift team will respond shortly. \n",
|
||||||
|
"\n",
|
||||||
|
"The DataDrift Public Preview API can be found [here](https://docs.microsoft.com/en-us/python/api/azureml-contrib-datadrift/?view=azure-ml-py). "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Prerequisites and Setup"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Install the DataDrift package\n",
|
||||||
|
"\n",
|
||||||
|
"Install the azureml-contrib-datadrift, azureml-contrib-opendatasets and lightgbm packages before running this notebook.\n",
|
||||||
|
"```\n",
|
||||||
|
"pip install azureml-contrib-datadrift\n",
|
||||||
|
"pip install azureml-contrib-datasets\n",
|
||||||
|
"pip install lightgbm\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Import Dependencies"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"import os\n",
|
||||||
|
"import time\n",
|
||||||
|
"from datetime import datetime, timedelta\n",
|
||||||
|
"\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import requests\n",
|
||||||
|
"from azureml.contrib.datadrift import DataDriftDetector, AlertConfiguration\n",
|
||||||
|
"from azureml.contrib.opendatasets import NoaaIsdWeather\n",
|
||||||
|
"from azureml.core import Dataset, Workspace, Run\n",
|
||||||
|
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"from azureml.core.experiment import Experiment\n",
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"from sklearn.externals import joblib\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Set up Configuraton and Create Azure ML Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) first if you haven't already to establish your connection to the AzureML Workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Please type in your initials/alias. The prefix is prepended to the names of resources created by this notebook. \n",
|
||||||
|
"prefix = \"dd\"\n",
|
||||||
|
"\n",
|
||||||
|
"# NOTE: Please do not change the model_name, as it's required by the score.py file\n",
|
||||||
|
"model_name = \"driftmodel\"\n",
|
||||||
|
"image_name = \"{}driftimage\".format(prefix)\n",
|
||||||
|
"service_name = \"{}driftservice\".format(prefix)\n",
|
||||||
|
"\n",
|
||||||
|
"# optionally, set email address to receive an email alert for DataDrift\n",
|
||||||
|
"email_address = \"\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Generate Train/Testing Data\n",
|
||||||
|
"\n",
|
||||||
|
"For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You may replace this step with your own dataset. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"usaf_list = ['725724', '722149', '723090', '722159', '723910', '720279',\n",
|
||||||
|
" '725513', '725254', '726430', '720381', '723074', '726682',\n",
|
||||||
|
" '725486', '727883', '723177', '722075', '723086', '724053',\n",
|
||||||
|
" '725070', '722073', '726060', '725224', '725260', '724520',\n",
|
||||||
|
" '720305', '724020', '726510', '725126', '722523', '703333',\n",
|
||||||
|
" '722249', '722728', '725483', '722972', '724975', '742079',\n",
|
||||||
|
" '727468', '722193', '725624', '722030', '726380', '720309',\n",
|
||||||
|
" '722071', '720326', '725415', '724504', '725665', '725424',\n",
|
||||||
|
" '725066']\n",
|
||||||
|
"\n",
|
||||||
|
"columns = ['usaf', 'wban', 'datetime', 'latitude', 'longitude', 'elevation', 'windAngle', 'windSpeed', 'temperature', 'stationName', 'p_k']\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def enrich_weather_noaa_data(noaa_df):\n",
|
||||||
|
" hours_in_day = 23\n",
|
||||||
|
" week_in_year = 52\n",
|
||||||
|
" \n",
|
||||||
|
" noaa_df[\"hour\"] = noaa_df[\"datetime\"].dt.hour\n",
|
||||||
|
" noaa_df[\"weekofyear\"] = noaa_df[\"datetime\"].dt.week\n",
|
||||||
|
" \n",
|
||||||
|
" noaa_df[\"sine_weekofyear\"] = noaa_df['datetime'].transform(lambda x: np.sin((2*np.pi*x.dt.week-1)/week_in_year))\n",
|
||||||
|
" noaa_df[\"cosine_weekofyear\"] = noaa_df['datetime'].transform(lambda x: np.cos((2*np.pi*x.dt.week-1)/week_in_year))\n",
|
||||||
|
"\n",
|
||||||
|
" noaa_df[\"sine_hourofday\"] = noaa_df['datetime'].transform(lambda x: np.sin(2*np.pi*x.dt.hour/hours_in_day))\n",
|
||||||
|
" noaa_df[\"cosine_hourofday\"] = noaa_df['datetime'].transform(lambda x: np.cos(2*np.pi*x.dt.hour/hours_in_day))\n",
|
||||||
|
" \n",
|
||||||
|
" return noaa_df\n",
|
||||||
|
"\n",
|
||||||
|
"def add_window_col(input_df):\n",
|
||||||
|
" shift_interval = pd.Timedelta('-7 days') # your X days interval\n",
|
||||||
|
" df_shifted = input_df.copy()\n",
|
||||||
|
" df_shifted['datetime'] = df_shifted['datetime'] - shift_interval\n",
|
||||||
|
" df_shifted.drop(list(input_df.columns.difference(['datetime', 'usaf', 'wban', 'sine_hourofday', 'temperature'])), axis=1, inplace=True)\n",
|
||||||
|
"\n",
|
||||||
|
" # merge, keeping only observations where -1 lag is present\n",
|
||||||
|
" df2 = pd.merge(input_df,\n",
|
||||||
|
" df_shifted,\n",
|
||||||
|
" on=['datetime', 'usaf', 'wban', 'sine_hourofday'],\n",
|
||||||
|
" how='inner', # use 'left' to keep observations without lags\n",
|
||||||
|
" suffixes=['', '-7'])\n",
|
||||||
|
" return df2\n",
|
||||||
|
"\n",
|
||||||
|
"def get_noaa_data(start_time, end_time, cols, station_list):\n",
|
||||||
|
" isd = NoaaIsdWeather(start_time, end_time, cols=cols)\n",
|
||||||
|
" # Read into Pandas data frame.\n",
|
||||||
|
" noaa_df = isd.to_pandas_dataframe()\n",
|
||||||
|
" noaa_df = noaa_df.rename(columns={\"stationName\": \"station_name\"})\n",
|
||||||
|
" \n",
|
||||||
|
" df_filtered = noaa_df[noaa_df[\"usaf\"].isin(station_list)]\n",
|
||||||
|
" df_filtered.reset_index(drop=True)\n",
|
||||||
|
" \n",
|
||||||
|
" # Enrich with time features\n",
|
||||||
|
" df_enriched = enrich_weather_noaa_data(df_filtered)\n",
|
||||||
|
" \n",
|
||||||
|
" return df_enriched\n",
|
||||||
|
"\n",
|
||||||
|
"def get_featurized_noaa_df(start_time, end_time, cols, station_list):\n",
|
||||||
|
" df_1 = get_noaa_data(start_time - timedelta(days=7), start_time - timedelta(seconds=1), cols, station_list)\n",
|
||||||
|
" df_2 = get_noaa_data(start_time, end_time, cols, station_list)\n",
|
||||||
|
" noaa_df = pd.concat([df_1, df_2])\n",
|
||||||
|
" \n",
|
||||||
|
" print(\"Adding window feature\")\n",
|
||||||
|
" df_window = add_window_col(noaa_df)\n",
|
||||||
|
" \n",
|
||||||
|
" cat_columns = df_window.dtypes == object\n",
|
||||||
|
" cat_columns = cat_columns[cat_columns == True]\n",
|
||||||
|
" \n",
|
||||||
|
" print(\"Encoding categorical columns\")\n",
|
||||||
|
" df_encoded = pd.get_dummies(df_window, columns=cat_columns.keys().tolist())\n",
|
||||||
|
" \n",
|
||||||
|
" print(\"Dropping unnecessary columns\")\n",
|
||||||
|
" df_featurized = df_encoded.drop(['windAngle', 'windSpeed', 'datetime', 'elevation'], axis=1).dropna().drop_duplicates()\n",
|
||||||
|
" \n",
|
||||||
|
" return df_featurized"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Train model on Jan 1 - 14, 2009 data\n",
|
||||||
|
"df = get_featurized_noaa_df(datetime(2009, 1, 1), datetime(2009, 1, 14, 23, 59, 59), columns, usaf_list)\n",
|
||||||
|
"df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"label = \"temperature\"\n",
|
||||||
|
"x_df = df.drop(label, axis=1)\n",
|
||||||
|
"y_df = df[[label]]\n",
|
||||||
|
"x_train, x_test, y_train, y_test = train_test_split(df, y_df, test_size=0.2, random_state=223)\n",
|
||||||
|
"print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
|
||||||
|
"\n",
|
||||||
|
"training_dir = 'outputs/training'\n",
|
||||||
|
"training_file = \"training.csv\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Generate training dataframe to register as Training Dataset\n",
|
||||||
|
"os.makedirs(training_dir, exist_ok=True)\n",
|
||||||
|
"training_df = pd.merge(x_train.drop(label, axis=1), y_train, left_index=True, right_index=True)\n",
|
||||||
|
"training_df.to_csv(training_dir + \"/\" + training_file)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create/Register Training Dataset"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dataset_name = \"dataset\"\n",
|
||||||
|
"name_suffix = datetime.utcnow().strftime(\"%Y-%m-%d-%H-%M-%S\")\n",
|
||||||
|
"snapshot_name = \"snapshot-{}\".format(name_suffix)\n",
|
||||||
|
"\n",
|
||||||
|
"dstore = ws.get_default_datastore()\n",
|
||||||
|
"dstore.upload(training_dir, \"data/training\", show_progress=True)\n",
|
||||||
|
"dpath = dstore.path(\"data/training/training.csv\")\n",
|
||||||
|
"trainingDataset = Dataset.auto_read_files(dpath, include_path=True)\n",
|
||||||
|
"trainingDataset = trainingDataset.register(workspace=ws, name=dataset_name, description=\"dset\", exist_ok=True)\n",
|
||||||
|
"\n",
|
||||||
|
"trainingDataSnapshot = trainingDataset.create_snapshot(snapshot_name=snapshot_name, compute_target=None, create_data_snapshot=True)\n",
|
||||||
|
"datasets = [(Dataset.Scenario.TRAINING, trainingDataSnapshot)]\n",
|
||||||
|
"print(\"dataset registration done.\\n\")\n",
|
||||||
|
"datasets"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train and Save Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import lightgbm as lgb\n",
|
||||||
|
"\n",
|
||||||
|
"train = lgb.Dataset(data=x_train, \n",
|
||||||
|
" label=y_train)\n",
|
||||||
|
"\n",
|
||||||
|
"test = lgb.Dataset(data=x_test, \n",
|
||||||
|
" label=y_test,\n",
|
||||||
|
" reference=train)\n",
|
||||||
|
"\n",
|
||||||
|
"params = {'learning_rate' : 0.1,\n",
|
||||||
|
" 'boosting' : 'gbdt',\n",
|
||||||
|
" 'metric' : 'rmse',\n",
|
||||||
|
" 'feature_fraction' : 1,\n",
|
||||||
|
" 'bagging_fraction' : 1,\n",
|
||||||
|
" 'max_depth': 6,\n",
|
||||||
|
" 'num_leaves' : 31,\n",
|
||||||
|
" 'objective' : 'regression',\n",
|
||||||
|
" 'bagging_freq' : 1,\n",
|
||||||
|
" \"verbose\": -1,\n",
|
||||||
|
" 'min_data_per_leaf': 100}\n",
|
||||||
|
"\n",
|
||||||
|
"model = lgb.train(params, \n",
|
||||||
|
" num_boost_round=500,\n",
|
||||||
|
" train_set=train,\n",
|
||||||
|
" valid_sets=[train, test],\n",
|
||||||
|
" verbose_eval=50,\n",
|
||||||
|
" early_stopping_rounds=25)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"model_file = 'outputs/{}.pkl'.format(model_name)\n",
|
||||||
|
"\n",
|
||||||
|
"os.makedirs('outputs', exist_ok=True)\n",
|
||||||
|
"joblib.dump(model, model_file)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Register Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"model = Model.register(model_path=model_file,\n",
|
||||||
|
" model_name=model_name,\n",
|
||||||
|
" workspace=ws,\n",
|
||||||
|
" datasets=datasets)\n",
|
||||||
|
"\n",
|
||||||
|
"print(model_name, image_name, service_name, model)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Deploy Model To AKS"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prepare Environment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn', 'joblib', 'lightgbm', 'pandas'],\n",
|
||||||
|
" pip_packages=['azureml-monitoring', 'azureml-sdk[automl]'])\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create Image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Image creation may take up to 15 minutes.\n",
|
||||||
|
"\n",
|
||||||
|
"image_name = image_name + str(model.version)\n",
|
||||||
|
"\n",
|
||||||
|
"if not image_name in ws.images:\n",
|
||||||
|
" # Use the score.py defined in this directory as the execution script\n",
|
||||||
|
" # NOTE: The Model Data Collector must be enabled in the execution script for DataDrift to run correctly\n",
|
||||||
|
" image_config = ContainerImage.image_configuration(execution_script=\"score.py\",\n",
|
||||||
|
" runtime=\"python\",\n",
|
||||||
|
" conda_file=\"myenv.yml\",\n",
|
||||||
|
" description=\"Image with weather dataset model\")\n",
|
||||||
|
" image = ContainerImage.create(name=image_name,\n",
|
||||||
|
" models=[model],\n",
|
||||||
|
" image_config=image_config,\n",
|
||||||
|
" workspace=ws)\n",
|
||||||
|
"\n",
|
||||||
|
" image.wait_for_creation(show_output=True)\n",
|
||||||
|
"else:\n",
|
||||||
|
" image = ws.images[image_name]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create Compute Target"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"aks_name = 'dd-demo-e2e'\n",
|
||||||
|
"prov_config = AksCompute.provisioning_configuration()\n",
|
||||||
|
"\n",
|
||||||
|
"if not aks_name in ws.compute_targets:\n",
|
||||||
|
" aks_target = ComputeTarget.create(workspace=ws,\n",
|
||||||
|
" name=aks_name,\n",
|
||||||
|
" provisioning_configuration=prov_config)\n",
|
||||||
|
"\n",
|
||||||
|
" aks_target.wait_for_completion(show_output=True)\n",
|
||||||
|
" print(aks_target.provisioning_state)\n",
|
||||||
|
" print(aks_target.provisioning_errors)\n",
|
||||||
|
"else:\n",
|
||||||
|
" aks_target=ws.compute_targets[aks_name]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploy Service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"aks_service_name = service_name\n",
|
||||||
|
"\n",
|
||||||
|
"if not aks_service_name in ws.webservices:\n",
|
||||||
|
" aks_config = AksWebservice.deploy_configuration(collect_model_data=True, enable_app_insights=True)\n",
|
||||||
|
" aks_service = Webservice.deploy_from_image(workspace=ws,\n",
|
||||||
|
" name=aks_service_name,\n",
|
||||||
|
" image=image,\n",
|
||||||
|
" deployment_config=aks_config,\n",
|
||||||
|
" deployment_target=aks_target)\n",
|
||||||
|
" aks_service.wait_for_deployment(show_output=True)\n",
|
||||||
|
" print(aks_service.state)\n",
|
||||||
|
"else:\n",
|
||||||
|
" aks_service = ws.webservices[aks_service_name]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Run DataDrift Analysis"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Send Scoring Data to Service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Download Scoring Data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Score Model on March 15, 2016 data\n",
|
||||||
|
"scoring_df = get_noaa_data(datetime(2016, 3, 15) - timedelta(days=7), datetime(2016, 3, 16), columns, usaf_list)\n",
|
||||||
|
"# Add the window feature column\n",
|
||||||
|
"scoring_df = add_window_col(scoring_df)\n",
|
||||||
|
"\n",
|
||||||
|
"# Drop features not used by the model\n",
|
||||||
|
"print(\"Dropping unnecessary columns\")\n",
|
||||||
|
"scoring_df = scoring_df.drop(['windAngle', 'windSpeed', 'datetime', 'elevation'], axis=1).dropna()\n",
|
||||||
|
"scoring_df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# One Hot Encode the scoring dataset to match the training dataset schema\n",
|
||||||
|
"columns_dict = model.datasets[\"training\"][0].get_profile().columns\n",
|
||||||
|
"extra_cols = ('Path', 'Column1')\n",
|
||||||
|
"for k in extra_cols:\n",
|
||||||
|
" columns_dict.pop(k, None)\n",
|
||||||
|
"training_columns = list(columns_dict.keys())\n",
|
||||||
|
"\n",
|
||||||
|
"categorical_columns = scoring_df.dtypes == object\n",
|
||||||
|
"categorical_columns = categorical_columns[categorical_columns == True]\n",
|
||||||
|
"\n",
|
||||||
|
"test_df = pd.get_dummies(scoring_df[categorical_columns.keys().tolist()])\n",
|
||||||
|
"encoded_df = scoring_df.join(test_df)\n",
|
||||||
|
"\n",
|
||||||
|
"# Populate missing OHE columns with 0 values to match traning dataset schema\n",
|
||||||
|
"difference = list(set(training_columns) - set(encoded_df.columns.tolist()))\n",
|
||||||
|
"for col in difference:\n",
|
||||||
|
" encoded_df[col] = 0\n",
|
||||||
|
"encoded_df.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Serialize dataframe to list of row dictionaries\n",
|
||||||
|
"encoded_dict = encoded_df.to_dict('records')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit Scoring Data to Service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"\n",
|
||||||
|
"# retreive the API keys. AML generates two keys.\n",
|
||||||
|
"key1, key2 = aks_service.get_keys()\n",
|
||||||
|
"\n",
|
||||||
|
"total_count = len(scoring_df)\n",
|
||||||
|
"i = 0\n",
|
||||||
|
"load = []\n",
|
||||||
|
"for row in encoded_dict:\n",
|
||||||
|
" load.append(row)\n",
|
||||||
|
" i = i + 1\n",
|
||||||
|
" if i % 100 == 0:\n",
|
||||||
|
" payload = json.dumps({\"data\": load})\n",
|
||||||
|
" \n",
|
||||||
|
" # construct raw HTTP request and send to the service\n",
|
||||||
|
" payload_binary = bytes(payload,encoding = 'utf8')\n",
|
||||||
|
" headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
||||||
|
" resp = requests.post(aks_service.scoring_uri, payload_binary, headers=headers)\n",
|
||||||
|
" \n",
|
||||||
|
" print(\"prediction:\", resp.content, \"Progress: {}/{}\".format(i, total_count)) \n",
|
||||||
|
"\n",
|
||||||
|
" load = []\n",
|
||||||
|
" time.sleep(3)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configure DataDrift"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"services = [service_name]\n",
|
||||||
|
"start = datetime.now() - timedelta(days=2)\n",
|
||||||
|
"end = datetime(year=2020, month=1, day=22, hour=15, minute=16)\n",
|
||||||
|
"feature_list = ['usaf', 'wban', 'latitude', 'longitude', 'station_name', 'p_k', 'sine_hourofday', 'cosine_hourofday', 'temperature-7']\n",
|
||||||
|
"alert_config = AlertConfiguration([email_address]) if email_address else None\n",
|
||||||
|
"\n",
|
||||||
|
"# there will be an exception indicating using get() method if DataDrift object already exist\n",
|
||||||
|
"try:\n",
|
||||||
|
" datadrift = DataDriftDetector.create(ws, model.name, model.version, services, frequency=\"Day\", alert_config=alert_config)\n",
|
||||||
|
"except KeyError:\n",
|
||||||
|
" datadrift = DataDriftDetector.get(ws, model.name, model.version)\n",
|
||||||
|
" \n",
|
||||||
|
"print(\"Details of DataDrift Object:\\n{}\".format(datadrift))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Run an Adhoc DataDriftDetector Run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"target_date = datetime.today()\n",
|
||||||
|
"run = datadrift.run(target_date, services, feature_list=feature_list, create_compute_target=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"exp = Experiment(ws, datadrift._id)\n",
|
||||||
|
"dd_run = Run(experiment=exp, run_id=run)\n",
|
||||||
|
"RunDetails(dd_run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Get Drift Analysis Results"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"children = list(dd_run.get_children())\n",
|
||||||
|
"for child in children:\n",
|
||||||
|
" child.wait_for_completion()\n",
|
||||||
|
"\n",
|
||||||
|
"drift_metrics = datadrift.get_output(start_time=start, end_time=end)\n",
|
||||||
|
"drift_metrics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Show all drift figures, one per serivice.\n",
|
||||||
|
"# If setting with_details is False (by default), only drift will be shown; if it's True, all details will be shown.\n",
|
||||||
|
"\n",
|
||||||
|
"drift_figures = datadrift.show(with_details=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Enable DataDrift Schedule"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"datadrift.enable_schedule()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "rafarmah"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
3
how-to-use-azureml/data-drift/readme.md
Normal file
3
how-to-use-azureml/data-drift/readme.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
## Using data drift APIs
|
||||||
|
|
||||||
|
1. [Detect data drift for a model](azure-ml-datadrift.ipynb): Detect data drift for a deployed model.
|
||||||
58
how-to-use-azureml/data-drift/score.py
Normal file
58
how-to-use-azureml/data-drift/score.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
import pickle
|
||||||
|
import json
|
||||||
|
import numpy
|
||||||
|
import azureml.train.automl
|
||||||
|
from sklearn.externals import joblib
|
||||||
|
from sklearn.linear_model import Ridge
|
||||||
|
from azureml.core.model import Model
|
||||||
|
from azureml.core.run import Run
|
||||||
|
from azureml.monitoring import ModelDataCollector
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def init():
|
||||||
|
global model, inputs_dc, prediction_dc, feature_names, categorical_features
|
||||||
|
|
||||||
|
print("Model is initialized" + time.strftime("%H:%M:%S"))
|
||||||
|
model_path = Model.get_model_path(model_name="driftmodel")
|
||||||
|
model = joblib.load(model_path)
|
||||||
|
|
||||||
|
feature_names = ["usaf", "wban", "latitude", "longitude", "station_name", "p_k",
|
||||||
|
"sine_weekofyear", "cosine_weekofyear", "sine_hourofday", "cosine_hourofday",
|
||||||
|
"temperature-7"]
|
||||||
|
|
||||||
|
categorical_features = ["usaf", "wban", "p_k", "station_name"]
|
||||||
|
|
||||||
|
inputs_dc = ModelDataCollector(model_name="driftmodel",
|
||||||
|
identifier="inputs",
|
||||||
|
feature_names=feature_names)
|
||||||
|
|
||||||
|
prediction_dc = ModelDataCollector("driftmodel",
|
||||||
|
identifier="predictions",
|
||||||
|
feature_names=["temperature"])
|
||||||
|
|
||||||
|
|
||||||
|
def run(raw_data):
|
||||||
|
global inputs_dc, prediction_dc
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(raw_data)["data"]
|
||||||
|
data = pd.DataFrame(data)
|
||||||
|
|
||||||
|
# Remove the categorical features as the model expects OHE values
|
||||||
|
input_data = data.drop(categorical_features, axis=1)
|
||||||
|
|
||||||
|
result = model.predict(input_data)
|
||||||
|
|
||||||
|
# Collect the non-OHE dataframe
|
||||||
|
collected_df = data[feature_names]
|
||||||
|
|
||||||
|
inputs_dc.collect(collected_df.values)
|
||||||
|
prediction_dc.collect(result)
|
||||||
|
return result.tolist()
|
||||||
|
except Exception as e:
|
||||||
|
error = str(e)
|
||||||
|
|
||||||
|
print(error + time.strftime("%H:%M:%S"))
|
||||||
|
return error
|
||||||
@@ -344,7 +344,9 @@
|
|||||||
"### 5.a. Create Client\n",
|
"### 5.a. Create Client\n",
|
||||||
"The image supports gRPC and the TensorFlow Serving \"predict\" API. We have a client that can call into the docker image to get predictions. \n",
|
"The image supports gRPC and the TensorFlow Serving \"predict\" API. We have a client that can call into the docker image to get predictions. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Note:** If you chose to use auth_enabled=True when creating your AksWebservice.deploy_configuration(), see documentation [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py#get-keys--) on how to retrieve your keys and use either key as an argument to PredictionClient(...,access_token=key)."
|
"**Note:** If you chose to use auth_enabled=True when creating your AksWebservice.deploy_configuration(), see documentation [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py#get-keys--) on how to retrieve your keys and use either key as an argument to PredictionClient(...,access_token=key).",
|
||||||
|
"\n",
|
||||||
|
"**WARNING:** If you are running on Azure Notebooks free compute, you will not be able to make outgoing calls to your service. Try locating your client on a different machine to consume it."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -417,7 +417,9 @@
|
|||||||
"### 7.a. Create Client\n",
|
"### 7.a. Create Client\n",
|
||||||
"The image supports gRPC and the TensorFlow Serving \"predict\" API. We have a client that can call into the docker image to get predictions.\n",
|
"The image supports gRPC and the TensorFlow Serving \"predict\" API. We have a client that can call into the docker image to get predictions.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Note:** If you chose to use auth_enabled=True when creating your AksWebservice, see documentation [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py#get-keys--) on how to retrieve your keys and use either key as an argument to PredictionClient(...,access_token=key)."
|
"**Note:** If you chose to use auth_enabled=True when creating your AksWebservice, see documentation [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py#get-keys--) on how to retrieve your keys and use either key as an argument to PredictionClient(...,access_token=key).",
|
||||||
|
"\n",
|
||||||
|
"**WARNING:** If you are running on Azure Notebooks free compute, you will not be able to make outgoing calls to your service. Try locating your client on a different machine to consume it."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -702,7 +702,9 @@
|
|||||||
"### 9.a. Create Client\n",
|
"### 9.a. Create Client\n",
|
||||||
"The image supports gRPC and the TensorFlow Serving \"predict\" API. We have a client that can call into the docker image to get predictions. \n",
|
"The image supports gRPC and the TensorFlow Serving \"predict\" API. We have a client that can call into the docker image to get predictions. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Note:** If you chose to use auth_enabled=True when creating your AksWebservice.deploy_configuration(), see documentation [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py#get-keys--) on how to retrieve your keys and use either key as an argument to PredictionClient(...,access_token=key)."
|
"**Note:** If you chose to use auth_enabled=True when creating your AksWebservice.deploy_configuration(), see documentation [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py#get-keys--) on how to retrieve your keys and use either key as an argument to PredictionClient(...,access_token=key).",
|
||||||
|
"\n",
|
||||||
|
"**WARNING:** If you are running on Azure Notebooks free compute, you will not be able to make outgoing calls to your service. Try locating your client on a different machine to consume it."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,498 +1,498 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Enabling App Insights for Services in Production\n",
|
"# Enabling App Insights for Services in Production\n",
|
||||||
"With this notebook, you can learn how to enable App Insights for standard service monitoring, plus, we provide examples for doing custom logging within a scoring files in a model. \n",
|
"With this notebook, you can learn how to enable App Insights for standard service monitoring, plus, we provide examples for doing custom logging within a scoring files in a model. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## What does Application Insights monitor?\n",
|
"## What does Application Insights monitor?\n",
|
||||||
"It monitors request rates, response times, failure rates, etc. For more information visit [App Insights docs.](https://docs.microsoft.com/en-us/azure/application-insights/app-insights-overview)\n",
|
"It monitors request rates, response times, failure rates, etc. For more information visit [App Insights docs.](https://docs.microsoft.com/en-us/azure/application-insights/app-insights-overview)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## What is different compared to standard production deployment process?\n",
|
"## What is different compared to standard production deployment process?\n",
|
||||||
"If you want to enable generic App Insights for a service run:\n",
|
"If you want to enable generic App Insights for a service run:\n",
|
||||||
"```python\n",
|
"```python\n",
|
||||||
"aks_service= Webservice(ws, \"aks-w-dc2\")\n",
|
"aks_service= Webservice(ws, \"aks-w-dc2\")\n",
|
||||||
"aks_service.update(enable_app_insights=True)```\n",
|
"aks_service.update(enable_app_insights=True)```\n",
|
||||||
"Where \"aks-w-dc2\" is your service name. You can also do this from the Azure Portal under your Workspace--> deployments--> Select deployment--> Edit--> Advanced Settings--> Select \"Enable AppInsights diagnostics\"\n",
|
"Where \"aks-w-dc2\" is your service name. You can also do this from the Azure Portal under your Workspace--> deployments--> Select deployment--> Edit--> Advanced Settings--> Select \"Enable AppInsights diagnostics\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"If you want to log custom traces, you will follow the standard deplyment process for AKS and you will:\n",
|
"If you want to log custom traces, you will follow the standard deplyment process for AKS and you will:\n",
|
||||||
"1. Update scoring file.\n",
|
"1. Update scoring file.\n",
|
||||||
"2. Update aks configuration.\n",
|
"2. Update aks configuration.\n",
|
||||||
"3. Build new image and deploy it. "
|
"3. Build new image and deploy it. "
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 1. Import your dependencies"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
|
||||||
"from azureml.core.webservice import AksWebservice\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"import json\n",
|
|
||||||
"print(azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 2. Set up your configuration and create a workspace\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 3. Register Model\n",
|
|
||||||
"Register an existing trained model, add descirption and tags."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Register the model\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"model = Model.register(model_path = \"sklearn_regression_model.pkl\", # this points to a local file\n",
|
|
||||||
" model_name = \"sklearn_regression_model.pkl\", # this is the name the model is registered as\n",
|
|
||||||
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
|
||||||
" description = \"Ridge regression model to predict diabetes\",\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"\n",
|
|
||||||
"print(model.name, model.description, model.version)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 4. *Update your scoring file with custom print statements*\n",
|
|
||||||
"Here is an example:\n",
|
|
||||||
"### a. In your init function add:\n",
|
|
||||||
"```python\n",
|
|
||||||
"print (\"model initialized\" + time.strftime(\"%H:%M:%S\"))```\n",
|
|
||||||
"\n",
|
|
||||||
"### b. In your run function add:\n",
|
|
||||||
"```python\n",
|
|
||||||
"print (\"Prediction created\" + time.strftime(\"%H:%M:%S\"))```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile score.py\n",
|
|
||||||
"import pickle\n",
|
|
||||||
"import json\n",
|
|
||||||
"import numpy \n",
|
|
||||||
"from sklearn.externals import joblib\n",
|
|
||||||
"from sklearn.linear_model import Ridge\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"def init():\n",
|
|
||||||
" global model\n",
|
|
||||||
" #Print statement for appinsights custom traces:\n",
|
|
||||||
" print (\"model initialized\" + time.strftime(\"%H:%M:%S\"))\n",
|
|
||||||
" \n",
|
|
||||||
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under the workspace\n",
|
|
||||||
" # this call should return the path to the model.pkl file on the local disk.\n",
|
|
||||||
" model_path = Model.get_model_path(model_name = 'sklearn_regression_model.pkl')\n",
|
|
||||||
" \n",
|
|
||||||
" # deserialize the model file back into a sklearn model\n",
|
|
||||||
" model = joblib.load(model_path)\n",
|
|
||||||
" \n",
|
|
||||||
"\n",
|
|
||||||
"# note you can pass in multiple rows for scoring\n",
|
|
||||||
"def run(raw_data):\n",
|
|
||||||
" try:\n",
|
|
||||||
" data = json.loads(raw_data)['data']\n",
|
|
||||||
" data = numpy.array(data)\n",
|
|
||||||
" result = model.predict(data)\n",
|
|
||||||
" print (\"Prediction created\" + time.strftime(\"%H:%M:%S\"))\n",
|
|
||||||
" # you can return any datatype as long as it is JSON-serializable\n",
|
|
||||||
" return result.tolist()\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" error = str(e)\n",
|
|
||||||
" print (error + time.strftime(\"%H:%M:%S\"))\n",
|
|
||||||
" return error"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 5. *Create myenv.yml file*"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
|
||||||
"\n",
|
|
||||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
|
||||||
"\n",
|
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
|
||||||
" f.write(myenv.serialize_to_string())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 6. Create your new Image"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.image import ContainerImage\n",
|
|
||||||
"\n",
|
|
||||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
|
||||||
" runtime = \"python\",\n",
|
|
||||||
" conda_file = \"myenv.yml\",\n",
|
|
||||||
" description = \"Image with ridge regression model\",\n",
|
|
||||||
" tags = {'area': \"diabetes\", 'type': \"regression\"}\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
"image = ContainerImage.create(name = \"myimage1\",\n",
|
|
||||||
" # this is the model object\n",
|
|
||||||
" models = [model],\n",
|
|
||||||
" image_config = image_config,\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"\n",
|
|
||||||
"image.wait_for_creation(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Deploy to ACI (Optional)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import AciWebservice\n",
|
|
||||||
"\n",
|
|
||||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
|
||||||
" memory_gb = 1, \n",
|
|
||||||
" tags = {'area': \"diabetes\", 'type': \"regression\"}, \n",
|
|
||||||
" description = 'Predict diabetes using regression model',\n",
|
|
||||||
" enable_app_insights = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import Webservice\n",
|
|
||||||
"\n",
|
|
||||||
"aci_service_name = 'my-aci-service-4'\n",
|
|
||||||
"print(aci_service_name)\n",
|
|
||||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
|
||||||
" image = image,\n",
|
|
||||||
" name = aci_service_name,\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"aci_service.wait_for_deployment(True)\n",
|
|
||||||
"print(aci_service.state)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"\n",
|
|
||||||
"test_sample = json.dumps({'data': [\n",
|
|
||||||
" [1,28,13,45,54,6,57,8,8,10], \n",
|
|
||||||
" [101,9,8,37,6,45,4,3,2,41]\n",
|
|
||||||
"]})\n",
|
|
||||||
"test_sample = bytes(test_sample,encoding='utf8')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if aci_service.state == \"Healthy\":\n",
|
|
||||||
" prediction = aci_service.run(input_data=test_sample)\n",
|
|
||||||
" print(prediction)\n",
|
|
||||||
"else:\n",
|
|
||||||
" raise ValueError(\"Service deployment isn't healthy, can't call the service. Error: \", aci_service.error)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 7. Deploy to AKS service"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create AKS compute if you haven't done so."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Use the default configuration (can also provide parameters to customize)\n",
|
|
||||||
"prov_config = AksCompute.provisioning_configuration()\n",
|
|
||||||
"\n",
|
|
||||||
"aks_name = 'my-aks-test3' \n",
|
|
||||||
"# Create the cluster\n",
|
|
||||||
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
|
||||||
" name = aks_name, \n",
|
|
||||||
" provisioning_configuration = prov_config)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"aks_target.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(aks_target.provisioning_state)\n",
|
|
||||||
"print(aks_target.provisioning_errors)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"If you already have a cluster you can attach the service to it:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"```python \n",
|
|
||||||
"%%time\n",
|
|
||||||
"resource_id = '/subscriptions/<subscriptionid>/resourcegroups/<resourcegroupname>/providers/Microsoft.ContainerService/managedClusters/<aksservername>'\n",
|
|
||||||
"create_name= 'myaks4'\n",
|
|
||||||
"attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n",
|
|
||||||
"aks_target = ComputeTarget.attach(workspace = ws, \n",
|
|
||||||
" name = create_name, \n",
|
|
||||||
" attach_configuration=attach_config)\n",
|
|
||||||
"## Wait for the operation to complete\n",
|
|
||||||
"aks_target.wait_for_provisioning(True)```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### a. *Activate App Insights through updating AKS Webservice configuration*\n",
|
|
||||||
"In order to enable App Insights in your service you will need to update your AKS configuration file:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Set the web service configuration\n",
|
|
||||||
"aks_config = AksWebservice.deploy_configuration(enable_app_insights=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### b. Deploy your service"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if aks_target.provisioning_state== \"Succeeded\": \n",
|
|
||||||
" aks_service_name ='aks-w-dc5'\n",
|
|
||||||
" aks_service = Webservice.deploy_from_image(workspace = ws, \n",
|
|
||||||
" name = aks_service_name,\n",
|
|
||||||
" image = image,\n",
|
|
||||||
" deployment_config = aks_config,\n",
|
|
||||||
" deployment_target = aks_target\n",
|
|
||||||
" )\n",
|
|
||||||
" aks_service.wait_for_deployment(show_output = True)\n",
|
|
||||||
" print(aks_service.state)\n",
|
|
||||||
"else:\n",
|
|
||||||
" raise ValueError(\"AKS provisioning failed. Error: \", aks_service.error)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 8. Test your service "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"\n",
|
|
||||||
"test_sample = json.dumps({'data': [\n",
|
|
||||||
" [1,28,13,45,54,6,57,8,8,10], \n",
|
|
||||||
" [101,9,8,37,6,45,4,3,2,41]\n",
|
|
||||||
"]})\n",
|
|
||||||
"test_sample = bytes(test_sample,encoding='utf8')\n",
|
|
||||||
"\n",
|
|
||||||
"if aks_service.state == \"Healthy\":\n",
|
|
||||||
" prediction = aks_service.run(input_data=test_sample)\n",
|
|
||||||
" print(prediction)\n",
|
|
||||||
"else:\n",
|
|
||||||
" raise ValueError(\"Service deployment isn't healthy, can't call the service. Error: \", aks_service.error)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 9. See your service telemetry in App Insights\n",
|
|
||||||
"1. Go to the [Azure Portal](https://portal.azure.com/)\n",
|
|
||||||
"2. All resources--> Select the subscription/resource group where you created your Workspace--> Select the App Insights type\n",
|
|
||||||
"3. Click on the AppInsights resource. You'll see a highlevel dashboard with information on Requests, Server response time and availability.\n",
|
|
||||||
"4. Click on the top banner \"Analytics\"\n",
|
|
||||||
"5. In the \"Schema\" section select \"traces\" and run your query.\n",
|
|
||||||
"6. Voila! All your custom traces should be there."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Disable App Insights"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"aks_service.update(enable_app_insights=False)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Clean up"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"aks_service.delete()\n",
|
|
||||||
"aci_service.delete()\n",
|
|
||||||
"image.delete()\n",
|
|
||||||
"model.delete()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "shipatel"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.3"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Import your dependencies"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||||
|
"from azureml.core.webservice import AksWebservice\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"import json\n",
|
||||||
|
"print(azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Set up your configuration and create a workspace\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 3. Register Model\n",
|
||||||
|
"Register an existing trained model, add descirption and tags."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Register the model\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"model = Model.register(model_path = \"sklearn_regression_model.pkl\", # this points to a local file\n",
|
||||||
|
" model_name = \"sklearn_regression_model.pkl\", # this is the name the model is registered as\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||||
|
" description = \"Ridge regression model to predict diabetes\",\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"print(model.name, model.description, model.version)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 4. *Update your scoring file with custom print statements*\n",
|
||||||
|
"Here is an example:\n",
|
||||||
|
"### a. In your init function add:\n",
|
||||||
|
"```python\n",
|
||||||
|
"print (\"model initialized\" + time.strftime(\"%H:%M:%S\"))```\n",
|
||||||
|
"\n",
|
||||||
|
"### b. In your run function add:\n",
|
||||||
|
"```python\n",
|
||||||
|
"print (\"Prediction created\" + time.strftime(\"%H:%M:%S\"))```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score.py\n",
|
||||||
|
"import pickle\n",
|
||||||
|
"import json\n",
|
||||||
|
"import numpy \n",
|
||||||
|
"from sklearn.externals import joblib\n",
|
||||||
|
"from sklearn.linear_model import Ridge\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global model\n",
|
||||||
|
" #Print statement for appinsights custom traces:\n",
|
||||||
|
" print (\"model initialized\" + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
|
" \n",
|
||||||
|
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under the workspace\n",
|
||||||
|
" # this call should return the path to the model.pkl file on the local disk.\n",
|
||||||
|
" model_path = Model.get_model_path(model_name = 'sklearn_regression_model.pkl')\n",
|
||||||
|
" \n",
|
||||||
|
" # deserialize the model file back into a sklearn model\n",
|
||||||
|
" model = joblib.load(model_path)\n",
|
||||||
|
" \n",
|
||||||
|
"\n",
|
||||||
|
"# note you can pass in multiple rows for scoring\n",
|
||||||
|
"def run(raw_data):\n",
|
||||||
|
" try:\n",
|
||||||
|
" data = json.loads(raw_data)['data']\n",
|
||||||
|
" data = numpy.array(data)\n",
|
||||||
|
" result = model.predict(data)\n",
|
||||||
|
" print (\"Prediction created\" + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
|
" # you can return any datatype as long as it is JSON-serializable\n",
|
||||||
|
" return result.tolist()\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" error = str(e)\n",
|
||||||
|
" print (error + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
|
" return error"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 5. *Create myenv.yml file*"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
|
"\n",
|
||||||
|
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 6. Create your new Image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||||
|
" runtime = \"python\",\n",
|
||||||
|
" conda_file = \"myenv.yml\",\n",
|
||||||
|
" description = \"Image with ridge regression model\",\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"}\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"image = ContainerImage.create(name = \"myimage1\",\n",
|
||||||
|
" # this is the model object\n",
|
||||||
|
" models = [model],\n",
|
||||||
|
" image_config = image_config,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"image.wait_for_creation(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploy to ACI (Optional)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import AciWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||||
|
" memory_gb = 1, \n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"}, \n",
|
||||||
|
" description = 'Predict diabetes using regression model',\n",
|
||||||
|
" enable_app_insights = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import Webservice\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service_name = 'my-aci-service-4'\n",
|
||||||
|
"print(aci_service_name)\n",
|
||||||
|
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||||
|
" image = image,\n",
|
||||||
|
" name = aci_service_name,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"aci_service.wait_for_deployment(True)\n",
|
||||||
|
"print(aci_service.state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"\n",
|
||||||
|
"test_sample = json.dumps({'data': [\n",
|
||||||
|
" [1,28,13,45,54,6,57,8,8,10], \n",
|
||||||
|
" [101,9,8,37,6,45,4,3,2,41]\n",
|
||||||
|
"]})\n",
|
||||||
|
"test_sample = bytes(test_sample,encoding='utf8')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if aci_service.state == \"Healthy\":\n",
|
||||||
|
" prediction = aci_service.run(input_data=test_sample)\n",
|
||||||
|
" print(prediction)\n",
|
||||||
|
"else:\n",
|
||||||
|
" raise ValueError(\"Service deployment isn't healthy, can't call the service. Error: \", aci_service.error)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 7. Deploy to AKS service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create AKS compute if you haven't done so."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Use the default configuration (can also provide parameters to customize)\n",
|
||||||
|
"prov_config = AksCompute.provisioning_configuration()\n",
|
||||||
|
"\n",
|
||||||
|
"aks_name = 'my-aks-test3' \n",
|
||||||
|
"# Create the cluster\n",
|
||||||
|
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
||||||
|
" name = aks_name, \n",
|
||||||
|
" provisioning_configuration = prov_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"aks_target.wait_for_completion(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(aks_target.provisioning_state)\n",
|
||||||
|
"print(aks_target.provisioning_errors)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"If you already have a cluster you can attach the service to it:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"```python \n",
|
||||||
|
"%%time\n",
|
||||||
|
"resource_id = '/subscriptions/<subscriptionid>/resourcegroups/<resourcegroupname>/providers/Microsoft.ContainerService/managedClusters/<aksservername>'\n",
|
||||||
|
"create_name= 'myaks4'\n",
|
||||||
|
"attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n",
|
||||||
|
"aks_target = ComputeTarget.attach(workspace = ws, \n",
|
||||||
|
" name = create_name, \n",
|
||||||
|
" attach_configuration=attach_config)\n",
|
||||||
|
"## Wait for the operation to complete\n",
|
||||||
|
"aks_target.wait_for_provisioning(True)```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### a. *Activate App Insights through updating AKS Webservice configuration*\n",
|
||||||
|
"In order to enable App Insights in your service you will need to update your AKS configuration file:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Set the web service configuration\n",
|
||||||
|
"aks_config = AksWebservice.deploy_configuration(enable_app_insights=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### b. Deploy your service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if aks_target.provisioning_state== \"Succeeded\": \n",
|
||||||
|
" aks_service_name ='aks-w-dc5'\n",
|
||||||
|
" aks_service = Webservice.deploy_from_image(workspace = ws, \n",
|
||||||
|
" name = aks_service_name,\n",
|
||||||
|
" image = image,\n",
|
||||||
|
" deployment_config = aks_config,\n",
|
||||||
|
" deployment_target = aks_target\n",
|
||||||
|
" )\n",
|
||||||
|
" aks_service.wait_for_deployment(show_output = True)\n",
|
||||||
|
" print(aks_service.state)\n",
|
||||||
|
"else:\n",
|
||||||
|
" raise ValueError(\"AKS provisioning failed. Error: \", aks_service.error)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 8. Test your service "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"\n",
|
||||||
|
"test_sample = json.dumps({'data': [\n",
|
||||||
|
" [1,28,13,45,54,6,57,8,8,10], \n",
|
||||||
|
" [101,9,8,37,6,45,4,3,2,41]\n",
|
||||||
|
"]})\n",
|
||||||
|
"test_sample = bytes(test_sample,encoding='utf8')\n",
|
||||||
|
"\n",
|
||||||
|
"if aks_service.state == \"Healthy\":\n",
|
||||||
|
" prediction = aks_service.run(input_data=test_sample)\n",
|
||||||
|
" print(prediction)\n",
|
||||||
|
"else:\n",
|
||||||
|
" raise ValueError(\"Service deployment isn't healthy, can't call the service. Error: \", aks_service.error)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 9. See your service telemetry in App Insights\n",
|
||||||
|
"1. Go to the [Azure Portal](https://portal.azure.com/)\n",
|
||||||
|
"2. All resources--> Select the subscription/resource group where you created your Workspace--> Select the App Insights type\n",
|
||||||
|
"3. Click on the AppInsights resource. You'll see a highlevel dashboard with information on Requests, Server response time and availability.\n",
|
||||||
|
"4. Click on the top banner \"Analytics\"\n",
|
||||||
|
"5. In the \"Schema\" section select \"traces\" and run your query.\n",
|
||||||
|
"6. Voila! All your custom traces should be there."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Disable App Insights"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"aks_service.update(enable_app_insights=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Clean up"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"aks_service.delete()\n",
|
||||||
|
"aci_service.delete()\n",
|
||||||
|
"image.delete()\n",
|
||||||
|
"model.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "shipatel"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,478 +1,478 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Enabling Data Collection for Models in Production\n",
|
|
||||||
"With this notebook, you can learn how to collect input model data from your Azure Machine Learning service in an Azure Blob storage. Once enabled, this data collected gives you the opportunity:\n",
|
|
||||||
"\n",
|
|
||||||
"* Monitor data drifts as production data enters your model\n",
|
|
||||||
"* Make better decisions on when to retrain or optimize your model\n",
|
|
||||||
"* Retrain your model with the data collected\n",
|
|
||||||
"\n",
|
|
||||||
"## What data is collected?\n",
|
|
||||||
"* Model input data (voice, images, and video are not supported) from services deployed in Azure Kubernetes Cluster (AKS)\n",
|
|
||||||
"* Model predictions using production input data.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** pre-aggregation or pre-calculations on this data are done by user and not included in this version of the product.\n",
|
|
||||||
"\n",
|
|
||||||
"## What is different compared to standard production deployment process?\n",
|
|
||||||
"1. Update scoring file.\n",
|
|
||||||
"2. Update yml file with new dependency.\n",
|
|
||||||
"3. Update aks configuration.\n",
|
|
||||||
"4. Build new image and deploy it. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 1. Import your dependencies"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
|
||||||
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"print(azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 2. Set up your configuration and create a workspace"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 3. Register Model\n",
|
|
||||||
"Register an existing trained model, add descirption and tags."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Register the model\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"model = Model.register(model_path = \"sklearn_regression_model.pkl\", # this points to a local file\n",
|
|
||||||
" model_name = \"sklearn_regression_model.pkl\", # this is the name the model is registered as\n",
|
|
||||||
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
|
||||||
" description = \"Ridge regression model to predict diabetes\",\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"\n",
|
|
||||||
"print(model.name, model.description, model.version)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 4. *Update your scoring file with Data Collection*\n",
|
|
||||||
"The file below, compared to the file used in notebook 11, has the following changes:\n",
|
|
||||||
"### a. Import the module\n",
|
|
||||||
"```python \n",
|
|
||||||
"from azureml.monitoring import ModelDataCollector```\n",
|
|
||||||
"### b. In your init function add:\n",
|
|
||||||
"```python \n",
|
|
||||||
"global inputs_dc, prediction_d\n",
|
|
||||||
"inputs_dc = ModelDataCollector(\"best_model\", identifier=\"inputs\", feature_names=[\"feat1\", \"feat2\", \"feat3\", \"feat4\", \"feat5\", \"Feat6\"])\n",
|
|
||||||
"prediction_dc = ModelDataCollector(\"best_model\", identifier=\"predictions\", feature_names=[\"prediction1\", \"prediction2\"])```\n",
|
|
||||||
" \n",
|
|
||||||
"* Identifier: Identifier is later used for building the folder structure in your Blob, it can be used to divide \"raw\" data versus \"processed\".\n",
|
|
||||||
"* CorrelationId: is an optional parameter, you do not need to set it up if your model doesn't require it. Having a correlationId in place does help you for easier mapping with other data. (Examples include: LoanNumber, CustomerId, etc.)\n",
|
|
||||||
"* Feature Names: These need to be set up in the order of your features in order for them to have column names when the .csv is created.\n",
|
|
||||||
"\n",
|
|
||||||
"### c. In your run function add:\n",
|
|
||||||
"```python\n",
|
|
||||||
"inputs_dc.collect(data)\n",
|
|
||||||
"prediction_dc.collect(result)```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile score.py\n",
|
|
||||||
"import pickle\n",
|
|
||||||
"import json\n",
|
|
||||||
"import numpy \n",
|
|
||||||
"from sklearn.externals import joblib\n",
|
|
||||||
"from sklearn.linear_model import Ridge\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"from azureml.monitoring import ModelDataCollector\n",
|
|
||||||
"import time\n",
|
|
||||||
"\n",
|
|
||||||
"def init():\n",
|
|
||||||
" global model\n",
|
|
||||||
" print (\"model initialized\" + time.strftime(\"%H:%M:%S\"))\n",
|
|
||||||
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under the workspace\n",
|
|
||||||
" # this call should return the path to the model.pkl file on the local disk.\n",
|
|
||||||
" model_path = Model.get_model_path(model_name = 'sklearn_regression_model.pkl')\n",
|
|
||||||
" # deserialize the model file back into a sklearn model\n",
|
|
||||||
" model = joblib.load(model_path)\n",
|
|
||||||
" global inputs_dc, prediction_dc\n",
|
|
||||||
" # this setup will help us save our inputs under the \"inputs\" path in our Azure Blob\n",
|
|
||||||
" inputs_dc = ModelDataCollector(model_name=\"sklearn_regression_model\", identifier=\"inputs\", feature_names=[\"feat1\", \"feat2\"]) \n",
|
|
||||||
" # this setup will help us save our ipredictions under the \"predictions\" path in our Azure Blob\n",
|
|
||||||
" prediction_dc = ModelDataCollector(\"sklearn_regression_model\", identifier=\"predictions\", feature_names=[\"prediction1\", \"prediction2\"]) \n",
|
|
||||||
" \n",
|
|
||||||
"# note you can pass in multiple rows for scoring\n",
|
|
||||||
"def run(raw_data):\n",
|
|
||||||
" global inputs_dc, prediction_dc\n",
|
|
||||||
" try:\n",
|
|
||||||
" data = json.loads(raw_data)['data']\n",
|
|
||||||
" data = numpy.array(data)\n",
|
|
||||||
" result = model.predict(data)\n",
|
|
||||||
" print (\"saving input data\" + time.strftime(\"%H:%M:%S\"))\n",
|
|
||||||
" inputs_dc.collect(data) #this call is saving our input data into our blob\n",
|
|
||||||
" prediction_dc.collect(result)#this call is saving our prediction data into our blob\n",
|
|
||||||
" print (\"saving prediction data\" + time.strftime(\"%H:%M:%S\"))\n",
|
|
||||||
" # you can return any data type as long as it is JSON-serializable\n",
|
|
||||||
" return result.tolist()\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" error = str(e)\n",
|
|
||||||
" print (error + time.strftime(\"%H:%M:%S\"))\n",
|
|
||||||
" return error"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 5. *Update your myenv.yml file with the required module*"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
|
||||||
"\n",
|
|
||||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
|
||||||
"myenv.add_pip_package(\"azureml-monitoring\")\n",
|
|
||||||
"\n",
|
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
|
||||||
" f.write(myenv.serialize_to_string())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 6. Create your new Image"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.image import ContainerImage\n",
|
|
||||||
"\n",
|
|
||||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
|
||||||
" runtime = \"python\",\n",
|
|
||||||
" conda_file = \"myenv.yml\",\n",
|
|
||||||
" description = \"Image with ridge regression model\",\n",
|
|
||||||
" tags = {'area': \"diabetes\", 'type': \"regression\"}\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
"image = ContainerImage.create(name = \"myimage1\",\n",
|
|
||||||
" # this is the model object\n",
|
|
||||||
" models = [model],\n",
|
|
||||||
" image_config = image_config,\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"\n",
|
|
||||||
"image.wait_for_creation(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(model.name, model.description, model.version)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 7. Deploy to AKS service"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create AKS compute if you haven't done so."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Use the default configuration (can also provide parameters to customize)\n",
|
|
||||||
"prov_config = AksCompute.provisioning_configuration()\n",
|
|
||||||
"\n",
|
|
||||||
"aks_name = 'my-aks-test1' \n",
|
|
||||||
"# Create the cluster\n",
|
|
||||||
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
|
||||||
" name = aks_name, \n",
|
|
||||||
" provisioning_configuration = prov_config)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"aks_target.wait_for_completion(show_output = True)\n",
|
|
||||||
"print(aks_target.provisioning_state)\n",
|
|
||||||
"print(aks_target.provisioning_errors)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"If you already have a cluster you can attach the service to it:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"```python \n",
|
|
||||||
" %%time\n",
|
|
||||||
" resource_id = '/subscriptions/<subscriptionid>/resourcegroups/<resourcegroupname>/providers/Microsoft.ContainerService/managedClusters/<aksservername>'\n",
|
|
||||||
" create_name= 'myaks4'\n",
|
|
||||||
" attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n",
|
|
||||||
" aks_target = ComputeTarget.attach(workspace = ws, \n",
|
|
||||||
" name = create_name, \n",
|
|
||||||
" attach_configuration=attach_config)\n",
|
|
||||||
" ## Wait for the operation to complete\n",
|
|
||||||
" aks_target.wait_for_provisioning(True)```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### a. *Activate Data Collection and App Insights through updating AKS Webservice configuration*\n",
|
|
||||||
"In order to enable Data Collection and App Insights in your service you will need to update your AKS configuration file:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Set the web service configuration\n",
|
|
||||||
"aks_config = AksWebservice.deploy_configuration(collect_model_data=True, enable_app_insights=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### b. Deploy your service"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if aks_target.provisioning_state== \"Succeeded\": \n",
|
|
||||||
" aks_service_name ='aks-w-dc0'\n",
|
|
||||||
" aks_service = Webservice.deploy_from_image(workspace = ws, \n",
|
|
||||||
" name = aks_service_name,\n",
|
|
||||||
" image = image,\n",
|
|
||||||
" deployment_config = aks_config,\n",
|
|
||||||
" deployment_target = aks_target\n",
|
|
||||||
" )\n",
|
|
||||||
" aks_service.wait_for_deployment(show_output = True)\n",
|
|
||||||
" print(aks_service.state)\n",
|
|
||||||
"else: \n",
|
|
||||||
" raise ValueError(\"aks provisioning failed, can't deploy service. Error: \", aks_service.error)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 8. Test your service and send some data\n",
|
|
||||||
"**Note**: It will take around 15 mins for your data to appear in your blob.\n",
|
|
||||||
"The data will appear in your Azure Blob following this format:\n",
|
|
||||||
"\n",
|
|
||||||
"/modeldata/subscriptionid/resourcegroupname/workspacename/webservicename/modelname/modelversion/identifier/year/month/day/data.csv "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"import json\n",
|
|
||||||
"\n",
|
|
||||||
"test_sample = json.dumps({'data': [\n",
|
|
||||||
" [1,2,3,4,54,6,7,8,88,10], \n",
|
|
||||||
" [10,9,8,37,36,45,4,33,2,1]\n",
|
|
||||||
"]})\n",
|
|
||||||
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
|
||||||
"\n",
|
|
||||||
"if aks_service.state == \"Healthy\":\n",
|
|
||||||
" prediction = aks_service.run(input_data=test_sample)\n",
|
|
||||||
" print(prediction)\n",
|
|
||||||
"else:\n",
|
|
||||||
" raise ValueError(\"Service deployment isn't healthy, can't call the service. Error: \", aks_service.error)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## 9. Validate you data and analyze it\n",
|
|
||||||
"You can look into your data following this path format in your Azure Blob (it takes up to 15 minutes for the data to appear):\n",
|
|
||||||
"\n",
|
|
||||||
"/modeldata/**subscriptionid>**/**resourcegroupname>**/**workspacename>**/**webservicename>**/**modelname>**/**modelversion>>**/**identifier>**/*year/month/day*/data.csv \n",
|
|
||||||
"\n",
|
|
||||||
"For doing further analysis you have multiple options:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### a. Create DataBricks cluter and connect it to your blob\n",
|
|
||||||
"https://docs.microsoft.com/en-us/azure/azure-databricks/quickstart-create-databricks-workspace-portal or in your databricks workspace you can look for the template \"Azure Blob Storage Import Example Notebook\".\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"Here is an example for setting up the file location to extract the relevant data:\n",
|
|
||||||
"\n",
|
|
||||||
"<code> file_location = \"wasbs://mycontainer@storageaccountname.blob.core.windows.net/unknown/unknown/unknown-bigdataset-unknown/my_iterate_parking_inputs/2018/°/°/data.csv\" \n",
|
|
||||||
"file_type = \"csv\"</code>\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### b. Connect Blob to Power Bi (Small Data only)\n",
|
|
||||||
"1. Download and Open PowerBi Desktop\n",
|
|
||||||
"2. Select \"Get Data\" and click on \"Azure Blob Storage\" >> Connect\n",
|
|
||||||
"3. Add your storage account and enter your storage key.\n",
|
|
||||||
"4. Select the container where your Data Collection is stored and click on Edit. \n",
|
|
||||||
"5. In the query editor, click under \"Name\" column and add your Storage account Model path into the filter. Note: if you want to only look into files from a specific year or month, just expand the filter path. For example, just look into March data: /modeldata/subscriptionid>/resourcegroupname>/workspacename>/webservicename>/modelname>/modelversion>/identifier>/year>/3\n",
|
|
||||||
"6. Click on the double arrow aside the \"Content\" column to combine the files. \n",
|
|
||||||
"7. Click OK and the data will preload.\n",
|
|
||||||
"8. You can now click Close and Apply and start building your custom reports on your Model Input data."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Disable Data Collection"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"aks_service.update(collect_model_data=False)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Clean up"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"aks_service.delete()\n",
|
|
||||||
"image.delete()\n",
|
|
||||||
"model.delete()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "shipatel"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.3"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Enabling Data Collection for Models in Production\n",
|
||||||
|
"With this notebook, you can learn how to collect input model data from your Azure Machine Learning service in an Azure Blob storage. Once enabled, this data collected gives you the opportunity:\n",
|
||||||
|
"\n",
|
||||||
|
"* Monitor data drifts as production data enters your model\n",
|
||||||
|
"* Make better decisions on when to retrain or optimize your model\n",
|
||||||
|
"* Retrain your model with the data collected\n",
|
||||||
|
"\n",
|
||||||
|
"## What data is collected?\n",
|
||||||
|
"* Model input data (voice, images, and video are not supported) from services deployed in Azure Kubernetes Cluster (AKS)\n",
|
||||||
|
"* Model predictions using production input data.\n",
|
||||||
|
"\n",
|
||||||
|
"**Note:** pre-aggregation or pre-calculations on this data are done by user and not included in this version of the product.\n",
|
||||||
|
"\n",
|
||||||
|
"## What is different compared to standard production deployment process?\n",
|
||||||
|
"1. Update scoring file.\n",
|
||||||
|
"2. Update yml file with new dependency.\n",
|
||||||
|
"3. Update aks configuration.\n",
|
||||||
|
"4. Build new image and deploy it. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 1. Import your dependencies"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||||
|
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"print(azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 2. Set up your configuration and create a workspace"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 3. Register Model\n",
|
||||||
|
"Register an existing trained model, add descirption and tags."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Register the model\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"model = Model.register(model_path = \"sklearn_regression_model.pkl\", # this points to a local file\n",
|
||||||
|
" model_name = \"sklearn_regression_model.pkl\", # this is the name the model is registered as\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||||
|
" description = \"Ridge regression model to predict diabetes\",\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"print(model.name, model.description, model.version)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 4. *Update your scoring file with Data Collection*\n",
|
||||||
|
"The file below, compared to the file used in notebook 11, has the following changes:\n",
|
||||||
|
"### a. Import the module\n",
|
||||||
|
"```python \n",
|
||||||
|
"from azureml.monitoring import ModelDataCollector```\n",
|
||||||
|
"### b. In your init function add:\n",
|
||||||
|
"```python \n",
|
||||||
|
"global inputs_dc, prediction_d\n",
|
||||||
|
"inputs_dc = ModelDataCollector(\"best_model\", identifier=\"inputs\", feature_names=[\"feat1\", \"feat2\", \"feat3\", \"feat4\", \"feat5\", \"Feat6\"])\n",
|
||||||
|
"prediction_dc = ModelDataCollector(\"best_model\", identifier=\"predictions\", feature_names=[\"prediction1\", \"prediction2\"])```\n",
|
||||||
|
" \n",
|
||||||
|
"* Identifier: Identifier is later used for building the folder structure in your Blob, it can be used to divide \"raw\" data versus \"processed\".\n",
|
||||||
|
"* CorrelationId: is an optional parameter, you do not need to set it up if your model doesn't require it. Having a correlationId in place does help you for easier mapping with other data. (Examples include: LoanNumber, CustomerId, etc.)\n",
|
||||||
|
"* Feature Names: These need to be set up in the order of your features in order for them to have column names when the .csv is created.\n",
|
||||||
|
"\n",
|
||||||
|
"### c. In your run function add:\n",
|
||||||
|
"```python\n",
|
||||||
|
"inputs_dc.collect(data)\n",
|
||||||
|
"prediction_dc.collect(result)```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score.py\n",
|
||||||
|
"import pickle\n",
|
||||||
|
"import json\n",
|
||||||
|
"import numpy \n",
|
||||||
|
"from sklearn.externals import joblib\n",
|
||||||
|
"from sklearn.linear_model import Ridge\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"from azureml.monitoring import ModelDataCollector\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global model\n",
|
||||||
|
" print (\"model initialized\" + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
|
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under the workspace\n",
|
||||||
|
" # this call should return the path to the model.pkl file on the local disk.\n",
|
||||||
|
" model_path = Model.get_model_path(model_name = 'sklearn_regression_model.pkl')\n",
|
||||||
|
" # deserialize the model file back into a sklearn model\n",
|
||||||
|
" model = joblib.load(model_path)\n",
|
||||||
|
" global inputs_dc, prediction_dc\n",
|
||||||
|
" # this setup will help us save our inputs under the \"inputs\" path in our Azure Blob\n",
|
||||||
|
" inputs_dc = ModelDataCollector(model_name=\"sklearn_regression_model\", identifier=\"inputs\", feature_names=[\"feat1\", \"feat2\"]) \n",
|
||||||
|
" # this setup will help us save our ipredictions under the \"predictions\" path in our Azure Blob\n",
|
||||||
|
" prediction_dc = ModelDataCollector(\"sklearn_regression_model\", identifier=\"predictions\", feature_names=[\"prediction1\", \"prediction2\"]) \n",
|
||||||
|
" \n",
|
||||||
|
"# note you can pass in multiple rows for scoring\n",
|
||||||
|
"def run(raw_data):\n",
|
||||||
|
" global inputs_dc, prediction_dc\n",
|
||||||
|
" try:\n",
|
||||||
|
" data = json.loads(raw_data)['data']\n",
|
||||||
|
" data = numpy.array(data)\n",
|
||||||
|
" result = model.predict(data)\n",
|
||||||
|
" print (\"saving input data\" + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
|
" inputs_dc.collect(data) #this call is saving our input data into our blob\n",
|
||||||
|
" prediction_dc.collect(result)#this call is saving our prediction data into our blob\n",
|
||||||
|
" print (\"saving prediction data\" + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
|
" # you can return any data type as long as it is JSON-serializable\n",
|
||||||
|
" return result.tolist()\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" error = str(e)\n",
|
||||||
|
" print (error + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
|
" return error"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 5. *Update your myenv.yml file with the required module*"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
|
"\n",
|
||||||
|
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||||
|
"myenv.add_pip_package(\"azureml-monitoring\")\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 6. Create your new Image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||||
|
" runtime = \"python\",\n",
|
||||||
|
" conda_file = \"myenv.yml\",\n",
|
||||||
|
" description = \"Image with ridge regression model\",\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"}\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"image = ContainerImage.create(name = \"myimage1\",\n",
|
||||||
|
" # this is the model object\n",
|
||||||
|
" models = [model],\n",
|
||||||
|
" image_config = image_config,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"image.wait_for_creation(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(model.name, model.description, model.version)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 7. Deploy to AKS service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create AKS compute if you haven't done so."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Use the default configuration (can also provide parameters to customize)\n",
|
||||||
|
"prov_config = AksCompute.provisioning_configuration()\n",
|
||||||
|
"\n",
|
||||||
|
"aks_name = 'my-aks-test1' \n",
|
||||||
|
"# Create the cluster\n",
|
||||||
|
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
||||||
|
" name = aks_name, \n",
|
||||||
|
" provisioning_configuration = prov_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"aks_target.wait_for_completion(show_output = True)\n",
|
||||||
|
"print(aks_target.provisioning_state)\n",
|
||||||
|
"print(aks_target.provisioning_errors)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"If you already have a cluster you can attach the service to it:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"```python \n",
|
||||||
|
" %%time\n",
|
||||||
|
" resource_id = '/subscriptions/<subscriptionid>/resourcegroups/<resourcegroupname>/providers/Microsoft.ContainerService/managedClusters/<aksservername>'\n",
|
||||||
|
" create_name= 'myaks4'\n",
|
||||||
|
" attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n",
|
||||||
|
" aks_target = ComputeTarget.attach(workspace = ws, \n",
|
||||||
|
" name = create_name, \n",
|
||||||
|
" attach_configuration=attach_config)\n",
|
||||||
|
" ## Wait for the operation to complete\n",
|
||||||
|
" aks_target.wait_for_provisioning(True)```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### a. *Activate Data Collection and App Insights through updating AKS Webservice configuration*\n",
|
||||||
|
"In order to enable Data Collection and App Insights in your service you will need to update your AKS configuration file:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Set the web service configuration\n",
|
||||||
|
"aks_config = AksWebservice.deploy_configuration(collect_model_data=True, enable_app_insights=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### b. Deploy your service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if aks_target.provisioning_state== \"Succeeded\": \n",
|
||||||
|
" aks_service_name ='aks-w-dc0'\n",
|
||||||
|
" aks_service = Webservice.deploy_from_image(workspace = ws, \n",
|
||||||
|
" name = aks_service_name,\n",
|
||||||
|
" image = image,\n",
|
||||||
|
" deployment_config = aks_config,\n",
|
||||||
|
" deployment_target = aks_target\n",
|
||||||
|
" )\n",
|
||||||
|
" aks_service.wait_for_deployment(show_output = True)\n",
|
||||||
|
" print(aks_service.state)\n",
|
||||||
|
"else: \n",
|
||||||
|
" raise ValueError(\"aks provisioning failed, can't deploy service. Error: \", aks_service.error)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 8. Test your service and send some data\n",
|
||||||
|
"**Note**: It will take around 15 mins for your data to appear in your blob.\n",
|
||||||
|
"The data will appear in your Azure Blob following this format:\n",
|
||||||
|
"\n",
|
||||||
|
"/modeldata/subscriptionid/resourcegroupname/workspacename/webservicename/modelname/modelversion/identifier/year/month/day/data.csv "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"test_sample = json.dumps({'data': [\n",
|
||||||
|
" [1,2,3,4,54,6,7,8,88,10], \n",
|
||||||
|
" [10,9,8,37,36,45,4,33,2,1]\n",
|
||||||
|
"]})\n",
|
||||||
|
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
||||||
|
"\n",
|
||||||
|
"if aks_service.state == \"Healthy\":\n",
|
||||||
|
" prediction = aks_service.run(input_data=test_sample)\n",
|
||||||
|
" print(prediction)\n",
|
||||||
|
"else:\n",
|
||||||
|
" raise ValueError(\"Service deployment isn't healthy, can't call the service. Error: \", aks_service.error)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## 9. Validate you data and analyze it\n",
|
||||||
|
"You can look into your data following this path format in your Azure Blob (it takes up to 15 minutes for the data to appear):\n",
|
||||||
|
"\n",
|
||||||
|
"/modeldata/**subscriptionid>**/**resourcegroupname>**/**workspacename>**/**webservicename>**/**modelname>**/**modelversion>>**/**identifier>**/*year/month/day*/data.csv \n",
|
||||||
|
"\n",
|
||||||
|
"For doing further analysis you have multiple options:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### a. Create DataBricks cluter and connect it to your blob\n",
|
||||||
|
"https://docs.microsoft.com/en-us/azure/azure-databricks/quickstart-create-databricks-workspace-portal or in your databricks workspace you can look for the template \"Azure Blob Storage Import Example Notebook\".\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"Here is an example for setting up the file location to extract the relevant data:\n",
|
||||||
|
"\n",
|
||||||
|
"<code> file_location = \"wasbs://mycontainer@storageaccountname.blob.core.windows.net/unknown/unknown/unknown-bigdataset-unknown/my_iterate_parking_inputs/2018/°/°/data.csv\" \n",
|
||||||
|
"file_type = \"csv\"</code>\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### b. Connect Blob to Power Bi (Small Data only)\n",
|
||||||
|
"1. Download and Open PowerBi Desktop\n",
|
||||||
|
"2. Select \"Get Data\" and click on \"Azure Blob Storage\" >> Connect\n",
|
||||||
|
"3. Add your storage account and enter your storage key.\n",
|
||||||
|
"4. Select the container where your Data Collection is stored and click on Edit. \n",
|
||||||
|
"5. In the query editor, click under \"Name\" column and add your Storage account Model path into the filter. Note: if you want to only look into files from a specific year or month, just expand the filter path. For example, just look into March data: /modeldata/subscriptionid>/resourcegroupname>/workspacename>/webservicename>/modelname>/modelversion>/identifier>/year>/3\n",
|
||||||
|
"6. Click on the double arrow aside the \"Content\" column to combine the files. \n",
|
||||||
|
"7. Click OK and the data will preload.\n",
|
||||||
|
"8. You can now click Close and Apply and start building your custom reports on your Model Input data."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Disable Data Collection"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"aks_service.update(collect_model_data=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Clean up"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"aks_service.delete()\n",
|
||||||
|
"image.delete()\n",
|
||||||
|
"model.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "shipatel"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.3"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,443 +1,443 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# YOLO Real-time Object Detection using ONNX on AzureML\n",
|
|
||||||
"\n",
|
|
||||||
"This example shows how to convert the TinyYOLO model from CoreML to ONNX and operationalize it as a web service using Azure Machine Learning services and the ONNX Runtime.\n",
|
|
||||||
"\n",
|
|
||||||
"## What is ONNX\n",
|
|
||||||
"ONNX is an open format for representing machine learning and deep learning models. ONNX enables open and interoperable AI by enabling data scientists and developers to use the tools of their choice without worrying about lock-in and flexibility to deploy to a variety of platforms. ONNX is developed and supported by a community of partners including Microsoft, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai).\n",
|
|
||||||
"\n",
|
|
||||||
"## YOLO Details\n",
|
|
||||||
"You Only Look Once (YOLO) is a state-of-the-art, real-time object detection system. For more information about YOLO, please visit the [YOLO website](https://pjreddie.com/darknet/yolo/)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"\n",
|
|
||||||
"To make the best use of your time, make sure you have done the following:\n",
|
|
||||||
"\n",
|
|
||||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
|
||||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook to:\n",
|
|
||||||
" * install the AML SDK\n",
|
|
||||||
" * create a workspace and its configuration file (config.json)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Install necessary packages\n",
|
|
||||||
"\n",
|
|
||||||
"You'll need to run the following commands to use this tutorial:\n",
|
|
||||||
"\n",
|
|
||||||
"```sh\n",
|
|
||||||
"pip install onnxmltools\n",
|
|
||||||
"pip install coremltools # use this on Linux and Mac\n",
|
|
||||||
"pip install git+https://github.com/apple/coremltools # use this on Windows\n",
|
|
||||||
"```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Convert model to ONNX\n",
|
|
||||||
"\n",
|
|
||||||
"First we download the CoreML model. We use the CoreML model from [Matthijs Hollemans's tutorial](https://github.com/hollance/YOLO-CoreML-MPSNNGraph). This may take a few minutes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import urllib.request\n",
|
|
||||||
"\n",
|
|
||||||
"coreml_model_url = \"https://github.com/hollance/YOLO-CoreML-MPSNNGraph/raw/master/TinyYOLO-CoreML/TinyYOLO-CoreML/TinyYOLO.mlmodel\"\n",
|
|
||||||
"urllib.request.urlretrieve(coreml_model_url, filename=\"TinyYOLO.mlmodel\")\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Then we use ONNXMLTools to convert the model."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import onnxmltools\n",
|
|
||||||
"import coremltools\n",
|
|
||||||
"\n",
|
|
||||||
"# Load a CoreML model\n",
|
|
||||||
"coreml_model = coremltools.utils.load_spec('TinyYOLO.mlmodel')\n",
|
|
||||||
"\n",
|
|
||||||
"# Convert from CoreML into ONNX\n",
|
|
||||||
"onnx_model = onnxmltools.convert_coreml(coreml_model, 'TinyYOLOv2')\n",
|
|
||||||
"\n",
|
|
||||||
"# Save ONNX model\n",
|
|
||||||
"onnxmltools.utils.save_model(onnx_model, 'tinyyolov2.onnx')\n",
|
|
||||||
"\n",
|
|
||||||
"import os\n",
|
|
||||||
"print(os.path.getsize('tinyyolov2.onnx'))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Deploying as a web service with Azure ML\n",
|
|
||||||
"\n",
|
|
||||||
"### Load Azure ML workspace\n",
|
|
||||||
"\n",
|
|
||||||
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.location, ws.resource_group, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Registering your model with Azure ML\n",
|
|
||||||
"\n",
|
|
||||||
"Now we upload the model and register it in the workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"\n",
|
|
||||||
"model = Model.register(model_path = \"tinyyolov2.onnx\",\n",
|
|
||||||
" model_name = \"tinyyolov2\",\n",
|
|
||||||
" tags = {\"onnx\": \"demo\"},\n",
|
|
||||||
" description = \"TinyYOLO\",\n",
|
|
||||||
" workspace = ws)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Displaying your registered models\n",
|
|
||||||
"\n",
|
|
||||||
"You can optionally list out all the models that you have registered in this workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"models = ws.models\n",
|
|
||||||
"for name, m in models.items():\n",
|
|
||||||
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Write scoring file\n",
|
|
||||||
"\n",
|
|
||||||
"We are now going to deploy our ONNX model on Azure ML using the ONNX Runtime. We begin by writing a score.py file that will be invoked by the web service call. The `init()` function is called once when the container is started so we load the model using the ONNX Runtime into a global session object."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile score.py\n",
|
|
||||||
"import json\n",
|
|
||||||
"import time\n",
|
|
||||||
"import sys\n",
|
|
||||||
"import os\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"import numpy as np # we're going to use numpy to process input and output data\n",
|
|
||||||
"import onnxruntime # to inference ONNX models, we use the ONNX Runtime\n",
|
|
||||||
"\n",
|
|
||||||
"def init():\n",
|
|
||||||
" global session\n",
|
|
||||||
" model = Model.get_model_path(model_name = 'tinyyolov2')\n",
|
|
||||||
" session = onnxruntime.InferenceSession(model)\n",
|
|
||||||
"\n",
|
|
||||||
"def preprocess(input_data_json):\n",
|
|
||||||
" # convert the JSON data into the tensor input\n",
|
|
||||||
" return np.array(json.loads(input_data_json)['data']).astype('float32')\n",
|
|
||||||
"\n",
|
|
||||||
"def postprocess(result):\n",
|
|
||||||
" return np.array(result).tolist()\n",
|
|
||||||
"\n",
|
|
||||||
"def run(input_data_json):\n",
|
|
||||||
" try:\n",
|
|
||||||
" start = time.time() # start timer\n",
|
|
||||||
" input_data = preprocess(input_data_json)\n",
|
|
||||||
" input_name = session.get_inputs()[0].name # get the id of the first input of the model \n",
|
|
||||||
" result = session.run([], {input_name: input_data})\n",
|
|
||||||
" end = time.time() # stop timer\n",
|
|
||||||
" return {\"result\": postprocess(result),\n",
|
|
||||||
" \"time\": end - start}\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" result = str(e)\n",
|
|
||||||
" return {\"error\": result}"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create container image\n",
|
|
||||||
"First we create a YAML file that specifies which dependencies we would like to see in our container."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
|
||||||
"\n",
|
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime==0.4.0\",\"azureml-core\"])\n",
|
|
||||||
"\n",
|
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
|
||||||
" f.write(myenv.serialize_to_string())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Then we have Azure ML create the container. This step will likely take a few minutes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.image import ContainerImage\n",
|
|
||||||
"\n",
|
|
||||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
|
||||||
" runtime = \"python\",\n",
|
|
||||||
" conda_file = \"myenv.yml\",\n",
|
|
||||||
" docker_file = \"Dockerfile\",\n",
|
|
||||||
" description = \"TinyYOLO ONNX Demo\",\n",
|
|
||||||
" tags = {\"demo\": \"onnx\"}\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"image = ContainerImage.create(name = \"onnxyolo\",\n",
|
|
||||||
" models = [model],\n",
|
|
||||||
" image_config = image_config,\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"\n",
|
|
||||||
"image.wait_for_creation(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"In case you need to debug your code, the next line of code accesses the log file."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(image.image_build_log_uri)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"We're all set! Let's get our model chugging.\n",
|
|
||||||
"\n",
|
|
||||||
"### Deploy the container image"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import AciWebservice\n",
|
|
||||||
"\n",
|
|
||||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
|
||||||
" memory_gb = 1, \n",
|
|
||||||
" tags = {'demo': 'onnx'}, \n",
|
|
||||||
" description = 'web service for TinyYOLO ONNX model')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The following cell will likely take a few minutes to run as well."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import Webservice\n",
|
|
||||||
"from random import randint\n",
|
|
||||||
"\n",
|
|
||||||
"aci_service_name = 'onnx-tinyyolo'+str(randint(0,100))\n",
|
|
||||||
"print(\"Service\", aci_service_name)\n",
|
|
||||||
"\n",
|
|
||||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
|
||||||
" image = image,\n",
|
|
||||||
" name = aci_service_name,\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"\n",
|
|
||||||
"aci_service.wait_for_deployment(True)\n",
|
|
||||||
"print(aci_service.state)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"In case the deployment fails, you can check the logs. Make sure to delete your aci_service before trying again."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if aci_service.state != 'Healthy':\n",
|
|
||||||
" # run this command for debugging.\n",
|
|
||||||
" print(aci_service.get_logs())\n",
|
|
||||||
" aci_service.delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Success!\n",
|
|
||||||
"\n",
|
|
||||||
"If you've made it this far, you've deployed a working web service that does object detection using an ONNX model. You can get the URL for the webservice with the code below."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(aci_service.scoring_uri)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"When you are eventually done using the web service, remember to delete it."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#aci_service.delete()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "viswamy"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# YOLO Real-time Object Detection using ONNX on AzureML\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows how to convert the TinyYOLO model from CoreML to ONNX and operationalize it as a web service using Azure Machine Learning services and the ONNX Runtime.\n",
|
||||||
|
"\n",
|
||||||
|
"## What is ONNX\n",
|
||||||
|
"ONNX is an open format for representing machine learning and deep learning models. ONNX enables open and interoperable AI by enabling data scientists and developers to use the tools of their choice without worrying about lock-in and flexibility to deploy to a variety of platforms. ONNX is developed and supported by a community of partners including Microsoft, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai).\n",
|
||||||
|
"\n",
|
||||||
|
"## YOLO Details\n",
|
||||||
|
"You Only Look Once (YOLO) is a state-of-the-art, real-time object detection system. For more information about YOLO, please visit the [YOLO website](https://pjreddie.com/darknet/yolo/)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"\n",
|
||||||
|
"To make the best use of your time, make sure you have done the following:\n",
|
||||||
|
"\n",
|
||||||
|
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||||
|
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook to:\n",
|
||||||
|
" * install the AML SDK\n",
|
||||||
|
" * create a workspace and its configuration file (config.json)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Install necessary packages\n",
|
||||||
|
"\n",
|
||||||
|
"You'll need to run the following commands to use this tutorial:\n",
|
||||||
|
"\n",
|
||||||
|
"```sh\n",
|
||||||
|
"pip install onnxmltools\n",
|
||||||
|
"pip install coremltools # use this on Linux and Mac\n",
|
||||||
|
"pip install git+https://github.com/apple/coremltools # use this on Windows\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Convert model to ONNX\n",
|
||||||
|
"\n",
|
||||||
|
"First we download the CoreML model. We use the CoreML model from [Matthijs Hollemans's tutorial](https://github.com/hollance/YOLO-CoreML-MPSNNGraph). This may take a few minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import urllib.request\n",
|
||||||
|
"\n",
|
||||||
|
"coreml_model_url = \"https://github.com/hollance/YOLO-CoreML-MPSNNGraph/raw/master/TinyYOLO-CoreML/TinyYOLO-CoreML/TinyYOLO.mlmodel\"\n",
|
||||||
|
"urllib.request.urlretrieve(coreml_model_url, filename=\"TinyYOLO.mlmodel\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then we use ONNXMLTools to convert the model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import onnxmltools\n",
|
||||||
|
"import coremltools\n",
|
||||||
|
"\n",
|
||||||
|
"# Load a CoreML model\n",
|
||||||
|
"coreml_model = coremltools.utils.load_spec('TinyYOLO.mlmodel')\n",
|
||||||
|
"\n",
|
||||||
|
"# Convert from CoreML into ONNX\n",
|
||||||
|
"onnx_model = onnxmltools.convert_coreml(coreml_model, 'TinyYOLOv2')\n",
|
||||||
|
"\n",
|
||||||
|
"# Save ONNX model\n",
|
||||||
|
"onnxmltools.utils.save_model(onnx_model, 'tinyyolov2.onnx')\n",
|
||||||
|
"\n",
|
||||||
|
"import os\n",
|
||||||
|
"print(os.path.getsize('tinyyolov2.onnx'))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploying as a web service with Azure ML\n",
|
||||||
|
"\n",
|
||||||
|
"### Load Azure ML workspace\n",
|
||||||
|
"\n",
|
||||||
|
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.location, ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Registering your model with Azure ML\n",
|
||||||
|
"\n",
|
||||||
|
"Now we upload the model and register it in the workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"model = Model.register(model_path = \"tinyyolov2.onnx\",\n",
|
||||||
|
" model_name = \"tinyyolov2\",\n",
|
||||||
|
" tags = {\"onnx\": \"demo\"},\n",
|
||||||
|
" description = \"TinyYOLO\",\n",
|
||||||
|
" workspace = ws)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Displaying your registered models\n",
|
||||||
|
"\n",
|
||||||
|
"You can optionally list out all the models that you have registered in this workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"models = ws.models\n",
|
||||||
|
"for name, m in models.items():\n",
|
||||||
|
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Write scoring file\n",
|
||||||
|
"\n",
|
||||||
|
"We are now going to deploy our ONNX model on Azure ML using the ONNX Runtime. We begin by writing a score.py file that will be invoked by the web service call. The `init()` function is called once when the container is started so we load the model using the ONNX Runtime into a global session object."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score.py\n",
|
||||||
|
"import json\n",
|
||||||
|
"import time\n",
|
||||||
|
"import sys\n",
|
||||||
|
"import os\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"import numpy as np # we're going to use numpy to process input and output data\n",
|
||||||
|
"import onnxruntime # to inference ONNX models, we use the ONNX Runtime\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global session\n",
|
||||||
|
" model = Model.get_model_path(model_name = 'tinyyolov2')\n",
|
||||||
|
" session = onnxruntime.InferenceSession(model)\n",
|
||||||
|
"\n",
|
||||||
|
"def preprocess(input_data_json):\n",
|
||||||
|
" # convert the JSON data into the tensor input\n",
|
||||||
|
" return np.array(json.loads(input_data_json)['data']).astype('float32')\n",
|
||||||
|
"\n",
|
||||||
|
"def postprocess(result):\n",
|
||||||
|
" return np.array(result).tolist()\n",
|
||||||
|
"\n",
|
||||||
|
"def run(input_data_json):\n",
|
||||||
|
" try:\n",
|
||||||
|
" start = time.time() # start timer\n",
|
||||||
|
" input_data = preprocess(input_data_json)\n",
|
||||||
|
" input_name = session.get_inputs()[0].name # get the id of the first input of the model \n",
|
||||||
|
" result = session.run([], {input_name: input_data})\n",
|
||||||
|
" end = time.time() # stop timer\n",
|
||||||
|
" return {\"result\": postprocess(result),\n",
|
||||||
|
" \"time\": end - start}\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" result = str(e)\n",
|
||||||
|
" return {\"error\": result}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create container image\n",
|
||||||
|
"First we create a YAML file that specifies which dependencies we would like to see in our container."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
|
"\n",
|
||||||
|
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime==0.4.0\",\"azureml-core\"])\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then we have Azure ML create the container. This step will likely take a few minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||||
|
" runtime = \"python\",\n",
|
||||||
|
" conda_file = \"myenv.yml\",\n",
|
||||||
|
" docker_file = \"Dockerfile\",\n",
|
||||||
|
" description = \"TinyYOLO ONNX Demo\",\n",
|
||||||
|
" tags = {\"demo\": \"onnx\"}\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"image = ContainerImage.create(name = \"onnxyolo\",\n",
|
||||||
|
" models = [model],\n",
|
||||||
|
" image_config = image_config,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"image.wait_for_creation(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"In case you need to debug your code, the next line of code accesses the log file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(image.image_build_log_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We're all set! Let's get our model chugging.\n",
|
||||||
|
"\n",
|
||||||
|
"### Deploy the container image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import AciWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||||
|
" memory_gb = 1, \n",
|
||||||
|
" tags = {'demo': 'onnx'}, \n",
|
||||||
|
" description = 'web service for TinyYOLO ONNX model')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The following cell will likely take a few minutes to run as well."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import Webservice\n",
|
||||||
|
"from random import randint\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service_name = 'onnx-tinyyolo'+str(randint(0,100))\n",
|
||||||
|
"print(\"Service\", aci_service_name)\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||||
|
" image = image,\n",
|
||||||
|
" name = aci_service_name,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service.wait_for_deployment(True)\n",
|
||||||
|
"print(aci_service.state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"In case the deployment fails, you can check the logs. Make sure to delete your aci_service before trying again."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if aci_service.state != 'Healthy':\n",
|
||||||
|
" # run this command for debugging.\n",
|
||||||
|
" print(aci_service.get_logs())\n",
|
||||||
|
" aci_service.delete()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Success!\n",
|
||||||
|
"\n",
|
||||||
|
"If you've made it this far, you've deployed a working web service that does object detection using an ONNX model. You can get the URL for the webservice with the code below."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(aci_service.scoring_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"When you are eventually done using the web service, remember to delete it."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#aci_service.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "viswamy"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,427 +1,427 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# ResNet50 Image Classification using ONNX and AzureML\n",
|
|
||||||
"\n",
|
|
||||||
"This example shows how to deploy the ResNet50 ONNX model as a web service using Azure Machine Learning services and the ONNX Runtime.\n",
|
|
||||||
"\n",
|
|
||||||
"## What is ONNX\n",
|
|
||||||
"ONNX is an open format for representing machine learning and deep learning models. ONNX enables open and interoperable AI by enabling data scientists and developers to use the tools of their choice without worrying about lock-in and flexibility to deploy to a variety of platforms. ONNX is developed and supported by a community of partners including Microsoft, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai).\n",
|
|
||||||
"\n",
|
|
||||||
"## ResNet50 Details\n",
|
|
||||||
"ResNet classifies the major object in an input image into a set of 1000 pre-defined classes. For more information about the ResNet50 model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/models/image_classification/resnet). "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"\n",
|
|
||||||
"To make the best use of your time, make sure you have done the following:\n",
|
|
||||||
"\n",
|
|
||||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
|
||||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
|
||||||
" * install the AML SDK\n",
|
|
||||||
" * create a workspace and its configuration file (config.json)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Download pre-trained ONNX model from ONNX Model Zoo.\n",
|
|
||||||
"\n",
|
|
||||||
"Download the [ResNet50v2 model and test data](https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz) and extract it in the same folder as this tutorial notebook.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import urllib.request\n",
|
|
||||||
"\n",
|
|
||||||
"onnx_model_url = \"https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz\"\n",
|
|
||||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"resnet50v2.tar.gz\")\n",
|
|
||||||
"\n",
|
|
||||||
"!tar xvzf resnet50v2.tar.gz"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Deploying as a web service with Azure ML"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Load your Azure ML workspace\n",
|
|
||||||
"\n",
|
|
||||||
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.location, ws.resource_group, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Register your model with Azure ML\n",
|
|
||||||
"\n",
|
|
||||||
"Now we upload the model and register it in the workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"\n",
|
|
||||||
"model = Model.register(model_path = \"resnet50v2/resnet50v2.onnx\",\n",
|
|
||||||
" model_name = \"resnet50v2\",\n",
|
|
||||||
" tags = {\"onnx\": \"demo\"},\n",
|
|
||||||
" description = \"ResNet50v2 from ONNX Model Zoo\",\n",
|
|
||||||
" workspace = ws)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Displaying your registered models\n",
|
|
||||||
"\n",
|
|
||||||
"You can optionally list out all the models that you have registered in this workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"models = ws.models\n",
|
|
||||||
"for name, m in models.items():\n",
|
|
||||||
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Write scoring file\n",
|
|
||||||
"\n",
|
|
||||||
"We are now going to deploy our ONNX model on Azure ML using the ONNX Runtime. We begin by writing a score.py file that will be invoked by the web service call. The `init()` function is called once when the container is started so we load the model using the ONNX Runtime into a global session object."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile score.py\n",
|
|
||||||
"import json\n",
|
|
||||||
"import time\n",
|
|
||||||
"import sys\n",
|
|
||||||
"import os\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"import numpy as np # we're going to use numpy to process input and output data\n",
|
|
||||||
"import onnxruntime # to inference ONNX models, we use the ONNX Runtime\n",
|
|
||||||
"\n",
|
|
||||||
"def softmax(x):\n",
|
|
||||||
" x = x.reshape(-1)\n",
|
|
||||||
" e_x = np.exp(x - np.max(x))\n",
|
|
||||||
" return e_x / e_x.sum(axis=0)\n",
|
|
||||||
"\n",
|
|
||||||
"def init():\n",
|
|
||||||
" global session\n",
|
|
||||||
" model = Model.get_model_path(model_name = 'resnet50v2')\n",
|
|
||||||
" session = onnxruntime.InferenceSession(model, None)\n",
|
|
||||||
"\n",
|
|
||||||
"def preprocess(input_data_json):\n",
|
|
||||||
" # convert the JSON data into the tensor input\n",
|
|
||||||
" img_data = np.array(json.loads(input_data_json)['data']).astype('float32')\n",
|
|
||||||
" \n",
|
|
||||||
" #normalize\n",
|
|
||||||
" mean_vec = np.array([0.485, 0.456, 0.406])\n",
|
|
||||||
" stddev_vec = np.array([0.229, 0.224, 0.225])\n",
|
|
||||||
" norm_img_data = np.zeros(img_data.shape).astype('float32')\n",
|
|
||||||
" for i in range(img_data.shape[0]):\n",
|
|
||||||
" norm_img_data[i,:,:] = (img_data[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]\n",
|
|
||||||
"\n",
|
|
||||||
" return norm_img_data\n",
|
|
||||||
"\n",
|
|
||||||
"def postprocess(result):\n",
|
|
||||||
" return softmax(np.array(result)).tolist()\n",
|
|
||||||
"\n",
|
|
||||||
"def run(input_data_json):\n",
|
|
||||||
" try:\n",
|
|
||||||
" start = time.time()\n",
|
|
||||||
" # load in our data which is expected as NCHW 224x224 image\n",
|
|
||||||
" input_data = preprocess(input_data_json)\n",
|
|
||||||
" input_name = session.get_inputs()[0].name # get the id of the first input of the model \n",
|
|
||||||
" result = session.run([], {input_name: input_data})\n",
|
|
||||||
" end = time.time() # stop timer\n",
|
|
||||||
" return {\"result\": postprocess(result),\n",
|
|
||||||
" \"time\": end - start}\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" result = str(e)\n",
|
|
||||||
" return {\"error\": result}"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create container image"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"First we create a YAML file that specifies which dependencies we would like to see in our container."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
|
||||||
"\n",
|
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n",
|
|
||||||
"\n",
|
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
|
||||||
" f.write(myenv.serialize_to_string())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Then we have Azure ML create the container. This step will likely take a few minutes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.image import ContainerImage\n",
|
|
||||||
"\n",
|
|
||||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
|
||||||
" runtime = \"python\",\n",
|
|
||||||
" conda_file = \"myenv.yml\",\n",
|
|
||||||
" docker_file = \"Dockerfile\",\n",
|
|
||||||
" description = \"ONNX ResNet50 Demo\",\n",
|
|
||||||
" tags = {\"demo\": \"onnx\"}\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"image = ContainerImage.create(name = \"onnxresnet50v2\",\n",
|
|
||||||
" models = [model],\n",
|
|
||||||
" image_config = image_config,\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"\n",
|
|
||||||
"image.wait_for_creation(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"In case you need to debug your code, the next line of code accesses the log file."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(image.image_build_log_uri)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"We're all set! Let's get our model chugging.\n",
|
|
||||||
"\n",
|
|
||||||
"### Deploy the container image"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import AciWebservice\n",
|
|
||||||
"\n",
|
|
||||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
|
||||||
" memory_gb = 1, \n",
|
|
||||||
" tags = {'demo': 'onnx'}, \n",
|
|
||||||
" description = 'web service for ResNet50 ONNX model')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The following cell will likely take a few minutes to run as well."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import Webservice\n",
|
|
||||||
"from random import randint\n",
|
|
||||||
"\n",
|
|
||||||
"aci_service_name = 'onnx-demo-resnet50'+str(randint(0,100))\n",
|
|
||||||
"print(\"Service\", aci_service_name)\n",
|
|
||||||
"\n",
|
|
||||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
|
||||||
" image = image,\n",
|
|
||||||
" name = aci_service_name,\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"\n",
|
|
||||||
"aci_service.wait_for_deployment(True)\n",
|
|
||||||
"print(aci_service.state)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"In case the deployment fails, you can check the logs. Make sure to delete your aci_service before trying again."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if aci_service.state != 'Healthy':\n",
|
|
||||||
" # run this command for debugging.\n",
|
|
||||||
" print(aci_service.get_logs())\n",
|
|
||||||
" aci_service.delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Success!\n",
|
|
||||||
"\n",
|
|
||||||
"If you've made it this far, you've deployed a working web service that does image classification using an ONNX model. You can get the URL for the webservice with the code below."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(aci_service.scoring_uri)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"When you are eventually done using the web service, remember to delete it."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#aci_service.delete()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "viswamy"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# ResNet50 Image Classification using ONNX and AzureML\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows how to deploy the ResNet50 ONNX model as a web service using Azure Machine Learning services and the ONNX Runtime.\n",
|
||||||
|
"\n",
|
||||||
|
"## What is ONNX\n",
|
||||||
|
"ONNX is an open format for representing machine learning and deep learning models. ONNX enables open and interoperable AI by enabling data scientists and developers to use the tools of their choice without worrying about lock-in and flexibility to deploy to a variety of platforms. ONNX is developed and supported by a community of partners including Microsoft, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai).\n",
|
||||||
|
"\n",
|
||||||
|
"## ResNet50 Details\n",
|
||||||
|
"ResNet classifies the major object in an input image into a set of 1000 pre-defined classes. For more information about the ResNet50 model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/models/image_classification/resnet). "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"\n",
|
||||||
|
"To make the best use of your time, make sure you have done the following:\n",
|
||||||
|
"\n",
|
||||||
|
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||||
|
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||||
|
" * install the AML SDK\n",
|
||||||
|
" * create a workspace and its configuration file (config.json)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Download pre-trained ONNX model from ONNX Model Zoo.\n",
|
||||||
|
"\n",
|
||||||
|
"Download the [ResNet50v2 model and test data](https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz) and extract it in the same folder as this tutorial notebook.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import urllib.request\n",
|
||||||
|
"\n",
|
||||||
|
"onnx_model_url = \"https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz\"\n",
|
||||||
|
"urllib.request.urlretrieve(onnx_model_url, filename=\"resnet50v2.tar.gz\")\n",
|
||||||
|
"\n",
|
||||||
|
"!tar xvzf resnet50v2.tar.gz"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploying as a web service with Azure ML"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Load your Azure ML workspace\n",
|
||||||
|
"\n",
|
||||||
|
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.location, ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Register your model with Azure ML\n",
|
||||||
|
"\n",
|
||||||
|
"Now we upload the model and register it in the workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"model = Model.register(model_path = \"resnet50v2/resnet50v2.onnx\",\n",
|
||||||
|
" model_name = \"resnet50v2\",\n",
|
||||||
|
" tags = {\"onnx\": \"demo\"},\n",
|
||||||
|
" description = \"ResNet50v2 from ONNX Model Zoo\",\n",
|
||||||
|
" workspace = ws)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Displaying your registered models\n",
|
||||||
|
"\n",
|
||||||
|
"You can optionally list out all the models that you have registered in this workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"models = ws.models\n",
|
||||||
|
"for name, m in models.items():\n",
|
||||||
|
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Write scoring file\n",
|
||||||
|
"\n",
|
||||||
|
"We are now going to deploy our ONNX model on Azure ML using the ONNX Runtime. We begin by writing a score.py file that will be invoked by the web service call. The `init()` function is called once when the container is started so we load the model using the ONNX Runtime into a global session object."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score.py\n",
|
||||||
|
"import json\n",
|
||||||
|
"import time\n",
|
||||||
|
"import sys\n",
|
||||||
|
"import os\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"import numpy as np # we're going to use numpy to process input and output data\n",
|
||||||
|
"import onnxruntime # to inference ONNX models, we use the ONNX Runtime\n",
|
||||||
|
"\n",
|
||||||
|
"def softmax(x):\n",
|
||||||
|
" x = x.reshape(-1)\n",
|
||||||
|
" e_x = np.exp(x - np.max(x))\n",
|
||||||
|
" return e_x / e_x.sum(axis=0)\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global session\n",
|
||||||
|
" model = Model.get_model_path(model_name = 'resnet50v2')\n",
|
||||||
|
" session = onnxruntime.InferenceSession(model, None)\n",
|
||||||
|
"\n",
|
||||||
|
"def preprocess(input_data_json):\n",
|
||||||
|
" # convert the JSON data into the tensor input\n",
|
||||||
|
" img_data = np.array(json.loads(input_data_json)['data']).astype('float32')\n",
|
||||||
|
" \n",
|
||||||
|
" #normalize\n",
|
||||||
|
" mean_vec = np.array([0.485, 0.456, 0.406])\n",
|
||||||
|
" stddev_vec = np.array([0.229, 0.224, 0.225])\n",
|
||||||
|
" norm_img_data = np.zeros(img_data.shape).astype('float32')\n",
|
||||||
|
" for i in range(img_data.shape[0]):\n",
|
||||||
|
" norm_img_data[i,:,:] = (img_data[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]\n",
|
||||||
|
"\n",
|
||||||
|
" return norm_img_data\n",
|
||||||
|
"\n",
|
||||||
|
"def postprocess(result):\n",
|
||||||
|
" return softmax(np.array(result)).tolist()\n",
|
||||||
|
"\n",
|
||||||
|
"def run(input_data_json):\n",
|
||||||
|
" try:\n",
|
||||||
|
" start = time.time()\n",
|
||||||
|
" # load in our data which is expected as NCHW 224x224 image\n",
|
||||||
|
" input_data = preprocess(input_data_json)\n",
|
||||||
|
" input_name = session.get_inputs()[0].name # get the id of the first input of the model \n",
|
||||||
|
" result = session.run([], {input_name: input_data})\n",
|
||||||
|
" end = time.time() # stop timer\n",
|
||||||
|
" return {\"result\": postprocess(result),\n",
|
||||||
|
" \"time\": end - start}\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" result = str(e)\n",
|
||||||
|
" return {\"error\": result}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create container image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"First we create a YAML file that specifies which dependencies we would like to see in our container."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
|
"\n",
|
||||||
|
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then we have Azure ML create the container. This step will likely take a few minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||||
|
" runtime = \"python\",\n",
|
||||||
|
" conda_file = \"myenv.yml\",\n",
|
||||||
|
" docker_file = \"Dockerfile\",\n",
|
||||||
|
" description = \"ONNX ResNet50 Demo\",\n",
|
||||||
|
" tags = {\"demo\": \"onnx\"}\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"image = ContainerImage.create(name = \"onnxresnet50v2\",\n",
|
||||||
|
" models = [model],\n",
|
||||||
|
" image_config = image_config,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"image.wait_for_creation(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"In case you need to debug your code, the next line of code accesses the log file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(image.image_build_log_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We're all set! Let's get our model chugging.\n",
|
||||||
|
"\n",
|
||||||
|
"### Deploy the container image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import AciWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||||
|
" memory_gb = 1, \n",
|
||||||
|
" tags = {'demo': 'onnx'}, \n",
|
||||||
|
" description = 'web service for ResNet50 ONNX model')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The following cell will likely take a few minutes to run as well."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import Webservice\n",
|
||||||
|
"from random import randint\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service_name = 'onnx-demo-resnet50'+str(randint(0,100))\n",
|
||||||
|
"print(\"Service\", aci_service_name)\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||||
|
" image = image,\n",
|
||||||
|
" name = aci_service_name,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service.wait_for_deployment(True)\n",
|
||||||
|
"print(aci_service.state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"In case the deployment fails, you can check the logs. Make sure to delete your aci_service before trying again."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if aci_service.state != 'Healthy':\n",
|
||||||
|
" # run this command for debugging.\n",
|
||||||
|
" print(aci_service.get_logs())\n",
|
||||||
|
" aci_service.delete()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Success!\n",
|
||||||
|
"\n",
|
||||||
|
"If you've made it this far, you've deployed a working web service that does image classification using an ONNX model. You can get the URL for the webservice with the code below."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(aci_service.scoring_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"When you are eventually done using the web service, remember to delete it."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#aci_service.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "viswamy"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,477 +1,477 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Deploying a web service to Azure Kubernetes Service (AKS)\n",
|
|
||||||
"This notebook shows the steps for deploying a service: registering a model, creating an image, provisioning a cluster (one time action), and deploying a service to it. \n",
|
|
||||||
"We then test and delete the service, image and model."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
|
||||||
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
|
||||||
"from azureml.core.image import Image\n",
|
|
||||||
"from azureml.core.model import Model"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import azureml.core\n",
|
|
||||||
"print(azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Get workspace\n",
|
|
||||||
"Load existing workspace from the config file info."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Register the model\n",
|
|
||||||
"Register an existing trained model, add descirption and tags."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Register the model\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"model = Model.register(model_path = \"sklearn_regression_model.pkl\", # this points to a local file\n",
|
|
||||||
" model_name = \"sklearn_regression_model.pkl\", # this is the name the model is registered as\n",
|
|
||||||
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
|
||||||
" description = \"Ridge regression model to predict diabetes\",\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"\n",
|
|
||||||
"print(model.name, model.description, model.version)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Create an image\n",
|
|
||||||
"Create an image using the registered model the script that will load and run the model."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile score.py\n",
|
|
||||||
"import pickle\n",
|
|
||||||
"import json\n",
|
|
||||||
"import numpy\n",
|
|
||||||
"from sklearn.externals import joblib\n",
|
|
||||||
"from sklearn.linear_model import Ridge\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"\n",
|
|
||||||
"def init():\n",
|
|
||||||
" global model\n",
|
|
||||||
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
|
|
||||||
" # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
|
|
||||||
" model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
|
|
||||||
" # deserialize the model file back into a sklearn model\n",
|
|
||||||
" model = joblib.load(model_path)\n",
|
|
||||||
"\n",
|
|
||||||
"# note you can pass in multiple rows for scoring\n",
|
|
||||||
"def run(raw_data):\n",
|
|
||||||
" try:\n",
|
|
||||||
" data = json.loads(raw_data)['data']\n",
|
|
||||||
" data = numpy.array(data)\n",
|
|
||||||
" result = model.predict(data)\n",
|
|
||||||
" # you can return any data type as long as it is JSON-serializable\n",
|
|
||||||
" return result.tolist()\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" error = str(e)\n",
|
|
||||||
" return error"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
|
||||||
"\n",
|
|
||||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
|
||||||
"\n",
|
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
|
||||||
" f.write(myenv.serialize_to_string())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.image import ContainerImage\n",
|
|
||||||
"\n",
|
|
||||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
|
||||||
" runtime = \"python\",\n",
|
|
||||||
" conda_file = \"myenv.yml\",\n",
|
|
||||||
" description = \"Image with ridge regression model\",\n",
|
|
||||||
" tags = {'area': \"diabetes\", 'type': \"regression\"}\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
"image = ContainerImage.create(name = \"myimage1\",\n",
|
|
||||||
" # this is the model object\n",
|
|
||||||
" models = [model],\n",
|
|
||||||
" image_config = image_config,\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"\n",
|
|
||||||
"image.wait_for_creation(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Use a custom Docker image\n",
|
|
||||||
"\n",
|
|
||||||
"You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
|
|
||||||
"\n",
|
|
||||||
"Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
|
|
||||||
"```python\n",
|
|
||||||
"# use an image available in public Container Registry without authentication\n",
|
|
||||||
"image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
|
|
||||||
"\n",
|
|
||||||
"# or, use an image available in a private Container Registry\n",
|
|
||||||
"image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
|
|
||||||
"image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
|
|
||||||
"image_config.base_image_registry.username = \"username\"\n",
|
|
||||||
"image_config.base_image_registry.password = \"password\"\n",
|
|
||||||
"\n",
|
|
||||||
"# or, use an image built during training.\n",
|
|
||||||
"image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
|
|
||||||
"```\n",
|
|
||||||
"You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Provision the AKS Cluster\n",
|
|
||||||
"This is a one time setup. You can reuse this cluster for multiple deployments after it has been created. If you delete the cluster or the resource group that contains it, then you would have to recreate it."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Use the default configuration (can also provide parameters to customize)\n",
|
|
||||||
"prov_config = AksCompute.provisioning_configuration()\n",
|
|
||||||
"\n",
|
|
||||||
"aks_name = 'my-aks-9' \n",
|
|
||||||
"# Create the cluster\n",
|
|
||||||
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
|
||||||
" name = aks_name, \n",
|
|
||||||
" provisioning_configuration = prov_config)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Create AKS Cluster in an existing virtual network (optional)\n",
|
|
||||||
"See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-enable-virtual-network#use-azure-kubernetes-service) for more details."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"'''\n",
|
|
||||||
"from azureml.core.compute import ComputeTarget, AksCompute\n",
|
|
||||||
"\n",
|
|
||||||
"# Create the compute configuration and set virtual network information\n",
|
|
||||||
"config = AksCompute.provisioning_configuration(location=\"eastus2\")\n",
|
|
||||||
"config.vnet_resourcegroup_name = \"mygroup\"\n",
|
|
||||||
"config.vnet_name = \"mynetwork\"\n",
|
|
||||||
"config.subnet_name = \"default\"\n",
|
|
||||||
"config.service_cidr = \"10.0.0.0/16\"\n",
|
|
||||||
"config.dns_service_ip = \"10.0.0.10\"\n",
|
|
||||||
"config.docker_bridge_cidr = \"172.17.0.1/16\"\n",
|
|
||||||
"\n",
|
|
||||||
"# Create the compute target\n",
|
|
||||||
"aks_target = ComputeTarget.create(workspace = ws,\n",
|
|
||||||
" name = \"myaks\",\n",
|
|
||||||
" provisioning_configuration = config)\n",
|
|
||||||
"'''"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Enable SSL on the AKS Cluster (optional)\n",
|
|
||||||
"See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-secure-web-service) for more details"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# provisioning_config = AksCompute.provisioning_configuration(ssl_cert_pem_file=\"cert.pem\", ssl_key_pem_file=\"key.pem\", ssl_cname=\"www.contoso.com\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"aks_target.wait_for_completion(show_output = True)\n",
|
|
||||||
"print(aks_target.provisioning_state)\n",
|
|
||||||
"print(aks_target.provisioning_errors)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Optional step: Attach existing AKS cluster\n",
|
|
||||||
"\n",
|
|
||||||
"If you have existing AKS cluster in your Azure subscription, you can attach it to the Workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"'''\n",
|
|
||||||
"# Use the default configuration (can also provide parameters to customize)\n",
|
|
||||||
"resource_id = '/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/raymondsdk0604/providers/Microsoft.ContainerService/managedClusters/my-aks-0605d37425356b7d01'\n",
|
|
||||||
"\n",
|
|
||||||
"create_name='my-existing-aks' \n",
|
|
||||||
"# Create the cluster\n",
|
|
||||||
"attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n",
|
|
||||||
"aks_target = ComputeTarget.attach(workspace=ws, name=create_name, attach_configuration=attach_config)\n",
|
|
||||||
"# Wait for the operation to complete\n",
|
|
||||||
"aks_target.wait_for_completion(True)\n",
|
|
||||||
"'''"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Deploy web service to AKS"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Set the web service configuration (using default here)\n",
|
|
||||||
"aks_config = AksWebservice.deploy_configuration()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"aks_service_name ='aks-service-1'\n",
|
|
||||||
"\n",
|
|
||||||
"aks_service = Webservice.deploy_from_image(workspace = ws, \n",
|
|
||||||
" name = aks_service_name,\n",
|
|
||||||
" image = image,\n",
|
|
||||||
" deployment_config = aks_config,\n",
|
|
||||||
" deployment_target = aks_target)\n",
|
|
||||||
"aks_service.wait_for_deployment(show_output = True)\n",
|
|
||||||
"print(aks_service.state)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Test the web service using run method\n",
|
|
||||||
"We test the web sevice by passing data.\n",
|
|
||||||
"Run() method retrieves API keys behind the scenes to make sure that call is authenticated."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"import json\n",
|
|
||||||
"\n",
|
|
||||||
"test_sample = json.dumps({'data': [\n",
|
|
||||||
" [1,2,3,4,5,6,7,8,9,10], \n",
|
|
||||||
" [10,9,8,7,6,5,4,3,2,1]\n",
|
|
||||||
"]})\n",
|
|
||||||
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
|
||||||
"\n",
|
|
||||||
"prediction = aks_service.run(input_data = test_sample)\n",
|
|
||||||
"print(prediction)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Test the web service using raw HTTP request (optional)\n",
|
|
||||||
"Alternatively you can construct a raw HTTP request and send it to the service. In this case you need to explicitly pass the HTTP header. This process is shown in the next 2 cells."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# retreive the API keys. AML generates two keys.\n",
|
|
||||||
"'''\n",
|
|
||||||
"key1, Key2 = aks_service.get_keys()\n",
|
|
||||||
"print(key1)\n",
|
|
||||||
"'''"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# construct raw HTTP request and send to the service\n",
|
|
||||||
"'''\n",
|
|
||||||
"%%time\n",
|
|
||||||
"\n",
|
|
||||||
"import requests\n",
|
|
||||||
"\n",
|
|
||||||
"import json\n",
|
|
||||||
"\n",
|
|
||||||
"test_sample = json.dumps({'data': [\n",
|
|
||||||
" [1,2,3,4,5,6,7,8,9,10], \n",
|
|
||||||
" [10,9,8,7,6,5,4,3,2,1]\n",
|
|
||||||
"]})\n",
|
|
||||||
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
|
||||||
"\n",
|
|
||||||
"# Don't forget to add key to the HTTP header.\n",
|
|
||||||
"headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
|
||||||
"\n",
|
|
||||||
"resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"prediction:\", resp.text)\n",
|
|
||||||
"'''"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Clean up\n",
|
|
||||||
"Delete the service, image and model."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"aks_service.delete()\n",
|
|
||||||
"image.delete()\n",
|
|
||||||
"model.delete()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "aashishb"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Deploying a web service to Azure Kubernetes Service (AKS)\n",
|
||||||
|
"This notebook shows the steps for deploying a service: registering a model, creating an image, provisioning a cluster (one time action), and deploying a service to it. \n",
|
||||||
|
"We then test and delete the service, image and model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||||
|
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||||
|
"from azureml.core.image import Image\n",
|
||||||
|
"from azureml.core.model import Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import azureml.core\n",
|
||||||
|
"print(azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Get workspace\n",
|
||||||
|
"Load existing workspace from the config file info."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Register the model\n",
|
||||||
|
"Register an existing trained model, add descirption and tags."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Register the model\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"model = Model.register(model_path = \"sklearn_regression_model.pkl\", # this points to a local file\n",
|
||||||
|
" model_name = \"sklearn_regression_model.pkl\", # this is the name the model is registered as\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||||
|
" description = \"Ridge regression model to predict diabetes\",\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"print(model.name, model.description, model.version)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Create an image\n",
|
||||||
|
"Create an image using the registered model the script that will load and run the model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score.py\n",
|
||||||
|
"import pickle\n",
|
||||||
|
"import json\n",
|
||||||
|
"import numpy\n",
|
||||||
|
"from sklearn.externals import joblib\n",
|
||||||
|
"from sklearn.linear_model import Ridge\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global model\n",
|
||||||
|
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
|
||||||
|
" # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
|
||||||
|
" model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
|
||||||
|
" # deserialize the model file back into a sklearn model\n",
|
||||||
|
" model = joblib.load(model_path)\n",
|
||||||
|
"\n",
|
||||||
|
"# note you can pass in multiple rows for scoring\n",
|
||||||
|
"def run(raw_data):\n",
|
||||||
|
" try:\n",
|
||||||
|
" data = json.loads(raw_data)['data']\n",
|
||||||
|
" data = numpy.array(data)\n",
|
||||||
|
" result = model.predict(data)\n",
|
||||||
|
" # you can return any data type as long as it is JSON-serializable\n",
|
||||||
|
" return result.tolist()\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" error = str(e)\n",
|
||||||
|
" return error"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
|
"\n",
|
||||||
|
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||||
|
" runtime = \"python\",\n",
|
||||||
|
" conda_file = \"myenv.yml\",\n",
|
||||||
|
" description = \"Image with ridge regression model\",\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"}\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"image = ContainerImage.create(name = \"myimage1\",\n",
|
||||||
|
" # this is the model object\n",
|
||||||
|
" models = [model],\n",
|
||||||
|
" image_config = image_config,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"image.wait_for_creation(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Use a custom Docker image\n",
|
||||||
|
"\n",
|
||||||
|
"You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
|
||||||
|
"\n",
|
||||||
|
"Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
|
||||||
|
"```python\n",
|
||||||
|
"# use an image available in public Container Registry without authentication\n",
|
||||||
|
"image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
|
||||||
|
"\n",
|
||||||
|
"# or, use an image available in a private Container Registry\n",
|
||||||
|
"image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
|
||||||
|
"image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
|
||||||
|
"image_config.base_image_registry.username = \"username\"\n",
|
||||||
|
"image_config.base_image_registry.password = \"password\"\n",
|
||||||
|
"\n",
|
||||||
|
"# or, use an image built during training.\n",
|
||||||
|
"image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
|
||||||
|
"```\n",
|
||||||
|
"You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Provision the AKS Cluster\n",
|
||||||
|
"This is a one time setup. You can reuse this cluster for multiple deployments after it has been created. If you delete the cluster or the resource group that contains it, then you would have to recreate it."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Use the default configuration (can also provide parameters to customize)\n",
|
||||||
|
"prov_config = AksCompute.provisioning_configuration()\n",
|
||||||
|
"\n",
|
||||||
|
"aks_name = 'my-aks-9' \n",
|
||||||
|
"# Create the cluster\n",
|
||||||
|
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
||||||
|
" name = aks_name, \n",
|
||||||
|
" provisioning_configuration = prov_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Create AKS Cluster in an existing virtual network (optional)\n",
|
||||||
|
"See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-enable-virtual-network#use-azure-kubernetes-service) for more details."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"'''\n",
|
||||||
|
"from azureml.core.compute import ComputeTarget, AksCompute\n",
|
||||||
|
"\n",
|
||||||
|
"# Create the compute configuration and set virtual network information\n",
|
||||||
|
"config = AksCompute.provisioning_configuration(location=\"eastus2\")\n",
|
||||||
|
"config.vnet_resourcegroup_name = \"mygroup\"\n",
|
||||||
|
"config.vnet_name = \"mynetwork\"\n",
|
||||||
|
"config.subnet_name = \"default\"\n",
|
||||||
|
"config.service_cidr = \"10.0.0.0/16\"\n",
|
||||||
|
"config.dns_service_ip = \"10.0.0.10\"\n",
|
||||||
|
"config.docker_bridge_cidr = \"172.17.0.1/16\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Create the compute target\n",
|
||||||
|
"aks_target = ComputeTarget.create(workspace = ws,\n",
|
||||||
|
" name = \"myaks\",\n",
|
||||||
|
" provisioning_configuration = config)\n",
|
||||||
|
"'''"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Enable SSL on the AKS Cluster (optional)\n",
|
||||||
|
"See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-secure-web-service) for more details"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# provisioning_config = AksCompute.provisioning_configuration(ssl_cert_pem_file=\"cert.pem\", ssl_key_pem_file=\"key.pem\", ssl_cname=\"www.contoso.com\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"aks_target.wait_for_completion(show_output = True)\n",
|
||||||
|
"print(aks_target.provisioning_state)\n",
|
||||||
|
"print(aks_target.provisioning_errors)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Optional step: Attach existing AKS cluster\n",
|
||||||
|
"\n",
|
||||||
|
"If you have existing AKS cluster in your Azure subscription, you can attach it to the Workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"'''\n",
|
||||||
|
"# Use the default configuration (can also provide parameters to customize)\n",
|
||||||
|
"resource_id = '/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/raymondsdk0604/providers/Microsoft.ContainerService/managedClusters/my-aks-0605d37425356b7d01'\n",
|
||||||
|
"\n",
|
||||||
|
"create_name='my-existing-aks' \n",
|
||||||
|
"# Create the cluster\n",
|
||||||
|
"attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n",
|
||||||
|
"aks_target = ComputeTarget.attach(workspace=ws, name=create_name, attach_configuration=attach_config)\n",
|
||||||
|
"# Wait for the operation to complete\n",
|
||||||
|
"aks_target.wait_for_completion(True)\n",
|
||||||
|
"'''"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Deploy web service to AKS"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Set the web service configuration (using default here)\n",
|
||||||
|
"aks_config = AksWebservice.deploy_configuration()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"aks_service_name ='aks-service-1'\n",
|
||||||
|
"\n",
|
||||||
|
"aks_service = Webservice.deploy_from_image(workspace = ws, \n",
|
||||||
|
" name = aks_service_name,\n",
|
||||||
|
" image = image,\n",
|
||||||
|
" deployment_config = aks_config,\n",
|
||||||
|
" deployment_target = aks_target)\n",
|
||||||
|
"aks_service.wait_for_deployment(show_output = True)\n",
|
||||||
|
"print(aks_service.state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Test the web service using run method\n",
|
||||||
|
"We test the web sevice by passing data.\n",
|
||||||
|
"Run() method retrieves API keys behind the scenes to make sure that call is authenticated."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"test_sample = json.dumps({'data': [\n",
|
||||||
|
" [1,2,3,4,5,6,7,8,9,10], \n",
|
||||||
|
" [10,9,8,7,6,5,4,3,2,1]\n",
|
||||||
|
"]})\n",
|
||||||
|
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
||||||
|
"\n",
|
||||||
|
"prediction = aks_service.run(input_data = test_sample)\n",
|
||||||
|
"print(prediction)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Test the web service using raw HTTP request (optional)\n",
|
||||||
|
"Alternatively you can construct a raw HTTP request and send it to the service. In this case you need to explicitly pass the HTTP header. This process is shown in the next 2 cells."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# retreive the API keys. AML generates two keys.\n",
|
||||||
|
"'''\n",
|
||||||
|
"key1, Key2 = aks_service.get_keys()\n",
|
||||||
|
"print(key1)\n",
|
||||||
|
"'''"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# construct raw HTTP request and send to the service\n",
|
||||||
|
"'''\n",
|
||||||
|
"%%time\n",
|
||||||
|
"\n",
|
||||||
|
"import requests\n",
|
||||||
|
"\n",
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"test_sample = json.dumps({'data': [\n",
|
||||||
|
" [1,2,3,4,5,6,7,8,9,10], \n",
|
||||||
|
" [10,9,8,7,6,5,4,3,2,1]\n",
|
||||||
|
"]})\n",
|
||||||
|
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
||||||
|
"\n",
|
||||||
|
"# Don't forget to add key to the HTTP header.\n",
|
||||||
|
"headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
||||||
|
"\n",
|
||||||
|
"resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"prediction:\", resp.text)\n",
|
||||||
|
"'''"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Clean up\n",
|
||||||
|
"Delete the service, image and model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"aks_service.delete()\n",
|
||||||
|
"image.delete()\n",
|
||||||
|
"model.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "aashishb"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,453 +1,453 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Register Model, Create Image and Deploy Service\n",
|
|
||||||
"\n",
|
|
||||||
"This example shows how to deploy a web service in step-by-step fashion:\n",
|
|
||||||
"\n",
|
|
||||||
" 1. Register model\n",
|
|
||||||
" 2. Query versions of models and select one to deploy\n",
|
|
||||||
" 3. Create Docker image\n",
|
|
||||||
" 4. Query versions of images\n",
|
|
||||||
" 5. Deploy the image as web service\n",
|
|
||||||
" \n",
|
|
||||||
"**IMPORTANT**:\n",
|
|
||||||
" * This notebook requires you to first complete [train-within-notebook](../../training/train-within-notebook/train-within-notebook.ipynb) example\n",
|
|
||||||
" \n",
|
|
||||||
"The train-within-notebook example taught you how to deploy a web service directly from model in one step. This Notebook shows a more advanced approach that gives you more control over model versions and Docker image versions. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a workspace object from persisted configuration."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"create workspace"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Register Model"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can add tags and descriptions to your models. Note you need to have a `sklearn_linreg_model.pkl` file in the current directory. This file is generated by the 01 notebook. The below call registers that file as a model with the same name `sklearn_linreg_model.pkl` in the workspace.\n",
|
|
||||||
"\n",
|
|
||||||
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model. Note that tags must be alphanumeric."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"register model from file"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"import sklearn\n",
|
|
||||||
"\n",
|
|
||||||
"library_version = \"sklearn\"+sklearn.__version__.replace(\".\",\"x\")\n",
|
|
||||||
"\n",
|
|
||||||
"model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
|
|
||||||
" model_name = \"sklearn_regression_model.pkl\",\n",
|
|
||||||
" tags = {'area': \"diabetes\", 'type': \"regression\", 'version': library_version},\n",
|
|
||||||
" description = \"Ridge regression model to predict diabetes\",\n",
|
|
||||||
" workspace = ws)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can explore the registered models within your workspace and query by tag. Models are versioned. If you call the register_model command many times with same model name, you will get multiple versions of the model with increasing version numbers."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"register model from file"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"regression_models = Model.list(workspace=ws, tags=['area'])\n",
|
|
||||||
"for m in regression_models:\n",
|
|
||||||
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can pick a specific model to deploy"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(model.name, model.description, model.version, sep = '\\t')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create Docker Image"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Show `score.py`. Note that the `sklearn_regression_model.pkl` in the `get_model_path` call is referring to a model named `sklearn_linreg_model.pkl` registered under the workspace. It is NOT referenceing the local file."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile score.py\n",
|
|
||||||
"import pickle\n",
|
|
||||||
"import json\n",
|
|
||||||
"import numpy\n",
|
|
||||||
"from sklearn.externals import joblib\n",
|
|
||||||
"from sklearn.linear_model import Ridge\n",
|
|
||||||
"from azureml.core.model import Model\n",
|
|
||||||
"\n",
|
|
||||||
"def init():\n",
|
|
||||||
" global model\n",
|
|
||||||
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
|
|
||||||
" # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
|
|
||||||
" model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
|
|
||||||
" # deserialize the model file back into a sklearn model\n",
|
|
||||||
" model = joblib.load(model_path)\n",
|
|
||||||
"\n",
|
|
||||||
"# note you can pass in multiple rows for scoring\n",
|
|
||||||
"def run(raw_data):\n",
|
|
||||||
" try:\n",
|
|
||||||
" data = json.loads(raw_data)['data']\n",
|
|
||||||
" data = numpy.array(data)\n",
|
|
||||||
" result = model.predict(data)\n",
|
|
||||||
" # you can return any datatype as long as it is JSON-serializable\n",
|
|
||||||
" return result.tolist()\n",
|
|
||||||
" except Exception as e:\n",
|
|
||||||
" error = str(e)\n",
|
|
||||||
" return error"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
|
||||||
"\n",
|
|
||||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
|
||||||
"\n",
|
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
|
||||||
" f.write(myenv.serialize_to_string())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Note that following command can take few minutes. \n",
|
|
||||||
"\n",
|
|
||||||
"You can add tags and descriptions to images. Also, an image can contain multiple models."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"create image"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.image import Image, ContainerImage\n",
|
|
||||||
"\n",
|
|
||||||
"image_config = ContainerImage.image_configuration(runtime= \"python\",\n",
|
|
||||||
" execution_script=\"score.py\",\n",
|
|
||||||
" conda_file=\"myenv.yml\",\n",
|
|
||||||
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
|
||||||
" description = \"Image with ridge regression model\")\n",
|
|
||||||
"\n",
|
|
||||||
"image = Image.create(name = \"myimage1\",\n",
|
|
||||||
" # this is the model object. note you can pass in 0-n models via this list-type parameter\n",
|
|
||||||
" # in case you need to reference multiple models, or none at all, in your scoring script.\n",
|
|
||||||
" models = [model],\n",
|
|
||||||
" image_config = image_config, \n",
|
|
||||||
" workspace = ws)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"create image"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"image.wait_for_creation(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Use a custom Docker image\n",
|
|
||||||
"\n",
|
|
||||||
"You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
|
|
||||||
"\n",
|
|
||||||
"Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
|
|
||||||
"```python\n",
|
|
||||||
"# use an image available in public Container Registry without authentication\n",
|
|
||||||
"image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
|
|
||||||
"\n",
|
|
||||||
"# or, use an image available in a private Container Registry\n",
|
|
||||||
"image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
|
|
||||||
"image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
|
|
||||||
"image_config.base_image_registry.username = \"username\"\n",
|
|
||||||
"image_config.base_image_registry.password = \"password\"\n",
|
|
||||||
"\n",
|
|
||||||
"# or, use an image built during training.\n",
|
|
||||||
"image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
|
|
||||||
"```\n",
|
|
||||||
"You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"List images by tag and find out the detailed build log for debugging."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"create image"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"for i in Image.list(workspace = ws,tags = [\"area\"]):\n",
|
|
||||||
" print('{}(v.{} [{}]) stored at {} with build log {}'.format(i.name, i.version, i.creation_state, i.image_location, i.image_build_log_uri))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Deploy image as web service on Azure Container Instance\n",
|
|
||||||
"\n",
|
|
||||||
"Note that the service creation can take few minutes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"deploy service",
|
|
||||||
"aci"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import AciWebservice\n",
|
|
||||||
"\n",
|
|
||||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
|
||||||
" memory_gb = 1, \n",
|
|
||||||
" tags = {'area': \"diabetes\", 'type': \"regression\"}, \n",
|
|
||||||
" description = 'Predict diabetes using regression model')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"deploy service",
|
|
||||||
"aci"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.webservice import Webservice\n",
|
|
||||||
"\n",
|
|
||||||
"aci_service_name = 'my-aci-service-2'\n",
|
|
||||||
"print(aci_service_name)\n",
|
|
||||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
|
||||||
" image = image,\n",
|
|
||||||
" name = aci_service_name,\n",
|
|
||||||
" workspace = ws)\n",
|
|
||||||
"aci_service.wait_for_deployment(True)\n",
|
|
||||||
"print(aci_service.state)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Test web service"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Call the web service with some dummy input data to get a prediction."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"deploy service",
|
|
||||||
"aci"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import json\n",
|
|
||||||
"\n",
|
|
||||||
"test_sample = json.dumps({'data': [\n",
|
|
||||||
" [1,2,3,4,5,6,7,8,9,10], \n",
|
|
||||||
" [10,9,8,7,6,5,4,3,2,1]\n",
|
|
||||||
"]})\n",
|
|
||||||
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
|
||||||
"\n",
|
|
||||||
"prediction = aci_service.run(input_data=test_sample)\n",
|
|
||||||
"print(prediction)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Delete ACI to clean up"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"deploy service",
|
|
||||||
"aci"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"aci_service.delete()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "aashishb"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Register Model, Create Image and Deploy Service\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows how to deploy a web service in step-by-step fashion:\n",
|
||||||
|
"\n",
|
||||||
|
" 1. Register model\n",
|
||||||
|
" 2. Query versions of models and select one to deploy\n",
|
||||||
|
" 3. Create Docker image\n",
|
||||||
|
" 4. Query versions of images\n",
|
||||||
|
" 5. Deploy the image as web service\n",
|
||||||
|
" \n",
|
||||||
|
"**IMPORTANT**:\n",
|
||||||
|
" * This notebook requires you to first complete [train-within-notebook](../../training/train-within-notebook/train-within-notebook.ipynb) example\n",
|
||||||
|
" \n",
|
||||||
|
"The train-within-notebook example taught you how to deploy a web service directly from model in one step. This Notebook shows a more advanced approach that gives you more control over model versions and Docker image versions. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"create workspace"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Register Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can add tags and descriptions to your models. Note you need to have a `sklearn_linreg_model.pkl` file in the current directory. This file is generated by the 01 notebook. The below call registers that file as a model with the same name `sklearn_linreg_model.pkl` in the workspace.\n",
|
||||||
|
"\n",
|
||||||
|
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model. Note that tags must be alphanumeric."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"register model from file"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"import sklearn\n",
|
||||||
|
"\n",
|
||||||
|
"library_version = \"sklearn\"+sklearn.__version__.replace(\".\",\"x\")\n",
|
||||||
|
"\n",
|
||||||
|
"model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
|
||||||
|
" model_name = \"sklearn_regression_model.pkl\",\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\", 'version': library_version},\n",
|
||||||
|
" description = \"Ridge regression model to predict diabetes\",\n",
|
||||||
|
" workspace = ws)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can explore the registered models within your workspace and query by tag. Models are versioned. If you call the register_model command many times with same model name, you will get multiple versions of the model with increasing version numbers."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"register model from file"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"regression_models = Model.list(workspace=ws, tags=['area'])\n",
|
||||||
|
"for m in regression_models:\n",
|
||||||
|
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can pick a specific model to deploy"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(model.name, model.description, model.version, sep = '\\t')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create Docker Image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Show `score.py`. Note that the `sklearn_regression_model.pkl` in the `get_model_path` call is referring to a model named `sklearn_linreg_model.pkl` registered under the workspace. It is NOT referenceing the local file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score.py\n",
|
||||||
|
"import pickle\n",
|
||||||
|
"import json\n",
|
||||||
|
"import numpy\n",
|
||||||
|
"from sklearn.externals import joblib\n",
|
||||||
|
"from sklearn.linear_model import Ridge\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global model\n",
|
||||||
|
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
|
||||||
|
" # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
|
||||||
|
" model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
|
||||||
|
" # deserialize the model file back into a sklearn model\n",
|
||||||
|
" model = joblib.load(model_path)\n",
|
||||||
|
"\n",
|
||||||
|
"# note you can pass in multiple rows for scoring\n",
|
||||||
|
"def run(raw_data):\n",
|
||||||
|
" try:\n",
|
||||||
|
" data = json.loads(raw_data)['data']\n",
|
||||||
|
" data = numpy.array(data)\n",
|
||||||
|
" result = model.predict(data)\n",
|
||||||
|
" # you can return any datatype as long as it is JSON-serializable\n",
|
||||||
|
" return result.tolist()\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" error = str(e)\n",
|
||||||
|
" return error"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
|
"\n",
|
||||||
|
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Note that following command can take few minutes. \n",
|
||||||
|
"\n",
|
||||||
|
"You can add tags and descriptions to images. Also, an image can contain multiple models."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"create image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import Image, ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"image_config = ContainerImage.image_configuration(runtime= \"python\",\n",
|
||||||
|
" execution_script=\"score.py\",\n",
|
||||||
|
" conda_file=\"myenv.yml\",\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||||
|
" description = \"Image with ridge regression model\")\n",
|
||||||
|
"\n",
|
||||||
|
"image = Image.create(name = \"myimage1\",\n",
|
||||||
|
" # this is the model object. note you can pass in 0-n models via this list-type parameter\n",
|
||||||
|
" # in case you need to reference multiple models, or none at all, in your scoring script.\n",
|
||||||
|
" models = [model],\n",
|
||||||
|
" image_config = image_config, \n",
|
||||||
|
" workspace = ws)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"create image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"image.wait_for_creation(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Use a custom Docker image\n",
|
||||||
|
"\n",
|
||||||
|
"You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
|
||||||
|
"\n",
|
||||||
|
"Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
|
||||||
|
"```python\n",
|
||||||
|
"# use an image available in public Container Registry without authentication\n",
|
||||||
|
"image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
|
||||||
|
"\n",
|
||||||
|
"# or, use an image available in a private Container Registry\n",
|
||||||
|
"image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
|
||||||
|
"image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
|
||||||
|
"image_config.base_image_registry.username = \"username\"\n",
|
||||||
|
"image_config.base_image_registry.password = \"password\"\n",
|
||||||
|
"\n",
|
||||||
|
"# or, use an image built during training.\n",
|
||||||
|
"image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
|
||||||
|
"```\n",
|
||||||
|
"You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"List images by tag and find out the detailed build log for debugging."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"create image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for i in Image.list(workspace = ws,tags = [\"area\"]):\n",
|
||||||
|
" print('{}(v.{} [{}]) stored at {} with build log {}'.format(i.name, i.version, i.creation_state, i.image_location, i.image_build_log_uri))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Deploy image as web service on Azure Container Instance\n",
|
||||||
|
"\n",
|
||||||
|
"Note that the service creation can take few minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"deploy service",
|
||||||
|
"aci"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import AciWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||||
|
" memory_gb = 1, \n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"}, \n",
|
||||||
|
" description = 'Predict diabetes using regression model')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"deploy service",
|
||||||
|
"aci"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import Webservice\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service_name = 'my-aci-service-2'\n",
|
||||||
|
"print(aci_service_name)\n",
|
||||||
|
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||||
|
" image = image,\n",
|
||||||
|
" name = aci_service_name,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"aci_service.wait_for_deployment(True)\n",
|
||||||
|
"print(aci_service.state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Test web service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Call the web service with some dummy input data to get a prediction."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"deploy service",
|
||||||
|
"aci"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"test_sample = json.dumps({'data': [\n",
|
||||||
|
" [1,2,3,4,5,6,7,8,9,10], \n",
|
||||||
|
" [10,9,8,7,6,5,4,3,2,1]\n",
|
||||||
|
"]})\n",
|
||||||
|
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
||||||
|
"\n",
|
||||||
|
"prediction = aci_service.run(input_data=test_sample)\n",
|
||||||
|
"print(prediction)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Delete ACI to clean up"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"deploy service",
|
||||||
|
"aci"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"aci_service.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "aashishb"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,279 +1,279 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Breast cancer diagnosis classification with scikit-learn (run model explainer locally)"
|
"# Breast cancer diagnosis classification with scikit-learn (run model explainer locally)"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Explain a model with the AML explain-model package\n",
|
|
||||||
"\n",
|
|
||||||
"1. Train a SVM classification model using Scikit-learn\n",
|
|
||||||
"2. Run 'explain_model' with full data in local mode, which doesn't contact any Azure services\n",
|
|
||||||
"3. Run 'explain_model' with summarized data in local mode, which doesn't contact any Azure services\n",
|
|
||||||
"4. Visualize the global and local explanations with the visualization dashboard."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn.datasets import load_breast_cancer\n",
|
|
||||||
"from sklearn import svm\n",
|
|
||||||
"from azureml.explain.model.tabular_explainer import TabularExplainer"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 1. Run model explainer locally with full data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Load the breast cancer diagnosis data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"breast_cancer_data = load_breast_cancer()\n",
|
|
||||||
"classes = breast_cancer_data.target_names.tolist()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Split data into train and test\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"x_train, x_test, y_train, y_test = train_test_split(breast_cancer_data.data, breast_cancer_data.target, test_size=0.2, random_state=0)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train a SVM classification model, which you want to explain"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"clf = svm.SVC(gamma=0.001, C=100., probability=True)\n",
|
|
||||||
"model = clf.fit(x_train, y_train)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain predictions on your local machine"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"tabular_explainer = TabularExplainer(model, x_train, features=breast_cancer_data.feature_names, classes=classes)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain overall model predictions (global explanation)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
|
||||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
|
||||||
"global_explanation = tabular_explainer.explain_global(x_test)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Sorted SHAP values\n",
|
|
||||||
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
|
||||||
"# Corresponding feature names\n",
|
|
||||||
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
|
||||||
"# feature ranks (based on original order of features)\n",
|
|
||||||
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))\n",
|
|
||||||
"# per class feature names\n",
|
|
||||||
"print('ranked per class feature names: {}'.format(global_explanation.get_ranked_per_class_names()))\n",
|
|
||||||
"# per class feature importance values\n",
|
|
||||||
"print('ranked per class feature values: {}'.format(global_explanation.get_ranked_per_class_values()))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"dict(zip(global_explanation.get_ranked_global_names(), global_explanation.get_ranked_global_values()))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain overall model predictions as a collection of local (instance-level) explanations"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# feature shap values for all features and all data points in the training data\n",
|
|
||||||
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain local data points (individual instances)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# explain the first member of the test set\n",
|
|
||||||
"instance_num = 0\n",
|
|
||||||
"local_explanation = tabular_explainer.explain_local(x_test[instance_num,:])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get the prediction for the first member of the test set and explain why model made that prediction\n",
|
|
||||||
"prediction_value = clf.predict(x_test)[instance_num]\n",
|
|
||||||
"\n",
|
|
||||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
|
||||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"dict(zip(sorted_local_importance_names, sorted_local_importance_values))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 2. Load visualization dashboard"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
|
||||||
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
|
||||||
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
|
||||||
"# Or, in Jupyter Labs, uncomment below\n",
|
|
||||||
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
|
||||||
"# jupyter labextension install microsoft-mli-widget"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "mesameki"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.8"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Explain a model with the AML explain-model package\n",
|
||||||
|
"\n",
|
||||||
|
"1. Train a SVM classification model using Scikit-learn\n",
|
||||||
|
"2. Run 'explain_model' with full data in local mode, which doesn't contact any Azure services\n",
|
||||||
|
"3. Run 'explain_model' with summarized data in local mode, which doesn't contact any Azure services\n",
|
||||||
|
"4. Visualize the global and local explanations with the visualization dashboard."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.datasets import load_breast_cancer\n",
|
||||||
|
"from sklearn import svm\n",
|
||||||
|
"from azureml.explain.model.tabular_explainer import TabularExplainer"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 1. Run model explainer locally with full data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Load the breast cancer diagnosis data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"breast_cancer_data = load_breast_cancer()\n",
|
||||||
|
"classes = breast_cancer_data.target_names.tolist()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Split data into train and test\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"x_train, x_test, y_train, y_test = train_test_split(breast_cancer_data.data, breast_cancer_data.target, test_size=0.2, random_state=0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train a SVM classification model, which you want to explain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"clf = svm.SVC(gamma=0.001, C=100., probability=True)\n",
|
||||||
|
"model = clf.fit(x_train, y_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain predictions on your local machine"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tabular_explainer = TabularExplainer(model, x_train, features=breast_cancer_data.feature_names, classes=classes)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain overall model predictions (global explanation)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||||
|
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||||
|
"global_explanation = tabular_explainer.explain_global(x_test)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Sorted SHAP values\n",
|
||||||
|
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
||||||
|
"# Corresponding feature names\n",
|
||||||
|
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
||||||
|
"# feature ranks (based on original order of features)\n",
|
||||||
|
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))\n",
|
||||||
|
"# per class feature names\n",
|
||||||
|
"print('ranked per class feature names: {}'.format(global_explanation.get_ranked_per_class_names()))\n",
|
||||||
|
"# per class feature importance values\n",
|
||||||
|
"print('ranked per class feature values: {}'.format(global_explanation.get_ranked_per_class_values()))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dict(zip(global_explanation.get_ranked_global_names(), global_explanation.get_ranked_global_values()))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# feature shap values for all features and all data points in the training data\n",
|
||||||
|
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain local data points (individual instances)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# explain the first member of the test set\n",
|
||||||
|
"instance_num = 0\n",
|
||||||
|
"local_explanation = tabular_explainer.explain_local(x_test[instance_num,:])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||||
|
"prediction_value = clf.predict(x_test)[instance_num]\n",
|
||||||
|
"\n",
|
||||||
|
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||||
|
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"dict(zip(sorted_local_importance_names, sorted_local_importance_values))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 2. Load visualization dashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
||||||
|
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||||
|
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||||
|
"# Or, in Jupyter Labs, uncomment below\n",
|
||||||
|
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||||
|
"# jupyter labextension install microsoft-mli-widget"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "mesameki"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,280 +1,280 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Iris flower classification with scikit-learn (run model explainer locally)"
|
"# Iris flower classification with scikit-learn (run model explainer locally)"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Explain a model with the AML explain-model package\n",
|
|
||||||
"\n",
|
|
||||||
"1. Train a SVM classification model using Scikit-learn\n",
|
|
||||||
"2. Run 'explain_model' with full data in local mode, which doesn't contact any Azure services\n",
|
|
||||||
"3. Run 'explain_model' with summarized data in local mode, which doesn't contact any Azure services\n",
|
|
||||||
"4. Visualize the global and local explanations with the visualization dashboard."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn.datasets import load_iris\n",
|
|
||||||
"from sklearn import svm\n",
|
|
||||||
"from azureml.explain.model.tabular_explainer import TabularExplainer"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 1. Run model explainer locally with full data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Load the breast cancer diagnosis data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"iris = load_iris()\n",
|
|
||||||
"X = iris['data']\n",
|
|
||||||
"y = iris['target']\n",
|
|
||||||
"classes = iris['target_names']\n",
|
|
||||||
"feature_names = iris['feature_names']"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Split data into train and test\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train a SVM classification model, which you want to explain"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"clf = svm.SVC(gamma=0.001, C=100., probability=True)\n",
|
|
||||||
"model = clf.fit(x_train, y_train)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain predictions on your local machine"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"tabular_explainer = TabularExplainer(model, x_train, features = feature_names, classes=classes)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain overall model predictions (global explanation)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"global_explanation = tabular_explainer.explain_global(x_test)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Sorted SHAP values\n",
|
|
||||||
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
|
||||||
"# Corresponding feature names\n",
|
|
||||||
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
|
||||||
"# feature ranks (based on original order of features)\n",
|
|
||||||
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))\n",
|
|
||||||
"# per class feature names\n",
|
|
||||||
"print('ranked per class feature names: {}'.format(global_explanation.get_ranked_per_class_names()))\n",
|
|
||||||
"# per class feature importance values\n",
|
|
||||||
"print('ranked per class feature values: {}'.format(global_explanation.get_ranked_per_class_values()))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"dict(zip(global_explanation.get_ranked_global_names(), global_explanation.get_ranked_global_values()))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain overall model predictions as a collection of local (instance-level) explanations"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# feature shap values for all features and all data points in the training data\n",
|
|
||||||
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain local data points (individual instances)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# explain the first member of the test set\n",
|
|
||||||
"instance_num = 0\n",
|
|
||||||
"local_explanation = tabular_explainer.explain_local(x_test[instance_num,:])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get the prediction for the first member of the test set and explain why model made that prediction\n",
|
|
||||||
"prediction_value = clf.predict(x_test)[instance_num]\n",
|
|
||||||
"\n",
|
|
||||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
|
||||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"dict(zip(sorted_local_importance_names, sorted_local_importance_values))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Load visualization dashboard"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
|
||||||
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
|
||||||
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
|
||||||
"# Or, in Jupyter Labs, uncomment below\n",
|
|
||||||
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
|
||||||
"# jupyter labextension install microsoft-mli-widget"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "mesameki"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.8"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Explain a model with the AML explain-model package\n",
|
||||||
|
"\n",
|
||||||
|
"1. Train a SVM classification model using Scikit-learn\n",
|
||||||
|
"2. Run 'explain_model' with full data in local mode, which doesn't contact any Azure services\n",
|
||||||
|
"3. Run 'explain_model' with summarized data in local mode, which doesn't contact any Azure services\n",
|
||||||
|
"4. Visualize the global and local explanations with the visualization dashboard."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.datasets import load_iris\n",
|
||||||
|
"from sklearn import svm\n",
|
||||||
|
"from azureml.explain.model.tabular_explainer import TabularExplainer"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 1. Run model explainer locally with full data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Load the breast cancer diagnosis data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"iris = load_iris()\n",
|
||||||
|
"X = iris['data']\n",
|
||||||
|
"y = iris['target']\n",
|
||||||
|
"classes = iris['target_names']\n",
|
||||||
|
"feature_names = iris['feature_names']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Split data into train and test\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train a SVM classification model, which you want to explain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"clf = svm.SVC(gamma=0.001, C=100., probability=True)\n",
|
||||||
|
"model = clf.fit(x_train, y_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain predictions on your local machine"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tabular_explainer = TabularExplainer(model, x_train, features = feature_names, classes=classes)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain overall model predictions (global explanation)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"global_explanation = tabular_explainer.explain_global(x_test)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Sorted SHAP values\n",
|
||||||
|
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
||||||
|
"# Corresponding feature names\n",
|
||||||
|
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
||||||
|
"# feature ranks (based on original order of features)\n",
|
||||||
|
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))\n",
|
||||||
|
"# per class feature names\n",
|
||||||
|
"print('ranked per class feature names: {}'.format(global_explanation.get_ranked_per_class_names()))\n",
|
||||||
|
"# per class feature importance values\n",
|
||||||
|
"print('ranked per class feature values: {}'.format(global_explanation.get_ranked_per_class_values()))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dict(zip(global_explanation.get_ranked_global_names(), global_explanation.get_ranked_global_values()))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# feature shap values for all features and all data points in the training data\n",
|
||||||
|
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain local data points (individual instances)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# explain the first member of the test set\n",
|
||||||
|
"instance_num = 0\n",
|
||||||
|
"local_explanation = tabular_explainer.explain_local(x_test[instance_num,:])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||||
|
"prediction_value = clf.predict(x_test)[instance_num]\n",
|
||||||
|
"\n",
|
||||||
|
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||||
|
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"dict(zip(sorted_local_importance_names, sorted_local_importance_values))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Load visualization dashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
||||||
|
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||||
|
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||||
|
"# Or, in Jupyter Labs, uncomment below\n",
|
||||||
|
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||||
|
"# jupyter labextension install microsoft-mli-widget"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "mesameki"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,272 +1,272 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Boston Housing Price Prediction with scikit-learn (run model explainer locally)"
|
"# Boston Housing Price Prediction with scikit-learn (run model explainer locally)"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Explain a model with the AML explain-model package\n",
|
|
||||||
"\n",
|
|
||||||
"1. Train a GradientBoosting regression model using Scikit-learn\n",
|
|
||||||
"2. Run 'explain_model' with full dataset in local mode, which doesn't contact any Azure services.\n",
|
|
||||||
"3. Run 'explain_model' with summarized dataset in local mode, which doesn't contact any Azure services.\n",
|
|
||||||
"4. Visualize the global and local explanations with the visualization dashboard."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn import datasets\n",
|
|
||||||
"from sklearn.ensemble import GradientBoostingRegressor\n",
|
|
||||||
"from azureml.explain.model.tabular_explainer import TabularExplainer"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 1. Run model explainer locally with full data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Load the Boston house price data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"boston_data = datasets.load_boston()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Split data into train and test\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"x_train, x_test, y_train, y_test = train_test_split(boston_data.data, boston_data.target, test_size=0.2, random_state=0)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train a GradientBoosting Regression model, which you want to explain"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"reg = GradientBoostingRegressor(n_estimators=100, max_depth=4,\n",
|
|
||||||
" learning_rate=0.1, loss='huber',\n",
|
|
||||||
" random_state=1)\n",
|
|
||||||
"model = reg.fit(x_train, y_train)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain predictions on your local machine"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"tabular_explainer = TabularExplainer(model, x_train, features = boston_data.feature_names)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain overall model predictions (global explanation)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
|
||||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
|
||||||
"global_explanation = tabular_explainer.explain_global(x_test)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Sorted SHAP values \n",
|
|
||||||
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
|
||||||
"# Corresponding feature names\n",
|
|
||||||
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
|
||||||
"# feature ranks (based on original order of features)\n",
|
|
||||||
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"dict(zip(global_explanation.get_ranked_global_names(), global_explanation.get_ranked_global_values()))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain overall model predictions as a collection of local (instance-level) explanations"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# feature shap values for all features and all data points in the training data\n",
|
|
||||||
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain local data points (individual instances)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"local_explanation = tabular_explainer.explain_local(x_test[0,:])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# sorted local feature importance information; reflects the original feature order\n",
|
|
||||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()\n",
|
|
||||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()\n",
|
|
||||||
"\n",
|
|
||||||
"print('sorted local importance names: {}'.format(sorted_local_importance_names))\n",
|
|
||||||
"print('sorted local importance values: {}'.format(sorted_local_importance_values))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Load visualization dashboard"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
|
||||||
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
|
||||||
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
|
||||||
"# Or, in Jupyter Labs, uncomment below\n",
|
|
||||||
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
|
||||||
"# jupyter labextension install microsoft-mli-widget"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "mesameki"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.8"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Explain a model with the AML explain-model package\n",
|
||||||
|
"\n",
|
||||||
|
"1. Train a GradientBoosting regression model using Scikit-learn\n",
|
||||||
|
"2. Run 'explain_model' with full dataset in local mode, which doesn't contact any Azure services.\n",
|
||||||
|
"3. Run 'explain_model' with summarized dataset in local mode, which doesn't contact any Azure services.\n",
|
||||||
|
"4. Visualize the global and local explanations with the visualization dashboard."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn import datasets\n",
|
||||||
|
"from sklearn.ensemble import GradientBoostingRegressor\n",
|
||||||
|
"from azureml.explain.model.tabular_explainer import TabularExplainer"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 1. Run model explainer locally with full data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Load the Boston house price data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"boston_data = datasets.load_boston()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Split data into train and test\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"x_train, x_test, y_train, y_test = train_test_split(boston_data.data, boston_data.target, test_size=0.2, random_state=0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train a GradientBoosting Regression model, which you want to explain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"reg = GradientBoostingRegressor(n_estimators=100, max_depth=4,\n",
|
||||||
|
" learning_rate=0.1, loss='huber',\n",
|
||||||
|
" random_state=1)\n",
|
||||||
|
"model = reg.fit(x_train, y_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain predictions on your local machine"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tabular_explainer = TabularExplainer(model, x_train, features = boston_data.feature_names)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain overall model predictions (global explanation)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||||
|
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||||
|
"global_explanation = tabular_explainer.explain_global(x_test)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Sorted SHAP values \n",
|
||||||
|
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
||||||
|
"# Corresponding feature names\n",
|
||||||
|
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
||||||
|
"# feature ranks (based on original order of features)\n",
|
||||||
|
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dict(zip(global_explanation.get_ranked_global_names(), global_explanation.get_ranked_global_values()))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# feature shap values for all features and all data points in the training data\n",
|
||||||
|
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain local data points (individual instances)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_explanation = tabular_explainer.explain_local(x_test[0,:])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# sorted local feature importance information; reflects the original feature order\n",
|
||||||
|
"sorted_local_importance_names = local_explanation.get_ranked_local_names()\n",
|
||||||
|
"sorted_local_importance_values = local_explanation.get_ranked_local_values()\n",
|
||||||
|
"\n",
|
||||||
|
"print('sorted local importance names: {}'.format(sorted_local_importance_names))\n",
|
||||||
|
"print('sorted local importance values: {}'.format(sorted_local_importance_values))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Load visualization dashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
||||||
|
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||||
|
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||||
|
"# Or, in Jupyter Labs, uncomment below\n",
|
||||||
|
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||||
|
"# jupyter labextension install microsoft-mli-widget"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "mesameki"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,302 +1,337 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Summary\n",
|
"# Summary\n",
|
||||||
"From raw data that is a mixture of categoricals and numeric, featurize the categoricals using one hot encoding. Use tabular explainer to get explain object and then get raw feature importances"
|
"From raw data that is a mixture of categoricals and numeric, featurize the categoricals using one hot encoding. Use tabular explainer to get explain object and then get raw feature importances"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Explain a model with the AML explain-model package on raw features\n",
|
|
||||||
"\n",
|
|
||||||
"1. Train a Logistic Regression model using Scikit-learn\n",
|
|
||||||
"2. Run 'explain_model' with full dataset in local mode, which doesn't contact any Azure services.\n",
|
|
||||||
"3. Run 'explain_model' with summarized dataset in local mode, which doesn't contact any Azure services.\n",
|
|
||||||
"4. Visualize the global and local explanations with the visualization dashboard."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"This example needs sklearn-pandas. If it is not installed, uncomment and run the following line."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#!pip install sklearn-pandas"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn.pipeline import Pipeline\n",
|
|
||||||
"from sklearn.impute import SimpleImputer\n",
|
|
||||||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
|
||||||
"from sklearn.linear_model import LogisticRegression\n",
|
|
||||||
"from azureml.explain.model.tabular_explainer import TabularExplainer\n",
|
|
||||||
"from sklearn_pandas import DataFrameMapper\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"import numpy as np"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"titanic_url = ('https://raw.githubusercontent.com/amueller/'\n",
|
|
||||||
" 'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')\n",
|
|
||||||
"data = pd.read_csv(titanic_url)\n",
|
|
||||||
"# fill missing values\n",
|
|
||||||
"data = data.fillna(method=\"ffill\")\n",
|
|
||||||
"data = data.fillna(method=\"bfill\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 1. Run model explainer locally with full data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Similar to example [here](https://scikit-learn.org/stable/auto_examples/compose/plot_column_transformer_mixed_types.html#sphx-glr-auto-examples-compose-plot-column-transformer-mixed-types-py), use a subset of columns"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"\n",
|
|
||||||
"numeric_features = ['age', 'fare']\n",
|
|
||||||
"categorical_features = ['embarked', 'sex', 'pclass']\n",
|
|
||||||
"\n",
|
|
||||||
"y = data['survived'].values\n",
|
|
||||||
"X = data[categorical_features + numeric_features]\n",
|
|
||||||
"\n",
|
|
||||||
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn.pipeline import Pipeline\n",
|
|
||||||
"from sklearn.impute import SimpleImputer\n",
|
|
||||||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
|
||||||
"from sklearn_pandas import DataFrameMapper\n",
|
|
||||||
"\n",
|
|
||||||
"# Impute, standardize the numeric features and one-hot encode the categorical features. \n",
|
|
||||||
"\n",
|
|
||||||
"transformations = [\n",
|
|
||||||
" ([\"age\", \"fare\"], Pipeline(steps=[\n",
|
|
||||||
" ('imputer', SimpleImputer(strategy='median')),\n",
|
|
||||||
" ('scaler', StandardScaler())\n",
|
|
||||||
" ])),\n",
|
|
||||||
" ([\"embarked\"], Pipeline(steps=[\n",
|
|
||||||
" (\"imputer\", SimpleImputer(strategy='constant', fill_value='missing')), \n",
|
|
||||||
" (\"encoder\", OneHotEncoder(sparse=False))])),\n",
|
|
||||||
" ([\"sex\", \"pclass\"], OneHotEncoder(sparse=False)) \n",
|
|
||||||
"]\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# Append classifier to preprocessing pipeline.\n",
|
|
||||||
"# Now we have a full prediction pipeline.\n",
|
|
||||||
"clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),\n",
|
|
||||||
" ('classifier', LogisticRegression(solver='lbfgs'))])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train a Logistic Regression model, which you want to explain"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"model = clf.fit(x_train, y_train)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain predictions on your local machine"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"tabular_explainer = TabularExplainer(clf.steps[-1][1], initialization_examples=x_train, features=x_train.columns, transformations=transformations)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
|
||||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
|
||||||
"global_explanation = tabular_explainer.explain_global(x_test)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"sorted_global_importance_values = global_explanation.get_ranked_global_values()\n",
|
|
||||||
"sorted_global_importance_names = global_explanation.get_ranked_global_names()\n",
|
|
||||||
"dict(zip(sorted_global_importance_names, sorted_global_importance_values))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain overall model predictions as a collection of local (instance-level) explanations"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# explain the first member of the test set\n",
|
|
||||||
"local_explanation = tabular_explainer.explain_local(x_test[:1])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get the prediction for the first member of the test set and explain why model made that prediction\n",
|
|
||||||
"prediction_value = clf.predict(x_test)[0]\n",
|
|
||||||
"\n",
|
|
||||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
|
||||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
|
||||||
"\n",
|
|
||||||
"# Sorted local SHAP values\n",
|
|
||||||
"print('ranked local importance values: {}'.format(sorted_local_importance_values))\n",
|
|
||||||
"# Corresponding feature names\n",
|
|
||||||
"print('ranked local importance names: {}'.format(sorted_local_importance_names))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 2. Load visualization dashboard"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
|
||||||
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
|
||||||
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
|
||||||
"# Or, in Jupyter Labs, uncomment below\n",
|
|
||||||
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
|
||||||
"# jupyter labextension install microsoft-mli-widget"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "mesameki"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.8"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Explain a model with the AML explain-model package on raw features\n",
|
||||||
|
"\n",
|
||||||
|
"1. Train a Logistic Regression model using Scikit-learn\n",
|
||||||
|
"2. Run 'explain_model' with full dataset in local mode, which doesn't contact any Azure services.\n",
|
||||||
|
"3. Run 'explain_model' with summarized dataset in local mode, which doesn't contact any Azure services.\n",
|
||||||
|
"4. Visualize the global and local explanations with the visualization dashboard."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.pipeline import Pipeline\n",
|
||||||
|
"from sklearn.impute import SimpleImputer\n",
|
||||||
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||||
|
"from sklearn.linear_model import LogisticRegression\n",
|
||||||
|
"from azureml.explain.model.tabular_explainer import TabularExplainer\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"titanic_url = ('https://raw.githubusercontent.com/amueller/'\n",
|
||||||
|
" 'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')\n",
|
||||||
|
"data = pd.read_csv(titanic_url)\n",
|
||||||
|
"# fill missing values\n",
|
||||||
|
"data = data.fillna(method=\"ffill\")\n",
|
||||||
|
"data = data.fillna(method=\"bfill\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 1. Run model explainer locally with full data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Similar to example [here](https://scikit-learn.org/stable/auto_examples/compose/plot_column_transformer_mixed_types.html#sphx-glr-auto-examples-compose-plot-column-transformer-mixed-types-py), use a subset of columns"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"\n",
|
||||||
|
"numeric_features = ['age', 'fare']\n",
|
||||||
|
"categorical_features = ['embarked', 'sex', 'pclass']\n",
|
||||||
|
"\n",
|
||||||
|
"y = data['survived'].values\n",
|
||||||
|
"X = data[categorical_features + numeric_features]\n",
|
||||||
|
"\n",
|
||||||
|
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"sklearn imports"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.pipeline import Pipeline\n",
|
||||||
|
"from sklearn.impute import SimpleImputer\n",
|
||||||
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We can explain raw features by either using a `sklearn.compose.ColumnTransformer` or a list of fitted transformer tuples. The cell below uses `sklearn.compose.ColumnTransformer`. In case you want to run the example with the list of fitted transformer tuples, comment the cell below and uncomment the cell that follows after. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.compose import ColumnTransformer\n",
|
||||||
|
"\n",
|
||||||
|
"transformations = ColumnTransformer([\n",
|
||||||
|
" (\"age_fare\", Pipeline(steps=[\n",
|
||||||
|
" ('imputer', SimpleImputer(strategy='median')),\n",
|
||||||
|
" ('scaler', StandardScaler())\n",
|
||||||
|
" ]), [\"age\", \"fare\"]),\n",
|
||||||
|
" (\"embarked\", Pipeline(steps=[\n",
|
||||||
|
" (\"imputer\", SimpleImputer(strategy='constant', fill_value='missing')), \n",
|
||||||
|
" (\"encoder\", OneHotEncoder(sparse=False))]), [\"embarked\"]),\n",
|
||||||
|
" (\"sex_pclass\", OneHotEncoder(sparse=False), [\"sex\", \"pclass\"]) \n",
|
||||||
|
"])\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Append classifier to preprocessing pipeline.\n",
|
||||||
|
"# Now we have a full prediction pipeline.\n",
|
||||||
|
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
|
||||||
|
" ('classifier', LogisticRegression(solver='lbfgs'))])\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"'''\n",
|
||||||
|
"# Uncomment below if sklearn-pandas is not installed\n",
|
||||||
|
"#!pip install sklearn-pandas\n",
|
||||||
|
"from sklearn_pandas import DataFrameMapper\n",
|
||||||
|
"\n",
|
||||||
|
"# Impute, standardize the numeric features and one-hot encode the categorical features. \n",
|
||||||
|
"\n",
|
||||||
|
"transformations = [\n",
|
||||||
|
" ([\"age\", \"fare\"], Pipeline(steps=[\n",
|
||||||
|
" ('imputer', SimpleImputer(strategy='median')),\n",
|
||||||
|
" ('scaler', StandardScaler())\n",
|
||||||
|
" ])),\n",
|
||||||
|
" ([\"embarked\"], Pipeline(steps=[\n",
|
||||||
|
" (\"imputer\", SimpleImputer(strategy='constant', fill_value='missing')), \n",
|
||||||
|
" (\"encoder\", OneHotEncoder(sparse=False))])),\n",
|
||||||
|
" ([\"sex\", \"pclass\"], OneHotEncoder(sparse=False)) \n",
|
||||||
|
"]\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Append classifier to preprocessing pipeline.\n",
|
||||||
|
"# Now we have a full prediction pipeline.\n",
|
||||||
|
"clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),\n",
|
||||||
|
" ('classifier', LogisticRegression(solver='lbfgs'))])\n",
|
||||||
|
"'''"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train a Logistic Regression model, which you want to explain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"model = clf.fit(x_train, y_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain predictions on your local machine"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tabular_explainer = TabularExplainer(clf.steps[-1][1], initialization_examples=x_train, features=x_train.columns, transformations=transformations)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||||
|
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||||
|
"global_explanation = tabular_explainer.explain_global(x_test)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sorted_global_importance_values = global_explanation.get_ranked_global_values()\n",
|
||||||
|
"sorted_global_importance_names = global_explanation.get_ranked_global_names()\n",
|
||||||
|
"dict(zip(sorted_global_importance_names, sorted_global_importance_values))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# explain the first member of the test set\n",
|
||||||
|
"local_explanation = tabular_explainer.explain_local(x_test[:1])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||||
|
"prediction_value = clf.predict(x_test)[0]\n",
|
||||||
|
"\n",
|
||||||
|
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||||
|
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
||||||
|
"\n",
|
||||||
|
"# Sorted local SHAP values\n",
|
||||||
|
"print('ranked local importance values: {}'.format(sorted_local_importance_values))\n",
|
||||||
|
"# Corresponding feature names\n",
|
||||||
|
"print('ranked local importance names: {}'.format(sorted_local_importance_names))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 2. Load visualization dashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
||||||
|
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||||
|
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||||
|
"# Or, in Jupyter Labs, uncomment below\n",
|
||||||
|
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||||
|
"# jupyter labextension install microsoft-mli-widget"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "mesameki"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,262 +1,262 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Breast cancer diagnosis classification with scikit-learn (save model explanations via AML Run History)"
|
"# Breast cancer diagnosis classification with scikit-learn (save model explanations via AML Run History)"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Explain a model with the AML explain-model package\n",
|
|
||||||
"\n",
|
|
||||||
"1. Train a SVM classification model using Scikit-learn\n",
|
|
||||||
"2. Run 'explain_model' with AML Run History, which leverages run history service to store and manage the explanation data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn.datasets import load_breast_cancer\n",
|
|
||||||
"from sklearn import svm\n",
|
|
||||||
"from azureml.explain.model.tabular_explainer import TabularExplainer"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 1. Run model explainer locally with full data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Load the breast cancer diagnosis data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"breast_cancer_data = load_breast_cancer()\n",
|
|
||||||
"classes = breast_cancer_data.target_names.tolist()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Split data into train and test\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"x_train, x_test, y_train, y_test = train_test_split(breast_cancer_data.data, breast_cancer_data.target, test_size=0.2, random_state=0)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train a SVM classification model, which you want to explain"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"clf = svm.SVC(gamma=0.001, C=100., probability=True)\n",
|
|
||||||
"model = clf.fit(x_train, y_train)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain predictions on your local machine"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"tabular_explainer = TabularExplainer(model, x_train, features=breast_cancer_data.feature_names, classes=classes)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain overall model predictions (global explanation)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
|
||||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
|
||||||
"global_explanation = tabular_explainer.explain_global(x_test)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 2. Save Model Explanation With AML Run History"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core import Workspace, Experiment, Run\n",
|
|
||||||
"from azureml.explain.model.tabular_explainer import TabularExplainer\n",
|
|
||||||
"from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient\n",
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + ws.name, \n",
|
|
||||||
" 'Azure region: ' + ws.location, \n",
|
|
||||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"experiment_name = 'explain_model'\n",
|
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
|
||||||
"run = experiment.start_logging()\n",
|
|
||||||
"client = ExplanationClient.from_run(run)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Uploading model explanation data for storage or visualization in webUX\n",
|
|
||||||
"# The explanation can then be downloaded on any compute\n",
|
|
||||||
"client.upload_model_explanation(global_explanation)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Get model explanation data\n",
|
|
||||||
"explanation = client.download_model_explanation()\n",
|
|
||||||
"local_importance_values = explanation.local_importance_values\n",
|
|
||||||
"expected_values = explanation.expected_values"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Get the top k (e.g., 4) most important features with their importance values\n",
|
|
||||||
"explanation = client.download_model_explanation(top_k=4)\n",
|
|
||||||
"global_importance_values = explanation.get_ranked_global_values()\n",
|
|
||||||
"global_importance_names = explanation.get_ranked_global_names()\n",
|
|
||||||
"per_class_names = explanation.get_ranked_per_class_names()[0]\n",
|
|
||||||
"per_class_values = explanation.get_ranked_per_class_values()[0]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print('per class feature importance values: {}'.format(per_class_values))\n",
|
|
||||||
"print('per class feature importance names: {}'.format(per_class_names))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"dict(zip(per_class_names, per_class_values))"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "mesameki"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.8"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Explain a model with the AML explain-model package\n",
|
||||||
|
"\n",
|
||||||
|
"1. Train a SVM classification model using Scikit-learn\n",
|
||||||
|
"2. Run 'explain_model' with AML Run History, which leverages run history service to store and manage the explanation data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.datasets import load_breast_cancer\n",
|
||||||
|
"from sklearn import svm\n",
|
||||||
|
"from azureml.explain.model.tabular_explainer import TabularExplainer"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 1. Run model explainer locally with full data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Load the breast cancer diagnosis data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"breast_cancer_data = load_breast_cancer()\n",
|
||||||
|
"classes = breast_cancer_data.target_names.tolist()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Split data into train and test\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"x_train, x_test, y_train, y_test = train_test_split(breast_cancer_data.data, breast_cancer_data.target, test_size=0.2, random_state=0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train a SVM classification model, which you want to explain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"clf = svm.SVC(gamma=0.001, C=100., probability=True)\n",
|
||||||
|
"model = clf.fit(x_train, y_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain predictions on your local machine"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tabular_explainer = TabularExplainer(model, x_train, features=breast_cancer_data.feature_names, classes=classes)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain overall model predictions (global explanation)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||||
|
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||||
|
"global_explanation = tabular_explainer.explain_global(x_test)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 2. Save Model Explanation With AML Run History"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core import Workspace, Experiment, Run\n",
|
||||||
|
"from azureml.explain.model.tabular_explainer import TabularExplainer\n",
|
||||||
|
"from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient\n",
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name, \n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"experiment_name = 'explain_model'\n",
|
||||||
|
"experiment = Experiment(ws, experiment_name)\n",
|
||||||
|
"run = experiment.start_logging()\n",
|
||||||
|
"client = ExplanationClient.from_run(run)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Uploading model explanation data for storage or visualization in webUX\n",
|
||||||
|
"# The explanation can then be downloaded on any compute\n",
|
||||||
|
"client.upload_model_explanation(global_explanation)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Get model explanation data\n",
|
||||||
|
"explanation = client.download_model_explanation()\n",
|
||||||
|
"local_importance_values = explanation.local_importance_values\n",
|
||||||
|
"expected_values = explanation.expected_values"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Get the top k (e.g., 4) most important features with their importance values\n",
|
||||||
|
"explanation = client.download_model_explanation(top_k=4)\n",
|
||||||
|
"global_importance_values = explanation.get_ranked_global_values()\n",
|
||||||
|
"global_importance_names = explanation.get_ranked_global_names()\n",
|
||||||
|
"per_class_names = explanation.get_ranked_per_class_names()[0]\n",
|
||||||
|
"per_class_values = explanation.get_ranked_per_class_values()[0]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print('per class feature importance values: {}'.format(per_class_values))\n",
|
||||||
|
"print('per class feature importance names: {}'.format(per_class_names))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dict(zip(per_class_names, per_class_values))"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "mesameki"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,276 +1,276 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Boston Housing Price Prediction with scikit-learn (save model explanations via AML Run History)"
|
"# Boston Housing Price Prediction with scikit-learn (save model explanations via AML Run History)"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Explain a model with the AML explain-model package\n",
|
|
||||||
"\n",
|
|
||||||
"1. Train a GradientBoosting regression model using Scikit-learn\n",
|
|
||||||
"2. Run 'explain_model' with AML Run History, which leverages run history service to store and manage the explanation data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Save Model Explanation With AML Run History"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Import Iris dataset\n",
|
|
||||||
"from sklearn import datasets\n",
|
|
||||||
"from sklearn.ensemble import GradientBoostingRegressor\n",
|
|
||||||
"\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core import Workspace, Experiment, Run\n",
|
|
||||||
"from azureml.explain.model.tabular_explainer import TabularExplainer\n",
|
|
||||||
"from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient\n",
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + ws.name, \n",
|
|
||||||
" 'Azure region: ' + ws.location, \n",
|
|
||||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"experiment_name = 'explain_model'\n",
|
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
|
||||||
"run = experiment.start_logging()\n",
|
|
||||||
"client = ExplanationClient.from_run(run)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Load the Boston house price data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"boston_data = datasets.load_boston()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Split data into train and test\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"x_train, x_test, y_train, y_test = train_test_split(boston_data.data, boston_data.target, test_size=0.2, random_state=0)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train a GradientBoosting Regression model, which you want to explain"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"clf = GradientBoostingRegressor(n_estimators=100, max_depth=4,\n",
|
|
||||||
" learning_rate=0.1, loss='huber',\n",
|
|
||||||
" random_state=1)\n",
|
|
||||||
"model = clf.fit(x_train, y_train)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain predictions on your local machine"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"tabular_explainer = TabularExplainer(model, x_train, features=boston_data.feature_names)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain overall model predictions (global explanation)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
|
||||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
|
||||||
"global_explanation = tabular_explainer.explain_global(x_test)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Uploading model explanation data for storage or visualization in webUX\n",
|
|
||||||
"# The explanation can then be downloaded on any compute\n",
|
|
||||||
"client.upload_model_explanation(global_explanation)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Get model explanation data\n",
|
|
||||||
"explanation = client.download_model_explanation()\n",
|
|
||||||
"local_importance_values = explanation.local_importance_values\n",
|
|
||||||
"expected_values = explanation.expected_values"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Print the values\n",
|
|
||||||
"print('expected values: {}'.format(expected_values))\n",
|
|
||||||
"print('local importance values: {}'.format(local_importance_values))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Get the top k (e.g., 4) most important features with their importance values\n",
|
|
||||||
"explanation = client.download_model_explanation(top_k=4)\n",
|
|
||||||
"global_importance_values = explanation.get_ranked_global_values()\n",
|
|
||||||
"global_importance_names = explanation.get_ranked_global_names()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print('global importance values: {}'.format(global_importance_values))\n",
|
|
||||||
"print('global importance names: {}'.format(global_importance_names))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explain individual instance predictions (local explanation) ##### needs to get updated with the new build"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"local_explanation = tabular_explainer.explain_local(x_test[0,:])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# local feature importance information\n",
|
|
||||||
"local_importance_values = local_explanation.local_importance_values\n",
|
|
||||||
"print('local importance values: {}'.format(local_importance_values))"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "mesameki"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.8"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Explain a model with the AML explain-model package\n",
|
||||||
|
"\n",
|
||||||
|
"1. Train a GradientBoosting regression model using Scikit-learn\n",
|
||||||
|
"2. Run 'explain_model' with AML Run History, which leverages run history service to store and manage the explanation data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Save Model Explanation With AML Run History"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Import Iris dataset\n",
|
||||||
|
"from sklearn import datasets\n",
|
||||||
|
"from sklearn.ensemble import GradientBoostingRegressor\n",
|
||||||
|
"\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core import Workspace, Experiment, Run\n",
|
||||||
|
"from azureml.explain.model.tabular_explainer import TabularExplainer\n",
|
||||||
|
"from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient\n",
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name, \n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"experiment_name = 'explain_model'\n",
|
||||||
|
"experiment = Experiment(ws, experiment_name)\n",
|
||||||
|
"run = experiment.start_logging()\n",
|
||||||
|
"client = ExplanationClient.from_run(run)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Load the Boston house price data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"boston_data = datasets.load_boston()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Split data into train and test\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"x_train, x_test, y_train, y_test = train_test_split(boston_data.data, boston_data.target, test_size=0.2, random_state=0)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train a GradientBoosting Regression model, which you want to explain"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"clf = GradientBoostingRegressor(n_estimators=100, max_depth=4,\n",
|
||||||
|
" learning_rate=0.1, loss='huber',\n",
|
||||||
|
" random_state=1)\n",
|
||||||
|
"model = clf.fit(x_train, y_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain predictions on your local machine"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tabular_explainer = TabularExplainer(model, x_train, features=boston_data.feature_names)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain overall model predictions (global explanation)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||||
|
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||||
|
"global_explanation = tabular_explainer.explain_global(x_test)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Uploading model explanation data for storage or visualization in webUX\n",
|
||||||
|
"# The explanation can then be downloaded on any compute\n",
|
||||||
|
"client.upload_model_explanation(global_explanation)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Get model explanation data\n",
|
||||||
|
"explanation = client.download_model_explanation()\n",
|
||||||
|
"local_importance_values = explanation.local_importance_values\n",
|
||||||
|
"expected_values = explanation.expected_values"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Print the values\n",
|
||||||
|
"print('expected values: {}'.format(expected_values))\n",
|
||||||
|
"print('local importance values: {}'.format(local_importance_values))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Get the top k (e.g., 4) most important features with their importance values\n",
|
||||||
|
"explanation = client.download_model_explanation(top_k=4)\n",
|
||||||
|
"global_importance_values = explanation.get_ranked_global_values()\n",
|
||||||
|
"global_importance_names = explanation.get_ranked_global_names()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print('global importance values: {}'.format(global_importance_values))\n",
|
||||||
|
"print('global importance names: {}'.format(global_importance_names))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explain individual instance predictions (local explanation) ##### needs to get updated with the new build"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_explanation = tabular_explainer.explain_local(x_test[0,:])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# local feature importance information\n",
|
||||||
|
"local_importance_values = local_explanation.local_importance_values\n",
|
||||||
|
"print('local importance values: {}'.format(local_importance_values))"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "mesameki"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -43,5 +43,6 @@ Take a look at [intro-to-pipelines](./intro-to-pipelines/) for the list of noteb
|
|||||||
|
|
||||||
1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score): This notebook demonstrates how to run a batch scoring job using Azure Machine Learning pipelines.
|
1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score): This notebook demonstrates how to run a batch scoring job using Azure Machine Learning pipelines.
|
||||||
2. [pipeline-style-transfer.ipynb](https://aka.ms/pl-style-trans): This notebook demonstrates a multi-step pipeline that uses GPU compute.
|
2. [pipeline-style-transfer.ipynb](https://aka.ms/pl-style-trans): This notebook demonstrates a multi-step pipeline that uses GPU compute.
|
||||||
|
3. [nyc-taxi-data-regression-model-building.ipynb](https://aka.ms/pl-nyctaxi-tutorial): This notebook is an AzureML Pipelines version of the previously published two part sample.
|
||||||
|
|
||||||

|

|
||||||
|
|||||||
@@ -65,8 +65,6 @@
|
|||||||
"import os\n",
|
"import os\n",
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"from azureml.core import Workspace, Experiment, Datastore\n",
|
"from azureml.core import Workspace, Experiment, Datastore\n",
|
||||||
"from azureml.core.compute import AmlCompute\n",
|
|
||||||
"from azureml.core.compute import ComputeTarget\n",
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
"from azureml.widgets import RunDetails\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Check core SDK version number\n",
|
"# Check core SDK version number\n",
|
||||||
@@ -116,36 +114,20 @@
|
|||||||
"ws = Workspace.from_config()\n",
|
"ws = Workspace.from_config()\n",
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')\n",
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Default datastore (Azure file storage)\n",
|
"# Default datastore\n",
|
||||||
"def_file_store = ws.get_default_datastore() \n",
|
"def_blob_store = ws.get_default_datastore() \n",
|
||||||
"# The above call is equivalent to Datastore(ws, \"workspacefilestore\") or simply Datastore(ws)\n",
|
|
||||||
"print(\"Default datastore's name: {}\".format(def_file_store.name))\n",
|
|
||||||
"\n",
|
|
||||||
"# Blob storage associated with the workspace\n",
|
|
||||||
"# The following call GETS the Azure Blob Store associated with your workspace.\n",
|
"# The following call GETS the Azure Blob Store associated with your workspace.\n",
|
||||||
"# Note that workspaceblobstore is **the name of this store and CANNOT BE CHANGED and must be used as is** \n",
|
"# Note that workspaceblobstore is **the name of this store and CANNOT BE CHANGED and must be used as is** \n",
|
||||||
"def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
|
"def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
|
||||||
"print(\"Blobstore's name: {}\".format(def_blob_store.name))"
|
"print(\"Blobstore's name: {}\".format(def_blob_store.name))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# project folder\n",
|
|
||||||
"project_folder = '.'\n",
|
|
||||||
" \n",
|
|
||||||
"print('Sample projects will be created in {}.'.format(os.path.realpath(project_folder)))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Required data and script files for the the tutorial\n",
|
"### Required data and script files for the the tutorial\n",
|
||||||
"Sample files required to finish this tutorial are already copied to the project folder specified above. Even though the .py provided in the samples don't have much \"ML work,\" as a data scientist, you will work on this extensively as part of your work. To complete this tutorial, the contents of these files are not very important. The one-line files are for demostration purpose only."
|
"Sample files required to finish this tutorial are already copied to the corresponding source_directory locations. Even though the .py provided in the samples don't have much \"ML work,\" as a data scientist, you will work on this extensively as part of your work. To complete this tutorial, the contents of these files are not very important. The one-line files are for demostration purpose only."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -176,19 +158,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# get_default_datastore() gets the default Azure File Store associated with your workspace.\n",
|
"# get_default_datastore() gets the default Azure Blob Store associated with your workspace.\n",
|
||||||
"# Here we are reusing the def_file_store object we obtained earlier\n",
|
"# Here we are reusing the def_blob_store object we obtained earlier\n",
|
||||||
"\n",
|
|
||||||
"# target_path is the directory at the destination\n",
|
|
||||||
"def_file_store.upload_files(['./20news.pkl'], \n",
|
|
||||||
" target_path = '20newsgroups', \n",
|
|
||||||
" overwrite = True, \n",
|
|
||||||
" show_progress = True)\n",
|
|
||||||
"\n",
|
|
||||||
"# Here we are reusing the def_blob_store we created earlier\n",
|
|
||||||
"def_blob_store.upload_files([\"./20news.pkl\"], target_path=\"20newsgroups\", overwrite=True)\n",
|
"def_blob_store.upload_files([\"./20news.pkl\"], target_path=\"20newsgroups\", overwrite=True)\n",
|
||||||
"\n",
|
"print(\"Upload call completed\")"
|
||||||
"print(\"Upload calls completed\")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -233,8 +206,15 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Retrieve default Azure Machine Learning compute\n",
|
"#### Retrieve or create a Azure Machine Learning compute\n",
|
||||||
"Azure Machine Learning Compute is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's get the default Azure Machine Learning Compute in the current workspace. We will then run the training script on this compute target."
|
"Azure Machine Learning Compute is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's create a new Azure Machine Learning Compute in the current workspace, if it doesn't already exist. We will then run the training script on this compute target.\n",
|
||||||
|
"\n",
|
||||||
|
"If we could not find the compute with the given name in the previous cell, then we will create a new compute here. We will create an Azure Machine Learning Compute containing **STANDARD_D2_V2 CPU VMs**. This process is broken down into the following steps:\n",
|
||||||
|
"\n",
|
||||||
|
"1. Create the configuration\n",
|
||||||
|
"2. Create the Azure Machine Learning compute\n",
|
||||||
|
"\n",
|
||||||
|
"**This process will take about 3 minutes and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell.**"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -243,7 +223,23 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"aml_compute = ws.get_default_compute_target(\"CPU\")"
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"aml_compute_target = \"cpu-cluster\"\n",
|
||||||
|
"try:\n",
|
||||||
|
" aml_compute = AmlCompute(ws, aml_compute_target)\n",
|
||||||
|
" print(\"found existing compute target.\")\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print(\"creating new compute target\")\n",
|
||||||
|
" \n",
|
||||||
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||||
|
" min_nodes = 1, \n",
|
||||||
|
" max_nodes = 4) \n",
|
||||||
|
" aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)\n",
|
||||||
|
" aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||||
|
" \n",
|
||||||
|
"print(\"Azure Machine Learning Compute attached\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -290,9 +286,10 @@
|
|||||||
"- [**MpiStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.mpi_step.mpistep?view=azure-ml-py): Adds a step to run a MPI job in a Pipeline.\n",
|
"- [**MpiStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.mpi_step.mpistep?view=azure-ml-py): Adds a step to run a MPI job in a Pipeline.\n",
|
||||||
"- [**AutoMLStep**](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.automlstep?view=azure-ml-py): Creates a AutoML step in a Pipeline.\n",
|
"- [**AutoMLStep**](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.automlstep?view=azure-ml-py): Creates a AutoML step in a Pipeline.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The following code will create a PythonScriptStep to be executed in the Azure Machine Learning Compute we created above using train.py, one of the files already made available in the project folder.\n",
|
"The following code will create a PythonScriptStep to be executed in the Azure Machine Learning Compute we created above using train.py, one of the files already made available in the `source_directory`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used. You can also use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify requirements for the PythonScriptStep, such as conda dependencies and docker image."
|
"A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used. You can also use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify requirements for the PythonScriptStep, such as conda dependencies and docker image.\n",
|
||||||
|
"> The best practice is to use separate folders for scripts and its dependent files for each step and specify that folder as the `source_directory` for the step. This helps reduce the size of the snapshot created for the step (only the specific folder is snapshotted). Since changes in any files in the `source_directory` would trigger a re-upload of the snapshot, this helps keep the reuse of the step when there are no changes in the `source_directory` of the step."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -303,6 +300,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Uses default values for PythonScriptStep construct.\n",
|
"# Uses default values for PythonScriptStep construct.\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"source_directory = './train'\n",
|
||||||
|
"print('Source directory for the step is {}.'.format(os.path.realpath(source_directory)))\n",
|
||||||
|
"\n",
|
||||||
"# Syntax\n",
|
"# Syntax\n",
|
||||||
"# PythonScriptStep(\n",
|
"# PythonScriptStep(\n",
|
||||||
"# script_name, \n",
|
"# script_name, \n",
|
||||||
@@ -321,7 +321,7 @@
|
|||||||
"step1 = PythonScriptStep(name=\"train_step\",\n",
|
"step1 = PythonScriptStep(name=\"train_step\",\n",
|
||||||
" script_name=\"train.py\", \n",
|
" script_name=\"train.py\", \n",
|
||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder,\n",
|
" source_directory=source_directory,\n",
|
||||||
" allow_reuse=True)\n",
|
" allow_reuse=True)\n",
|
||||||
"print(\"Step1 created\")"
|
"print(\"Step1 created\")"
|
||||||
]
|
]
|
||||||
@@ -351,12 +351,15 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# All steps use files already available in the project_folder\n",
|
"# For this step, we use a different source_directory\n",
|
||||||
|
"source_directory = './compare'\n",
|
||||||
|
"print('Source directory for the step is {}.'.format(os.path.realpath(source_directory)))\n",
|
||||||
|
"\n",
|
||||||
"# All steps use the same Azure Machine Learning compute target as well\n",
|
"# All steps use the same Azure Machine Learning compute target as well\n",
|
||||||
"step2 = PythonScriptStep(name=\"compare_step\",\n",
|
"step2 = PythonScriptStep(name=\"compare_step\",\n",
|
||||||
" script_name=\"compare.py\", \n",
|
" script_name=\"compare.py\", \n",
|
||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder)\n",
|
" source_directory=source_directory)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Use a RunConfiguration to specify some additional requirements for this step.\n",
|
"# Use a RunConfiguration to specify some additional requirements for this step.\n",
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
@@ -378,10 +381,14 @@
|
|||||||
"# specify CondaDependencies obj\n",
|
"# specify CondaDependencies obj\n",
|
||||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# For this step, we use yet another source_directory\n",
|
||||||
|
"source_directory = './extract'\n",
|
||||||
|
"print('Source directory for the step is {}.'.format(os.path.realpath(source_directory)))\n",
|
||||||
|
"\n",
|
||||||
"step3 = PythonScriptStep(name=\"extract_step\",\n",
|
"step3 = PythonScriptStep(name=\"extract_step\",\n",
|
||||||
" script_name=\"extract.py\", \n",
|
" script_name=\"extract.py\", \n",
|
||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder,\n",
|
" source_directory=source_directory,\n",
|
||||||
" runconfig=run_config)\n",
|
" runconfig=run_config)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# list of steps to run\n",
|
"# list of steps to run\n",
|
||||||
@@ -597,7 +604,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "diray"
|
"name": "sanpil"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
|
|||||||
@@ -113,7 +113,25 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"batch_compute = ws.get_default_compute_target(\"CPU\")"
|
"batch_compute_name = 'mybatchcompute' # Name to associate with new compute in workspace\n",
|
||||||
|
"\n",
|
||||||
|
"# Batch account details needed to attach as compute to workspace\n",
|
||||||
|
"batch_account_name = \"<batch_account_name>\" # Name of the Batch account\n",
|
||||||
|
"batch_resource_group = \"<batch_resource_group>\" # Name of the resource group which contains this account\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" # check if already attached\n",
|
||||||
|
" batch_compute = BatchCompute(ws, batch_compute_name)\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Attaching Batch compute...')\n",
|
||||||
|
" provisioning_config = BatchCompute.attach_configuration(resource_group=batch_resource_group, \n",
|
||||||
|
" account_name=batch_account_name)\n",
|
||||||
|
" batch_compute = ComputeTarget.attach(ws, batch_compute_name, provisioning_config)\n",
|
||||||
|
" batch_compute.wait_for_completion()\n",
|
||||||
|
" print(\"Provisioning state:{}\".format(batch_compute.provisioning_state))\n",
|
||||||
|
" print(\"Provisioning errors:{}\".format(batch_compute.provisioning_errors))\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Using Batch compute:{}\".format(batch_compute.cluster_resource_id))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -76,8 +76,18 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Get default AmlCompute\n",
|
"## Create or Attach existing AmlCompute\n",
|
||||||
"You can create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you use default `AmlCompute` as your training compute resource."
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
|
||||||
|
"1. create the configuration (this step is local and only takes a second)\n",
|
||||||
|
"2. create the cluster (this step will take about **20 seconds**)\n",
|
||||||
|
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -86,7 +96,25 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"cpu_cluster = ws.get_default_compute_target(\"CPU\")\n",
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for your cluster\n",
|
||||||
|
"cluster_name = \"cpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
|
" print('Found existing compute target')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" # create the cluster\n",
|
||||||
|
" cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||||
|
" # if no min node count is provided it uses the scale settings for the cluster\n",
|
||||||
|
" cpu_cluster.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||||
"print(cpu_cluster.get_status().serialize())"
|
"print(cpu_cluster.get_status().serialize())"
|
||||||
@@ -96,7 +124,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'cpucluster' of type `AmlCompute`."
|
"Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'cpu-cluster' of type `AmlCompute`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -22,9 +22,17 @@
|
|||||||
"# Azure Machine Learning Pipeline with HyperDriveStep\n",
|
"# Azure Machine Learning Pipeline with HyperDriveStep\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This notebook is used to demonstrate the use of HyperDriveStep in AML Pipeline.\n",
|
"This notebook is used to demonstrate the use of HyperDriveStep in AML Pipeline."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites and Azure Machine Learning Basics\n",
|
||||||
|
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Azure Machine Learning and Pipeline SDK-specific imports\n"
|
"## Azure Machine Learning and Pipeline SDK-specific imports"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -33,19 +41,24 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
|
||||||
"import shutil\n",
|
|
||||||
"import urllib\n",
|
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"from azureml.core import Workspace, Experiment\n",
|
"from azureml.core import Workspace, Experiment\n",
|
||||||
"from azureml.core.datastore import Datastore\n",
|
"from azureml.core.datastore import Datastore\n",
|
||||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
"from azureml.exceptions import ComputeTargetException\n",
|
"from azureml.exceptions import ComputeTargetException\n",
|
||||||
"from azureml.data.data_reference import DataReference\n",
|
"from azureml.data.data_reference import DataReference\n",
|
||||||
"from azureml.pipeline.steps import HyperDriveStep\n",
|
"from azureml.pipeline.steps import HyperDriveStep, HyperDriveStepRun\n",
|
||||||
"from azureml.pipeline.core import Pipeline, PipelineData\n",
|
"from azureml.pipeline.core import Pipeline, PipelineData\n",
|
||||||
"from azureml.train.dnn import TensorFlow\n",
|
"from azureml.train.dnn import TensorFlow\n",
|
||||||
"from azureml.train.hyperdrive import *\n",
|
"# from azureml.train.hyperdrive import *\n",
|
||||||
|
"from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal\n",
|
||||||
|
"from azureml.train.hyperdrive import choice, loguniform\n",
|
||||||
|
"\n",
|
||||||
|
"import os\n",
|
||||||
|
"import shutil\n",
|
||||||
|
"import urllib\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Check core SDK version number\n",
|
"# Check core SDK version number\n",
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
@@ -87,7 +100,7 @@
|
|||||||
"script_folder = './tf-mnist'\n",
|
"script_folder = './tf-mnist'\n",
|
||||||
"os.makedirs(script_folder, exist_ok=True)\n",
|
"os.makedirs(script_folder, exist_ok=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"exp = Experiment(workspace=ws, name='tf-mnist')"
|
"exp = Experiment(workspace=ws, name='Hyperdrive_sample')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -112,6 +125,42 @@
|
|||||||
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')"
|
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Show some sample images\n",
|
||||||
|
"Let's load the downloaded compressed file into numpy arrays using some utility functions included in the `utils.py` library file from the current folder. Then we use `matplotlib` to plot 30 random images from the dataset along with their labels."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from utils import load_data\n",
|
||||||
|
"\n",
|
||||||
|
"# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n",
|
||||||
|
"X_train = load_data('./data/mnist/train-images.gz', False) / 255.0\n",
|
||||||
|
"y_train = load_data('./data/mnist/train-labels.gz', True).reshape(-1)\n",
|
||||||
|
"\n",
|
||||||
|
"X_test = load_data('./data/mnist/test-images.gz', False) / 255.0\n",
|
||||||
|
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
|
||||||
|
"\n",
|
||||||
|
"count = 0\n",
|
||||||
|
"sample_size = 30\n",
|
||||||
|
"plt.figure(figsize = (16, 6))\n",
|
||||||
|
"for i in np.random.permutation(X_train.shape[0])[:sample_size]:\n",
|
||||||
|
" count = count + 1\n",
|
||||||
|
" plt.subplot(1, sample_size, count)\n",
|
||||||
|
" plt.axhline('')\n",
|
||||||
|
" plt.axvline('')\n",
|
||||||
|
" plt.text(x = 10, y = -10, s = y_train[i], fontsize = 18)\n",
|
||||||
|
" plt.imshow(X_train[i].reshape(28, 28), cmap = plt.cm.Greys)\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -135,7 +184,14 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Retrieve or create a Azure Machine Learning compute\n",
|
"## Retrieve or create a Azure Machine Learning compute\n",
|
||||||
"Azure Machine Learning Compute is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's get the default Azure Machine Learning Compute in the current workspace. We will then run the training script on this compute target."
|
"Azure Machine Learning Compute is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's create a new Azure Machine Learning Compute in the current workspace, if it doesn't already exist. We will then run the training script on this compute target.\n",
|
||||||
|
"\n",
|
||||||
|
"If we could not find the compute with the given name in the previous cell, then we will create a new compute here. This process is broken down into the following steps:\n",
|
||||||
|
"\n",
|
||||||
|
"1. Create the configuration\n",
|
||||||
|
"2. Create the Azure Machine Learning compute\n",
|
||||||
|
"\n",
|
||||||
|
"**This process will take a few minutes and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell.**\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -144,7 +200,20 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"compute_target = ws.get_default_compute_target(\"GPU\")"
|
"cluster_name = \"gpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
|
" print('Found existing compute target {}.'.format(cluster_name))\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
" compute_target.wait_for_completion(show_output=True, timeout_in_minutes=20)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Azure Machine Learning Compute attached\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -173,8 +242,12 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Create TensorFlow estimator\n",
|
"## Create TensorFlow estimator\n",
|
||||||
"Next, we construct an `azureml.train.dnn.TensorFlow` estimator object, use the Batch AI cluster as compute target, and pass the mount-point of the datastore to the training code as a parameter.\n",
|
"Next, we construct an [TensorFlow](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn.tensorflow?view=azure-ml-py) estimator object.\n",
|
||||||
"The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker."
|
"The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker.\n",
|
||||||
|
"\n",
|
||||||
|
"The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release.\n",
|
||||||
|
"\n",
|
||||||
|
"The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -186,7 +259,8 @@
|
|||||||
"est = TensorFlow(source_directory=script_folder, \n",
|
"est = TensorFlow(source_directory=script_folder, \n",
|
||||||
" compute_target=compute_target,\n",
|
" compute_target=compute_target,\n",
|
||||||
" entry_script='tf_mnist.py', \n",
|
" entry_script='tf_mnist.py', \n",
|
||||||
" use_gpu=True)"
|
" use_gpu=True,\n",
|
||||||
|
" framework_version='1.13')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -194,7 +268,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Intelligent hyperparameter tuning\n",
|
"## Intelligent hyperparameter tuning\n",
|
||||||
"We have trained the model with one set of hyperparameters, now let's how we can do hyperparameter tuning by launching multiple runs on the cluster. First let's define the parameter space using random sampling.\n",
|
"Now let's try hyperparameter tuning by launching multiple runs on the cluster. First let's define the parameter space using random sampling.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example we will use random sampling to try different configuration sets of hyperparameters to maximize our primary metric, the best validation accuracy (`validation_acc`)."
|
"In this example we will use random sampling to try different configuration sets of hyperparameters to maximize our primary metric, the best validation accuracy (`validation_acc`)."
|
||||||
]
|
]
|
||||||
@@ -251,8 +325,8 @@
|
|||||||
" policy=early_termination_policy,\n",
|
" policy=early_termination_policy,\n",
|
||||||
" primary_metric_name='validation_acc', \n",
|
" primary_metric_name='validation_acc', \n",
|
||||||
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, \n",
|
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, \n",
|
||||||
" max_total_runs=1,\n",
|
" max_total_runs=10,\n",
|
||||||
" max_concurrent_runs=1)"
|
" max_concurrent_runs=4)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -261,6 +335,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Add HyperDrive as a step of pipeline\n",
|
"## Add HyperDrive as a step of pipeline\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"### Setup an input for the hypderdrive step\n",
|
||||||
"Let's setup a data reference for inputs of hyperdrive step."
|
"Let's setup a data reference for inputs of hyperdrive step."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -302,8 +377,9 @@
|
|||||||
" datastore=ds,\n",
|
" datastore=ds,\n",
|
||||||
" pipeline_output_name=metrics_output_name)\n",
|
" pipeline_output_name=metrics_output_name)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"hd_step_name='hd_step01'\n",
|
||||||
"hd_step = HyperDriveStep(\n",
|
"hd_step = HyperDriveStep(\n",
|
||||||
" name=\"hyperdrive_module\",\n",
|
" name=hd_step_name,\n",
|
||||||
" hyperdrive_config=hd_config,\n",
|
" hyperdrive_config=hd_config,\n",
|
||||||
" estimator_entry_script_arguments=['--data-folder', data_folder],\n",
|
" estimator_entry_script_arguments=['--data-folder', data_folder],\n",
|
||||||
" inputs=[data_folder],\n",
|
" inputs=[data_folder],\n",
|
||||||
@@ -324,7 +400,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"pipeline = Pipeline(workspace=ws, steps=[hd_step])\n",
|
"pipeline = Pipeline(workspace=ws, steps=[hd_step])\n",
|
||||||
"pipeline_run = Experiment(ws, 'Hyperdrive_Test').submit(pipeline)"
|
"pipeline_run = exp.submit(pipeline)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -357,7 +433,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"pipeline_run.wait_for_completion()"
|
"# PUBLISHONLY\n",
|
||||||
|
"# pipeline_run.wait_for_completion()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -374,8 +451,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n",
|
"# PUBLISHONLY\n",
|
||||||
"num_file_downloaded = metrics_output.download('.', show_progress=True)"
|
"# metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n",
|
||||||
|
"# num_file_downloaded = metrics_output.download('.', show_progress=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -384,14 +462,374 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import pandas as pd\n",
|
"# PUBLISHONLY\n",
|
||||||
"import json\n",
|
"# import pandas as pd\n",
|
||||||
"with open(metrics_output._path_on_datastore) as f: \n",
|
"# import json\n",
|
||||||
" metrics_output_result = f.read()\n",
|
"# with open(metrics_output._path_on_datastore) as f: \n",
|
||||||
|
"# metrics_output_result = f.read()\n",
|
||||||
" \n",
|
" \n",
|
||||||
"deserialized_metrics_output = json.loads(metrics_output_result)\n",
|
"# deserialized_metrics_output = json.loads(metrics_output_result)\n",
|
||||||
"df = pd.DataFrame(deserialized_metrics_output)\n",
|
"# df = pd.DataFrame(deserialized_metrics_output)\n",
|
||||||
"df"
|
"# df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Find and register best model\n",
|
||||||
|
"When all the jobs finish, we can find out the one that has the highest accuracy."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# hd_step_run = HyperDriveStepRun(step_run=pipeline_run.find_step_run(hd_step_name)[0])\n",
|
||||||
|
"# best_run = hd_step_run.get_best_run_by_primary_metric()\n",
|
||||||
|
"# best_run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now let's list the model files uploaded during the run."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# print(best_run.get_file_names())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We can then register the folder (and all files in it) as a model named `tf-dnn-mnist` under the workspace for deployment."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# model = best_run.register_model(model_name='tf-dnn-mnist', model_path='outputs/model')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploy the model in ACI\n",
|
||||||
|
"Now we are ready to deploy the model as a web service running in Azure Container Instance [ACI](https://azure.microsoft.com/en-us/services/container-instances/). Azure Machine Learning accomplishes this by constructing a Docker image with the scoring logic and model baked in.\n",
|
||||||
|
"### Create score.py\n",
|
||||||
|
"First, we will create a scoring script that will be invoked by the web service call. \n",
|
||||||
|
"\n",
|
||||||
|
"* Note that the scoring script must have two required functions, `init()` and `run(input_data)`. \n",
|
||||||
|
" * In `init()` function, you typically load the model into a global object. This function is executed only once when the Docker container is started. \n",
|
||||||
|
" * In `run(input_data)` function, the model is used to predict a value based on the input data. The input and output to `run` typically use JSON as serialization and de-serialization format but you are not limited to that."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score.py\n",
|
||||||
|
"import json\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import os\n",
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global X, output, sess\n",
|
||||||
|
" tf.reset_default_graph()\n",
|
||||||
|
" model_root = Model.get_model_path('tf-dnn-mnist')\n",
|
||||||
|
" saver = tf.train.import_meta_graph(os.path.join(model_root, 'mnist-tf.model.meta'))\n",
|
||||||
|
" X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n",
|
||||||
|
" output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n",
|
||||||
|
" \n",
|
||||||
|
" sess = tf.Session()\n",
|
||||||
|
" saver.restore(sess, os.path.join(model_root, 'mnist-tf.model'))\n",
|
||||||
|
"\n",
|
||||||
|
"def run(raw_data):\n",
|
||||||
|
" data = np.array(json.loads(raw_data)['data'])\n",
|
||||||
|
" # make prediction\n",
|
||||||
|
" out = output.eval(session=sess, feed_dict={X: data})\n",
|
||||||
|
" y_hat = np.argmax(out, axis=1)\n",
|
||||||
|
" return y_hat.tolist()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create myenv.yml\n",
|
||||||
|
"We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify packages `numpy`, `tensorflow`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# from azureml.core.runconfig import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"# cd = CondaDependencies.create()\n",
|
||||||
|
"# cd.add_conda_package('numpy')\n",
|
||||||
|
"# cd.add_tensorflow_conda_package()\n",
|
||||||
|
"# cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||||
|
"\n",
|
||||||
|
"# print(cd.serialize_to_string())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Deploy to ACI\n",
|
||||||
|
"We are almost ready to deploy. Create a deployment configuration and specify the number of CPUs and gigbyte of RAM needed for your ACI container. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# from azureml.core.webservice import AciWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"# aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||||
|
"# memory_gb=1, \n",
|
||||||
|
"# tags={'name':'mnist', 'framework': 'TensorFlow DNN'},\n",
|
||||||
|
"# description='Tensorflow DNN on MNIST')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Deployment Process\n",
|
||||||
|
"Now we can deploy. **This cell will run for about 7-8 minutes**. Behind the scene, it will do the following:\n",
|
||||||
|
"1. **Register model** \n",
|
||||||
|
"Take the local `model` folder (which contains our previously downloaded trained model files) and register it (and the files inside that folder) as a model named `model` under the workspace. Azure ML will register the model directory or model file(s) we specify to the `model_paths` parameter of the `Webservice.deploy` call.\n",
|
||||||
|
"2. **Build Docker image** \n",
|
||||||
|
"Build a Docker image using the scoring file (`score.py`), the environment file (`myenv.yml`), and the `model` folder containing the TensorFlow model files. \n",
|
||||||
|
"3. **Register image** \n",
|
||||||
|
"Register that image under the workspace. \n",
|
||||||
|
"4. **Ship to ACI** \n",
|
||||||
|
"And finally ship the image to the ACI infrastructure, start up a container in ACI using that image, and expose an HTTP endpoint to accept REST client calls."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# from azureml.core.image import ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"# imgconfig = ContainerImage.image_configuration(execution_script=\"score.py\", \n",
|
||||||
|
"# runtime=\"python\", \n",
|
||||||
|
"# conda_file=\"myenv.yml\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# %%time\n",
|
||||||
|
"# from azureml.core.webservice import Webservice\n",
|
||||||
|
"\n",
|
||||||
|
"# service = Webservice.deploy_from_model(workspace=ws,\n",
|
||||||
|
"# name='tf-mnist-svc',\n",
|
||||||
|
"# deployment_config=aciconfig,\n",
|
||||||
|
"# models=[model],\n",
|
||||||
|
"# image_config=imgconfig)\n",
|
||||||
|
"\n",
|
||||||
|
"# service.wait_for_deployment(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:**"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# print(service.get_logs())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"This is the scoring web service endpoint:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# print(service.scoring_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Test the deployed model\n",
|
||||||
|
"Let's test the deployed model. Pick 30 random samples from the test set, and send it to the web service hosted in ACI. Note here we are using the `run` API in the SDK to invoke the service. You can also make raw HTTP calls using any HTTP tool such as curl.\n",
|
||||||
|
"\n",
|
||||||
|
"After the invocation, we print the returned predictions and plot them along with the input images. Use red font color and inversed image (white on black) to highlight the misclassified samples. Note since the model accuracy is pretty high, you might have to run the below cell a few times before you can see a misclassified sample."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# import json\n",
|
||||||
|
"\n",
|
||||||
|
"# # find 30 random samples from test set\n",
|
||||||
|
"# n = 30\n",
|
||||||
|
"# sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n",
|
||||||
|
"\n",
|
||||||
|
"# test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n",
|
||||||
|
"# test_samples = bytes(test_samples, encoding='utf8')\n",
|
||||||
|
"\n",
|
||||||
|
"# # predict using the deployed model\n",
|
||||||
|
"# result = service.run(input_data=test_samples)\n",
|
||||||
|
"\n",
|
||||||
|
"# # compare actual value vs. the predicted values:\n",
|
||||||
|
"# i = 0\n",
|
||||||
|
"# plt.figure(figsize = (20, 1))\n",
|
||||||
|
"\n",
|
||||||
|
"# for s in sample_indices:\n",
|
||||||
|
"# plt.subplot(1, n, i + 1)\n",
|
||||||
|
"# plt.axhline('')\n",
|
||||||
|
"# plt.axvline('')\n",
|
||||||
|
" \n",
|
||||||
|
"# # use different color for misclassified sample\n",
|
||||||
|
"# font_color = 'red' if y_test[s] != result[i] else 'black'\n",
|
||||||
|
"# clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n",
|
||||||
|
" \n",
|
||||||
|
"# plt.text(x=10, y=-10, s=y_hat[s], fontsize=18, color=font_color)\n",
|
||||||
|
"# plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n",
|
||||||
|
" \n",
|
||||||
|
"# i = i + 1\n",
|
||||||
|
"# plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We can also send raw HTTP request to the service."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# import requests\n",
|
||||||
|
"\n",
|
||||||
|
"# # send a random row from the test set to score\n",
|
||||||
|
"# random_index = np.random.randint(0, len(X_test)-1)\n",
|
||||||
|
"# input_data = \"{\\\"data\\\": [\" + str(list(X_test[random_index])) + \"]}\"\n",
|
||||||
|
"\n",
|
||||||
|
"# headers = {'Content-Type':'application/json'}\n",
|
||||||
|
"\n",
|
||||||
|
"# resp = requests.post(service.scoring_uri, input_data, headers=headers)\n",
|
||||||
|
"\n",
|
||||||
|
"# print(\"POST to url\", service.scoring_uri)\n",
|
||||||
|
"# print(\"input data:\", input_data)\n",
|
||||||
|
"# print(\"label:\", y_test[random_index])\n",
|
||||||
|
"# print(\"prediction:\", resp.text)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's look at the workspace after the web service was deployed. You should see \n",
|
||||||
|
"* a registered model named 'model' and with the id 'model:1'\n",
|
||||||
|
"* an image called 'tf-mnist' and with a docker image location pointing to your workspace's Azure Container Registry (ACR) \n",
|
||||||
|
"* a webservice called 'tf-mnist' with some scoring URL"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# models = ws.models\n",
|
||||||
|
"# for name, model in models.items():\n",
|
||||||
|
"# print(\"Model: {}, ID: {}\".format(name, model.id))\n",
|
||||||
|
" \n",
|
||||||
|
"# images = ws.images\n",
|
||||||
|
"# for name, image in images.items():\n",
|
||||||
|
"# print(\"Image: {}, location: {}\".format(name, image.image_location))\n",
|
||||||
|
" \n",
|
||||||
|
"# webservices = ws.webservices\n",
|
||||||
|
"# for name, webservice in webservices.items():\n",
|
||||||
|
"# print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Clean up\n",
|
||||||
|
"You can delete the ACI deployment with a simple delete API call."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# PUBLISHONLY\n",
|
||||||
|
"# service.delete()"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -79,7 +79,20 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"aml_compute = ws.get_default_compute_target(\"CPU\")"
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"aml_compute_target = \"cpu-cluster\"\n",
|
||||||
|
"try:\n",
|
||||||
|
" aml_compute = AmlCompute(ws, aml_compute_target)\n",
|
||||||
|
" print(\"found existing compute target.\")\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print(\"creating new compute target\")\n",
|
||||||
|
" \n",
|
||||||
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||||
|
" min_nodes = 1, \n",
|
||||||
|
" max_nodes = 4) \n",
|
||||||
|
" aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)\n",
|
||||||
|
" aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -54,7 +54,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Compute Targets\n",
|
"### Compute Targets\n",
|
||||||
"#### Retrieve the default Azure Machine Learning Compute"
|
"#### Retrieve an already attached Azure Machine Learning Compute"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -63,7 +63,31 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"aml_compute_target = ws.get_default_compute_target(\"CPU\")"
|
"from azureml.core import Run, Experiment, Datastore\n",
|
||||||
|
"\n",
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
||||||
|
"aml_compute_target = \"cpu-cluster\"\n",
|
||||||
|
"try:\n",
|
||||||
|
" aml_compute = AmlCompute(ws, aml_compute_target)\n",
|
||||||
|
" print(\"Found existing compute target: {}\".format(aml_compute_target))\n",
|
||||||
|
"except:\n",
|
||||||
|
" print(\"Creating new compute target: {}\".format(aml_compute_target))\n",
|
||||||
|
" \n",
|
||||||
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||||
|
" min_nodes = 1, \n",
|
||||||
|
" max_nodes = 4) \n",
|
||||||
|
" aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)\n",
|
||||||
|
" aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -85,10 +85,24 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"from azureml.core import Run, Experiment, Datastore\n",
|
||||||
|
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
||||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||||
"from azureml.pipeline.core import Pipeline\n",
|
"from azureml.pipeline.core import Pipeline\n",
|
||||||
"\n",
|
"\n",
|
||||||
"aml_compute = ws.get_default_compute_target(\"CPU\")\n",
|
"#Retrieve an already attached Azure Machine Learning Compute\n",
|
||||||
|
"aml_compute_target = \"cpu-cluster\"\n",
|
||||||
|
"try:\n",
|
||||||
|
" aml_compute = AmlCompute(ws, aml_compute_target)\n",
|
||||||
|
" print(\"Found existing compute target: {}\".format(aml_compute_target))\n",
|
||||||
|
"except:\n",
|
||||||
|
" print(\"Creating new compute target: {}\".format(aml_compute_target))\n",
|
||||||
|
" \n",
|
||||||
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||||
|
" min_nodes = 1, \n",
|
||||||
|
" max_nodes = 4) \n",
|
||||||
|
" aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)\n",
|
||||||
|
" aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# source_directory\n",
|
"# source_directory\n",
|
||||||
"source_directory = '.'\n",
|
"source_directory = '.'\n",
|
||||||
|
|||||||
@@ -139,7 +139,31 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"compute_target = ws.get_default_compute_target(\"CPU\")"
|
"# Choose a name for your cluster.\n",
|
||||||
|
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"found = False\n",
|
||||||
|
"# Check if this compute target already exists in the workspace.\n",
|
||||||
|
"cts = ws.compute_targets\n",
|
||||||
|
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||||
|
" found = True\n",
|
||||||
|
" print('Found existing compute target.')\n",
|
||||||
|
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||||
|
" \n",
|
||||||
|
"if not found:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||||
|
" #vm_priority = 'lowpriority', # optional\n",
|
||||||
|
" max_nodes = 4)\n",
|
||||||
|
"\n",
|
||||||
|
" # Create the cluster.\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||||
|
" \n",
|
||||||
|
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||||
|
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||||
|
" compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)\n",
|
||||||
|
" \n",
|
||||||
|
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -127,7 +127,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Retrieve or create a Aml compute\n",
|
"#### Retrieve or create an Aml compute\n",
|
||||||
"Azure Machine Learning Compute is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's get the default Aml Compute in the current workspace. We will then run the training script on this compute target."
|
"Azure Machine Learning Compute is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's get the default Aml Compute in the current workspace. We will then run the training script on this compute target."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -137,7 +137,22 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"aml_compute = ws.get_default_compute_target(\"CPU\")\n"
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"aml_compute_target = \"cpu-cluster\"\n",
|
||||||
|
"try:\n",
|
||||||
|
" aml_compute = AmlCompute(ws, aml_compute_target)\n",
|
||||||
|
" print(\"found existing compute target.\")\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print(\"creating new compute target\")\n",
|
||||||
|
" \n",
|
||||||
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||||
|
" min_nodes = 1, \n",
|
||||||
|
" max_nodes = 4) \n",
|
||||||
|
" aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)\n",
|
||||||
|
" aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||||
|
" \n",
|
||||||
|
"print(\"Aml Compute attached\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -418,6 +433,77 @@
|
|||||||
"RunDetails(pipeline_run1).show()"
|
"RunDetails(pipeline_run1).show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Wait for pipeline run to complete"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pipeline_run1.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### See Outputs\n",
|
||||||
|
"\n",
|
||||||
|
"See where outputs of each pipeline step are located on your datastore.\n",
|
||||||
|
"\n",
|
||||||
|
"***Wait for pipeline run to complete, to make sure all the outputs are ready***"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Get Steps\n",
|
||||||
|
"for step in pipeline_run1.get_steps():\n",
|
||||||
|
" print(\"Outputs of step \" + step.name)\n",
|
||||||
|
" \n",
|
||||||
|
" # Get a dictionary of StepRunOutputs with the output name as the key \n",
|
||||||
|
" output_dict = step.get_outputs()\n",
|
||||||
|
" \n",
|
||||||
|
" for name, output in output_dict.items():\n",
|
||||||
|
" \n",
|
||||||
|
" output_reference = output.get_port_data_reference() # Get output port data reference\n",
|
||||||
|
" print(\"\\tname: \" + name)\n",
|
||||||
|
" print(\"\\tdatastore: \" + output_reference.datastore_name)\n",
|
||||||
|
" print(\"\\tpath on datastore: \" + output_reference.path_on_datastore)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Download Outputs\n",
|
||||||
|
"\n",
|
||||||
|
"We can download the output of any step to our local machine using the SDK."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Retrieve the step runs by name 'train.py'\n",
|
||||||
|
"train_step = pipeline_run1.find_step_run('train.py')\n",
|
||||||
|
"\n",
|
||||||
|
"if train_step:\n",
|
||||||
|
" train_step_obj = train_step[0] # since we have only one step by name 'train.py'\n",
|
||||||
|
" train_step_obj.get_output_data('processed_data1').download(\"./outputs\") # download the output to current directory"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,58 @@
|
|||||||
|
# Copyright (c) Microsoft. All rights reserved.
|
||||||
|
# Licensed under the MIT license.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import azureml.dataprep as dprep
|
||||||
|
|
||||||
|
|
||||||
|
def get_dict(dict_str):
|
||||||
|
pairs = dict_str.strip("{}").split("\;")
|
||||||
|
new_dict = {}
|
||||||
|
for pair in pairs:
|
||||||
|
key, value = pair.strip('\\').split(":")
|
||||||
|
new_dict[key.strip().strip("'")] = value.strip().strip("'")
|
||||||
|
|
||||||
|
return new_dict
|
||||||
|
|
||||||
|
|
||||||
|
print("Cleans the input data")
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser("cleanse")
|
||||||
|
parser.add_argument("--input_cleanse", type=str, help="raw taxi data")
|
||||||
|
parser.add_argument("--output_cleanse", type=str, help="cleaned taxi data directory")
|
||||||
|
parser.add_argument("--useful_columns", type=str, help="useful columns to keep")
|
||||||
|
parser.add_argument("--columns", type=str, help="rename column pattern")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("Argument 1(input taxi data path): %s" % args.input_cleanse)
|
||||||
|
print("Argument 2(columns to keep): %s" % str(args.useful_columns.strip("[]").split("\;")))
|
||||||
|
print("Argument 3(columns renaming mapping): %s" % str(args.columns.strip("{}").split("\;")))
|
||||||
|
print("Argument 4(output cleansed taxi data path): %s" % args.output_cleanse)
|
||||||
|
|
||||||
|
raw_df = dprep.read_csv(path=args.input_cleanse, header=dprep.PromoteHeadersMode.GROUPED)
|
||||||
|
|
||||||
|
# These functions ensure that null data is removed from the data set,
|
||||||
|
# which will help increase machine learning model accuracy.
|
||||||
|
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-data-prep
|
||||||
|
# for more details
|
||||||
|
|
||||||
|
useful_columns = [s.strip().strip("'") for s in args.useful_columns.strip("[]").split("\;")]
|
||||||
|
columns = get_dict(args.columns)
|
||||||
|
|
||||||
|
all_columns = dprep.ColumnSelector(term=".*", use_regex=True)
|
||||||
|
drop_if_all_null = [all_columns, dprep.ColumnRelationship(dprep.ColumnRelationship.ALL)]
|
||||||
|
|
||||||
|
new_df = (raw_df
|
||||||
|
.replace_na(columns=all_columns)
|
||||||
|
.drop_nulls(*drop_if_all_null)
|
||||||
|
.rename_columns(column_pairs=columns)
|
||||||
|
.keep_columns(columns=useful_columns))
|
||||||
|
|
||||||
|
if not (args.output_cleanse is None):
|
||||||
|
os.makedirs(args.output_cleanse, exist_ok=True)
|
||||||
|
print("%s created" % args.output_cleanse)
|
||||||
|
write_df = new_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_cleanse))
|
||||||
|
write_df.run_local()
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import azureml.dataprep as dprep
|
||||||
|
|
||||||
|
print("Filters out coordinates for locations that are outside the city border.",
|
||||||
|
"Chain the column filter commands within the filter() function",
|
||||||
|
"and define the minimum and maximum bounds for each field.")
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser("filter")
|
||||||
|
parser.add_argument("--input_filter", type=str, help="merged taxi data directory")
|
||||||
|
parser.add_argument("--output_filter", type=str, help="filter out out of city locations")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("Argument 1(input taxi data path): %s" % args.input_filter)
|
||||||
|
print("Argument 2(output filtered taxi data path): %s" % args.output_filter)
|
||||||
|
|
||||||
|
combined_df = dprep.read_csv(args.input_filter + '/part-*')
|
||||||
|
|
||||||
|
# These functions filter out coordinates for locations that are outside the city border.
|
||||||
|
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-data-prep for more details
|
||||||
|
|
||||||
|
# Create a condensed view of the dataflow to just show the lat/long fields,
|
||||||
|
# which makes it easier to evaluate missing or out-of-scope coordinates
|
||||||
|
decimal_type = dprep.TypeConverter(data_type=dprep.FieldType.DECIMAL)
|
||||||
|
combined_df = combined_df.set_column_types(type_conversions={
|
||||||
|
"pickup_longitude": decimal_type,
|
||||||
|
"pickup_latitude": decimal_type,
|
||||||
|
"dropoff_longitude": decimal_type,
|
||||||
|
"dropoff_latitude": decimal_type
|
||||||
|
})
|
||||||
|
|
||||||
|
# Filter out coordinates for locations that are outside the city border.
|
||||||
|
# Chain the column filter commands within the filter() function
|
||||||
|
# and define the minimum and maximum bounds for each field
|
||||||
|
latlong_filtered_df = (combined_df
|
||||||
|
.drop_nulls(columns=["pickup_longitude",
|
||||||
|
"pickup_latitude",
|
||||||
|
"dropoff_longitude",
|
||||||
|
"dropoff_latitude"],
|
||||||
|
column_relationship=dprep.ColumnRelationship(dprep.ColumnRelationship.ANY))
|
||||||
|
.filter(dprep.f_and(dprep.col("pickup_longitude") <= -73.72,
|
||||||
|
dprep.col("pickup_longitude") >= -74.09,
|
||||||
|
dprep.col("pickup_latitude") <= 40.88,
|
||||||
|
dprep.col("pickup_latitude") >= 40.53,
|
||||||
|
dprep.col("dropoff_longitude") <= -73.72,
|
||||||
|
dprep.col("dropoff_longitude") >= -74.09,
|
||||||
|
dprep.col("dropoff_latitude") <= 40.88,
|
||||||
|
dprep.col("dropoff_latitude") >= 40.53)))
|
||||||
|
|
||||||
|
if not (args.output_filter is None):
|
||||||
|
os.makedirs(args.output_filter, exist_ok=True)
|
||||||
|
print("%s created" % args.output_filter)
|
||||||
|
write_df = latlong_filtered_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_filter))
|
||||||
|
write_df.run_local()
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import azureml.dataprep as dprep
|
||||||
|
|
||||||
|
print("Merge Green and Yellow taxi data")
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser("merge")
|
||||||
|
parser.add_argument("--input_green_merge", type=str, help="cleaned green taxi data directory")
|
||||||
|
parser.add_argument("--input_yellow_merge", type=str, help="cleaned yellow taxi data directory")
|
||||||
|
parser.add_argument("--output_merge", type=str, help="green and yellow taxi data merged")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("Argument 1(input green taxi data path): %s" % args.input_green_merge)
|
||||||
|
print("Argument 2(input yellow taxi data path): %s" % args.input_yellow_merge)
|
||||||
|
print("Argument 3(output merge taxi data path): %s" % args.output_merge)
|
||||||
|
|
||||||
|
green_df = dprep.read_csv(args.input_green_merge + '/part-*')
|
||||||
|
yellow_df = dprep.read_csv(args.input_yellow_merge + '/part-*')
|
||||||
|
|
||||||
|
# Appending yellow data to green data
|
||||||
|
combined_df = green_df.append_rows([yellow_df])
|
||||||
|
|
||||||
|
if not (args.output_merge is None):
|
||||||
|
os.makedirs(args.output_merge, exist_ok=True)
|
||||||
|
print("%s created" % args.output_merge)
|
||||||
|
write_df = combined_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_merge))
|
||||||
|
write_df.run_local()
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import azureml.dataprep as dprep
|
||||||
|
|
||||||
|
print("Replace undefined values to relavant values and rename columns to meaningful names")
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser("normalize")
|
||||||
|
parser.add_argument("--input_normalize", type=str, help="combined and converted taxi data")
|
||||||
|
parser.add_argument("--output_normalize", type=str, help="replaced undefined values and renamed columns")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("Argument 1(input taxi data path): %s" % args.input_normalize)
|
||||||
|
print("Argument 2(output normalized taxi data path): %s" % args.output_normalize)
|
||||||
|
|
||||||
|
combined_converted_df = dprep.read_csv(args.input_normalize + '/part-*')
|
||||||
|
|
||||||
|
# These functions replace undefined values and rename to use meaningful names.
|
||||||
|
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-data-prep for more details
|
||||||
|
|
||||||
|
replaced_stfor_vals_df = combined_converted_df.replace(columns="store_forward",
|
||||||
|
find="0",
|
||||||
|
replace_with="N").fill_nulls("store_forward", "N")
|
||||||
|
|
||||||
|
replaced_distance_vals_df = replaced_stfor_vals_df.replace(columns="distance",
|
||||||
|
find=".00",
|
||||||
|
replace_with=0).fill_nulls("distance", 0)
|
||||||
|
|
||||||
|
replaced_distance_vals_df = replaced_distance_vals_df.to_number(["distance"])
|
||||||
|
|
||||||
|
time_split_df = (replaced_distance_vals_df
|
||||||
|
.split_column_by_example(source_column="pickup_datetime")
|
||||||
|
.split_column_by_example(source_column="dropoff_datetime"))
|
||||||
|
|
||||||
|
# Split the pickup and dropoff datetime values into the respective date and time columns
|
||||||
|
renamed_col_df = (time_split_df
|
||||||
|
.rename_columns(column_pairs={
|
||||||
|
"pickup_datetime_1": "pickup_date",
|
||||||
|
"pickup_datetime_2": "pickup_time",
|
||||||
|
"dropoff_datetime_1": "dropoff_date",
|
||||||
|
"dropoff_datetime_2": "dropoff_time"}))
|
||||||
|
|
||||||
|
if not (args.output_normalize is None):
|
||||||
|
os.makedirs(args.output_normalize, exist_ok=True)
|
||||||
|
print("%s created" % args.output_normalize)
|
||||||
|
write_df = renamed_col_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_normalize))
|
||||||
|
write_df.run_local()
|
||||||
@@ -0,0 +1,88 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import azureml.dataprep as dprep
|
||||||
|
|
||||||
|
print("Transforms the renamed taxi data to the required format")
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser("transform")
|
||||||
|
parser.add_argument("--input_transform", type=str, help="renamed taxi data")
|
||||||
|
parser.add_argument("--output_transform", type=str, help="transformed taxi data")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("Argument 1(input taxi data path): %s" % args.input_transform)
|
||||||
|
print("Argument 2(output final transformed taxi data): %s" % args.output_transform)
|
||||||
|
|
||||||
|
renamed_df = dprep.read_csv(args.input_transform + '/part-*')
|
||||||
|
|
||||||
|
# These functions transform the renamed data to be used finally for training.
|
||||||
|
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-data-prep for more details
|
||||||
|
|
||||||
|
# Split the pickup and dropoff date further into the day of the week, day of the month, and month values.
|
||||||
|
# To get the day of the week value, use the derive_column_by_example() function.
|
||||||
|
# The function takes an array parameter of example objects that define the input data,
|
||||||
|
# and the preferred output. The function automatically determines your preferred transformation.
|
||||||
|
# For the pickup and dropoff time columns, split the time into the hour, minute, and second by using
|
||||||
|
# the split_column_by_example() function with no example parameter. After you generate the new features,
|
||||||
|
# use the drop_columns() function to delete the original fields as the newly generated features are preferred.
|
||||||
|
# Rename the rest of the fields to use meaningful descriptions.
|
||||||
|
|
||||||
|
transformed_features_df = (renamed_df
|
||||||
|
.derive_column_by_example(
|
||||||
|
source_columns="pickup_date",
|
||||||
|
new_column_name="pickup_weekday",
|
||||||
|
example_data=[("2009-01-04", "Sunday"), ("2013-08-22", "Thursday")])
|
||||||
|
.derive_column_by_example(
|
||||||
|
source_columns="dropoff_date",
|
||||||
|
new_column_name="dropoff_weekday",
|
||||||
|
example_data=[("2013-08-22", "Thursday"), ("2013-11-03", "Sunday")])
|
||||||
|
|
||||||
|
.split_column_by_example(source_column="pickup_time")
|
||||||
|
.split_column_by_example(source_column="dropoff_time")
|
||||||
|
|
||||||
|
.split_column_by_example(source_column="pickup_time_1")
|
||||||
|
.split_column_by_example(source_column="dropoff_time_1")
|
||||||
|
.drop_columns(columns=[
|
||||||
|
"pickup_date", "pickup_time", "dropoff_date", "dropoff_time",
|
||||||
|
"pickup_date_1", "dropoff_date_1", "pickup_time_1", "dropoff_time_1"])
|
||||||
|
|
||||||
|
.rename_columns(column_pairs={
|
||||||
|
"pickup_date_2": "pickup_month",
|
||||||
|
"pickup_date_3": "pickup_monthday",
|
||||||
|
"pickup_time_1_1": "pickup_hour",
|
||||||
|
"pickup_time_1_2": "pickup_minute",
|
||||||
|
"pickup_time_2": "pickup_second",
|
||||||
|
"dropoff_date_2": "dropoff_month",
|
||||||
|
"dropoff_date_3": "dropoff_monthday",
|
||||||
|
"dropoff_time_1_1": "dropoff_hour",
|
||||||
|
"dropoff_time_1_2": "dropoff_minute",
|
||||||
|
"dropoff_time_2": "dropoff_second"}))
|
||||||
|
|
||||||
|
# Drop the pickup_datetime and dropoff_datetime columns because they're
|
||||||
|
# no longer needed (granular time features like hour,
|
||||||
|
# minute and second are more useful for model training).
|
||||||
|
processed_df = transformed_features_df.drop_columns(columns=["pickup_datetime", "dropoff_datetime"])
|
||||||
|
|
||||||
|
# Use the type inference functionality to automatically check the data type of each field,
|
||||||
|
# and display the inference results.
|
||||||
|
type_infer = processed_df.builders.set_column_types()
|
||||||
|
type_infer.learn()
|
||||||
|
|
||||||
|
# The inference results look correct based on the data. Now apply the type conversions to the dataflow.
|
||||||
|
type_converted_df = type_infer.to_dataflow()
|
||||||
|
|
||||||
|
# Before you package the dataflow, run two final filters on the data set.
|
||||||
|
# To eliminate incorrectly captured data points,
|
||||||
|
# filter the dataflow on records where both the cost and distance variable values are greater than zero.
|
||||||
|
# This step will significantly improve machine learning model accuracy,
|
||||||
|
# because data points with a zero cost or distance represent major outliers that throw off prediction accuracy.
|
||||||
|
|
||||||
|
final_df = type_converted_df.filter(dprep.col("distance") > 0)
|
||||||
|
final_df = final_df.filter(dprep.col("cost") > 0)
|
||||||
|
|
||||||
|
# Writing the final dataframe to use for training in the following steps
|
||||||
|
if not (args.output_transform is None):
|
||||||
|
os.makedirs(args.output_transform, exist_ok=True)
|
||||||
|
print("%s created" % args.output_transform)
|
||||||
|
write_df = final_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_transform))
|
||||||
|
write_df.run_local()
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import azureml.dataprep as dprep
|
||||||
|
import azureml.core
|
||||||
|
|
||||||
|
print("Extracts important features from prepared data")
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser("featurization")
|
||||||
|
parser.add_argument("--input_featurization", type=str, help="input featurization")
|
||||||
|
parser.add_argument("--useful_columns", type=str, help="columns to use")
|
||||||
|
parser.add_argument("--output_featurization", type=str, help="output featurization")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("Argument 1(input training data path): %s" % args.input_featurization)
|
||||||
|
print("Argument 2(column features to use): %s" % str(args.useful_columns.strip("[]").split("\;")))
|
||||||
|
print("Argument 3:(output featurized training data path) %s" % args.output_featurization)
|
||||||
|
|
||||||
|
dflow_prepared = dprep.read_csv(args.input_featurization + '/part-*')
|
||||||
|
|
||||||
|
# These functions extracts useful features for training
|
||||||
|
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-auto-train-models for more detail
|
||||||
|
|
||||||
|
useful_columns = [s.strip().strip("'") for s in args.useful_columns.strip("[]").split("\;")]
|
||||||
|
dflow = dflow_prepared.keep_columns(useful_columns)
|
||||||
|
|
||||||
|
if not (args.output_featurization is None):
|
||||||
|
os.makedirs(args.output_featurization, exist_ok=True)
|
||||||
|
print("%s created" % args.output_featurization)
|
||||||
|
write_df = dflow.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_featurization))
|
||||||
|
write_df.run_local()
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def get_data():
|
||||||
|
print("In get_data")
|
||||||
|
print(os.environ['AZUREML_DATAREFERENCE_output_split_train_x'])
|
||||||
|
X_train = pd.read_csv(os.environ['AZUREML_DATAREFERENCE_output_split_train_x'] + "/part-00000", header=0)
|
||||||
|
y_train = pd.read_csv(os.environ['AZUREML_DATAREFERENCE_output_split_train_y'] + "/part-00000", header=0)
|
||||||
|
|
||||||
|
return {"X": X_train.values, "y": y_train.values.flatten()}
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import azureml.dataprep as dprep
|
||||||
|
import azureml.core
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
|
||||||
|
def write_output(df, path):
|
||||||
|
os.makedirs(path, exist_ok=True)
|
||||||
|
print("%s created" % path)
|
||||||
|
df.to_csv(path + "/part-00000", index=False)
|
||||||
|
|
||||||
|
|
||||||
|
print("Split the data into train and test")
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser("split")
|
||||||
|
parser.add_argument("--input_split_features", type=str, help="input split features")
|
||||||
|
parser.add_argument("--input_split_labels", type=str, help="input split labels")
|
||||||
|
parser.add_argument("--output_split_train_x", type=str, help="output split train features")
|
||||||
|
parser.add_argument("--output_split_train_y", type=str, help="output split train labels")
|
||||||
|
parser.add_argument("--output_split_test_x", type=str, help="output split test features")
|
||||||
|
parser.add_argument("--output_split_test_y", type=str, help="output split test labels")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("Argument 1(input taxi data features path): %s" % args.input_split_features)
|
||||||
|
print("Argument 2(input taxi data labels path): %s" % args.input_split_labels)
|
||||||
|
print("Argument 3(output training features split path): %s" % args.output_split_train_x)
|
||||||
|
print("Argument 4(output training labels split path): %s" % args.output_split_train_y)
|
||||||
|
print("Argument 5(output test features split path): %s" % args.output_split_test_x)
|
||||||
|
print("Argument 6(output test labels split path): %s" % args.output_split_test_y)
|
||||||
|
|
||||||
|
x_df = dprep.read_csv(path=args.input_split_features, header=dprep.PromoteHeadersMode.GROUPED).to_pandas_dataframe()
|
||||||
|
y_df = dprep.read_csv(path=args.input_split_labels, header=dprep.PromoteHeadersMode.GROUPED).to_pandas_dataframe()
|
||||||
|
|
||||||
|
# These functions splits the input features and labels into test and train data
|
||||||
|
# Visit https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-auto-train-models for more detail
|
||||||
|
|
||||||
|
x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=223)
|
||||||
|
|
||||||
|
if not (args.output_split_train_x is None and
|
||||||
|
args.output_split_test_x is None and
|
||||||
|
args.output_split_train_y is None and
|
||||||
|
args.output_split_test_y is None):
|
||||||
|
write_output(x_train, args.output_split_train_x)
|
||||||
|
write_output(y_train, args.output_split_train_y)
|
||||||
|
write_output(x_test, args.output_split_test_x)
|
||||||
|
write_output(y_test, args.output_split_test_y)
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,303 +1,322 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Distributed Chainer\n",
|
|
||||||
"In this tutorial, you will run a Chainer training example on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using ChainerMN distributed training across a GPU cluster."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Diagnostics\n",
|
|
||||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"Diagnostics"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
|
||||||
"\n",
|
|
||||||
"set_diagnostics_collection(send_diagnostics=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + ws.name, \n",
|
|
||||||
" 'Azure region: ' + ws.location, \n",
|
|
||||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Get default AmlCompute\n",
|
|
||||||
"You can create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code gets the default compute cluster.\n",
|
|
||||||
"\n",
|
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"compute_target = ws.get_default_compute_target(type=\"GPU\")\n",
|
|
||||||
"\n",
|
|
||||||
"# use get_status() to get a detailed status for the current AmlCompute. \n",
|
|
||||||
"print(compute_target.get_status().serialize())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The above code retrieves the default GPU compute. If you instead want to use default CPU compute, provide type=\"CPU\"."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train model on the remote compute\n",
|
|
||||||
"Now that we have the AmlCompute ready to go, let's run our distributed training job."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a project directory\n",
|
|
||||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"project_folder = './chainer-distr'\n",
|
|
||||||
"os.makedirs(project_folder, exist_ok=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Prepare training script\n",
|
|
||||||
"Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `train_mnist.py`. In practice, you should be able to take any custom Chainer training script as is and run it with Azure ML without having to modify your code."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Once your script is ready, copy the training script `train_mnist.py` into the project directory."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import shutil\n",
|
|
||||||
"\n",
|
|
||||||
"shutil.copy('train_mnist.py', project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create an experiment\n",
|
|
||||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed Chainer tutorial. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"experiment_name = 'chainer-distr'\n",
|
|
||||||
"experiment = Experiment(ws, name=experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a Chainer estimator\n",
|
|
||||||
"The Azure ML SDK's Chainer estimator enables you to easily submit Chainer training jobs for both single-node and distributed runs."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import MpiConfiguration\n",
|
|
||||||
"from azureml.train.dnn import Chainer\n",
|
|
||||||
"\n",
|
|
||||||
"estimator = Chainer(source_directory=project_folder,\n",
|
|
||||||
" compute_target=compute_target,\n",
|
|
||||||
" entry_script='train_mnist.py',\n",
|
|
||||||
" node_count=2,\n",
|
|
||||||
" distributed_training=MpiConfiguration(),\n",
|
|
||||||
" use_gpu=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI, you must provide the argument `distributed_backend='mpi'`. Using this estimator with these settings, Chainer and its dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `Chainer` constructor's `pip_packages` or `conda_packages` parameters."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Submit job\n",
|
|
||||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run = experiment.submit(estimator)\n",
|
|
||||||
"print(run)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Monitor your run\n",
|
|
||||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"\n",
|
|
||||||
"RunDetails(run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "minxia"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
},
|
|
||||||
"msauthor": "minxia"
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Distributed Chainer\n",
|
||||||
|
"In this tutorial, you will run a Chainer training example on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using ChainerMN distributed training across a GPU cluster."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name, \n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create or attach existing AmlCompute\n",
|
||||||
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
||||||
|
"\n",
|
||||||
|
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n",
|
||||||
|
"\n",
|
||||||
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for your cluster\n",
|
||||||
|
"cluster_name = \"gpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
|
" print('Found existing compute target.')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" # create the cluster\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
" compute_target.wait_for_completion(show_output=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# use get_status() to get a detailed status for the current AmlCompute. \n",
|
||||||
|
"print(compute_target.get_status().serialize())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train model on the remote compute\n",
|
||||||
|
"Now that we have the AmlCompute ready to go, let's run our distributed training job."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a project directory\n",
|
||||||
|
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"project_folder = './chainer-distr'\n",
|
||||||
|
"os.makedirs(project_folder, exist_ok=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Prepare training script\n",
|
||||||
|
"Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `train_mnist.py`. In practice, you should be able to take any custom Chainer training script as is and run it with Azure ML without having to modify your code."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Once your script is ready, copy the training script `train_mnist.py` into the project directory."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shutil\n",
|
||||||
|
"\n",
|
||||||
|
"shutil.copy('train_mnist.py', project_folder)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create an experiment\n",
|
||||||
|
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed Chainer tutorial. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"experiment_name = 'chainer-distr'\n",
|
||||||
|
"experiment = Experiment(ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a Chainer estimator\n",
|
||||||
|
"The Azure ML SDK's Chainer estimator enables you to easily submit Chainer training jobs for both single-node and distributed runs."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import MpiConfiguration\n",
|
||||||
|
"from azureml.train.dnn import Chainer\n",
|
||||||
|
"\n",
|
||||||
|
"estimator = Chainer(source_directory=project_folder,\n",
|
||||||
|
" compute_target=compute_target,\n",
|
||||||
|
" entry_script='train_mnist.py',\n",
|
||||||
|
" node_count=2,\n",
|
||||||
|
" distributed_training=MpiConfiguration(),\n",
|
||||||
|
" use_gpu=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI, you must provide the argument `distributed_backend='mpi'`. Using this estimator with these settings, Chainer and its dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `Chainer` constructor's `pip_packages` or `conda_packages` parameters."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit job\n",
|
||||||
|
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run = experiment.submit(estimator)\n",
|
||||||
|
"print(run)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Monitor your run\n",
|
||||||
|
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"\n",
|
||||||
|
"RunDetails(run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "minxia"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
},
|
||||||
|
"msauthor": "minxia"
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,382 +1,401 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Distributed CNTK using custom docker images\n",
|
|
||||||
"In this tutorial, you will train a CNTK model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using a custom docker image and distributed training."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
|
||||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
|
||||||
" * install the AML SDK\n",
|
|
||||||
" * create a workspace and its configuration file (`config.json`)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Diagnostics\n",
|
|
||||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"Diagnostics"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
|
||||||
"\n",
|
|
||||||
"set_diagnostics_collection(send_diagnostics=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + ws.name,\n",
|
|
||||||
" 'Azure region: ' + ws.location, \n",
|
|
||||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Get default AmlCompute\n",
|
|
||||||
"You can create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use default `AmlCompute` as the training compute resource.\n",
|
|
||||||
"\n",
|
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"compute_target = ws.get_default_compute_target(type=\"GPU\")\n",
|
|
||||||
"\n",
|
|
||||||
"# use get_status() to get a detailed status for the current AmlCompute\n",
|
|
||||||
"print(compute_target.get_status().serialize())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Upload training data\n",
|
|
||||||
"For this tutorial, we will be using the MNIST dataset.\n",
|
|
||||||
"\n",
|
|
||||||
"First, let's download the dataset. We've included the `install_mnist.py` script to download the data and convert it to a CNTK-supported format. Our data files will get written to a directory named `'mnist'`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import install_mnist\n",
|
|
||||||
"\n",
|
|
||||||
"install_mnist.main('mnist')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"To make the data accessible for remote training, you will need to upload the data from your local machine to the cloud. AML provides a convenient way to do so via a [Datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data). The datastore provides a mechanism for you to upload/download data, and interact with it from your remote compute targets. \n",
|
|
||||||
"\n",
|
|
||||||
"Each workspace is associated with a default datastore. In this tutorial, we will upload the training data to this default datastore, which we will then mount on the remote compute for training in the next section."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ds = ws.get_default_datastore()\n",
|
|
||||||
"print(ds.datastore_type, ds.account_name, ds.container_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The following code will upload the training data to the path `./mnist` on the default datastore."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ds.upload(src_dir='./mnist', target_path='./mnist')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Now let's get a reference to the path on the datastore with the training data. We can do so using the `path` method. In the next section, we can then pass this reference to our training script's `--data_dir` argument. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"path_on_datastore = 'mnist'\n",
|
|
||||||
"ds_data = ds.path(path_on_datastore)\n",
|
|
||||||
"print(ds_data)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train model on the remote compute\n",
|
|
||||||
"Now that we have the cluster ready to go, let's run our distributed training job."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a project directory\n",
|
|
||||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"project_folder = './cntk-distr'\n",
|
|
||||||
"os.makedirs(project_folder, exist_ok=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copy the training script `cntk_distr_mnist.py` into this project directory."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import shutil\n",
|
|
||||||
"\n",
|
|
||||||
"shutil.copy('cntk_distr_mnist.py', project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create an experiment\n",
|
|
||||||
"Create an [experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed CNTK tutorial. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"experiment_name = 'cntk-distr'\n",
|
|
||||||
"experiment = Experiment(ws, name=experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create an Estimator\n",
|
|
||||||
"The AML SDK's base Estimator enables you to easily submit custom scripts for both single-node and distributed runs. You should this generic estimator for training code using frameworks such as sklearn or CNTK that don't have corresponding custom estimators. For more information on using the generic estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-ml-models)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.train.estimator import Estimator\n",
|
|
||||||
"\n",
|
|
||||||
"script_params = {\n",
|
|
||||||
" '--num_epochs': 20,\n",
|
|
||||||
" '--data_dir': ds_data.as_mount(),\n",
|
|
||||||
" '--output_dir': './outputs'\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"estimator = Estimator(source_directory=project_folder,\n",
|
|
||||||
" compute_target=compute_target,\n",
|
|
||||||
" entry_script='cntk_distr_mnist.py',\n",
|
|
||||||
" script_params=script_params,\n",
|
|
||||||
" node_count=2,\n",
|
|
||||||
" process_count_per_node=1,\n",
|
|
||||||
" distributed_backend='mpi',\n",
|
|
||||||
" pip_packages=['cntk-gpu==2.6'],\n",
|
|
||||||
" custom_docker_image='microsoft/mmlspark:gpu-0.12',\n",
|
|
||||||
" use_gpu=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"We would like to train our model using a [pre-built Docker container](https://hub.docker.com/r/microsoft/mmlspark/). To do so, specify the name of the docker image to the argument `custom_docker_image`. Finally, we provide the `cntk` package to `pip_packages` to install CNTK 2.6 on our custom image.\n",
|
|
||||||
"\n",
|
|
||||||
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to run distributed CNTK, which uses MPI, you must provide the argument `distributed_backend='mpi'`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Submit job\n",
|
|
||||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run = experiment.submit(estimator)\n",
|
|
||||||
"print(run)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Monitor your run\n",
|
|
||||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"\n",
|
|
||||||
"RunDetails(run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Alternatively, you can block until the script has completed training before running more code."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "minxia"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Distributed CNTK using custom docker images\n",
|
||||||
|
"In this tutorial, you will train a CNTK model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using a custom docker image and distributed training."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||||
|
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||||
|
" * install the AML SDK\n",
|
||||||
|
" * create a workspace and its configuration file (`config.json`)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name,\n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create or Attach existing AmlCompute\n",
|
||||||
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource.\n",
|
||||||
|
"\n",
|
||||||
|
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||||
|
"\n",
|
||||||
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for your cluster\n",
|
||||||
|
"cluster_name = \"gpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
|
" print('Found existing compute target.')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" # create the cluster\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
" compute_target.wait_for_completion(show_output=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# use get_status() to get a detailed status for the current AmlCompute\n",
|
||||||
|
"print(compute_target.get_status().serialize())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Upload training data\n",
|
||||||
|
"For this tutorial, we will be using the MNIST dataset.\n",
|
||||||
|
"\n",
|
||||||
|
"First, let's download the dataset. We've included the `install_mnist.py` script to download the data and convert it to a CNTK-supported format. Our data files will get written to a directory named `'mnist'`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import install_mnist\n",
|
||||||
|
"\n",
|
||||||
|
"install_mnist.main('mnist')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"To make the data accessible for remote training, you will need to upload the data from your local machine to the cloud. AML provides a convenient way to do so via a [Datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data). The datastore provides a mechanism for you to upload/download data, and interact with it from your remote compute targets. \n",
|
||||||
|
"\n",
|
||||||
|
"Each workspace is associated with a default datastore. In this tutorial, we will upload the training data to this default datastore, which we will then mount on the remote compute for training in the next section."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ds = ws.get_default_datastore()\n",
|
||||||
|
"print(ds.datastore_type, ds.account_name, ds.container_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The following code will upload the training data to the path `./mnist` on the default datastore."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ds.upload(src_dir='./mnist', target_path='./mnist')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now let's get a reference to the path on the datastore with the training data. We can do so using the `path` method. In the next section, we can then pass this reference to our training script's `--data_dir` argument. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"path_on_datastore = 'mnist'\n",
|
||||||
|
"ds_data = ds.path(path_on_datastore)\n",
|
||||||
|
"print(ds_data)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train model on the remote compute\n",
|
||||||
|
"Now that we have the cluster ready to go, let's run our distributed training job."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a project directory\n",
|
||||||
|
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"project_folder = './cntk-distr'\n",
|
||||||
|
"os.makedirs(project_folder, exist_ok=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copy the training script `cntk_distr_mnist.py` into this project directory."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shutil\n",
|
||||||
|
"\n",
|
||||||
|
"shutil.copy('cntk_distr_mnist.py', project_folder)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create an experiment\n",
|
||||||
|
"Create an [experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed CNTK tutorial. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"experiment_name = 'cntk-distr'\n",
|
||||||
|
"experiment = Experiment(ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create an Estimator\n",
|
||||||
|
"The AML SDK's base Estimator enables you to easily submit custom scripts for both single-node and distributed runs. You should this generic estimator for training code using frameworks such as sklearn or CNTK that don't have corresponding custom estimators. For more information on using the generic estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-ml-models)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.train.estimator import Estimator\n",
|
||||||
|
"\n",
|
||||||
|
"script_params = {\n",
|
||||||
|
" '--num_epochs': 20,\n",
|
||||||
|
" '--data_dir': ds_data.as_mount(),\n",
|
||||||
|
" '--output_dir': './outputs'\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"estimator = Estimator(source_directory=project_folder,\n",
|
||||||
|
" compute_target=compute_target,\n",
|
||||||
|
" entry_script='cntk_distr_mnist.py',\n",
|
||||||
|
" script_params=script_params,\n",
|
||||||
|
" node_count=2,\n",
|
||||||
|
" process_count_per_node=1,\n",
|
||||||
|
" distributed_backend='mpi',\n",
|
||||||
|
" pip_packages=['cntk-gpu==2.6'],\n",
|
||||||
|
" custom_docker_image='microsoft/mmlspark:gpu-0.12',\n",
|
||||||
|
" use_gpu=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We would like to train our model using a [pre-built Docker container](https://hub.docker.com/r/microsoft/mmlspark/). To do so, specify the name of the docker image to the argument `custom_docker_image`. Finally, we provide the `cntk` package to `pip_packages` to install CNTK 2.6 on our custom image.\n",
|
||||||
|
"\n",
|
||||||
|
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to run distributed CNTK, which uses MPI, you must provide the argument `distributed_backend='mpi'`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit job\n",
|
||||||
|
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run = experiment.submit(estimator)\n",
|
||||||
|
"print(run)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Monitor your run\n",
|
||||||
|
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"\n",
|
||||||
|
"RunDetails(run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Alternatively, you can block until the script has completed training before running more code."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "minxia"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,323 +1,342 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Distributed PyTorch with Horovod\n",
|
|
||||||
"In this tutorial, you will train a PyTorch model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using distributed training via [Horovod](https://github.com/uber/horovod) across a GPU cluster."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`\n",
|
|
||||||
"* Review the [tutorial](../train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) on single-node PyTorch training using Azure Machine Learning"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Diagnostics\n",
|
|
||||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"Diagnostics"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
|
||||||
"\n",
|
|
||||||
"set_diagnostics_collection(send_diagnostics=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + ws.name, \n",
|
|
||||||
" 'Azure region: ' + ws.location, \n",
|
|
||||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Get default AmlCompute\n",
|
|
||||||
"You can create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code uses the default compute in the workspace.\n",
|
|
||||||
"\n",
|
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"compute_target = ws.get_default_compute_target(type=\"GPU\")\n",
|
|
||||||
"\n",
|
|
||||||
"# use get_status() to get a detailed status for the current AmlCompute. \n",
|
|
||||||
"print(compute_target.get_status().serialize())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The above code retrieves the default GPU compute. If you instead want to use default CPU compute, provide type=\"CPU\"."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train model on the remote compute\n",
|
|
||||||
"Now that we have the AmlCompute ready to go, let's run our distributed training job."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a project directory\n",
|
|
||||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"project_folder = './pytorch-distr-hvd'\n",
|
|
||||||
"os.makedirs(project_folder, exist_ok=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Prepare training script\n",
|
|
||||||
"Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `pytorch_horovod_mnist.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code.\n",
|
|
||||||
"\n",
|
|
||||||
"However, if you would like to use Azure ML's [metric logging](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#logging) capabilities, you will have to add a small amount of Azure ML logic inside your training script. In this example, at each logging interval, we will log the loss for that minibatch to our Azure ML run.\n",
|
|
||||||
"\n",
|
|
||||||
"To do so, in `pytorch_horovod_mnist.py`, we will first access the Azure ML `Run` object within the script:\n",
|
|
||||||
"```Python\n",
|
|
||||||
"from azureml.core.run import Run\n",
|
|
||||||
"run = Run.get_context()\n",
|
|
||||||
"```\n",
|
|
||||||
"Later within the script, we log the loss metric to our run:\n",
|
|
||||||
"```Python\n",
|
|
||||||
"run.log('loss', loss.item())\n",
|
|
||||||
"```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Once your script is ready, copy the training script `pytorch_horovod_mnist.py` into the project directory."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import shutil\n",
|
|
||||||
"\n",
|
|
||||||
"shutil.copy('pytorch_horovod_mnist.py', project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create an experiment\n",
|
|
||||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed PyTorch tutorial. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"experiment_name = 'pytorch-distr-hvd'\n",
|
|
||||||
"experiment = Experiment(ws, name=experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a PyTorch estimator\n",
|
|
||||||
"The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import MpiConfiguration\n",
|
|
||||||
"from azureml.train.dnn import PyTorch\n",
|
|
||||||
"\n",
|
|
||||||
"estimator = PyTorch(source_directory=project_folder,\n",
|
|
||||||
" compute_target=compute_target,\n",
|
|
||||||
" entry_script='pytorch_horovod_mnist.py',\n",
|
|
||||||
" node_count=2,\n",
|
|
||||||
" distributed_training=MpiConfiguration(),\n",
|
|
||||||
" use_gpu=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI/Horovod, you must provide the argument `distributed_backend='mpi'`. Using this estimator with these settings, PyTorch, Horovod and their dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `PyTorch` constructor's `pip_packages` or `conda_packages` parameters."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Submit job\n",
|
|
||||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run = experiment.submit(estimator)\n",
|
|
||||||
"print(run)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Monitor your run\n",
|
|
||||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"\n",
|
|
||||||
"RunDetails(run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Alternatively, you can block until the script has completed training before running more code."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output=True) # this provides a verbose log"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "minxia"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
},
|
|
||||||
"msauthor": "minxia"
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Distributed PyTorch with Horovod\n",
|
||||||
|
"In this tutorial, you will train a PyTorch model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using distributed training via [Horovod](https://github.com/uber/horovod) across a GPU cluster."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`\n",
|
||||||
|
"* Review the [tutorial](../train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) on single-node PyTorch training using Azure Machine Learning"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name, \n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create or attach existing AmlCompute\n",
|
||||||
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource. Specifically, the below code creates an `STANDARD_NC6` GPU cluster that autoscales from `0` to `4` nodes.\n",
|
||||||
|
"\n",
|
||||||
|
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n",
|
||||||
|
"\n",
|
||||||
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for your cluster\n",
|
||||||
|
"cluster_name = \"gpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
|
" print('Found existing compute target.')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" # create the cluster\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
" compute_target.wait_for_completion(show_output=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# use get_status() to get a detailed status for the current AmlCompute. \n",
|
||||||
|
"print(compute_target.get_status().serialize())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train model on the remote compute\n",
|
||||||
|
"Now that we have the AmlCompute ready to go, let's run our distributed training job."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a project directory\n",
|
||||||
|
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"project_folder = './pytorch-distr-hvd'\n",
|
||||||
|
"os.makedirs(project_folder, exist_ok=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Prepare training script\n",
|
||||||
|
"Now you will need to create your training script. In this tutorial, the script for distributed training of MNIST is already provided for you at `pytorch_horovod_mnist.py`. In practice, you should be able to take any custom PyTorch training script as is and run it with Azure ML without having to modify your code.\n",
|
||||||
|
"\n",
|
||||||
|
"However, if you would like to use Azure ML's [metric logging](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#logging) capabilities, you will have to add a small amount of Azure ML logic inside your training script. In this example, at each logging interval, we will log the loss for that minibatch to our Azure ML run.\n",
|
||||||
|
"\n",
|
||||||
|
"To do so, in `pytorch_horovod_mnist.py`, we will first access the Azure ML `Run` object within the script:\n",
|
||||||
|
"```Python\n",
|
||||||
|
"from azureml.core.run import Run\n",
|
||||||
|
"run = Run.get_context()\n",
|
||||||
|
"```\n",
|
||||||
|
"Later within the script, we log the loss metric to our run:\n",
|
||||||
|
"```Python\n",
|
||||||
|
"run.log('loss', loss.item())\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Once your script is ready, copy the training script `pytorch_horovod_mnist.py` into the project directory."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shutil\n",
|
||||||
|
"\n",
|
||||||
|
"shutil.copy('pytorch_horovod_mnist.py', project_folder)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create an experiment\n",
|
||||||
|
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed PyTorch tutorial. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"experiment_name = 'pytorch-distr-hvd'\n",
|
||||||
|
"experiment = Experiment(ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a PyTorch estimator\n",
|
||||||
|
"The Azure ML SDK's PyTorch estimator enables you to easily submit PyTorch training jobs for both single-node and distributed runs. For more information on the PyTorch estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-pytorch)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import MpiConfiguration\n",
|
||||||
|
"from azureml.train.dnn import PyTorch\n",
|
||||||
|
"\n",
|
||||||
|
"estimator = PyTorch(source_directory=project_folder,\n",
|
||||||
|
" compute_target=compute_target,\n",
|
||||||
|
" entry_script='pytorch_horovod_mnist.py',\n",
|
||||||
|
" node_count=2,\n",
|
||||||
|
" distributed_training=MpiConfiguration(),\n",
|
||||||
|
" use_gpu=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI/Horovod, you must provide the argument `distributed_backend='mpi'`. Using this estimator with these settings, PyTorch, Horovod and their dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `PyTorch` constructor's `pip_packages` or `conda_packages` parameters."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit job\n",
|
||||||
|
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run = experiment.submit(estimator)\n",
|
||||||
|
"print(run)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Monitor your run\n",
|
||||||
|
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes. You can see that the widget automatically plots and visualizes the loss metric that we logged to the Azure ML run."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"\n",
|
||||||
|
"RunDetails(run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Alternatively, you can block until the script has completed training before running more code."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output=True) # this provides a verbose log"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "minxia"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
},
|
||||||
|
"msauthor": "minxia"
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,392 +1,411 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Distributed Tensorflow with Horovod\n",
|
|
||||||
"In this tutorial, you will train a word2vec model in TensorFlow using distributed training via [Horovod](https://github.com/uber/horovod)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning (AML)\n",
|
|
||||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
|
||||||
" * install the AML SDK\n",
|
|
||||||
" * create a workspace and its configuration file (`config.json`)\n",
|
|
||||||
"* Review the [tutorial](../train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) on single-node TensorFlow training using the SDK"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Diagnostics\n",
|
|
||||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"Diagnostics"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
|
||||||
"\n",
|
|
||||||
"set_diagnostics_collection(send_diagnostics=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize workspace\n",
|
|
||||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + ws.name, \n",
|
|
||||||
" 'Azure region: ' + ws.location, \n",
|
|
||||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Get default AmlCompute\n",
|
|
||||||
"You can create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you use default `AmlCompute` as your training compute resource.\n",
|
|
||||||
"\n",
|
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"compute_target = ws.get_default_compute_target(\"GPU\")\n",
|
|
||||||
"\n",
|
|
||||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
|
||||||
"print(compute_target.get_status().serialize())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The above code retrieves the default GPU compute. If you instead want to use default CPU compute, provide type=\"CPU\"."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Upload data to datastore\n",
|
|
||||||
"To make data accessible for remote training, AML provides a convenient way to do so via a [Datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data). The datastore provides a mechanism for you to upload/download data to Azure Storage, and interact with it from your remote compute targets. \n",
|
|
||||||
"\n",
|
|
||||||
"If your data is already stored in Azure, or you download the data as part of your training script, you will not need to do this step. For this tutorial, although you can download the data in your training script, we will demonstrate how to upload the training data to a datastore and access it during training to illustrate the datastore functionality."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"First, download the training data from [here](http://mattmahoney.net/dc/text8.zip) to your local machine:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import urllib\n",
|
|
||||||
"\n",
|
|
||||||
"os.makedirs('./data', exist_ok=True)\n",
|
|
||||||
"download_url = 'http://mattmahoney.net/dc/text8.zip'\n",
|
|
||||||
"urllib.request.urlretrieve(download_url, filename='./data/text8.zip')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Each workspace is associated with a default datastore. In this tutorial, we will upload the training data to this default datastore."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ds = ws.get_default_datastore()\n",
|
|
||||||
"print(ds.datastore_type, ds.account_name, ds.container_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Upload the contents of the data directory to the path `./data` on the default datastore."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ds.upload(src_dir='data', target_path='data', overwrite=True, show_progress=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"For convenience, let's get a reference to the path on the datastore with the zip file of training data. We can do so using the `path` method. In the next section, we can then pass this reference to our training script's `--input_data` argument. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"path_on_datastore = 'data/text8.zip'\n",
|
|
||||||
"ds_data = ds.path(path_on_datastore)\n",
|
|
||||||
"print(ds_data)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train model on the remote compute"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a project directory\n",
|
|
||||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"project_folder = './tf-distr-hvd'\n",
|
|
||||||
"os.makedirs(project_folder, exist_ok=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copy the training script `tf_horovod_word2vec.py` into this project directory."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import shutil\n",
|
|
||||||
"\n",
|
|
||||||
"shutil.copy('tf_horovod_word2vec.py', project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create an experiment\n",
|
|
||||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed TensorFlow tutorial. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"experiment_name = 'tf-distr-hvd'\n",
|
|
||||||
"experiment = Experiment(ws, name=experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a TensorFlow estimator\n",
|
|
||||||
"The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow).\n",
|
|
||||||
"\n",
|
|
||||||
"The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import MpiConfiguration\n",
|
|
||||||
"from azureml.train.dnn import TensorFlow\n",
|
|
||||||
"\n",
|
|
||||||
"script_params={\n",
|
|
||||||
" '--input_data': ds_data\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"estimator= TensorFlow(source_directory=project_folder,\n",
|
|
||||||
" compute_target=compute_target,\n",
|
|
||||||
" script_params=script_params,\n",
|
|
||||||
" entry_script='tf_horovod_word2vec.py',\n",
|
|
||||||
" node_count=2,\n",
|
|
||||||
" distributed_training=MpiConfiguration(),\n",
|
|
||||||
" framework_version='1.13')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI/Horovod, you must provide the argument `distributed_backend='mpi'`. Using this estimator with these settings, TensorFlow, Horovod and their dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `TensorFlow` constructor's `pip_packages` or `conda_packages` parameters.\n",
|
|
||||||
"\n",
|
|
||||||
"Note that we passed our training data reference `ds_data` to our script's `--input_data` argument. This will 1) mount our datastore on the remote compute and 2) provide the path to the data zip file on our datastore."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Submit job\n",
|
|
||||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run = experiment.submit(estimator)\n",
|
|
||||||
"print(run)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Monitor your run\n",
|
|
||||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"RunDetails(run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Alternatively, you can block until the script has completed training before running more code."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "roastala"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
},
|
|
||||||
"msauthor": "minxia"
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Distributed Tensorflow with Horovod\n",
|
||||||
|
"In this tutorial, you will train a word2vec model in TensorFlow using distributed training via [Horovod](https://github.com/uber/horovod)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning (AML)\n",
|
||||||
|
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||||
|
" * install the AML SDK\n",
|
||||||
|
" * create a workspace and its configuration file (`config.json`)\n",
|
||||||
|
"* Review the [tutorial](../train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) on single-node TensorFlow training using the SDK"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize workspace\n",
|
||||||
|
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name, \n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create or Attach existing AmlCompute\n",
|
||||||
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource.\n",
|
||||||
|
"\n",
|
||||||
|
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||||
|
"\n",
|
||||||
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for your cluster\n",
|
||||||
|
"cluster_name = \"gpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
|
" print('Found existing compute target')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" # create the cluster\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
" compute_target.wait_for_completion(show_output=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||||
|
"print(compute_target.get_status().serialize())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The above code creates a GPU cluster. If you instead want to create a CPU cluster, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Upload data to datastore\n",
|
||||||
|
"To make data accessible for remote training, AML provides a convenient way to do so via a [Datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data). The datastore provides a mechanism for you to upload/download data to Azure Storage, and interact with it from your remote compute targets. \n",
|
||||||
|
"\n",
|
||||||
|
"If your data is already stored in Azure, or you download the data as part of your training script, you will not need to do this step. For this tutorial, although you can download the data in your training script, we will demonstrate how to upload the training data to a datastore and access it during training to illustrate the datastore functionality."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"First, download the training data from [here](http://mattmahoney.net/dc/text8.zip) to your local machine:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import urllib\n",
|
||||||
|
"\n",
|
||||||
|
"os.makedirs('./data', exist_ok=True)\n",
|
||||||
|
"download_url = 'http://mattmahoney.net/dc/text8.zip'\n",
|
||||||
|
"urllib.request.urlretrieve(download_url, filename='./data/text8.zip')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Each workspace is associated with a default datastore. In this tutorial, we will upload the training data to this default datastore."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ds = ws.get_default_datastore()\n",
|
||||||
|
"print(ds.datastore_type, ds.account_name, ds.container_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Upload the contents of the data directory to the path `./data` on the default datastore."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ds.upload(src_dir='data', target_path='data', overwrite=True, show_progress=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"For convenience, let's get a reference to the path on the datastore with the zip file of training data. We can do so using the `path` method. In the next section, we can then pass this reference to our training script's `--input_data` argument. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"path_on_datastore = 'data/text8.zip'\n",
|
||||||
|
"ds_data = ds.path(path_on_datastore)\n",
|
||||||
|
"print(ds_data)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train model on the remote compute"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a project directory\n",
|
||||||
|
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"project_folder = './tf-distr-hvd'\n",
|
||||||
|
"os.makedirs(project_folder, exist_ok=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copy the training script `tf_horovod_word2vec.py` into this project directory."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shutil\n",
|
||||||
|
"\n",
|
||||||
|
"shutil.copy('tf_horovod_word2vec.py', project_folder)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create an experiment\n",
|
||||||
|
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed TensorFlow tutorial. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"experiment_name = 'tf-distr-hvd'\n",
|
||||||
|
"experiment = Experiment(ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a TensorFlow estimator\n",
|
||||||
|
"The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow).\n",
|
||||||
|
"\n",
|
||||||
|
"The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import MpiConfiguration\n",
|
||||||
|
"from azureml.train.dnn import TensorFlow\n",
|
||||||
|
"\n",
|
||||||
|
"script_params={\n",
|
||||||
|
" '--input_data': ds_data\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"estimator= TensorFlow(source_directory=project_folder,\n",
|
||||||
|
" compute_target=compute_target,\n",
|
||||||
|
" script_params=script_params,\n",
|
||||||
|
" entry_script='tf_horovod_word2vec.py',\n",
|
||||||
|
" node_count=2,\n",
|
||||||
|
" distributed_training=MpiConfiguration(),\n",
|
||||||
|
" framework_version='1.13')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The above code specifies that we will run our training script on `2` nodes, with one worker per node. In order to execute a distributed run using MPI/Horovod, you must provide the argument `distributed_backend='mpi'`. Using this estimator with these settings, TensorFlow, Horovod and their dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `TensorFlow` constructor's `pip_packages` or `conda_packages` parameters.\n",
|
||||||
|
"\n",
|
||||||
|
"Note that we passed our training data reference `ds_data` to our script's `--input_data` argument. This will 1) mount our datastore on the remote compute and 2) provide the path to the data zip file on our datastore."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit job\n",
|
||||||
|
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run = experiment.submit(estimator)\n",
|
||||||
|
"print(run)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Monitor your run\n",
|
||||||
|
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"RunDetails(run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Alternatively, you can block until the script has completed training before running more code."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
},
|
||||||
|
"msauthor": "minxia"
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,307 +1,326 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Distributed TensorFlow with parameter server\n",
|
|
||||||
"In this tutorial, you will train a TensorFlow model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using native [distributed TensorFlow](https://www.tensorflow.org/deploy/distributed)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning (AML)\n",
|
|
||||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
|
||||||
" * install the AML SDK\n",
|
|
||||||
" * create a workspace and its configuration file (`config.json`)\n",
|
|
||||||
"* Review the [tutorial](../train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) on single-node TensorFlow training using the SDK"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Diagnostics\n",
|
|
||||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"Diagnostics"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
|
||||||
"\n",
|
|
||||||
"set_diagnostics_collection(send_diagnostics=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize workspace\n",
|
|
||||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + ws.name, \n",
|
|
||||||
" 'Azure region: ' + ws.location, \n",
|
|
||||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Get default AmlCompute\n",
|
|
||||||
"You can create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you use default `AmlCompute` as your training compute resource.\n",
|
|
||||||
"\n",
|
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"compute_target = ws.get_default_compute_target(type=\"GPU\")\n",
|
|
||||||
"\n",
|
|
||||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
|
||||||
"print(compute_target.get_status().serialize())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train model on the remote compute\n",
|
|
||||||
"Now that we have the cluster ready to go, let's run our distributed training job."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a project directory\n",
|
|
||||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"project_folder = './tf-distr-ps'\n",
|
|
||||||
"os.makedirs(project_folder, exist_ok=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copy the training script `tf_mnist_replica.py` into this project directory."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import shutil\n",
|
|
||||||
"\n",
|
|
||||||
"shutil.copy('tf_mnist_replica.py', project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create an experiment\n",
|
|
||||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed TensorFlow tutorial. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"experiment_name = 'tf-distr-ps'\n",
|
|
||||||
"experiment = Experiment(ws, name=experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a TensorFlow estimator\n",
|
|
||||||
"The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import TensorflowConfiguration\n",
|
|
||||||
"from azureml.train.dnn import TensorFlow\n",
|
|
||||||
"\n",
|
|
||||||
"script_params={\n",
|
|
||||||
" '--num_gpus': 1,\n",
|
|
||||||
" '--train_steps': 500\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"distributed_training = TensorflowConfiguration()\n",
|
|
||||||
"distributed_training.worker_count = 2\n",
|
|
||||||
"\n",
|
|
||||||
"estimator = TensorFlow(source_directory=project_folder,\n",
|
|
||||||
" compute_target=compute_target,\n",
|
|
||||||
" script_params=script_params,\n",
|
|
||||||
" entry_script='tf_mnist_replica.py',\n",
|
|
||||||
" node_count=2,\n",
|
|
||||||
" distributed_training=distributed_training,\n",
|
|
||||||
" use_gpu=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The above code specifies that we will run our training script on `2` nodes, with two workers and one parameter server. In order to execute a native distributed TensorFlow run, you must provide the argument `distributed_backend='ps'`. Using this estimator with these settings, TensorFlow and its dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `TensorFlow` constructor's `pip_packages` or `conda_packages` parameters."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Submit job\n",
|
|
||||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run = experiment.submit(estimator)\n",
|
|
||||||
"print(run)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Monitor your run\n",
|
|
||||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"\n",
|
|
||||||
"RunDetails(run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Alternatively, you can block until the script has completed training before running more code."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output=True) # this provides a verbose log"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "minxia"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
},
|
|
||||||
"msauthor": "minxia"
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Distributed TensorFlow with parameter server\n",
|
||||||
|
"In this tutorial, you will train a TensorFlow model on the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset using native [distributed TensorFlow](https://www.tensorflow.org/deploy/distributed)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning (AML)\n",
|
||||||
|
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||||
|
" * install the AML SDK\n",
|
||||||
|
" * create a workspace and its configuration file (`config.json`)\n",
|
||||||
|
"* Review the [tutorial](../train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) on single-node TensorFlow training using the SDK"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize workspace\n",
|
||||||
|
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name, \n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create or Attach existing AmlCompute\n",
|
||||||
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource.\n",
|
||||||
|
"\n",
|
||||||
|
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||||
|
"\n",
|
||||||
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for your cluster\n",
|
||||||
|
"cluster_name = \"gpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
|
" print('Found existing compute target.')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" # create the cluster\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
" compute_target.wait_for_completion(show_output=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||||
|
"print(compute_target.get_status().serialize())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train model on the remote compute\n",
|
||||||
|
"Now that we have the cluster ready to go, let's run our distributed training job."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a project directory\n",
|
||||||
|
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"project_folder = './tf-distr-ps'\n",
|
||||||
|
"os.makedirs(project_folder, exist_ok=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copy the training script `tf_mnist_replica.py` into this project directory."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shutil\n",
|
||||||
|
"\n",
|
||||||
|
"shutil.copy('tf_mnist_replica.py', project_folder)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create an experiment\n",
|
||||||
|
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this distributed TensorFlow tutorial. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"experiment_name = 'tf-distr-ps'\n",
|
||||||
|
"experiment = Experiment(ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a TensorFlow estimator\n",
|
||||||
|
"The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import TensorflowConfiguration\n",
|
||||||
|
"from azureml.train.dnn import TensorFlow\n",
|
||||||
|
"\n",
|
||||||
|
"script_params={\n",
|
||||||
|
" '--num_gpus': 1,\n",
|
||||||
|
" '--train_steps': 500\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"distributed_training = TensorflowConfiguration()\n",
|
||||||
|
"distributed_training.worker_count = 2\n",
|
||||||
|
"\n",
|
||||||
|
"estimator = TensorFlow(source_directory=project_folder,\n",
|
||||||
|
" compute_target=compute_target,\n",
|
||||||
|
" script_params=script_params,\n",
|
||||||
|
" entry_script='tf_mnist_replica.py',\n",
|
||||||
|
" node_count=2,\n",
|
||||||
|
" distributed_training=distributed_training,\n",
|
||||||
|
" use_gpu=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The above code specifies that we will run our training script on `2` nodes, with two workers and one parameter server. In order to execute a native distributed TensorFlow run, you must provide the argument `distributed_backend='ps'`. Using this estimator with these settings, TensorFlow and its dependencies will be installed for you. However, if your script also uses other packages, make sure to install them via the `TensorFlow` constructor's `pip_packages` or `conda_packages` parameters."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit job\n",
|
||||||
|
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run = experiment.submit(estimator)\n",
|
||||||
|
"print(run)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Monitor your run\n",
|
||||||
|
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"\n",
|
||||||
|
"RunDetails(run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Alternatively, you can block until the script has completed training before running more code."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output=True) # this provides a verbose log"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "minxia"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
},
|
||||||
|
"msauthor": "minxia"
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,256 +1,256 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Export Run History as Tensorboard logs\n",
|
|
||||||
"\n",
|
|
||||||
"1. Run some training and log some metrics into Run History\n",
|
|
||||||
"2. Export the run history to some directory as Tensorboard logs\n",
|
|
||||||
"3. Launch a local Tensorboard to view the run history"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
|
||||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) notebook to:\n",
|
|
||||||
" * install the AML SDK\n",
|
|
||||||
" * create a workspace and its configuration file (`config.json`)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a workspace object from persisted configuration."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace, Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + ws.name, \n",
|
|
||||||
" 'Azure region: ' + ws.location, \n",
|
|
||||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Set experiment name and start the run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"experiment_name = 'export-to-tensorboard'\n",
|
|
||||||
"exp = Experiment(ws, experiment_name)\n",
|
|
||||||
"root_run = exp.start_logging()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# load diabetes dataset, a well-known built-in small dataset that comes with scikit-learn\n",
|
|
||||||
"from sklearn.datasets import load_diabetes\n",
|
|
||||||
"from sklearn.linear_model import Ridge\n",
|
|
||||||
"from sklearn.metrics import mean_squared_error\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"\n",
|
|
||||||
"X, y = load_diabetes(return_X_y=True)\n",
|
|
||||||
"\n",
|
|
||||||
"columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n",
|
|
||||||
"\n",
|
|
||||||
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
|
|
||||||
"data = {\n",
|
|
||||||
" \"train\":{\"x\":x_train, \"y\":y_train}, \n",
|
|
||||||
" \"test\":{\"x\":x_test, \"y\":y_test}\n",
|
|
||||||
"}"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Example experiment\n",
|
|
||||||
"from tqdm import tqdm\n",
|
|
||||||
"\n",
|
|
||||||
"alphas = [.1, .2, .3, .4, .5, .6 , .7]\n",
|
|
||||||
"\n",
|
|
||||||
"# try a bunch of alpha values in a Linear Regression (Ridge) model\n",
|
|
||||||
"for alpha in tqdm(alphas):\n",
|
|
||||||
" # create a bunch of child runs\n",
|
|
||||||
" with root_run.child_run(\"alpha\" + str(alpha)) as run:\n",
|
|
||||||
" # More data science stuff\n",
|
|
||||||
" reg = Ridge(alpha=alpha)\n",
|
|
||||||
" reg.fit(data[\"train\"][\"x\"], data[\"train\"][\"y\"])\n",
|
|
||||||
" \n",
|
|
||||||
" preds = reg.predict(data[\"test\"][\"x\"])\n",
|
|
||||||
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
|
|
||||||
" # End train and eval\n",
|
|
||||||
"\n",
|
|
||||||
" # log alpha, mean_squared_error and feature names in run history\n",
|
|
||||||
" root_run.log(\"alpha\", alpha)\n",
|
|
||||||
" root_run.log(\"mse\", mse)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Export Run History to Tensorboard logs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Export Run History to Tensorboard logs\n",
|
|
||||||
"from azureml.tensorboard.export import export_to_tensorboard\n",
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"logdir = 'exportedTBlogs'\n",
|
|
||||||
"log_path = os.path.join(os.getcwd(), logdir)\n",
|
|
||||||
"try:\n",
|
|
||||||
" os.stat(log_path)\n",
|
|
||||||
"except os.error:\n",
|
|
||||||
" os.mkdir(log_path)\n",
|
|
||||||
"print(logdir)\n",
|
|
||||||
"\n",
|
|
||||||
"# export run history for the project\n",
|
|
||||||
"export_to_tensorboard(root_run, logdir)\n",
|
|
||||||
"\n",
|
|
||||||
"# or export a particular run\n",
|
|
||||||
"# export_to_tensorboard(run, logdir)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"root_run.complete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Start Tensorboard\n",
|
|
||||||
"\n",
|
|
||||||
"Or you can start the Tensorboard outside this notebook to view the result"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.tensorboard import Tensorboard\n",
|
|
||||||
"\n",
|
|
||||||
"# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here\n",
|
|
||||||
"tb = Tensorboard([], local_root=logdir, port=6006)\n",
|
|
||||||
"\n",
|
|
||||||
"# If successful, start() returns a string with the URI of the instance.\n",
|
|
||||||
"tb.start()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Stop Tensorboard\n",
|
|
||||||
"\n",
|
|
||||||
"When you're done, make sure to call the `stop()` method of the Tensorboard object."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"tb.stop()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "roastala"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Export Run History as Tensorboard logs\n",
|
||||||
|
"\n",
|
||||||
|
"1. Run some training and log some metrics into Run History\n",
|
||||||
|
"2. Export the run history to some directory as Tensorboard logs\n",
|
||||||
|
"3. Launch a local Tensorboard to view the run history"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||||
|
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) notebook to:\n",
|
||||||
|
" * install the AML SDK\n",
|
||||||
|
" * create a workspace and its configuration file (`config.json`)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace, Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name, \n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Set experiment name and start the run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"experiment_name = 'export-to-tensorboard'\n",
|
||||||
|
"exp = Experiment(ws, experiment_name)\n",
|
||||||
|
"root_run = exp.start_logging()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# load diabetes dataset, a well-known built-in small dataset that comes with scikit-learn\n",
|
||||||
|
"from sklearn.datasets import load_diabetes\n",
|
||||||
|
"from sklearn.linear_model import Ridge\n",
|
||||||
|
"from sklearn.metrics import mean_squared_error\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"\n",
|
||||||
|
"X, y = load_diabetes(return_X_y=True)\n",
|
||||||
|
"\n",
|
||||||
|
"columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n",
|
||||||
|
"\n",
|
||||||
|
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
|
||||||
|
"data = {\n",
|
||||||
|
" \"train\":{\"x\":x_train, \"y\":y_train}, \n",
|
||||||
|
" \"test\":{\"x\":x_test, \"y\":y_test}\n",
|
||||||
|
"}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Example experiment\n",
|
||||||
|
"from tqdm import tqdm\n",
|
||||||
|
"\n",
|
||||||
|
"alphas = [.1, .2, .3, .4, .5, .6 , .7]\n",
|
||||||
|
"\n",
|
||||||
|
"# try a bunch of alpha values in a Linear Regression (Ridge) model\n",
|
||||||
|
"for alpha in tqdm(alphas):\n",
|
||||||
|
" # create a bunch of child runs\n",
|
||||||
|
" with root_run.child_run(\"alpha\" + str(alpha)) as run:\n",
|
||||||
|
" # More data science stuff\n",
|
||||||
|
" reg = Ridge(alpha=alpha)\n",
|
||||||
|
" reg.fit(data[\"train\"][\"x\"], data[\"train\"][\"y\"])\n",
|
||||||
|
" \n",
|
||||||
|
" preds = reg.predict(data[\"test\"][\"x\"])\n",
|
||||||
|
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
|
||||||
|
" # End train and eval\n",
|
||||||
|
"\n",
|
||||||
|
" # log alpha, mean_squared_error and feature names in run history\n",
|
||||||
|
" root_run.log(\"alpha\", alpha)\n",
|
||||||
|
" root_run.log(\"mse\", mse)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Export Run History to Tensorboard logs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Export Run History to Tensorboard logs\n",
|
||||||
|
"from azureml.tensorboard.export import export_to_tensorboard\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"logdir = 'exportedTBlogs'\n",
|
||||||
|
"log_path = os.path.join(os.getcwd(), logdir)\n",
|
||||||
|
"try:\n",
|
||||||
|
" os.stat(log_path)\n",
|
||||||
|
"except os.error:\n",
|
||||||
|
" os.mkdir(log_path)\n",
|
||||||
|
"print(logdir)\n",
|
||||||
|
"\n",
|
||||||
|
"# export run history for the project\n",
|
||||||
|
"export_to_tensorboard(root_run, logdir)\n",
|
||||||
|
"\n",
|
||||||
|
"# or export a particular run\n",
|
||||||
|
"# export_to_tensorboard(run, logdir)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"root_run.complete()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Start Tensorboard\n",
|
||||||
|
"\n",
|
||||||
|
"Or you can start the Tensorboard outside this notebook to view the result"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.tensorboard import Tensorboard\n",
|
||||||
|
"\n",
|
||||||
|
"# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here\n",
|
||||||
|
"tb = Tensorboard([], local_root=logdir, port=6006)\n",
|
||||||
|
"\n",
|
||||||
|
"# If successful, start() returns a string with the URI of the instance.\n",
|
||||||
|
"tb.start()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Stop Tensorboard\n",
|
||||||
|
"\n",
|
||||||
|
"When you're done, make sure to call the `stop()` method of the Tensorboard object."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tb.stop()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,8 +9,16 @@ print("Hello Azure ML!")
|
|||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--numbers-in-sequence', type=int, dest='num_in_sequence', default=10,
|
parser.add_argument('--numbers-in-sequence', type=int, dest='num_in_sequence', default=10,
|
||||||
help='number of fibonacci numbers in sequence')
|
help='number of fibonacci numbers in sequence')
|
||||||
|
|
||||||
|
# This is how you can use a bool argument in Python. If you want the 'my_bool_var' to be True, just pass it
|
||||||
|
# in Estimator's script_param as script+params:{'my_bool_var': ''}.
|
||||||
|
# And, if you want to use it as False, then do not pass it in the Estimator's script_params.
|
||||||
|
# You can reverse the behavior by setting action='store_false' in the next line.
|
||||||
|
parser.add_argument("--my_bool_var", action='store_true')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
num = args.num_in_sequence
|
num = args.num_in_sequence
|
||||||
|
my_bool_var = args.my_bool_var
|
||||||
|
|
||||||
|
|
||||||
def fibo(n):
|
def fibo(n):
|
||||||
@@ -23,6 +31,7 @@ def fibo(n):
|
|||||||
try:
|
try:
|
||||||
from azureml.core import Run
|
from azureml.core import Run
|
||||||
run = Run.get_context()
|
run = Run.get_context()
|
||||||
|
print("The value of boolean parameter 'my_bool_var' is {}".format(my_bool_var))
|
||||||
print("Log Fibonacci numbers.")
|
print("Log Fibonacci numbers.")
|
||||||
for i in range(0, num - 1):
|
for i in range(0, num - 1):
|
||||||
run.log('Fibonacci numbers', fibo(i))
|
run.log('Fibonacci numbers', fibo(i))
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,57 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import scrapbook as sb\n",
|
||||||
|
"sb.glue('Fibonacci numbers', [0, 1, 1, 2, 3, 5, 8, 13, 21, 34])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "jingywa"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.8"
|
||||||
|
},
|
||||||
|
"msauthor": "jingywa"
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,413 +1,432 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Train and hyperparameter tune with Chainer\n",
|
|
||||||
"\n",
|
|
||||||
"In this tutorial, we demonstrate how to use the Azure ML Python SDK to train a Convolutional Neural Network (CNN) on a single-node GPU with Chainer to perform handwritten digit recognition on the popular MNIST dataset. We will also demonstrate how to perform hyperparameter tuning of the model using Azure ML's HyperDrive service."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Diagnostics\n",
|
|
||||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"Diagnostics"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
|
||||||
"\n",
|
|
||||||
"set_diagnostics_collection(send_diagnostics=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize workspace\n",
|
|
||||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + ws.name, \n",
|
|
||||||
" 'Azure region: ' + ws.location, \n",
|
|
||||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Get default AmlCompute\n",
|
|
||||||
"You can create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource.\n",
|
|
||||||
"\n",
|
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"compute_target = ws.get_default_compute_target(type=\"GPU\")\n",
|
|
||||||
"\n",
|
|
||||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
|
||||||
"print(compute_target.get_status().serialize())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The above code retrieves the default GPU compute. If you instead want to use default CPU compute, provide type=\"CPU\"."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train model on the remote compute\n",
|
|
||||||
"Now that you have your data and training script prepared, you are ready to train on your remote compute cluster. You can take advantage of Azure compute to leverage GPUs to cut down your training time. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a project directory\n",
|
|
||||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"project_folder = './chainer-mnist'\n",
|
|
||||||
"os.makedirs(project_folder, exist_ok=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Prepare training script\n",
|
|
||||||
"Now you will need to create your training script. In this tutorial, the training script is already provided for you at `chainer_mnist.py`. In practice, you should be able to take any custom training script as is and run it with Azure ML without having to modify your code.\n",
|
|
||||||
"\n",
|
|
||||||
"However, if you would like to use Azure ML's [tracking and metrics](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#metrics) capabilities, you will have to add a small amount of Azure ML code inside your training script. \n",
|
|
||||||
"\n",
|
|
||||||
"In `chainer_mnist.py`, we will log some metrics to our Azure ML run. To do so, we will access the Azure ML `Run` object within the script:\n",
|
|
||||||
"```Python\n",
|
|
||||||
"from azureml.core.run import Run\n",
|
|
||||||
"run = Run.get_context()\n",
|
|
||||||
"```\n",
|
|
||||||
"Further within `chainer_mnist.py`, we log the batchsize and epochs parameters, and the highest accuracy the model achieves:\n",
|
|
||||||
"```Python\n",
|
|
||||||
"run.log('Batch size', np.int(args.batchsize))\n",
|
|
||||||
"run.log('Epochs', np.int(args.epochs))\n",
|
|
||||||
"\n",
|
|
||||||
"run.log('Accuracy', np.float(val_accuracy))\n",
|
|
||||||
"```\n",
|
|
||||||
"These run metrics will become particularly important when we begin hyperparameter tuning our model in the \"Tune model hyperparameters\" section."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Once your script is ready, copy the training script `chainer_mnist.py` into your project directory."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import shutil\n",
|
|
||||||
"\n",
|
|
||||||
"shutil.copy('chainer_mnist.py', project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create an experiment\n",
|
|
||||||
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this Chainer tutorial. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"experiment_name = 'chainer-mnist'\n",
|
|
||||||
"experiment = Experiment(ws, name=experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create a Chainer estimator\n",
|
|
||||||
"The Azure ML SDK's Chainer estimator enables you to easily submit Chainer training jobs for both single-node and distributed runs. The following code will define a single-node Chainer job."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.train.dnn import Chainer\n",
|
|
||||||
"\n",
|
|
||||||
"script_params = {\n",
|
|
||||||
" '--epochs': 10,\n",
|
|
||||||
" '--batchsize': 128,\n",
|
|
||||||
" '--output_dir': './outputs'\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"estimator = Chainer(source_directory=project_folder, \n",
|
|
||||||
" script_params=script_params,\n",
|
|
||||||
" compute_target=compute_target,\n",
|
|
||||||
" pip_packages=['numpy', 'pytest'],\n",
|
|
||||||
" entry_script='chainer_mnist.py',\n",
|
|
||||||
" use_gpu=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The `script_params` parameter is a dictionary containing the command-line arguments to your training script `entry_script`. To leverage the Azure VM's GPU for training, we set `use_gpu=True`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Submit job\n",
|
|
||||||
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run = experiment.submit(estimator)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Monitor your run\n",
|
|
||||||
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"\n",
|
|
||||||
"RunDetails(run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# to get more details of your run\n",
|
|
||||||
"print(run.get_details())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Tune model hyperparameters\n",
|
|
||||||
"Now that we've seen how to do a simple Chainer training run using the SDK, let's see if we can further improve the accuracy of our model. We can optimize our model's hyperparameters using Azure Machine Learning's hyperparameter tuning capabilities."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Start a hyperparameter sweep\n",
|
|
||||||
"First, we will define the hyperparameter space to sweep over. Let's tune the batch size and epochs parameters. In this example we will use random sampling to try different configuration sets of hyperparameters to maximize our primary metric, accuracy.\n",
|
|
||||||
"\n",
|
|
||||||
"Then, we specify the early termination policy to use to early terminate poorly performing runs. Here we use the `BanditPolicy`, which will terminate any run that doesn't fall within the slack factor of our primary evaluation metric. In this tutorial, we will apply this policy every epoch (since we report our `Accuracy` metric every epoch and `evaluation_interval=1`). Notice we will delay the first policy evaluation until after the first `3` epochs (`delay_evaluation=3`).\n",
|
|
||||||
"Refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-tune-hyperparameters#specify-an-early-termination-policy) for more information on the BanditPolicy and other policies available."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.train.hyperdrive.runconfig import HyperDriveConfig\n",
|
|
||||||
"from azureml.train.hyperdrive.sampling import RandomParameterSampling\n",
|
|
||||||
"from azureml.train.hyperdrive.policy import BanditPolicy\n",
|
|
||||||
"from azureml.train.hyperdrive.run import PrimaryMetricGoal\n",
|
|
||||||
"from azureml.train.hyperdrive.parameter_expressions import choice\n",
|
|
||||||
" \n",
|
|
||||||
"\n",
|
|
||||||
"param_sampling = RandomParameterSampling( {\n",
|
|
||||||
" \"--batchsize\": choice(128, 256),\n",
|
|
||||||
" \"--epochs\": choice(5, 10, 20, 40)\n",
|
|
||||||
" }\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"hyperdrive_config = HyperDriveConfig(estimator=estimator,\n",
|
|
||||||
" hyperparameter_sampling=param_sampling, \n",
|
|
||||||
" primary_metric_name='Accuracy',\n",
|
|
||||||
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,\n",
|
|
||||||
" max_total_runs=8,\n",
|
|
||||||
" max_concurrent_runs=4)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Finally, lauch the hyperparameter tuning job."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# start the HyperDrive run\n",
|
|
||||||
"hyperdrive_run = experiment.submit(hyperdrive_config)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Monitor HyperDrive runs\n",
|
|
||||||
"You can monitor the progress of the runs with the following Jupyter widget. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"RunDetails(hyperdrive_run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "minxia"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
},
|
|
||||||
"msauthor": "minxia"
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Train and hyperparameter tune with Chainer\n",
|
||||||
|
"\n",
|
||||||
|
"In this tutorial, we demonstrate how to use the Azure ML Python SDK to train a Convolutional Neural Network (CNN) on a single-node GPU with Chainer to perform handwritten digit recognition on the popular MNIST dataset. We will also demonstrate how to perform hyperparameter tuning of the model using Azure ML's HyperDrive service."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML `Workspace`"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize workspace\n",
|
||||||
|
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name, \n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create or Attach existing AmlCompute\n",
|
||||||
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource.\n",
|
||||||
|
"\n",
|
||||||
|
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace, this code will skip the creation process.\n",
|
||||||
|
"\n",
|
||||||
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for your cluster\n",
|
||||||
|
"cluster_name = \"gpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
|
" print('Found existing compute target.')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" # create the cluster\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
" compute_target.wait_for_completion(show_output=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||||
|
"print(compute_target.get_status().serialize())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The above code creates a GPU cluster. If you instead want to create a CPU cluster, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train model on the remote compute\n",
|
||||||
|
"Now that you have your data and training script prepared, you are ready to train on your remote compute cluster. You can take advantage of Azure compute to leverage GPUs to cut down your training time. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a project directory\n",
|
||||||
|
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"project_folder = './chainer-mnist'\n",
|
||||||
|
"os.makedirs(project_folder, exist_ok=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Prepare training script\n",
|
||||||
|
"Now you will need to create your training script. In this tutorial, the training script is already provided for you at `chainer_mnist.py`. In practice, you should be able to take any custom training script as is and run it with Azure ML without having to modify your code.\n",
|
||||||
|
"\n",
|
||||||
|
"However, if you would like to use Azure ML's [tracking and metrics](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#metrics) capabilities, you will have to add a small amount of Azure ML code inside your training script. \n",
|
||||||
|
"\n",
|
||||||
|
"In `chainer_mnist.py`, we will log some metrics to our Azure ML run. To do so, we will access the Azure ML `Run` object within the script:\n",
|
||||||
|
"```Python\n",
|
||||||
|
"from azureml.core.run import Run\n",
|
||||||
|
"run = Run.get_context()\n",
|
||||||
|
"```\n",
|
||||||
|
"Further within `chainer_mnist.py`, we log the batchsize and epochs parameters, and the highest accuracy the model achieves:\n",
|
||||||
|
"```Python\n",
|
||||||
|
"run.log('Batch size', np.int(args.batchsize))\n",
|
||||||
|
"run.log('Epochs', np.int(args.epochs))\n",
|
||||||
|
"\n",
|
||||||
|
"run.log('Accuracy', np.float(val_accuracy))\n",
|
||||||
|
"```\n",
|
||||||
|
"These run metrics will become particularly important when we begin hyperparameter tuning our model in the \"Tune model hyperparameters\" section."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Once your script is ready, copy the training script `chainer_mnist.py` into your project directory."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shutil\n",
|
||||||
|
"\n",
|
||||||
|
"shutil.copy('chainer_mnist.py', project_folder)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create an experiment\n",
|
||||||
|
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this Chainer tutorial. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"experiment_name = 'chainer-mnist'\n",
|
||||||
|
"experiment = Experiment(ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a Chainer estimator\n",
|
||||||
|
"The Azure ML SDK's Chainer estimator enables you to easily submit Chainer training jobs for both single-node and distributed runs. The following code will define a single-node Chainer job."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.train.dnn import Chainer\n",
|
||||||
|
"\n",
|
||||||
|
"script_params = {\n",
|
||||||
|
" '--epochs': 10,\n",
|
||||||
|
" '--batchsize': 128,\n",
|
||||||
|
" '--output_dir': './outputs'\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"estimator = Chainer(source_directory=project_folder, \n",
|
||||||
|
" script_params=script_params,\n",
|
||||||
|
" compute_target=compute_target,\n",
|
||||||
|
" pip_packages=['numpy', 'pytest'],\n",
|
||||||
|
" entry_script='chainer_mnist.py',\n",
|
||||||
|
" use_gpu=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The `script_params` parameter is a dictionary containing the command-line arguments to your training script `entry_script`. To leverage the Azure VM's GPU for training, we set `use_gpu=True`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit job\n",
|
||||||
|
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run = experiment.submit(estimator)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Monitor your run\n",
|
||||||
|
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"\n",
|
||||||
|
"RunDetails(run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# to get more details of your run\n",
|
||||||
|
"print(run.get_details())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Tune model hyperparameters\n",
|
||||||
|
"Now that we've seen how to do a simple Chainer training run using the SDK, let's see if we can further improve the accuracy of our model. We can optimize our model's hyperparameters using Azure Machine Learning's hyperparameter tuning capabilities."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Start a hyperparameter sweep\n",
|
||||||
|
"First, we will define the hyperparameter space to sweep over. Let's tune the batch size and epochs parameters. In this example we will use random sampling to try different configuration sets of hyperparameters to maximize our primary metric, accuracy.\n",
|
||||||
|
"\n",
|
||||||
|
"Then, we specify the early termination policy to use to early terminate poorly performing runs. Here we use the `BanditPolicy`, which will terminate any run that doesn't fall within the slack factor of our primary evaluation metric. In this tutorial, we will apply this policy every epoch (since we report our `Accuracy` metric every epoch and `evaluation_interval=1`). Notice we will delay the first policy evaluation until after the first `3` epochs (`delay_evaluation=3`).\n",
|
||||||
|
"Refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-tune-hyperparameters#specify-an-early-termination-policy) for more information on the BanditPolicy and other policies available."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.train.hyperdrive.runconfig import HyperDriveConfig\n",
|
||||||
|
"from azureml.train.hyperdrive.sampling import RandomParameterSampling\n",
|
||||||
|
"from azureml.train.hyperdrive.policy import BanditPolicy\n",
|
||||||
|
"from azureml.train.hyperdrive.run import PrimaryMetricGoal\n",
|
||||||
|
"from azureml.train.hyperdrive.parameter_expressions import choice\n",
|
||||||
|
" \n",
|
||||||
|
"\n",
|
||||||
|
"param_sampling = RandomParameterSampling( {\n",
|
||||||
|
" \"--batchsize\": choice(128, 256),\n",
|
||||||
|
" \"--epochs\": choice(5, 10, 20, 40)\n",
|
||||||
|
" }\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"hyperdrive_config = HyperDriveConfig(estimator=estimator,\n",
|
||||||
|
" hyperparameter_sampling=param_sampling, \n",
|
||||||
|
" primary_metric_name='Accuracy',\n",
|
||||||
|
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,\n",
|
||||||
|
" max_total_runs=8,\n",
|
||||||
|
" max_concurrent_runs=4)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Finally, lauch the hyperparameter tuning job."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# start the HyperDrive run\n",
|
||||||
|
"hyperdrive_run = experiment.submit(hyperdrive_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Monitor HyperDrive runs\n",
|
||||||
|
"You can monitor the progress of the runs with the following Jupyter widget. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"RunDetails(hyperdrive_run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "minxia"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
},
|
||||||
|
"msauthor": "minxia"
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -5,9 +5,10 @@ Follow these sample notebooks to learn:
|
|||||||
1. [Train within notebook](train-within-notebook): train a simple scikit-learn model using the Jupyter kernel and deploy the model to Azure Container Service.
|
1. [Train within notebook](train-within-notebook): train a simple scikit-learn model using the Jupyter kernel and deploy the model to Azure Container Service.
|
||||||
2. [Train on local](train-on-local): train a model using local computer as compute target.
|
2. [Train on local](train-on-local): train a model using local computer as compute target.
|
||||||
3. [Train on remote VM](train-on-remote-vm): train a model using a remote Azure VM as compute target.
|
3. [Train on remote VM](train-on-remote-vm): train a model using a remote Azure VM as compute target.
|
||||||
4. [Train on AmlCompute](train-on-amlcompute): train a model using an AmlCompute cluster as compute target.
|
4. [Train on ML Compute](train-on-amlcompute): train a model using an ML Compute cluster as compute target.
|
||||||
5. [Train in an HDI Spark cluster](train-in-spark): train a Spark ML model using an HDInsight Spark cluster as compute target.
|
5. [Train in an HDI Spark cluster](train-in-spark): train a Spark ML model using an HDInsight Spark cluster as compute target.
|
||||||
6. [Logging API](logging-api): experiment with various logging functions to create runs and automatically generate graphs.
|
6. [Logging API](logging-api): experiment with various logging functions to create runs and automatically generate graphs.
|
||||||
7. [Train and hyperparameter tune on Iris Dataset with Scikit-learn](train-hyperparameter-tune-deploy-with-sklearn): train a model using the Scikit-learn estimator and tune hyperparameters with Hyperdrive.
|
7. [Manage runs](manage-runs): learn different ways how to start runs and child runs, monitor them, and cancel them.
|
||||||
|
8. [Train and hyperparameter tune on Iris Dataset with Scikit-learn](train-hyperparameter-tune-deploy-with-sklearn): train a model using the Scikit-learn estimator and tune hyperparameters with Hyperdrive.
|
||||||
|
|
||||||

|

|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,501 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Train and hyperparameter tune on Iris Dataset with Scikit-learn\n",
|
||||||
|
"In this tutorial, we demonstrate how to use the Azure ML Python SDK to train a support vector machine (SVM) on a single-node CPU with Scikit-learn to perform classification on the popular [Iris dataset](https://archive.ics.uci.edu/ml/datasets/iris). We will also demonstrate how to perform hyperparameter tuning of the model using Azure ML's HyperDrive service."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"* Go through the [Configuration](../../../configuration.ipynb) notebook to install the Azure Machine Learning Python SDK and create an Azure ML Workspace"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize workspace"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name, \n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create AmlCompute"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, we use Azure ML managed compute ([AmlCompute](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)) for our remote training compute resource.\n",
|
||||||
|
"\n",
|
||||||
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for your cluster\n",
|
||||||
|
"cluster_name = \"gpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
|
" print('Found existing compute target.')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" # create the cluster\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
" compute_target.wait_for_completion(show_output=True)\n",
|
||||||
|
"\n",
|
||||||
|
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||||
|
"print(compute_target.get_status().serialize())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train model on the remote compute"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now that you have your data and training script prepared, you are ready to train on your remote compute cluster. You can take advantage of Azure compute to leverage GPUs to cut down your training time."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a project directory"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script and any additional files your training script depends on."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"project_folder = './sklearn-iris'\n",
|
||||||
|
"os.makedirs(project_folder, exist_ok=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Prepare training script"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now you will need to create your training script. In this tutorial, the training script is already provided for you at `train_iris`.py. In practice, you should be able to take any custom training script as is and run it with Azure ML without having to modify your code.\n",
|
||||||
|
"\n",
|
||||||
|
"However, if you would like to use Azure ML's [tracking and metrics](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#metrics) capabilities, you will have to add a small amount of Azure ML code inside your training script.\n",
|
||||||
|
"\n",
|
||||||
|
"In `train_iris.py`, we will log some metrics to our Azure ML run. To do so, we will access the Azure ML Run object within the script:\n",
|
||||||
|
"\n",
|
||||||
|
"```python\n",
|
||||||
|
"from azureml.core.run import Run\n",
|
||||||
|
"run = Run.get_context()\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"Further within `train_iris.py`, we log the kernel and penalty parameters, and the highest accuracy the model achieves:\n",
|
||||||
|
"\n",
|
||||||
|
"```python\n",
|
||||||
|
"run.log('Kernel type', np.string(args.kernel))\n",
|
||||||
|
"run.log('Penalty', np.float(args.penalty))\n",
|
||||||
|
"\n",
|
||||||
|
"run.log('Accuracy', np.float(accuracy))\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"These run metrics will become particularly important when we begin hyperparameter tuning our model in the \"Tune model hyperparameters\" section.\n",
|
||||||
|
"\n",
|
||||||
|
"Once your script is ready, copy the training script `train_iris.py` into your project directory."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import shutil\n",
|
||||||
|
"\n",
|
||||||
|
"shutil.copy('train_iris.py', project_folder)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create an experiment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Create an [Experiment](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#experiment) to track all the runs in your workspace for this Scikit-learn tutorial."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"experiment_name = 'train_iris'\n",
|
||||||
|
"experiment = Experiment(ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a Scikit-learn estimator"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The Azure ML SDK's Scikit-learn estimator enables you to easily submit Scikit-learn training jobs for single-node runs. The following code will define a single-node Scikit-learn job."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.train.sklearn import SKLearn\n",
|
||||||
|
"\n",
|
||||||
|
"script_params = {\n",
|
||||||
|
" '--kernel': 'linear',\n",
|
||||||
|
" '--penalty': 1.0,\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"estimator = SKLearn(source_directory=project_folder, \n",
|
||||||
|
" script_params=script_params,\n",
|
||||||
|
" compute_target=compute_target,\n",
|
||||||
|
" entry_script='train_iris.py'\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The `script_params` parameter is a dictionary containing the command-line arguments to your training script `entry_script`. To leverage the Azure VM's GPU for training, we set `use_gpu=True`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit job"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Run your experiment by submitting your estimator object. Note that this call is asynchronous."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run = experiment.submit(estimator)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Monitor your run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can monitor the progress of the run with a Jupyter widget. Like the run submission, the widget is asynchronous and provides live updates every 10-15 seconds until the job completes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"\n",
|
||||||
|
"RunDetails(run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.cancel()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Tune model hyperparameters"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now that we've seen how to do a simple Scikit-learn training run using the SDK, let's see if we can further improve the accuracy of our model. We can optimize our model's hyperparameters using Azure Machine Learning's hyperparameter tuning capabilities."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Start a hyperparameter sweep"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"First, we will define the hyperparameter space to sweep over. Let's tune the `kernel` and `penalty` parameters. In this example we will use random sampling to try different configuration sets of hyperparameters to maximize our primary metric, `Accuracy`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.train.hyperdrive.runconfig import HyperDriveRunConfig\n",
|
||||||
|
"from azureml.train.hyperdrive.sampling import RandomParameterSampling\n",
|
||||||
|
"from azureml.train.hyperdrive.run import PrimaryMetricGoal\n",
|
||||||
|
"from azureml.train.hyperdrive.parameter_expressions import choice\n",
|
||||||
|
" \n",
|
||||||
|
"\n",
|
||||||
|
"param_sampling = RandomParameterSampling( {\n",
|
||||||
|
" \"--kernel\": choice('linear', 'rbf', 'poly', 'sigmoid'),\n",
|
||||||
|
" \"--penalty\": choice(0.5, 1, 1.5)\n",
|
||||||
|
" }\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"hyperdrive_run_config = HyperDriveRunConfig(estimator=estimator,\n",
|
||||||
|
" hyperparameter_sampling=param_sampling, \n",
|
||||||
|
" primary_metric_name='Accuracy',\n",
|
||||||
|
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,\n",
|
||||||
|
" max_total_runs=12,\n",
|
||||||
|
" max_concurrent_runs=4)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Finally, lauch the hyperparameter tuning job."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# start the HyperDrive run\n",
|
||||||
|
"hyperdrive_run = experiment.submit(hyperdrive_run_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Monitor HyperDrive runs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can monitor the progress of the runs with the following Jupyter widget."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"RunDetails(hyperdrive_run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "dipeck"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.5.2"
|
||||||
|
},
|
||||||
|
"msauthor": "dipeck"
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,53 @@
|
|||||||
|
# Modified from https://www.geeksforgeeks.org/multiclass-classification-using-scikit-learn/
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# importing necessary libraries
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from sklearn import datasets
|
||||||
|
from sklearn.metrics import confusion_matrix
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
from azureml.core.run import Run
|
||||||
|
run = Run.get_context()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('--kernel', type=str, default='linear',
|
||||||
|
help='Kernel type to be used in the algorithm')
|
||||||
|
parser.add_argument('--penalty', type=float, default=1.0,
|
||||||
|
help='Penalty parameter of the error term')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
run.log('Kernel type', np.str(args.kernel))
|
||||||
|
run.log('Penalty', np.float(args.penalty))
|
||||||
|
|
||||||
|
# loading the iris dataset
|
||||||
|
iris = datasets.load_iris()
|
||||||
|
|
||||||
|
# X -> features, y -> label
|
||||||
|
X = iris.data
|
||||||
|
y = iris.target
|
||||||
|
|
||||||
|
# dividing X, y into train and test data
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
|
||||||
|
|
||||||
|
# training a linear SVM classifier
|
||||||
|
from sklearn.svm import SVC
|
||||||
|
svm_model_linear = SVC(kernel=args.kernel, C=args.penalty).fit(X_train, y_train)
|
||||||
|
svm_predictions = svm_model_linear.predict(X_test)
|
||||||
|
|
||||||
|
# model accuracy for X_test
|
||||||
|
accuracy = svm_model_linear.score(X_test, y_test)
|
||||||
|
print('Accuracy of SVM classifier on test set: {:.2f}'.format(accuracy))
|
||||||
|
run.log('Accuracy', np.float(accuracy))
|
||||||
|
# creating a confusion matrix
|
||||||
|
cm = confusion_matrix(y_test, svm_predictions)
|
||||||
|
print(cm)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
@@ -1,285 +1,285 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 05. Train in Spark\n",
|
|
||||||
"* Create Workspace\n",
|
|
||||||
"* Create Experiment\n",
|
|
||||||
"* Copy relevant files to the script folder\n",
|
|
||||||
"* Configure and Run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't already to establish your connection to the AzureML Workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a workspace object from persisted configuration."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create Experiment\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"experiment_name = 'train-on-spark'\n",
|
|
||||||
"\n",
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"exp = Experiment(workspace=ws, name=experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## View `train-spark.py`\n",
|
|
||||||
"\n",
|
|
||||||
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"with open('train-spark.py', 'r') as training_script:\n",
|
|
||||||
" print(training_script.read())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Configure & Run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"**Note** You can use Docker-based execution to run the Spark job in local computer or a remote VM. Please see the `train-in-remote-vm` notebook for example on how to configure and run in Docker mode in a VM. Make sure you choose a Docker image that has Spark installed, such as `microsoft/mmlspark:0.12`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Attach an HDI cluster\n",
|
|
||||||
"Here we will use a actual Spark cluster, HDInsight for Spark, to run this job. To use HDI commpute target:\n",
|
|
||||||
" 1. Create a Spark for HDI cluster in Azure. Here are some [quick instructions](https://docs.microsoft.com/en-us/azure/hdinsight/spark/apache-spark-jupyter-spark-sql). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
|
|
||||||
" 2. Enter the IP address, username and password below"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import ComputeTarget, HDInsightCompute\n",
|
|
||||||
"from azureml.exceptions import ComputeTargetException\n",
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"try:\n",
|
|
||||||
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n",
|
|
||||||
" attach_config = HDInsightCompute.attach_configuration(address=os.environ.get('hdiservername', '<my_hdi_cluster_name>-ssh.azurehdinsight.net'), \n",
|
|
||||||
" ssh_port=22, \n",
|
|
||||||
" username=os.environ.get('hdiusername', '<ssh_username>'), \n",
|
|
||||||
" password=os.environ.get('hdipassword', '<my_password>'))\n",
|
|
||||||
" hdi_compute = ComputeTarget.attach(workspace=ws, \n",
|
|
||||||
" name='myhdi', \n",
|
|
||||||
" attach_configuration=attach_config)\n",
|
|
||||||
"\n",
|
|
||||||
"except ComputeTargetException as e:\n",
|
|
||||||
" print(\"Caught = {}\".format(e.message))\n",
|
|
||||||
" \n",
|
|
||||||
" \n",
|
|
||||||
"hdi_compute.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Configure HDI run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Configure an execution using the HDInsight cluster with a conda environment that has `numpy`."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"# use pyspark framework\n",
|
|
||||||
"hdi_run_config = RunConfiguration(framework=\"pyspark\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Set compute target to the HDI cluster\n",
|
|
||||||
"hdi_run_config.target = hdi_compute.name\n",
|
|
||||||
"\n",
|
|
||||||
"# specify CondaDependencies object to ask system installing numpy\n",
|
|
||||||
"cd = CondaDependencies()\n",
|
|
||||||
"cd.add_conda_package('numpy')\n",
|
|
||||||
"hdi_run_config.environment.python.conda_dependencies = cd"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Submit the script to HDI"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import ScriptRunConfig\n",
|
|
||||||
"\n",
|
|
||||||
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
|
|
||||||
" script= 'train-spark.py',\n",
|
|
||||||
" run_config = hdi_run_config)\n",
|
|
||||||
"run = exp.submit(config=script_run_config)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Monitor the run using a Juypter widget"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"RunDetails(run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"After the run is succesfully finished, you can check the metrics logged."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get all metris logged in the run\n",
|
|
||||||
"metrics = run.get_metrics()\n",
|
|
||||||
"print(metrics)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "aashishb"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.7"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 05. Train in Spark\n",
|
||||||
|
"* Create Workspace\n",
|
||||||
|
"* Create Experiment\n",
|
||||||
|
"* Copy relevant files to the script folder\n",
|
||||||
|
"* Configure and Run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't already to establish your connection to the AzureML Workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create Experiment\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"experiment_name = 'train-on-spark'\n",
|
||||||
|
"\n",
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"exp = Experiment(workspace=ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## View `train-spark.py`\n",
|
||||||
|
"\n",
|
||||||
|
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"with open('train-spark.py', 'r') as training_script:\n",
|
||||||
|
" print(training_script.read())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configure & Run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"**Note** You can use Docker-based execution to run the Spark job in local computer or a remote VM. Please see the `train-in-remote-vm` notebook for example on how to configure and run in Docker mode in a VM. Make sure you choose a Docker image that has Spark installed, such as `microsoft/mmlspark:0.12`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Attach an HDI cluster\n",
|
||||||
|
"Here we will use a actual Spark cluster, HDInsight for Spark, to run this job. To use HDI commpute target:\n",
|
||||||
|
" 1. Create a Spark for HDI cluster in Azure. Here are some [quick instructions](https://docs.microsoft.com/en-us/azure/hdinsight/spark/apache-spark-jupyter-spark-sql). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
|
||||||
|
" 2. Enter the IP address, username and password below"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, HDInsightCompute\n",
|
||||||
|
"from azureml.exceptions import ComputeTargetException\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n",
|
||||||
|
" attach_config = HDInsightCompute.attach_configuration(address=os.environ.get('hdiservername', '<my_hdi_cluster_name>-ssh.azurehdinsight.net'), \n",
|
||||||
|
" ssh_port=22, \n",
|
||||||
|
" username=os.environ.get('hdiusername', '<ssh_username>'), \n",
|
||||||
|
" password=os.environ.get('hdipassword', '<my_password>'))\n",
|
||||||
|
" hdi_compute = ComputeTarget.attach(workspace=ws, \n",
|
||||||
|
" name='myhdi', \n",
|
||||||
|
" attach_configuration=attach_config)\n",
|
||||||
|
"\n",
|
||||||
|
"except ComputeTargetException as e:\n",
|
||||||
|
" print(\"Caught = {}\".format(e.message))\n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
"hdi_compute.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Configure HDI run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Configure an execution using the HDInsight cluster with a conda environment that has `numpy`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"# use pyspark framework\n",
|
||||||
|
"hdi_run_config = RunConfiguration(framework=\"pyspark\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Set compute target to the HDI cluster\n",
|
||||||
|
"hdi_run_config.target = hdi_compute.name\n",
|
||||||
|
"\n",
|
||||||
|
"# specify CondaDependencies object to ask system installing numpy\n",
|
||||||
|
"cd = CondaDependencies()\n",
|
||||||
|
"cd.add_conda_package('numpy')\n",
|
||||||
|
"hdi_run_config.environment.python.conda_dependencies = cd"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit the script to HDI"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import ScriptRunConfig\n",
|
||||||
|
"\n",
|
||||||
|
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
|
||||||
|
" script= 'train-spark.py',\n",
|
||||||
|
" run_config = hdi_run_config)\n",
|
||||||
|
"run = exp.submit(config=script_run_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Monitor the run using a Juypter widget"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"RunDetails(run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"After the run is succesfully finished, you can check the metrics logged."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get all metris logged in the run\n",
|
||||||
|
"metrics = run.get_metrics()\n",
|
||||||
|
"print(metrics)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "aashishb"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,506 +1,448 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Train using Azure Machine Learning Compute\n",
|
|
||||||
"\n",
|
|
||||||
"* Initialize a Workspace\n",
|
|
||||||
"* Create an Experiment\n",
|
|
||||||
"* Introduction to AmlCompute\n",
|
|
||||||
"* Submit an AmlCompute run in a few different ways\n",
|
|
||||||
" - Provision as a run based compute target \n",
|
|
||||||
" - Provision as a persistent compute target (Basic)\n",
|
|
||||||
" - Provision as a persistent compute target (Advanced)\n",
|
|
||||||
"* Additional operations to perform on AmlCompute\n",
|
|
||||||
"* Find the best model in the run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't already to establish your connection to the AzureML Workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize a Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a workspace object from persisted configuration"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"create workspace"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create An Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"experiment_name = 'train-on-amlcompute'\n",
|
|
||||||
"experiment = Experiment(workspace = ws, name = experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Introduction to AmlCompute\n",
|
|
||||||
"\n",
|
|
||||||
"Azure Machine Learning Compute is managed compute infrastructure that allows the user to easily create single to multi-node compute of the appropriate VM Family. It is created **within your workspace region** and is a resource that can be used by other users in your workspace. It autoscales by default to the max_nodes, when a job is submitted, and executes in a containerized environment packaging the dependencies as specified by the user. \n",
|
|
||||||
"\n",
|
|
||||||
"Since it is managed compute, job scheduling and cluster management are handled internally by Azure Machine Learning service. \n",
|
|
||||||
"\n",
|
|
||||||
"For more information on Azure Machine Learning Compute, please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)\n",
|
|
||||||
"\n",
|
|
||||||
"If you are an existing BatchAI customer who is migrating to Azure Machine Learning, please read [this article](https://aka.ms/batchai-retirement)\n",
|
|
||||||
"\n",
|
|
||||||
"**Note**: As with other Azure services, there are limits on certain resources (for eg. AmlCompute quota) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota.\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"The training script `train.py` is already created for you. Let's have a look."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Submit an AmlCompute run in a few different ways\n",
|
|
||||||
"\n",
|
|
||||||
"First lets check which VM families are available in your region. Azure is a regional service and some specialized SKUs (especially GPUs) are only available in certain regions. Since AmlCompute is created in the region of your workspace, we will use the supported_vms () function to see if the VM family we want to use ('STANDARD_D2_V2') is supported.\n",
|
|
||||||
"\n",
|
|
||||||
"You can also pass a different region to check availability and then re-create your workspace in that region through the [configuration notebook](../../../configuration.ipynb)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
|
||||||
"\n",
|
|
||||||
"AmlCompute.supported_vmsizes(workspace = ws)\n",
|
|
||||||
"#AmlCompute.supported_vmsizes(workspace = ws, location='southcentralus')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create project directory\n",
|
|
||||||
"\n",
|
|
||||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"import shutil\n",
|
|
||||||
"\n",
|
|
||||||
"project_folder = './train-on-amlcompute'\n",
|
|
||||||
"os.makedirs(project_folder, exist_ok=True)\n",
|
|
||||||
"shutil.copy('train.py', project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Create environment\n",
|
|
||||||
"\n",
|
|
||||||
"Create Docker based environment with scikit-learn installed."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Environment\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"myenv = Environment(\"myenv\")\n",
|
|
||||||
"\n",
|
|
||||||
"myenv.docker.enabled = True\n",
|
|
||||||
"myenv.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Get the default compute target\n",
|
|
||||||
"\n",
|
|
||||||
"In this case, we use the default `AmlCompute`target from the workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import ScriptRunConfig\n",
|
|
||||||
"from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n",
|
|
||||||
"\n",
|
|
||||||
"src = ScriptRunConfig(source_directory=project_folder, script='train.py')\n",
|
|
||||||
"\n",
|
|
||||||
"# Use default compute target\n",
|
|
||||||
"src.run_config.target = ws.get_default_compute_target(type=\"CPU\").name\n",
|
|
||||||
"\n",
|
|
||||||
"# Set environment\n",
|
|
||||||
"src.run_config.environment = myenv"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Submit run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run = experiment.submit(src)\n",
|
|
||||||
"\n",
|
|
||||||
"# Show run details\n",
|
|
||||||
"run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"# Shows output of the run on stdout.\n",
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.get_metrics()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Provision as a persistent compute target (Basic)\n",
|
|
||||||
"\n",
|
|
||||||
"You can provision a persistent AmlCompute resource by simply defining two parameters thanks to smart defaults. By default it autoscales from 0 nodes and provisions dedicated VMs to run your job in a container. This is useful when you want to continously re-use the same target, debug it between jobs or simply share the resource with other users of your workspace.\n",
|
|
||||||
"\n",
|
|
||||||
"* `vm_size`: VM family of the nodes provisioned by AmlCompute. Simply choose from the supported_vmsizes() above\n",
|
|
||||||
"* `max_nodes`: Maximum nodes to autoscale to while running a job on AmlCompute"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
|
||||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose a name for your CPU cluster\n",
|
|
||||||
"cpu_cluster_name = \"cpucluster\"\n",
|
|
||||||
"\n",
|
|
||||||
"# Verify that cluster does not exist already\n",
|
|
||||||
"try:\n",
|
|
||||||
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
|
||||||
" print('Found existing cluster, use it.')\n",
|
|
||||||
"except ComputeTargetException:\n",
|
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
|
||||||
" max_nodes=4)\n",
|
|
||||||
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
|
||||||
"\n",
|
|
||||||
"cpu_cluster.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Configure & Run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Set compute target to the one created in previous step\n",
|
|
||||||
"src.run_config.target = cpu_cluster.name\n",
|
|
||||||
" \n",
|
|
||||||
"run = experiment.submit(config=src)\n",
|
|
||||||
"run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"# Shows output of the run on stdout.\n",
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.get_metrics()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Provision as a persistent compute target (Advanced)\n",
|
|
||||||
"\n",
|
|
||||||
"You can also specify additional properties or change defaults while provisioning AmlCompute using a more advanced configuration. This is useful when you want a dedicated cluster of 4 nodes (for example you can set the min_nodes and max_nodes to 4), or want the compute to be within an existing VNet in your subscription.\n",
|
|
||||||
"\n",
|
|
||||||
"In addition to `vm_size` and `max_nodes`, you can specify:\n",
|
|
||||||
"* `min_nodes`: Minimum nodes (default 0 nodes) to downscale to while running a job on AmlCompute\n",
|
|
||||||
"* `vm_priority`: Choose between 'dedicated' (default) and 'lowpriority' VMs when provisioning AmlCompute. Low Priority VMs use Azure's excess capacity and are thus cheaper but risk your run being pre-empted\n",
|
|
||||||
"* `idle_seconds_before_scaledown`: Idle time (default 120 seconds) to wait after run completion before auto-scaling to min_nodes\n",
|
|
||||||
"* `vnet_resourcegroup_name`: Resource group of the **existing** VNet within which AmlCompute should be provisioned\n",
|
|
||||||
"* `vnet_name`: Name of VNet\n",
|
|
||||||
"* `subnet_name`: Name of SubNet within the VNet"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
|
||||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose a name for your CPU cluster\n",
|
|
||||||
"cpu_cluster_name = \"cpucluster\"\n",
|
|
||||||
"\n",
|
|
||||||
"# Verify that cluster does not exist already\n",
|
|
||||||
"try:\n",
|
|
||||||
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
|
||||||
" print('Found existing cluster, use it.')\n",
|
|
||||||
"except ComputeTargetException:\n",
|
|
||||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
|
||||||
" vm_priority='lowpriority',\n",
|
|
||||||
" min_nodes=2,\n",
|
|
||||||
" max_nodes=4,\n",
|
|
||||||
" idle_seconds_before_scaledown='300',\n",
|
|
||||||
" vnet_resourcegroup_name='<my-resource-group>',\n",
|
|
||||||
" vnet_name='<my-vnet-name>',\n",
|
|
||||||
" subnet_name='<my-subnet-name>')\n",
|
|
||||||
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
|
||||||
"\n",
|
|
||||||
"cpu_cluster.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Configure & Run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Set compute target to the one created in previous step\n",
|
|
||||||
"src.run_config.target = cpu_cluster.name\n",
|
|
||||||
" \n",
|
|
||||||
"run = experiment.submit(config=src)\n",
|
|
||||||
"run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"# Shows output of the run on stdout.\n",
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.get_metrics()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Additional operations to perform on AmlCompute\n",
|
|
||||||
"\n",
|
|
||||||
"You can perform more operations on AmlCompute such as updating the node counts or deleting the compute. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Get_status () gets the latest status of the AmlCompute target\n",
|
|
||||||
"cpu_cluster.get_status().serialize()\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Update () takes in the min_nodes, max_nodes and idle_seconds_before_scaledown and updates the AmlCompute target\n",
|
|
||||||
"#cpu_cluster.update(min_nodes=1)\n",
|
|
||||||
"#cpu_cluster.update(max_nodes=10)\n",
|
|
||||||
"cpu_cluster.update(idle_seconds_before_scaledown=300)\n",
|
|
||||||
"#cpu_cluster.update(min_nodes=2, max_nodes=4, idle_seconds_before_scaledown=600)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Delete () is used to deprovision and delete the AmlCompute target. Useful if you want to re-use the compute name \n",
|
|
||||||
"#'cpucluster' in this case but use a different VM family for instance.\n",
|
|
||||||
"\n",
|
|
||||||
"#cpu_cluster.delete()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Success!\n",
|
|
||||||
"Great, you are ready to move on to the remaining notebooks."
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "nigup"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Train using Azure Machine Learning Compute\n",
|
||||||
|
"\n",
|
||||||
|
"* Initialize a Workspace\n",
|
||||||
|
"* Create an Experiment\n",
|
||||||
|
"* Introduction to AmlCompute\n",
|
||||||
|
"* Submit an AmlCompute run in a few different ways\n",
|
||||||
|
" - Provision as a run based compute target \n",
|
||||||
|
" - Provision as a persistent compute target (Basic)\n",
|
||||||
|
" - Provision as a persistent compute target (Advanced)\n",
|
||||||
|
"* Additional operations to perform on AmlCompute\n",
|
||||||
|
"* Find the best model in the run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't already to establish your connection to the AzureML Workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize a Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"create workspace"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create An Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"experiment_name = 'train-on-amlcompute'\n",
|
||||||
|
"experiment = Experiment(workspace = ws, name = experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Introduction to AmlCompute\n",
|
||||||
|
"\n",
|
||||||
|
"Azure Machine Learning Compute is managed compute infrastructure that allows the user to easily create single to multi-node compute of the appropriate VM Family. It is created **within your workspace region** and is a resource that can be used by other users in your workspace. It autoscales by default to the max_nodes, when a job is submitted, and executes in a containerized environment packaging the dependencies as specified by the user. \n",
|
||||||
|
"\n",
|
||||||
|
"Since it is managed compute, job scheduling and cluster management are handled internally by Azure Machine Learning service. \n",
|
||||||
|
"\n",
|
||||||
|
"For more information on Azure Machine Learning Compute, please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)\n",
|
||||||
|
"\n",
|
||||||
|
"If you are an existing BatchAI customer who is migrating to Azure Machine Learning, please read [this article](https://aka.ms/batchai-retirement)\n",
|
||||||
|
"\n",
|
||||||
|
"**Note**: As with other Azure services, there are limits on certain resources (for eg. AmlCompute quota) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"The training script `train.py` is already created for you. Let's have a look."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Submit an AmlCompute run in a few different ways\n",
|
||||||
|
"\n",
|
||||||
|
"First lets check which VM families are available in your region. Azure is a regional service and some specialized SKUs (especially GPUs) are only available in certain regions. Since AmlCompute is created in the region of your workspace, we will use the supported_vms () function to see if the VM family we want to use ('STANDARD_D2_V2') is supported.\n",
|
||||||
|
"\n",
|
||||||
|
"You can also pass a different region to check availability and then re-create your workspace in that region through the [configuration notebook](../../../configuration.ipynb)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"\n",
|
||||||
|
"AmlCompute.supported_vmsizes(workspace = ws)\n",
|
||||||
|
"#AmlCompute.supported_vmsizes(workspace = ws, location='southcentralus')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create project directory\n",
|
||||||
|
"\n",
|
||||||
|
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import shutil\n",
|
||||||
|
"\n",
|
||||||
|
"project_folder = './train-on-amlcompute'\n",
|
||||||
|
"os.makedirs(project_folder, exist_ok=True)\n",
|
||||||
|
"shutil.copy('train.py', project_folder)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create environment\n",
|
||||||
|
"\n",
|
||||||
|
"Create Docker based environment with scikit-learn installed."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Environment\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"myenv = Environment(\"myenv\")\n",
|
||||||
|
"\n",
|
||||||
|
"myenv.docker.enabled = True\n",
|
||||||
|
"myenv.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Provision as a persistent compute target (Basic)\n",
|
||||||
|
"\n",
|
||||||
|
"You can provision a persistent AmlCompute resource by simply defining two parameters thanks to smart defaults. By default it autoscales from 0 nodes and provisions dedicated VMs to run your job in a container. This is useful when you want to continously re-use the same target, debug it between jobs or simply share the resource with other users of your workspace.\n",
|
||||||
|
"\n",
|
||||||
|
"* `vm_size`: VM family of the nodes provisioned by AmlCompute. Simply choose from the supported_vmsizes() above\n",
|
||||||
|
"* `max_nodes`: Maximum nodes to autoscale to while running a job on AmlCompute"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# Choose a name for your CPU cluster\n",
|
||||||
|
"cpu_cluster_name = \"cpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Verify that cluster does not exist already\n",
|
||||||
|
"try:\n",
|
||||||
|
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||||
|
" print('Found existing cluster, use it.')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||||
|
" max_nodes=4)\n",
|
||||||
|
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
"cpu_cluster.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Configure & Run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import ScriptRunConfig\n",
|
||||||
|
"from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n",
|
||||||
|
"\n",
|
||||||
|
"src = ScriptRunConfig(source_directory=project_folder, script='train.py')\n",
|
||||||
|
"\n",
|
||||||
|
"# Set compute target to the one created in previous step\n",
|
||||||
|
"src.run_config.target = cpu_cluster.name\n",
|
||||||
|
"\n",
|
||||||
|
"# Set environment\n",
|
||||||
|
"src.run_config.environment = myenv\n",
|
||||||
|
" \n",
|
||||||
|
"run = experiment.submit(config=src)\n",
|
||||||
|
"run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"# Shows output of the run on stdout.\n",
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.get_metrics()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Provision as a persistent compute target (Advanced)\n",
|
||||||
|
"\n",
|
||||||
|
"You can also specify additional properties or change defaults while provisioning AmlCompute using a more advanced configuration. This is useful when you want a dedicated cluster of 4 nodes (for example you can set the min_nodes and max_nodes to 4), or want the compute to be within an existing VNet in your subscription.\n",
|
||||||
|
"\n",
|
||||||
|
"In addition to `vm_size` and `max_nodes`, you can specify:\n",
|
||||||
|
"* `min_nodes`: Minimum nodes (default 0 nodes) to downscale to while running a job on AmlCompute\n",
|
||||||
|
"* `vm_priority`: Choose between 'dedicated' (default) and 'lowpriority' VMs when provisioning AmlCompute. Low Priority VMs use Azure's excess capacity and are thus cheaper but risk your run being pre-empted\n",
|
||||||
|
"* `idle_seconds_before_scaledown`: Idle time (default 120 seconds) to wait after run completion before auto-scaling to min_nodes\n",
|
||||||
|
"* `vnet_resourcegroup_name`: Resource group of the **existing** VNet within which AmlCompute should be provisioned\n",
|
||||||
|
"* `vnet_name`: Name of VNet\n",
|
||||||
|
"* `subnet_name`: Name of SubNet within the VNet"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# Choose a name for your CPU cluster\n",
|
||||||
|
"cpu_cluster_name = \"cpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Verify that cluster does not exist already\n",
|
||||||
|
"try:\n",
|
||||||
|
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||||
|
" print('Found existing cluster, use it.')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||||
|
" vm_priority='lowpriority',\n",
|
||||||
|
" min_nodes=2,\n",
|
||||||
|
" max_nodes=4,\n",
|
||||||
|
" idle_seconds_before_scaledown='300',\n",
|
||||||
|
" vnet_resourcegroup_name='<my-resource-group>',\n",
|
||||||
|
" vnet_name='<my-vnet-name>',\n",
|
||||||
|
" subnet_name='<my-subnet-name>')\n",
|
||||||
|
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
"cpu_cluster.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Configure & Run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Set compute target to the one created in previous step\n",
|
||||||
|
"src.run_config.target = cpu_cluster.name\n",
|
||||||
|
" \n",
|
||||||
|
"run = experiment.submit(config=src)\n",
|
||||||
|
"run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"# Shows output of the run on stdout.\n",
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.get_metrics()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Additional operations to perform on AmlCompute\n",
|
||||||
|
"\n",
|
||||||
|
"You can perform more operations on AmlCompute such as updating the node counts or deleting the compute. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Get_status () gets the latest status of the AmlCompute target\n",
|
||||||
|
"cpu_cluster.get_status().serialize()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Update () takes in the min_nodes, max_nodes and idle_seconds_before_scaledown and updates the AmlCompute target\n",
|
||||||
|
"#cpu_cluster.update(min_nodes=1)\n",
|
||||||
|
"#cpu_cluster.update(max_nodes=10)\n",
|
||||||
|
"cpu_cluster.update(idle_seconds_before_scaledown=300)\n",
|
||||||
|
"#cpu_cluster.update(min_nodes=2, max_nodes=4, idle_seconds_before_scaledown=600)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Delete () is used to deprovision and delete the AmlCompute target. Useful if you want to re-use the compute name \n",
|
||||||
|
"#'cpu-cluster' in this case but use a different VM family for instance.\n",
|
||||||
|
"\n",
|
||||||
|
"#cpu_cluster.delete()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Success!\n",
|
||||||
|
"Great, you are ready to move on to the remaining notebooks."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "nigup"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,8 @@
|
|||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
# Licensed under the MIT License
|
||||||
|
|
||||||
|
# Very simple script to demonstrate run in environment
|
||||||
|
# Print message passed in as environment variable
|
||||||
|
import os
|
||||||
|
|
||||||
|
print(os.environ.get("MESSAGE"))
|
||||||
@@ -1,371 +1,372 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License"
|
"Licensed under the MIT License"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Using environments\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"## Contents\n",
|
|
||||||
"\n",
|
|
||||||
"1. [Introduction](#Introduction)\n",
|
|
||||||
"1. [Setup](#Setup)\n",
|
|
||||||
"1. [Create environment](#Create-environment)\n",
|
|
||||||
" 1. Add Python packages\n",
|
|
||||||
" 1. Specify environment variables\n",
|
|
||||||
"1. [Submit run using environment](#Submit-run-using-environment)\n",
|
|
||||||
"1. [Register environment](#Register-environment)\n",
|
|
||||||
"1. [List and get existing environments](#List-and-get-existing-environments)\n",
|
|
||||||
"1. [Other ways to create environments](#Other-ways-to-create-environments)\n",
|
|
||||||
" 1. From existing Conda environment\n",
|
|
||||||
" 1. From Conda or pip files\n",
|
|
||||||
"1. [Docker settings](#Docker-settings)\n",
|
|
||||||
"1. [Spark and Azure Databricks settings](#Spark-and-Azure-Databricks-settings)\n",
|
|
||||||
"1. [Next steps](#Next-steps)\n",
|
|
||||||
"\n",
|
|
||||||
"## Introduction\n",
|
|
||||||
"\n",
|
|
||||||
"Azure ML environments are an encapsulation of the environment where your machine learning training happens. They define Python packages, environment variables, Docker settings and other attributes in declarative fashion. Environments are versioned: you can update them and retrieve old versions to revist and review your work.\n",
|
|
||||||
"\n",
|
|
||||||
"Environments allow you to:\n",
|
|
||||||
"* Encapsulate dependencies of your training process, such as Python packages and their versions.\n",
|
|
||||||
"* Reproduce the Python environment on your local computer in a remote run on VM or ML Compute cluster\n",
|
|
||||||
"* Reproduce your experimentation environment in production setting.\n",
|
|
||||||
"* Revisit and audit the environment in which an existing model was trained.\n",
|
|
||||||
"\n",
|
|
||||||
"Environment, compute target and training script together form run configuration: the full specification of training run.\n",
|
|
||||||
"\n",
|
|
||||||
"## Setup\n",
|
|
||||||
"\n",
|
|
||||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't.\n",
|
|
||||||
"\n",
|
|
||||||
"First, let's validate Azure ML SDK version and connect to workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "raw",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"print(azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"ws.get_details()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create environment\n",
|
|
||||||
"\n",
|
|
||||||
"You can create an environment by instantiating ```Environment``` object and then setting its attributes: set of Python packages, environment variables and others.\n",
|
|
||||||
"\n",
|
|
||||||
"### Add Python packages\n",
|
|
||||||
"\n",
|
|
||||||
"The recommended way is to specify Conda packages, as they typically come with complete set of pre-built binaries."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Environment\n",
|
|
||||||
"from azureml.core.environment import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"myenv = Environment(name=\"myenv\")\n",
|
|
||||||
"conda_dep = CondaDependencies()\n",
|
|
||||||
"conda_dep.add_conda_package(\"scikit-learn\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can also add pip packages, and specify the version of package"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"conda_dep.add_pip_package(\"pillow==5.4.1\")\n",
|
|
||||||
"myenv.python.conda_dependencies=conda_dep"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Specify environment variables\n",
|
|
||||||
"\n",
|
|
||||||
"You can add environment variables to your environment. These then become available using ```os.environ.get``` in your training script."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"myenv.environment_variables = {\"MESSAGE\":\"Hello from Azure Machine Learning\"}"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Submit run using environment\n",
|
|
||||||
"\n",
|
|
||||||
"When you submit a run, you can specify which environment to use. \n",
|
|
||||||
"\n",
|
|
||||||
"On the first run in given environment, Azure ML spends some time building the environment. On the subsequent runs, Azure ML keeps track of changes and uses the existing environment, resulting in faster run completion."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import ScriptRunConfig, Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"myexp = Experiment(workspace=ws, name = \"environment-example\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"To submit a run, create a run configuration that combines the script file and environment, and pass it to ```Experiment.submit```. In this example, the script is submitted to local computer, but you can specify other compute targets such as remote clusters as well."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"runconfig = ScriptRunConfig(source_directory=\"example\", script=\"example.py\")\n",
|
|
||||||
"runconfig.run_config.target = \"local\"\n",
|
|
||||||
"runconfig.run_config.environment = myenv\n",
|
|
||||||
"run = myexp.submit(config=runconfig)\n",
|
|
||||||
"\n",
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Register environment\n",
|
|
||||||
"\n",
|
|
||||||
"You can manage environments by registering them. This allows you to track their versions, and reuse them in future runs. For example, once you've constructed an environment that meets your requirements, you can register it and use it in other experiments so as to standardize your workflow.\n",
|
|
||||||
"\n",
|
|
||||||
"If you register the environment with same name, the version number is increased by one. Note that Azure ML keeps track of differences between the version, so if you re-register an identical version, the version number is not increased."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"myenv.register(workspace=ws)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## List and get existing environments\n",
|
|
||||||
"\n",
|
|
||||||
"Your workspace contains a dictionary of registered environments. You can then use ```Environment.get``` to retrieve a specific environment with specific version."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"for name,env in ws.environments.items():\n",
|
|
||||||
" print(\"Name {} \\t version {}\".format(name,env.version))\n",
|
|
||||||
"\n",
|
|
||||||
"restored_environment = Environment.get(workspace=ws,name=\"myenv\",version=\"1\")\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Attributes of restored environment\")\n",
|
|
||||||
"restored_environment"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Other ways to create environments\n",
|
|
||||||
"\n",
|
|
||||||
"### From existing Conda environment\n",
|
|
||||||
"\n",
|
|
||||||
"You can create an environment from existing conda environment. This make it easy to reuse your local interactive environment in Azure ML remote runs. For example, if you've created conda environment using\n",
|
|
||||||
"```\n",
|
|
||||||
"conda create -n mycondaenv\n",
|
|
||||||
"```\n",
|
|
||||||
"you can create Azure ML environment out of that conda environment using\n",
|
|
||||||
"```\n",
|
|
||||||
"myenv = Environment.from_existing_conda_environment(name=\"myenv\",conda_environment_name=\"mycondaenv\")\n",
|
|
||||||
"```\n",
|
|
||||||
"\n",
|
|
||||||
"### From conda or pip files\n",
|
|
||||||
"\n",
|
|
||||||
"You can create environments from conda specification or pip requirements files using\n",
|
|
||||||
"```\n",
|
|
||||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"path-to-conda-specification-file\")\n",
|
|
||||||
"\n",
|
|
||||||
"myenv = Environment.from_pip_requirements(name=\"myenv\", file_path=\"path-to-pip-requirements-file\")\n",
|
|
||||||
"```\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Docker settings\n",
|
|
||||||
"\n",
|
|
||||||
"Docker container provides an efficient way to encapsulate the dependencies. When you enable Docker, Azure ML builds a Docker image and creates a Python environment within that container, given your specifications. The Docker images are reused: the first run in a new environment typically takes longer as the image is build.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** For runs on local computer or attached virtual machine, that computer must have Docker installed and enabled. Machine Learning Compute has Docker pre-installed.\n",
|
|
||||||
"\n",
|
|
||||||
"Attribute ```docker.enabled``` controls whether to use Docker container or host OS for execution. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"myenv.docker.enabled = True"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can specify custom Docker base image and registry. This allows you to customize and control in detail the guest OS in which your training run executes. whether to use GPU, whether to use shared volumes, and shm size."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"myenv.docker.base_image\n",
|
|
||||||
"myenv.docker.base_image_registry"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can also specify whether to use GPU or shared volumes, and shm size."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"myenv.docker.gpu_support\n",
|
|
||||||
"myenv.docker.shared_volumes\n",
|
|
||||||
"myenv.docker.shm_size"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Spark and Azure Databricks settings\n",
|
|
||||||
"\n",
|
|
||||||
"In addition to Python and Docker settings, Environment also contains attributes for Spark and Azure Databricks runs. These attributes become relevant when you submit runs on those compute targets."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Next steps\n",
|
|
||||||
"\n",
|
|
||||||
"Learn more about remote runs on different compute targets:\n",
|
|
||||||
"\n",
|
|
||||||
"* [Train on ML Compute](../../train-on-amlcompute)\n",
|
|
||||||
"\n",
|
|
||||||
"* [Train on remote VM](../../train-on-remote-vm)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "roastala"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
{
|
||||||
"nbformat_minor": 2
|
"cell_type": "markdown",
|
||||||
}
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Using environments\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"## Contents\n",
|
||||||
|
"\n",
|
||||||
|
"1. [Introduction](#Introduction)\n",
|
||||||
|
"1. [Setup](#Setup)\n",
|
||||||
|
"1. [Create environment](#Create-environment)\n",
|
||||||
|
" 1. Add Python packages\n",
|
||||||
|
" 1. Specify environment variables\n",
|
||||||
|
"1. [Submit run using environment](#Submit-run-using-environment)\n",
|
||||||
|
"1. [Register environment](#Register-environment)\n",
|
||||||
|
"1. [List and get existing environments](#List-and-get-existing-environments)\n",
|
||||||
|
"1. [Other ways to create environments](#Other-ways-to-create-environments)\n",
|
||||||
|
" 1. From existing Conda environment\n",
|
||||||
|
" 1. From Conda or pip files\n",
|
||||||
|
"1. [Docker settings](#Docker-settings)\n",
|
||||||
|
"1. [Spark and Azure Databricks settings](#Spark-and-Azure-Databricks-settings)\n",
|
||||||
|
"1. [Next steps](#Next-steps)\n",
|
||||||
|
"\n",
|
||||||
|
"## Introduction\n",
|
||||||
|
"\n",
|
||||||
|
"Azure ML environments are an encapsulation of the environment where your machine learning training happens. They define Python packages, environment variables, Docker settings and other attributes in declarative fashion. Environments are versioned: you can update them and retrieve old versions to revist and review your work.\n",
|
||||||
|
"\n",
|
||||||
|
"Environments allow you to:\n",
|
||||||
|
"* Encapsulate dependencies of your training process, such as Python packages and their versions.\n",
|
||||||
|
"* Reproduce the Python environment on your local computer in a remote run on VM or ML Compute cluster\n",
|
||||||
|
"* Reproduce your experimentation environment in production setting.\n",
|
||||||
|
"* Revisit and audit the environment in which an existing model was trained.\n",
|
||||||
|
"\n",
|
||||||
|
"Environment, compute target and training script together form run configuration: the full specification of training run.\n",
|
||||||
|
"\n",
|
||||||
|
"## Setup\n",
|
||||||
|
"\n",
|
||||||
|
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't.\n",
|
||||||
|
"\n",
|
||||||
|
"First, let's validate Azure ML SDK version and connect to workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import azureml.core\n",
|
||||||
|
"print(azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"ws.get_details()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create environment\n",
|
||||||
|
"\n",
|
||||||
|
"You can create an environment by instantiating ```Environment``` object and then setting its attributes: set of Python packages, environment variables and others.\n",
|
||||||
|
"\n",
|
||||||
|
"### Add Python packages\n",
|
||||||
|
"\n",
|
||||||
|
"The recommended way is to specify Conda packages, as they typically come with complete set of pre-built binaries."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Environment\n",
|
||||||
|
"from azureml.core.environment import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"myenv = Environment(name=\"myenv\")\n",
|
||||||
|
"conda_dep = CondaDependencies()\n",
|
||||||
|
"conda_dep.add_conda_package(\"scikit-learn\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can also add pip packages, and specify the version of package"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"conda_dep.add_pip_package(\"pillow==5.4.1\")\n",
|
||||||
|
"myenv.python.conda_dependencies=conda_dep"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Specify environment variables\n",
|
||||||
|
"\n",
|
||||||
|
"You can add environment variables to your environment. These then become available using ```os.environ.get``` in your training script."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"myenv.environment_variables = {\"MESSAGE\":\"Hello from Azure Machine Learning\"}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Submit run using environment\n",
|
||||||
|
"\n",
|
||||||
|
"When you submit a run, you can specify which environment to use. \n",
|
||||||
|
"\n",
|
||||||
|
"On the first run in given environment, Azure ML spends some time building the environment. On the subsequent runs, Azure ML keeps track of changes and uses the existing environment, resulting in faster run completion."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import ScriptRunConfig, Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"myexp = Experiment(workspace=ws, name = \"environment-example\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"To submit a run, create a run configuration that combines the script file and environment, and pass it to ```Experiment.submit```. In this example, the script is submitted to local computer, but you can specify other compute targets such as remote clusters as well."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"runconfig = ScriptRunConfig(source_directory=\".\", script=\"example.py\")\n",
|
||||||
|
"runconfig.run_config.target = \"local\"\n",
|
||||||
|
"runconfig.run_config.environment = myenv\n",
|
||||||
|
"run = myexp.submit(config=runconfig)\n",
|
||||||
|
"\n",
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Register environment\n",
|
||||||
|
"\n",
|
||||||
|
"You can manage environments by registering them. This allows you to track their versions, and reuse them in future runs. For example, once you've constructed an environment that meets your requirements, you can register it and use it in other experiments so as to standardize your workflow.\n",
|
||||||
|
"\n",
|
||||||
|
"If you register the environment with same name, the version number is increased by one. Note that Azure ML keeps track of differences between the version, so if you re-register an identical version, the version number is not increased."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"myenv.register(workspace=ws)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## List and get existing environments\n",
|
||||||
|
"\n",
|
||||||
|
"Your workspace contains a dictionary of registered environments. You can then use ```Environment.get``` to retrieve a specific environment with specific version."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"for name,env in ws.environments.items():\n",
|
||||||
|
" print(\"Name {} \\t version {}\".format(name,env.version))\n",
|
||||||
|
"\n",
|
||||||
|
"restored_environment = Environment.get(workspace=ws,name=\"myenv\",version=\"1\")\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Attributes of restored environment\")\n",
|
||||||
|
"restored_environment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Other ways to create environments\n",
|
||||||
|
"\n",
|
||||||
|
"### From existing Conda environment\n",
|
||||||
|
"\n",
|
||||||
|
"You can create an environment from existing conda environment. This make it easy to reuse your local interactive environment in Azure ML remote runs. For example, if you've created conda environment using\n",
|
||||||
|
"```\n",
|
||||||
|
"conda create -n mycondaenv\n",
|
||||||
|
"```\n",
|
||||||
|
"you can create Azure ML environment out of that conda environment using\n",
|
||||||
|
"```\n",
|
||||||
|
"myenv = Environment.from_existing_conda_environment(name=\"myenv\",conda_environment_name=\"mycondaenv\")\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"### From conda or pip files\n",
|
||||||
|
"\n",
|
||||||
|
"You can create environments from conda specification or pip requirements files using\n",
|
||||||
|
"```\n",
|
||||||
|
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"path-to-conda-specification-file\")\n",
|
||||||
|
"\n",
|
||||||
|
"myenv = Environment.from_pip_requirements(name=\"myenv\", file_path=\"path-to-pip-requirements-file\")\n",
|
||||||
|
"```\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Docker settings\n",
|
||||||
|
"\n",
|
||||||
|
"Docker container provides an efficient way to encapsulate the dependencies. When you enable Docker, Azure ML builds a Docker image and creates a Python environment within that container, given your specifications. The Docker images are reused: the first run in a new environment typically takes longer as the image is build.\n",
|
||||||
|
"\n",
|
||||||
|
"**Note:** For runs on local computer or attached virtual machine, that computer must have Docker installed and enabled. Machine Learning Compute has Docker pre-installed.\n",
|
||||||
|
"\n",
|
||||||
|
"Attribute ```docker.enabled``` controls whether to use Docker container or host OS for execution. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"myenv.docker.enabled = True"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can specify custom Docker base image and registry. This allows you to customize and control in detail the guest OS in which your training run executes. whether to use GPU, whether to use shared volumes, and shm size."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"myenv.docker.base_image\n",
|
||||||
|
"myenv.docker.base_image_registry"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can also specify whether to use GPU or shared volumes, and shm size."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"myenv.docker.gpu_support\n",
|
||||||
|
"myenv.docker.shared_volumes\n",
|
||||||
|
"myenv.docker.shm_size"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Spark and Azure Databricks settings\n",
|
||||||
|
"\n",
|
||||||
|
"In addition to Python and Docker settings, Environment also contains attributes for Spark and Azure Databricks runs. These attributes become relevant when you submit runs on those compute targets."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Next steps\n",
|
||||||
|
"\n",
|
||||||
|
"Learn more about remote runs on different compute targets:\n",
|
||||||
|
"\n",
|
||||||
|
"* [Train on ML Compute](../../train-on-amlcompute)\n",
|
||||||
|
"\n",
|
||||||
|
"* [Train on remote VM](../../train-on-remote-vm)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
|
|||||||
12
how-to-use-azureml/using-mlflow/README.md
Normal file
12
how-to-use-azureml/using-mlflow/README.md
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
## Use MLflow with Azure Machine Learning service (Preview)
|
||||||
|
|
||||||
|
[MLflow](https://mlflow.org/) is an open-source platform for tracking machine learning experiments and managing models. You can use MLflow logging APIs with Azure Machine Learning service: the metrics and artifacts are logged to your Azure ML Workspace.
|
||||||
|
|
||||||
|
Try out the sample notebooks:
|
||||||
|
|
||||||
|
* [Use MLflow with Azure Machine Learning for Local Training Run](./train-local/train-local.ipynb)
|
||||||
|
* [Use MLflow with Azure Machine Learning for Remote Training Run](./train-remote/train-remote.ipynb)
|
||||||
|
* [Deploy Model as Azure Machine Learning Web Service using MLflow](./deploy-model/deploy-model.ipynb)
|
||||||
|
* [Train and Deploy PyTorch Image Classifier](./train-deploy-pytorch/train-deploy-pytorch.ipynb)
|
||||||
|
|
||||||
|

|
||||||
322
how-to-use-azureml/using-mlflow/deploy-model/deploy-model.ipynb
Normal file
322
how-to-use-azureml/using-mlflow/deploy-model/deploy-model.ipynb
Normal file
@@ -0,0 +1,322 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploy Model as Azure Machine Learning Web Service using MLflow\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows you how to use mlflow together with Azure Machine Learning services for deploying a model as a web service. You'll learn how to:\n",
|
||||||
|
"\n",
|
||||||
|
" 1. Retrieve a previously trained scikit-learn model\n",
|
||||||
|
" 2. Create a Docker image from the model\n",
|
||||||
|
" 3. Deploy the model as a web service on Azure Container Instance\n",
|
||||||
|
" 4. Make a scoring request against the web service.\n",
|
||||||
|
"\n",
|
||||||
|
"## Prerequisites and Set-up\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook requires you to first complete the [Use MLflow with Azure Machine Learning for Local Training Run](../train-local/train-local.ipnyb) or [Use MLflow with Azure Machine Learning for Remote Training Run](../train-remote/train-remote.ipnyb) notebook, so as to have an experiment run with uploaded model in your Azure Machine Learning Workspace.\n",
|
||||||
|
"\n",
|
||||||
|
"Also install following packages if you haven't already\n",
|
||||||
|
"\n",
|
||||||
|
"```\n",
|
||||||
|
"pip install azureml-mlflow pandas\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"Then, import necessary packages:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import mlflow\n",
|
||||||
|
"import azureml.mlflow\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Connect to workspace and set MLflow tracking URI\n",
|
||||||
|
"\n",
|
||||||
|
"Setting the tracking URI is required for retrieving the model and creating an image using the MLflow APIs."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"\n",
|
||||||
|
"mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Retrieve model from previous run\n",
|
||||||
|
"\n",
|
||||||
|
"Let's retrieve the experiment from training notebook, and list the runs within that experiment."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"experiment_name = \"experiment-with-mlflow\"\n",
|
||||||
|
"exp = ws.experiments[experiment_name]\n",
|
||||||
|
"\n",
|
||||||
|
"runs = list(exp.get_runs())\n",
|
||||||
|
"runs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then, let's select the most recent training run and find its ID. You also need to specify the path in run history where the model was saved. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"runid = runs[0].id\n",
|
||||||
|
"model_save_path = \"model\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create Docker image\n",
|
||||||
|
"\n",
|
||||||
|
"To create a Docker image with Azure Machine Learning for Model Management, use ```mlflow.azureml.build_image``` method. Specify the model path, your workspace, run ID and other parameters.\n",
|
||||||
|
"\n",
|
||||||
|
"MLflow automatically recognizes the model framework as scikit-learn, and creates the scoring logic and includes library dependencies for you.\n",
|
||||||
|
"\n",
|
||||||
|
"Note that the image creation can take several minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import mlflow.azureml\n",
|
||||||
|
"\n",
|
||||||
|
"azure_image, azure_model = mlflow.azureml.build_image(model_uri=\"runs:/{}/{}\".format(runid, model_save_path),\n",
|
||||||
|
" workspace=ws,\n",
|
||||||
|
" model_name='diabetes-sklearn-model',\n",
|
||||||
|
" image_name='diabetes-sklearn-image',\n",
|
||||||
|
" synchronous=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Deploy web service\n",
|
||||||
|
"\n",
|
||||||
|
"Let's use Azure Machine Learning SDK to deploy the image as a web service. \n",
|
||||||
|
"\n",
|
||||||
|
"First, specify the deployment configuration. Azure Container Instance is a suitable choice for a quick dev-test deployment, while Azure Kubernetes Service is suitable for scalable production deployments.\n",
|
||||||
|
"\n",
|
||||||
|
"Then, deploy the image using Azure Machine Learning SDK's ```deploy_from_image``` method.\n",
|
||||||
|
"\n",
|
||||||
|
"Note that the deployment can take several minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"aci_config = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||||
|
" memory_gb=1, \n",
|
||||||
|
" tags={\"method\" : \"sklearn\"}, \n",
|
||||||
|
" description='Diabetes model',\n",
|
||||||
|
" location='eastus2')\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Deploy the image to Azure Container Instances (ACI) for real-time serving\n",
|
||||||
|
"webservice = Webservice.deploy_from_image(\n",
|
||||||
|
" image=azure_image, workspace=ws, name=\"diabetes-model-1\", deployment_config=aci_config)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"webservice.wait_for_deployment(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Make a scoring request\n",
|
||||||
|
"\n",
|
||||||
|
"Let's take the first few rows of test data and score them using the web service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"test_rows = [\n",
|
||||||
|
" [0.01991321, 0.05068012, 0.10480869, 0.07007254, -0.03596778,\n",
|
||||||
|
" -0.0266789 , -0.02499266, -0.00259226, 0.00371174, 0.04034337],\n",
|
||||||
|
" [-0.01277963, -0.04464164, 0.06061839, 0.05285819, 0.04796534,\n",
|
||||||
|
" 0.02937467, -0.01762938, 0.03430886, 0.0702113 , 0.00720652],\n",
|
||||||
|
" [ 0.03807591, 0.05068012, 0.00888341, 0.04252958, -0.04284755,\n",
|
||||||
|
" -0.02104223, -0.03971921, -0.00259226, -0.01811827, 0.00720652]]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"MLflow-based web service for scikit-learn model requires the data to be converted to Pandas DataFrame, and then serialized as JSON. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"test_rows_as_json = pd.DataFrame(test_rows).to_json(orient=\"split\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's pass the conveted and serialized data to web service to get the predictions."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"predictions = webservice.run(test_rows_as_json)\n",
|
||||||
|
"\n",
|
||||||
|
"print(predictions)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can use the web service's scoring URI to make a raw HTTP request"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"webservice.scoring_uri"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can diagnose the web service using ```get_logs``` method."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"webservice.get_logs()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Next Steps\n",
|
||||||
|
"\n",
|
||||||
|
"Learn about [model management and inferencing in Azure Machine Learning service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-model-management-and-deployment)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "rastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,150 @@
|
|||||||
|
# Copyright (c) 2017, PyTorch Team
|
||||||
|
# All rights reserved
|
||||||
|
# Licensed under BSD 3-Clause License.
|
||||||
|
|
||||||
|
# This example is based on PyTorch MNIST example:
|
||||||
|
# https://github.com/pytorch/examples/blob/master/mnist/main.py
|
||||||
|
|
||||||
|
import mlflow
|
||||||
|
import mlflow.pytorch
|
||||||
|
from mlflow.utils.environment import _mlflow_conda_env
|
||||||
|
import warnings
|
||||||
|
import cloudpickle
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torch.optim as optim
|
||||||
|
import torchvision
|
||||||
|
from torchvision import datasets, transforms
|
||||||
|
|
||||||
|
|
||||||
|
class Net(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(Net, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(1, 20, 5, 1)
|
||||||
|
self.conv2 = nn.Conv2d(20, 50, 5, 1)
|
||||||
|
self.fc1 = nn.Linear(4 * 4 * 50, 500)
|
||||||
|
self.fc2 = nn.Linear(500, 10)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# Added the view for reshaping score requests
|
||||||
|
x = x.view(-1, 1, 28, 28)
|
||||||
|
x = F.relu(self.conv1(x))
|
||||||
|
x = F.max_pool2d(x, 2, 2)
|
||||||
|
x = F.relu(self.conv2(x))
|
||||||
|
x = F.max_pool2d(x, 2, 2)
|
||||||
|
x = x.view(-1, 4 * 4 * 50)
|
||||||
|
x = F.relu(self.fc1(x))
|
||||||
|
x = self.fc2(x)
|
||||||
|
return F.log_softmax(x, dim=1)
|
||||||
|
|
||||||
|
|
||||||
|
def train(args, model, device, train_loader, optimizer, epoch):
|
||||||
|
model.train()
|
||||||
|
for batch_idx, (data, target) in enumerate(train_loader):
|
||||||
|
data, target = data.to(device), target.to(device)
|
||||||
|
optimizer.zero_grad()
|
||||||
|
output = model(data)
|
||||||
|
loss = F.nll_loss(output, target)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
if batch_idx % args.log_interval == 0:
|
||||||
|
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
|
||||||
|
epoch, batch_idx * len(data), len(train_loader.dataset),
|
||||||
|
100. * batch_idx / len(train_loader), loss.item()))
|
||||||
|
# Use MLflow logging
|
||||||
|
mlflow.log_metric("epoch_loss", loss.item())
|
||||||
|
|
||||||
|
|
||||||
|
def test(args, model, device, test_loader):
|
||||||
|
model.eval()
|
||||||
|
test_loss = 0
|
||||||
|
correct = 0
|
||||||
|
with torch.no_grad():
|
||||||
|
for data, target in test_loader:
|
||||||
|
data, target = data.to(device), target.to(device)
|
||||||
|
output = model(data)
|
||||||
|
# sum up batch loss
|
||||||
|
test_loss += F.nll_loss(output, target, reduction="sum").item()
|
||||||
|
# get the index of the max log-probability
|
||||||
|
pred = output.argmax(dim=1, keepdim=True)
|
||||||
|
correct += pred.eq(target.view_as(pred)).sum().item()
|
||||||
|
|
||||||
|
test_loss /= len(test_loader.dataset)
|
||||||
|
print("\n")
|
||||||
|
print("Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
|
||||||
|
test_loss, correct, len(test_loader.dataset),
|
||||||
|
100. * correct / len(test_loader.dataset)))
|
||||||
|
# Use MLflow logging
|
||||||
|
mlflow.log_metric("average_loss", test_loss)
|
||||||
|
|
||||||
|
|
||||||
|
class Args(object):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Training settings
|
||||||
|
args = Args()
|
||||||
|
setattr(args, 'batch_size', 64)
|
||||||
|
setattr(args, 'test_batch_size', 1000)
|
||||||
|
setattr(args, 'epochs', 3) # Higher number for better convergence
|
||||||
|
setattr(args, 'lr', 0.01)
|
||||||
|
setattr(args, 'momentum', 0.5)
|
||||||
|
setattr(args, 'no_cuda', True)
|
||||||
|
setattr(args, 'seed', 1)
|
||||||
|
setattr(args, 'log_interval', 10)
|
||||||
|
setattr(args, 'save_model', True)
|
||||||
|
|
||||||
|
use_cuda = not args.no_cuda and torch.cuda.is_available()
|
||||||
|
|
||||||
|
torch.manual_seed(args.seed)
|
||||||
|
|
||||||
|
device = torch.device("cuda" if use_cuda else "cpu")
|
||||||
|
|
||||||
|
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
|
||||||
|
train_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST('../data', train=True, download=True,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))
|
||||||
|
])),
|
||||||
|
batch_size=args.batch_size, shuffle=True, **kwargs)
|
||||||
|
test_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST(
|
||||||
|
'../data',
|
||||||
|
train=False,
|
||||||
|
transform=transforms.Compose([
|
||||||
|
transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))])),
|
||||||
|
batch_size=args.test_batch_size, shuffle=True, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def driver():
|
||||||
|
warnings.filterwarnings("ignore")
|
||||||
|
# Dependencies for deploying the model
|
||||||
|
pytorch_index = "https://download.pytorch.org/whl/"
|
||||||
|
pytorch_version = "cpu/torch-1.1.0-cp36-cp36m-linux_x86_64.whl"
|
||||||
|
deps = [
|
||||||
|
"cloudpickle=={}".format(cloudpickle.__version__),
|
||||||
|
pytorch_index + pytorch_version,
|
||||||
|
"torchvision=={}".format(torchvision.__version__),
|
||||||
|
"Pillow=={}".format("6.0.0")
|
||||||
|
]
|
||||||
|
with mlflow.start_run() as run:
|
||||||
|
model = Net().to(device)
|
||||||
|
optimizer = optim.SGD(
|
||||||
|
model.parameters(),
|
||||||
|
lr=args.lr,
|
||||||
|
momentum=args.momentum)
|
||||||
|
for epoch in range(1, args.epochs + 1):
|
||||||
|
train(args, model, device, train_loader, optimizer, epoch)
|
||||||
|
test(args, model, device, test_loader)
|
||||||
|
# Log model to run history using MLflow
|
||||||
|
if args.save_model:
|
||||||
|
model_env = _mlflow_conda_env(additional_pip_deps=deps)
|
||||||
|
mlflow.pytorch.log_model(model, "model", conda_env=model_env)
|
||||||
|
return run
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
driver()
|
||||||
@@ -0,0 +1,481 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Use MLflow with Azure Machine Learning to Train and Deploy PyTorch Image Classifier\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows you how to use MLflow together with Azure Machine Learning services for tracking the metrics and artifacts while training a PyTorch model to classify MNIST digit images, and then deploy the model as a web service. You'll learn how to:\n",
|
||||||
|
"\n",
|
||||||
|
" 1. Set up MLflow tracking URI so as to use Azure ML\n",
|
||||||
|
" 2. Create experiment\n",
|
||||||
|
" 3. Instrument your model with MLflow tracking\n",
|
||||||
|
" 4. Train a PyTorch model locally\n",
|
||||||
|
" 5. Train a model on GPU compute on Azure\n",
|
||||||
|
" 6. View your experiment within your Azure ML Workspace in Azure Portal\n",
|
||||||
|
" 7. Create a Docker image from the trained model\n",
|
||||||
|
" 8. Deploy the model as a web service on Azure Container Instance\n",
|
||||||
|
" 9. Call the model to make predictions\n",
|
||||||
|
" \n",
|
||||||
|
"### Pre-requisites\n",
|
||||||
|
" \n",
|
||||||
|
"Make sure you have completed the [Configuration](../../../configuration.ipnyb) notebook to set up your Azure Machine Learning workspace and ensure other common prerequisites are met.\n",
|
||||||
|
"\n",
|
||||||
|
"Also, install mlflow-azureml package using ```pip install mlflow-azureml```. Note that mlflow-azureml installs mlflow package itself as a dependency, if you haven't done so previously.\n",
|
||||||
|
"\n",
|
||||||
|
"### Set-up\n",
|
||||||
|
"\n",
|
||||||
|
"Import packages and check versions of Azure ML SDK and MLflow installed on your computer. Then connect to your Workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys, os\n",
|
||||||
|
"import mlflow\n",
|
||||||
|
"import mlflow.azureml\n",
|
||||||
|
"import mlflow.sklearn\n",
|
||||||
|
"\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)\n",
|
||||||
|
"print(\"MLflow version:\", mlflow.version.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"ws.get_details()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Set tracking URI\n",
|
||||||
|
"\n",
|
||||||
|
"Set the MLFlow tracking URI to point to your Azure ML Workspace. The subsequent logging calls from MLFlow APIs will go to Azure ML services and will be tracked under your Workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"In both MLflow and Azure ML, training runs are grouped into experiments. Let's create one for our experimentation."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"experiment_name = \"pytorch-with-mlflow\"\n",
|
||||||
|
"mlflow.set_experiment(experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Train model locally while logging metrics and artifacts\n",
|
||||||
|
"\n",
|
||||||
|
"The ```scripts/train.py``` program contains the code to load the image dataset, and train and test the model. Within this program, the train.driver function wraps the end-to-end workflow.\n",
|
||||||
|
"\n",
|
||||||
|
"Within the driver, the ```mlflow.start_run``` starts MLflow tracking. Then, ```mlflow.log_metric``` functions are used to track the convergence of the neural network training iterations. Finally ```mlflow.pytorch.save_model``` is used to save the trained model in framework-aware manner.\n",
|
||||||
|
"\n",
|
||||||
|
"Let's add the program to search path, import it as a module, and then invoke the driver function. Note that the training can take few minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"lib_path = os.path.abspath(\"scripts\")\n",
|
||||||
|
"sys.path.append(lib_path)\n",
|
||||||
|
"\n",
|
||||||
|
"import train"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run = train.driver()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can view the metrics of the run at Azure Portal"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(azureml.mlflow.get_portal_url(run))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Train model on GPU compute on Azure\n",
|
||||||
|
"\n",
|
||||||
|
"Next, let's run the same script on GPU-enabled compute for faster training. If you've completed the the [Configuration](../../../configuration.ipnyb) notebook, you should have a GPU cluster named \"gpu-cluster\" available in your workspace. Otherwise, follow the instructions in the notebook to create one. For simplicity, this example uses single process on single VM to train the model.\n",
|
||||||
|
"\n",
|
||||||
|
"Create a PyTorch estimator to specify the training configuration: script, compute as well as additional packages needed. To enable MLflow tracking, include ```azureml-mlflow``` as pip package. The low-level specifications for the training run are encapsulated in the estimator instance."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.train.dnn import PyTorch\n",
|
||||||
|
"\n",
|
||||||
|
"pt = PyTorch(source_directory=\"./scripts\", \n",
|
||||||
|
" entry_script = \"train.py\", \n",
|
||||||
|
" compute_target = \"gpu-cluster\", \n",
|
||||||
|
" node_count = 1, \n",
|
||||||
|
" process_count_per_node = 1, \n",
|
||||||
|
" use_gpu=True,\n",
|
||||||
|
" pip_packages = [\"azureml-mlflow\", \"Pillow==6.0.0\"])\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Get a reference to the experiment you created previously, but this time, as Azure Machine Learning experiment object.\n",
|
||||||
|
"\n",
|
||||||
|
"Then, use ```Experiment.submit``` method to start the remote training run. Note that the first training run often takes longer as Azure Machine Learning service builds the Docker image for executing the script. Subsequent runs will be faster as cached image is used."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"exp = Experiment(ws, experiment_name)\n",
|
||||||
|
"run = exp.submit(pt)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can monitor the run and its metrics on Azure Portal."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Also, you can wait for run to complete."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Deploy model as web service\n",
|
||||||
|
"\n",
|
||||||
|
"To deploy a web service, first create a Docker image, and then deploy that Docker image on inferencing compute.\n",
|
||||||
|
"\n",
|
||||||
|
"The ```mlflow.azureml.build_image``` function builds a Docker image from saved PyTorch model in a framework-aware manner. It automatically creates the PyTorch-specific inferencing wrapper code and specififies package dependencies for you."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.get_file_names()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then build a docker image using *runs:/<run.id>/model* as the model_uri path.\n",
|
||||||
|
"\n",
|
||||||
|
"Note that the image building can take several minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"model_path = \"model\"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"azure_image, azure_model = mlflow.azureml.build_image(model_uri='runs:/{}/{}'.format(run.id, model_path),\n",
|
||||||
|
" workspace=ws,\n",
|
||||||
|
" model_name='pytorch_mnist',\n",
|
||||||
|
" image_name='pytorch-mnist-img',\n",
|
||||||
|
" synchronous=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then, deploy the Docker image to Azure Container Instance: a serverless compute capable of running a single container. You can tag and add descriptions to help keep track of your web service. \n",
|
||||||
|
"\n",
|
||||||
|
"[Other inferencing compute choices](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where) include Azure Kubernetes Service which provides scalable endpoint suitable for production use.\n",
|
||||||
|
"\n",
|
||||||
|
"Note that the service deployment can take several minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||||
|
"\n",
|
||||||
|
"aci_config = AciWebservice.deploy_configuration(cpu_cores=2, \n",
|
||||||
|
" memory_gb=5, \n",
|
||||||
|
" tags={\"data\": \"MNIST\", \"method\" : \"pytorch\"}, \n",
|
||||||
|
" description=\"Predict using webservice\")\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Deploy the image to Azure Container Instances (ACI) for real-time serving\n",
|
||||||
|
"webservice = Webservice.deploy_from_image(\n",
|
||||||
|
" image=azure_image, workspace=ws, name=\"pytorch-mnist-1\", deployment_config=aci_config)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"webservice.wait_for_deployment()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Once the deployment has completed you can check the scoring URI of the web service."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"Scoring URI is: {}\".format(webservice.scoring_uri))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"In case of a service creation issue, you can use ```webservice.get_logs()``` to get logs to debug."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Make predictions using web service\n",
|
||||||
|
"\n",
|
||||||
|
"To make the web service, create a test data set as normalized PyTorch tensors. \n",
|
||||||
|
"\n",
|
||||||
|
"Then, let's define a utility function that takes a random image and converts it into format and shape suitable for as input to PyTorch inferencing end-point. The conversion is done by: \n",
|
||||||
|
"\n",
|
||||||
|
" 1. Select a random (image, label) tuple\n",
|
||||||
|
" 2. Take the image and converting the tensor to NumPy array \n",
|
||||||
|
" 3. Reshape array into 1 x 1 x N array\n",
|
||||||
|
" * 1 image in batch, 1 color channel, N = 784 pixels for MNIST images\n",
|
||||||
|
" * Note also ```x = x.view(-1, 1, 28, 28)``` in net definition in ```train.py``` program to shape incoming scoring requests.\n",
|
||||||
|
" 4. Convert the NumPy array to list to make it into a built-in type.\n",
|
||||||
|
" 5. Create a dictionary {\"data\", <list>} that can be converted to JSON string for web service requests."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from torchvision import datasets, transforms\n",
|
||||||
|
"import random\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"test_data = datasets.MNIST('../data', train=False, transform=transforms.Compose([\n",
|
||||||
|
" transforms.ToTensor(),\n",
|
||||||
|
" transforms.Normalize((0.1307,), (0.3081,))]))\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def get_random_image():\n",
|
||||||
|
" image_idx = random.randint(0,len(test_data))\n",
|
||||||
|
" image_as_tensor = test_data[image_idx][0]\n",
|
||||||
|
" return {\"data\": elem for elem in image_as_tensor.numpy().reshape(1,1,-1).tolist()}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then, invoke the web service using a random test image. Convert the dictionary containing the image to JSON string before passing it to web service.\n",
|
||||||
|
"\n",
|
||||||
|
"The response contains the raw scores for each label, with greater value indicating higher probability. Sort the labels and select the one with greatest score to get the prediction. Let's also plot the image sent to web service for comparison purposes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%matplotlib inline\n",
|
||||||
|
"\n",
|
||||||
|
"import json\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"\n",
|
||||||
|
"test_image = get_random_image()\n",
|
||||||
|
"\n",
|
||||||
|
"response = webservice.run(json.dumps(test_image))\n",
|
||||||
|
"\n",
|
||||||
|
"response = sorted(response[0].items(), key = lambda x: x[1], reverse = True)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Predicted label:\", response[0][0])\n",
|
||||||
|
"plt.imshow(np.array(test_image[\"data\"]).reshape(28,28), cmap = \"gray\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can also call the web service using a raw POST method against the web service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import requests\n",
|
||||||
|
"\n",
|
||||||
|
"response = requests.post(url=webservice.scoring_uri, data=json.dumps(test_image),headers={\"Content-type\": \"application/json\"})\n",
|
||||||
|
"print(response.text)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"celltoolbar": "Edit Metadata",
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.3"
|
||||||
|
},
|
||||||
|
"name": "mlflow-sparksummit-pytorch",
|
||||||
|
"notebookId": 2495374963457641
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 1
|
||||||
|
}
|
||||||
248
how-to-use-azureml/using-mlflow/train-local/train-local.ipynb
Normal file
248
how-to-use-azureml/using-mlflow/train-local/train-local.ipynb
Normal file
@@ -0,0 +1,248 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Use MLflow with Azure Machine Learning for Local Training Run\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows you how to use mlflow tracking APIs together with Azure Machine Learning services for storing your metrics and artifacts, from local Notebook run. You'll learn how to:\n",
|
||||||
|
"\n",
|
||||||
|
" 1. Set up MLflow tracking URI so as to use Azure ML\n",
|
||||||
|
" 2. Create experiment\n",
|
||||||
|
" 3. Train a model on your local computer while logging metrics and artifacts\n",
|
||||||
|
" 4. View your experiment within your Azure ML Workspace in Azure Portal.\n",
|
||||||
|
"\n",
|
||||||
|
"## Prerequisites and Set-up\n",
|
||||||
|
"\n",
|
||||||
|
"Make sure you have completed the [Configuration](../../../configuration.ipnyb) notebook to set up your Azure Machine Learning workspace and ensure other common prerequisites are met.\n",
|
||||||
|
"\n",
|
||||||
|
"Install azureml-mlflow package before running this notebook. Note that mlflow itself gets installed as dependency if you haven't installed it yet.\n",
|
||||||
|
"\n",
|
||||||
|
"```\n",
|
||||||
|
"pip install azureml-mlflow\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"This example also uses scikit-learn and matplotlib packages. Install them:\n",
|
||||||
|
"```\n",
|
||||||
|
"pip install scikit-learn matplotlib\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"Then, import necessary packages"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import mlflow\n",
|
||||||
|
"import mlflow.sklearn\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"\n",
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Set tracking URI\n",
|
||||||
|
"\n",
|
||||||
|
"Set the MLflow tracking URI to point to your Azure ML Workspace. The subsequent logging calls from MLflow APIs will go to Azure ML services and will be tracked under your Workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"\n",
|
||||||
|
"mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"In both MLflow and Azure ML, training runs are grouped into experiments. Let's create one for our experimentation."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"experiment_name = \"experiment-with-mlflow\"\n",
|
||||||
|
"mlflow.set_experiment(experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create training and test data set\n",
|
||||||
|
"\n",
|
||||||
|
"This example uses diabetes dataset to build a simple regression model. Let's load the dataset and split it into training and test sets."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from sklearn.datasets import load_diabetes\n",
|
||||||
|
"from sklearn.linear_model import Ridge\n",
|
||||||
|
"from sklearn.metrics import mean_squared_error\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"\n",
|
||||||
|
"X, y = load_diabetes(return_X_y = True)\n",
|
||||||
|
"columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n",
|
||||||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
|
||||||
|
"data = {\n",
|
||||||
|
" \"train\":{\"X\": X_train, \"y\": y_train}, \n",
|
||||||
|
" \"test\":{\"X\": X_test, \"y\": y_test}\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"print (\"Data contains\", len(data['train']['X']), \"training samples and\",len(data['test']['X']), \"test samples\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train while logging metrics and artifacts\n",
|
||||||
|
"\n",
|
||||||
|
"Next, start a mlflow run to train a scikit-learn regression model. Note that the training script has been instrumented using MLflow to:\n",
|
||||||
|
" * Log model hyperparameter alpha value\n",
|
||||||
|
" * Log mean squared error against test set\n",
|
||||||
|
" * Save the scikit-learn based regression model produced by training\n",
|
||||||
|
" * Save an image that shows actuals vs predictions against test set.\n",
|
||||||
|
" \n",
|
||||||
|
"These metrics and artifacts have been recorded to your Azure ML Workspace; in the next step you'll learn how to view them."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Create a run object in the experiment\n",
|
||||||
|
"model_save_path = \"model\"\n",
|
||||||
|
"\n",
|
||||||
|
"with mlflow.start_run() as run:\n",
|
||||||
|
" # Log the algorithm parameter alpha to the run\n",
|
||||||
|
" mlflow.log_metric('alpha', 0.03)\n",
|
||||||
|
" # Create, fit, and test the scikit-learn Ridge regression model\n",
|
||||||
|
" regression_model = Ridge(alpha=0.03)\n",
|
||||||
|
" regression_model.fit(data['train']['X'], data['train']['y'])\n",
|
||||||
|
" preds = regression_model.predict(data['test']['X'])\n",
|
||||||
|
"\n",
|
||||||
|
" # Log mean squared error\n",
|
||||||
|
" print('Mean Squared Error is', mean_squared_error(data['test']['y'], preds))\n",
|
||||||
|
" mlflow.log_metric('mse', mean_squared_error(data['test']['y'], preds))\n",
|
||||||
|
" \n",
|
||||||
|
" # Save the model to the outputs directory for capture\n",
|
||||||
|
" mlflow.sklearn.log_model(regression_model,model_save_path)\n",
|
||||||
|
" \n",
|
||||||
|
" # Plot actuals vs predictions and save the plot within the run\n",
|
||||||
|
" fig = plt.figure(1)\n",
|
||||||
|
" idx = np.argsort(data['test']['y'])\n",
|
||||||
|
" plt.plot(data['test']['y'][idx],preds[idx])\n",
|
||||||
|
" fig.savefig(\"actuals_vs_predictions.png\")\n",
|
||||||
|
" mlflow.log_artifact(\"actuals_vs_predictions.png\") "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can open the report page for your experiment and runs within it from Azure Portal.\n",
|
||||||
|
"\n",
|
||||||
|
"Select one of the runs to view the metrics, and the plot you saved. The saved scikit-learn model appears under **outputs** tab."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws.experiments[experiment_name]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Next steps\n",
|
||||||
|
"\n",
|
||||||
|
"Try out these notebooks to learn more about MLflow-Azure Machine Learning integration:\n",
|
||||||
|
"\n",
|
||||||
|
" * [Train a model using remote compute on Azure Cloud](../train-on-remote/train-on-remote.ipynb)\n",
|
||||||
|
" * [Deploy the model as a web service](../deploy-model/deploy-model.ipynb)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "rastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
318
how-to-use-azureml/using-mlflow/train-remote/train-remote.ipynb
Normal file
318
how-to-use-azureml/using-mlflow/train-remote/train-remote.ipynb
Normal file
@@ -0,0 +1,318 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Use MLflow with Azure Machine Learning for Remote Training Run\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows you how to use MLflow tracking APIs together with Azure Machine Learning services for storing your metrics and artifacts, from local Notebook run. You'll learn how to:\n",
|
||||||
|
"\n",
|
||||||
|
" 1. Set up MLflow tracking URI so as to use Azure ML\n",
|
||||||
|
" 2. Create experiment\n",
|
||||||
|
" 3. Train a model on Machine Learning Compute while logging metrics and artifacts\n",
|
||||||
|
" 4. View your experiment within your Azure ML Workspace in Azure Portal."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"\n",
|
||||||
|
"Make sure you have completed the [Configuration](../../../configuration.ipnyb) notebook to set up your Azure Machine Learning workspace and ensure other common prerequisites are met."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Set-up\n",
|
||||||
|
"\n",
|
||||||
|
"Check Azure ML SDK version installed on your computer, and then connect to your Workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core import Workspace, Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's also create a Machine Learning Compute cluster for submitting the remote run. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# Choose a name for your CPU cluster\n",
|
||||||
|
"cpu_cluster_name = \"cpu-cluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Verify that cluster does not exist already\n",
|
||||||
|
"try:\n",
|
||||||
|
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||||
|
" print(\"Found existing cpu-cluster\")\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print(\"Creating new cpu-cluster\")\n",
|
||||||
|
" \n",
|
||||||
|
" # Specify the configuration for the new cluster\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_D2_V2\",\n",
|
||||||
|
" min_nodes=0,\n",
|
||||||
|
" max_nodes=1)\n",
|
||||||
|
"\n",
|
||||||
|
" # Create the cluster with the specified name and configuration\n",
|
||||||
|
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||||
|
" \n",
|
||||||
|
" # Wait for the cluster to complete, show the output log\n",
|
||||||
|
" cpu_cluster.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create Azure ML Experiment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The following steps show how to submit a training Python script to a cluster as an Azure ML run, while logging happens through MLflow APIs to your Azure ML Workspace. Let's first create an experiment to hold the training runs."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"experiment_name = \"experiment-with-mlflow\"\n",
|
||||||
|
"exp = Experiment(workspace=ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Instrument remote training script using MLflow"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's use [*train_diabetes.py*](train_diabetes.py) to train a regression model against diabetes dataset as the example. Note that the training script uses mlflow.start_run() to start logging, and then logs metrics, saves the trained scikit-learn model, and saves a plot as an artifact.\n",
|
||||||
|
"\n",
|
||||||
|
"Run following command to view the script file. Notice the mlflow logging statements, and also notice that the script doesn't have explicit dependencies on azureml library."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"training_script = 'train_diabetes.py'\n",
|
||||||
|
"with open(training_script, 'r') as f:\n",
|
||||||
|
" print(f.read())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit Run to Cluster \n",
|
||||||
|
"\n",
|
||||||
|
"Let's submit the run to cluster. When running on the remote cluster as submitted run, Azure ML sets the MLflow tracking URI to point to your Azure ML Workspace, so that the metrics and artifacts are automatically logged there.\n",
|
||||||
|
"\n",
|
||||||
|
"Note that you have to specify the packages your script depends on, including *azureml-mlflow* that implicitly enables the MLflow logging to Azure ML. \n",
|
||||||
|
"\n",
|
||||||
|
"First, create a environment with Docker enable and required package dependencies specified."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"mlflow"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Environment\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"env = Environment(name=\"mlflow-env\")\n",
|
||||||
|
"\n",
|
||||||
|
"env.docker.enabled = True\n",
|
||||||
|
"\n",
|
||||||
|
"# Specify conda dependencies with scikit-learn and temporary pointers to mlflow extensions\n",
|
||||||
|
"cd = CondaDependencies.create(\n",
|
||||||
|
" conda_packages=[\"scikit-learn\", \"matplotlib\"],\n",
|
||||||
|
" pip_packages=[\"azureml-mlflow\", \"numpy\"]\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"env.python.conda_dependencies = cd"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Next, specify a script run configuration that includes the training script, environment and CPU cluster created earlier."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import ScriptRunConfig\n",
|
||||||
|
"\n",
|
||||||
|
"src = ScriptRunConfig(source_directory=\".\", script=training_script)\n",
|
||||||
|
"src.run_config.environment = env\n",
|
||||||
|
"src.run_config.target = cpu_cluster.name"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Finally, submit the run. Note that the first instance of the run typically takes longer as the Docker-based environment is created, several minutes. Subsequent runs reuse the image and are faster."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run = exp.submit(src)\n",
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can navigate to your Azure ML Workspace at Azure Portal to view the run metrics and artifacts. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.id"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can also get the metrics and bring them to your local notebook, and view the details of the run."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.get_metrics()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws.get_details()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Next steps\n",
|
||||||
|
"\n",
|
||||||
|
" * [Deploy the model as a web service](../deploy-model/deploy-model.ipynb)\n",
|
||||||
|
" * [Learn more about Azure Machine Learning compute options](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "rastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
# Copyright (c) Microsoft. All rights reserved.
|
||||||
|
# Licensed under the MIT license.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.datasets import load_diabetes
|
||||||
|
from sklearn.linear_model import Ridge
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
import mlflow
|
||||||
|
import mlflow.sklearn
|
||||||
|
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use('Agg')
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
with mlflow.start_run():
|
||||||
|
X, y = load_diabetes(return_X_y=True)
|
||||||
|
columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
|
||||||
|
data = {
|
||||||
|
"train": {"X": X_train, "y": y_train},
|
||||||
|
"test": {"X": X_test, "y": y_test}}
|
||||||
|
|
||||||
|
mlflow.log_metric("Training samples", len(data['train']['X']))
|
||||||
|
mlflow.log_metric("Test samples", len(data['test']['X']))
|
||||||
|
|
||||||
|
# Log the algorithm parameter alpha to the run
|
||||||
|
mlflow.log_metric('alpha', 0.03)
|
||||||
|
# Create, fit, and test the scikit-learn Ridge regression model
|
||||||
|
regression_model = Ridge(alpha=0.03)
|
||||||
|
regression_model.fit(data['train']['X'], data['train']['y'])
|
||||||
|
preds = regression_model.predict(data['test']['X'])
|
||||||
|
|
||||||
|
# Log mean squared error
|
||||||
|
print('Mean Squared Error is', mean_squared_error(data['test']['y'], preds))
|
||||||
|
mlflow.log_metric('mse', mean_squared_error(data['test']['y'], preds))
|
||||||
|
|
||||||
|
# Save the model to the outputs directory for capture
|
||||||
|
mlflow.sklearn.log_model(regression_model, "model")
|
||||||
|
|
||||||
|
# Plot actuals vs predictions and save the plot within the run
|
||||||
|
fig = plt.figure(1)
|
||||||
|
idx = np.argsort(data['test']['y'])
|
||||||
|
plt.plot(data['test']['y'][idx], preds[idx])
|
||||||
|
fig.savefig("actuals_vs_predictions.png")
|
||||||
|
mlflow.log_artifact("actuals_vs_predictions.png")
|
||||||
@@ -31,6 +31,35 @@ If you have any questions or feedback, send us an email at: [askamldataprep@micr
|
|||||||
|
|
||||||
## Release Notes
|
## Release Notes
|
||||||
|
|
||||||
|
### 2019-05-28 (version 1.1.4)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- You can now use the following expression language functions to extract and parse datetime values into new columns.
|
||||||
|
- `RegEx.extract_record()` extracts datetime elements into a new column.
|
||||||
|
- `create_datetime()` creates datetime objects from separate datetime elements.
|
||||||
|
- When calling `get_profile()`, you can now see that quantile columns are labeled as (est.) to clearly indicate that the values are approximations.
|
||||||
|
- You can now use ** globbing when reading from Azure Blob Storage.
|
||||||
|
- e.g. `dprep.read_csv(path='https://yourblob.blob.core.windows.net/yourcontainer/**/data/*.csv')`
|
||||||
|
|
||||||
|
Bug fixes
|
||||||
|
- Fixed a bug related to reading a Parquet file from a remote source (Azure Blob).
|
||||||
|
|
||||||
|
### 2019-05-08 (version 1.1.3)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- Added support to read from a PostgresSQL database, either by calling `read_postgresql` or using a Datastore.
|
||||||
|
- See examples in how-to guides:
|
||||||
|
- [Data Ingestion notebook](https://aka.ms/aml-data-prep-ingestion-nb)
|
||||||
|
- [Datastore notebook](https://aka.ms/aml-data-prep-datastore-nb)
|
||||||
|
|
||||||
|
Bug fixes and improvements
|
||||||
|
- Fixed issues with column type conversion:
|
||||||
|
- Now correctly converts a boolean or numeric column to a boolean column.
|
||||||
|
- Now does not fail when attempting to set a date column to be date type.
|
||||||
|
- Improved JoinType types and accompanying reference documentation. When joining two dataflows, you can now specify one of these types of join:
|
||||||
|
- NONE, MATCH, INNER, UNMATCHLEFT, LEFTANTI, LEFTOUTER, UNMATCHRIGHT, RIGHTANTI, RIGHTOUTER, FULLANTI, FULL.
|
||||||
|
- Improved data type inferencing to recognize more date formats.
|
||||||
|
|
||||||
### 2019-04-17 (version 1.1.2)
|
### 2019-04-17 (version 1.1.2)
|
||||||
|
|
||||||
Note: Data Prep Python SDK will no longer install `numpy` and `pandas` packages. See [updated installation instructions](https://aka.ms/aml-data-prep-installation).
|
Note: Data Prep Python SDK will no longer install `numpy` and `pandas` packages. See [updated installation instructions](https://aka.ms/aml-data-prep-installation).
|
||||||
|
|||||||
@@ -94,7 +94,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"spark_df = df.take(5).to_pandas_dataframe()\n",
|
"spark_df = df.to_spark_dataframe()\n",
|
||||||
"spark_df.head(5)"
|
"spark_df.head(5)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
4415
how-to-use-azureml/work-with-data/dataprep/data/large_dflow.json
Normal file
4415
how-to-use-azureml/work-with-data/dataprep/data/large_dflow.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,11 @@
|
|||||||
|
Stream Path
|
||||||
|
https://dataset.blob.core.windows.net/blobstore/container/2019/01/01/train.csv
|
||||||
|
https://dataset.blob.core.windows.net/blobstore/container/2019/01/02/train.csv
|
||||||
|
https://dataset.blob.core.windows.net/blobstore/container/2019/01/03/train.csv
|
||||||
|
https://dataset.blob.core.windows.net/blobstore/container/2019/01/04/train.csv
|
||||||
|
https://dataset.blob.core.windows.net/blobstore/container/2019/01/05/train.csv
|
||||||
|
https://dataset.blob.core.windows.net/blobstore/container/2019/01/06/train.csv
|
||||||
|
https://dataset.blob.core.windows.net/blobstore/container/2019/01/07/train.csv
|
||||||
|
https://dataset.blob.core.windows.net/blobstore/container/2019/01/08/train.csv
|
||||||
|
https://dataset.blob.core.windows.net/blobstore/container/2019/01/09/train.csv
|
||||||
|
https://dataset.blob.core.windows.net/blobstore/container/2019/01/10/train.csv
|
||||||
|
@@ -139,6 +139,51 @@
|
|||||||
"dflow_to_lower.head(5)"
|
"dflow_to_lower.head(5)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### `RegEx.extract_record()`\n",
|
||||||
|
"Using the `RegEx.extract_record()` expression, add a new record column \"Stream Date Record\", which contains the name capturing groups in the regex with value."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dflow_regex_extract_record = dprep.auto_read_file('../data/stream-path.csv')\n",
|
||||||
|
"regex = dprep.RegEx('\\/(?<year>\\d{4})\\/(?<month>\\d{2})\\/(?<day>\\d{2})\\/')\n",
|
||||||
|
"dflow_regex_extract_record = dflow_regex_extract_record.add_column(new_column_name='Stream Date Record',\n",
|
||||||
|
" prior_column='Stream Path',\n",
|
||||||
|
" expression=regex.extract_record(dflow_regex_extract_record['Stream Path']))\n",
|
||||||
|
"dflow_regex_extract_record.head(5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### `create_datetime()`\n",
|
||||||
|
"Using the `create_datetime()` expression, add a new column \"Stream Date\", which contains datetime values constructed from year, month, day values extracted from a record column \"Stream Date Record\"."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"year = dprep.col('year', dflow_regex_extract_record['Stream Date Record'])\n",
|
||||||
|
"month = dprep.col('month', dflow_regex_extract_record['Stream Date Record'])\n",
|
||||||
|
"day = dprep.col('day', dflow_regex_extract_record['Stream Date Record'])\n",
|
||||||
|
"dflow_create_datetime = dflow_regex_extract_record.add_column(new_column_name='Stream Date',\n",
|
||||||
|
" prior_column='Stream Date Record',\n",
|
||||||
|
" expression=dprep.create_datetime(year, month, day))\n",
|
||||||
|
"dflow_create_datetime.head(5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -39,6 +39,7 @@
|
|||||||
"[Read Part Files Using Globbing](#globbing)<br>\n",
|
"[Read Part Files Using Globbing](#globbing)<br>\n",
|
||||||
"[Read JSON](#json)<br>\n",
|
"[Read JSON](#json)<br>\n",
|
||||||
"[Read SQL](#sql)<br>\n",
|
"[Read SQL](#sql)<br>\n",
|
||||||
|
"[Read PostgreSQL](#postgresql)<br>\n",
|
||||||
"[Read From Azure Blob](#azure-blob)<br>\n",
|
"[Read From Azure Blob](#azure-blob)<br>\n",
|
||||||
"[Read From ADLS](#adls)<br>\n",
|
"[Read From ADLS](#adls)<br>\n",
|
||||||
"[Read Pandas DataFrame](#pandas-df)<br>"
|
"[Read Pandas DataFrame](#pandas-df)<br>"
|
||||||
@@ -110,7 +111,8 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"When reading delimited files, the only required parameter is `path`. Other parameters (e.g. separator, encoding, whether to use headers, etc.) are available to modify default behavior.In this case, you can read a file by specifying only its location, then retrieve the first 5 rows to evaluate the result."
|
"When reading delimited files, the only required parameter is `path`. Other parameters (e.g. separator, encoding, whether to use headers, etc.) are available to modify default behavior.\n",
|
||||||
|
"In this case, you can read a file by specifying only its location, then retrieve the first 5 rows to evaluate the result."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -632,6 +634,69 @@
|
|||||||
"df.dtypes"
|
"df.dtypes"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"<a id=\"postgresql\"></a>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Read PostgreSQL"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Data Prep can also fetch data from Azure PostgreSQL servers.\n",
|
||||||
|
"\n",
|
||||||
|
"To read data from a PostgreSQL server, first create a data source object that contains the connection information."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"secret = dprep.register_secret(value=\"dpr3pTestU$er\", id=\"dprepPostgresqlUser\")\n",
|
||||||
|
"ds = dprep.PostgreSQLDataSource(server_name=\"dprep-postgresql-test.postgres.database.azure.com\",\n",
|
||||||
|
" database_name=\"dprep-postgresql-testdb\",\n",
|
||||||
|
" user_name=\"dprepPostgresqlReadOnlyUser@dprep-postgresql-test\",\n",
|
||||||
|
" password=secret)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"As you can see, the password parameter of `PostgreSQLDataSource` accepts a Secret object as well.\n",
|
||||||
|
"Now that you have created a PostgreSQL data source object, you can proceed to read data."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dflow = dprep.read_postgresql(ds, \"SELECT * FROM public.people\")\n",
|
||||||
|
"dflow.head(5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dflow.dtypes"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -899,7 +964,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.8"
|
"version": "3.6.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -142,33 +142,6 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"profile.columns['X Coordinate'].type_counts"
|
"profile.columns['X Coordinate'].type_counts"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#TEST CELL: Profile.Compare\n",
|
|
||||||
"import azureml.dataprep as dprep\n",
|
|
||||||
"import math\n",
|
|
||||||
"\n",
|
|
||||||
"lhs_dflow = dprep.auto_read_file('../data/crime-spring.csv')\n",
|
|
||||||
"lhs_profile = lhs_dflow.get_profile(number_of_histogram_bins=100)\n",
|
|
||||||
"rhs_dflow = dprep.auto_read_file('../data/crime-winter.csv')\n",
|
|
||||||
"rhs_profile = rhs_dflow.get_profile(number_of_histogram_bins=100)\n",
|
|
||||||
"\n",
|
|
||||||
"diff = lhs_profile.compare(rhs_profile)\n",
|
|
||||||
"\n",
|
|
||||||
"expected_col1 = dprep.ColumnProfileDifference()\n",
|
|
||||||
"expected_col1.difference_in_count_in_percent = 0\n",
|
|
||||||
"expected_col1.difference_in_histograms = 135349.66146244822\n",
|
|
||||||
"\n",
|
|
||||||
"for actual, expected in zip(diff.column_profile_difference, [expected_col1]) :\n",
|
|
||||||
" assert math.isclose(actual.difference_in_count_in_percent, expected.difference_in_count_in_percent)\n",
|
|
||||||
" assert math.isclose(actual.difference_in_histograms, expected.difference_in_histograms)\n",
|
|
||||||
" break\n"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
@@ -117,6 +117,24 @@
|
|||||||
"dflow_sql.head(5)"
|
"dflow_sql.head(5)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can also read from a PostgreSQL database. To do that, you will first get a PostgreSQL database datastore instance and pass it to Data Prep for reading."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"datastore = Datastore(workspace=workspace, name='postgre_test')\n",
|
||||||
|
"dflow_sql = dprep.read_postgresql(data_source=datastore, query='SELECT * FROM public.people')\n",
|
||||||
|
"dflow_sql.head(5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -126,7 +126,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Create or Attach existing compute resource\n",
|
"### Create or Attach existing compute resource\n",
|
||||||
"By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you use default Azure Machine Learning Compute as your training environment."
|
"By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you create Azure Machine Learning Compute as your training environment. The code below creates the compute clusters for you if they don't already exist in your workspace.\n",
|
||||||
|
"\n",
|
||||||
|
"**Creation of compute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -140,10 +142,38 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"from azureml.core.compute import AmlCompute\n",
|
||||||
|
"from azureml.core.compute import ComputeTarget\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"cluster_type = os.environ.get(\"AML_COMPUTE_CLUSTER_TYPE\", \"CPU\")\n",
|
"# choose a name for your cluster\n",
|
||||||
"compute_target = ws.get_default_compute_target(cluster_type)"
|
"compute_name = os.environ.get(\"AML_COMPUTE_CLUSTER_NAME\", \"cpu-cluster\")\n",
|
||||||
|
"compute_min_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MIN_NODES\", 0)\n",
|
||||||
|
"compute_max_nodes = os.environ.get(\"AML_COMPUTE_CLUSTER_MAX_NODES\", 4)\n",
|
||||||
|
"\n",
|
||||||
|
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
|
||||||
|
"vm_size = os.environ.get(\"AML_COMPUTE_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"if compute_name in ws.compute_targets:\n",
|
||||||
|
" compute_target = ws.compute_targets[compute_name]\n",
|
||||||
|
" if compute_target and type(compute_target) is AmlCompute:\n",
|
||||||
|
" print('found compute target. just use it. ' + compute_name)\n",
|
||||||
|
"else:\n",
|
||||||
|
" print('creating a new compute target...')\n",
|
||||||
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,\n",
|
||||||
|
" min_nodes = compute_min_nodes, \n",
|
||||||
|
" max_nodes = compute_max_nodes)\n",
|
||||||
|
"\n",
|
||||||
|
" # create the cluster\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)\n",
|
||||||
|
" \n",
|
||||||
|
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||||
|
" # if no min node count is provided it will use the scale settings for the cluster\n",
|
||||||
|
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||||
|
" \n",
|
||||||
|
" # For a more detailed view of current AmlCompute status, use get_status()\n",
|
||||||
|
" print(compute_target.get_status().serialize())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -324,8 +354,8 @@
|
|||||||
"# get hold of the current run\n",
|
"# get hold of the current run\n",
|
||||||
"run = Run.get_context()\n",
|
"run = Run.get_context()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print('Train a logistic regression model with regularizaion rate of', args.reg)\n",
|
"print('Train a logistic regression model with regularization rate of', args.reg)\n",
|
||||||
"clf = LogisticRegression(C=1.0/args.reg, random_state=42)\n",
|
"clf = LogisticRegression(C=1.0/args.reg, solver=\"liblinear\", multi_class=\"auto\", random_state=42)\n",
|
||||||
"clf.fit(X_train, y_train)\n",
|
"clf.fit(X_train, y_train)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print('Predict the test set')\n",
|
"print('Predict the test set')\n",
|
||||||
@@ -386,14 +416,13 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Create an estimator\n",
|
"### Create an estimator\n",
|
||||||
"\n",
|
"\n",
|
||||||
"An estimator object is used to submit the run. Create your estimator by running the following code to define:\n",
|
"An estimator object is used to submit the run. Azure Machine Learning has pre-configured estimators for common machine learning frameworks, as well as generic Estimator. Create SKLearn estimator for scikit-learn model, by specifying\n",
|
||||||
"\n",
|
"\n",
|
||||||
"* The name of the estimator object, `est`\n",
|
"* The name of the estimator object, `est`\n",
|
||||||
"* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n",
|
"* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n",
|
||||||
"* The compute target. In this case you will use the AmlCompute you created\n",
|
"* The compute target. In this case you will use the AmlCompute you created\n",
|
||||||
"* The training script name, train.py\n",
|
"* The training script name, train.py\n",
|
||||||
"* Parameters required from the training script \n",
|
"* Parameters required from the training script \n",
|
||||||
"* Python packages needed for training\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"In this tutorial, this target is AmlCompute. All files in the script folder are uploaded into the cluster nodes for execution. The data_folder is set to use the datastore (`ds.path('mnist').as_mount()`)."
|
"In this tutorial, this target is AmlCompute. All files in the script folder are uploaded into the cluster nodes for execution. The data_folder is set to use the datastore (`ds.path('mnist').as_mount()`)."
|
||||||
]
|
]
|
||||||
@@ -408,18 +437,17 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.train.estimator import Estimator\n",
|
"from azureml.train.sklearn import SKLearn\n",
|
||||||
"\n",
|
"\n",
|
||||||
"script_params = {\n",
|
"script_params = {\n",
|
||||||
" '--data-folder': ds.path('mnist').as_mount(),\n",
|
" '--data-folder': ds.path('mnist').as_mount(),\n",
|
||||||
" '--regularization': 0.05\n",
|
" '--regularization': 0.5\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"est = Estimator(source_directory=script_folder,\n",
|
"est = SKLearn(source_directory=script_folder,\n",
|
||||||
" script_params=script_params,\n",
|
" script_params=script_params,\n",
|
||||||
" compute_target=compute_target,\n",
|
" compute_target=compute_target,\n",
|
||||||
" entry_script='train.py',\n",
|
" entry_script='train.py')"
|
||||||
" conda_packages=['scikit-learn'])"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -536,7 +564,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# specify show_output to True for a verbose log\n",
|
"# specify show_output to True for a verbose log\n",
|
||||||
"run.wait_for_completion(show_output=False) "
|
"run.wait_for_completion(show_output=True) "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -646,18 +674,6 @@
|
|||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python36"
|
"name": "python36"
|
||||||
},
|
},
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.8"
|
|
||||||
},
|
|
||||||
"msauthor": "roastala"
|
"msauthor": "roastala"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
Reference in New Issue
Block a user