mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 01:27:06 -05:00
Compare commits
184 Commits
azureml-sd
...
azureml-sd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3531fe8a21 | ||
|
|
db6ae67940 | ||
|
|
2a479bb01e | ||
|
|
d05eec92af | ||
|
|
70fdab0a28 | ||
|
|
7ce5a43b58 | ||
|
|
d2a9dbb582 | ||
|
|
a5d774683d | ||
|
|
0e850f0917 | ||
|
|
59f34b7179 | ||
|
|
2a3cb69004 | ||
|
|
42894ff81a | ||
|
|
2163cab50b | ||
|
|
255edb04c0 | ||
|
|
cfce079278 | ||
|
|
ae6f067c81 | ||
|
|
1b7ff724f3 | ||
|
|
8bba850db1 | ||
|
|
b9e35ea0cb | ||
|
|
ffa28aa89c | ||
|
|
6ab85a20e3 | ||
|
|
486c44d157 | ||
|
|
cd80040dd8 | ||
|
|
465a5b13b1 | ||
|
|
dcd2d58880 | ||
|
|
93bf4393f2 | ||
|
|
d6ebb484a6 | ||
|
|
35afd43193 | ||
|
|
2d68535de2 | ||
|
|
0d448892a3 | ||
|
|
2d41c00488 | ||
|
|
22597ac684 | ||
|
|
8b1bffc200 | ||
|
|
a240ac319f | ||
|
|
83cfe3b9b3 | ||
|
|
dcce6f227f | ||
|
|
5328186d68 | ||
|
|
7ccaa2cf57 | ||
|
|
56b0664b6b | ||
|
|
4c1167edc4 | ||
|
|
eb643fe213 | ||
|
|
5faa9d293c | ||
|
|
32e2b5f647 | ||
|
|
ae25654882 | ||
|
|
0ca05093bd | ||
|
|
5e39582de3 | ||
|
|
6b6a6da9dc | ||
|
|
cba2c6b9e2 | ||
|
|
58557abd20 | ||
|
|
59452a3141 | ||
|
|
463718e26b | ||
|
|
9ea0ba5131 | ||
|
|
2804a8d859 | ||
|
|
4761b668ff | ||
|
|
c4163017c2 | ||
|
|
71e8e9bd23 | ||
|
|
6ff06dd137 | ||
|
|
73db8ae04d | ||
|
|
3637dce58a | ||
|
|
23771fc599 | ||
|
|
5f04a467b7 | ||
|
|
532f65c998 | ||
|
|
f36dda0c2d | ||
|
|
c7b56929bc | ||
|
|
5f19d75a42 | ||
|
|
a1968aafa2 | ||
|
|
6b82991017 | ||
|
|
725013511e | ||
|
|
6a20160173 | ||
|
|
137db8aec0 | ||
|
|
b7b10c394b | ||
|
|
46206716a4 | ||
|
|
92bb98ac62 | ||
|
|
b398c24262 | ||
|
|
e0618302e3 | ||
|
|
b6cddafa3e | ||
|
|
4188bd2474 | ||
|
|
69126edfcb | ||
|
|
4e14c35b9b | ||
|
|
1608c19aa6 | ||
|
|
46b8611b74 | ||
|
|
fbb01bde70 | ||
|
|
cefe2f0811 | ||
|
|
42e0a31f88 | ||
|
|
8b0998ac9f | ||
|
|
046c6051fb | ||
|
|
bdb7db15ef | ||
|
|
b13139f103 | ||
|
|
8adb206ae3 | ||
|
|
484b6bbb7a | ||
|
|
55ef0bda6a | ||
|
|
1401cdef33 | ||
|
|
5d02206cbd | ||
|
|
c24b65d4ae | ||
|
|
57c5ef318f | ||
|
|
ba033d72f8 | ||
|
|
aa657ac528 | ||
|
|
7d8289679d | ||
|
|
a7c3db0560 | ||
|
|
e548847881 | ||
|
|
08c6b1f4ed | ||
|
|
78abb65f5e | ||
|
|
3c6c090732 | ||
|
|
513e36d9b2 | ||
|
|
9db91a7fb8 | ||
|
|
d9b26b655b | ||
|
|
cb8dc41766 | ||
|
|
9c9b4bb122 | ||
|
|
f5c896c70f | ||
|
|
3b572eddb2 | ||
|
|
51523db294 | ||
|
|
3b4998941c | ||
|
|
6cdbfb8722 | ||
|
|
c086bd69c7 | ||
|
|
279c9b8dc4 | ||
|
|
98589fe335 | ||
|
|
77f21058a2 | ||
|
|
baa65d0886 | ||
|
|
0fffa11b2a | ||
|
|
20ec225343 | ||
|
|
845e9d653e | ||
|
|
639ef81636 | ||
|
|
60158bf41a | ||
|
|
8dbbb01b8a | ||
|
|
6e6b2b0c48 | ||
|
|
85f5721bf8 | ||
|
|
6a7dd741e7 | ||
|
|
314218fc89 | ||
|
|
b50d2725c7 | ||
|
|
9a2f448792 | ||
|
|
dd620f19fd | ||
|
|
8116d31da4 | ||
|
|
ef29dc1fa5 | ||
|
|
97b345cb33 | ||
|
|
282250e670 | ||
|
|
acef60c5b3 | ||
|
|
bfb444eb15 | ||
|
|
6277659bf2 | ||
|
|
1645e12712 | ||
|
|
cc4a32e70b | ||
|
|
997a35aed5 | ||
|
|
dd6317a4a0 | ||
|
|
82d8353d54 | ||
|
|
59a01c17a0 | ||
|
|
e31e1d9af3 | ||
|
|
d38b9db255 | ||
|
|
761ad88c93 | ||
|
|
644729e5db | ||
|
|
e2b1b3fcaa | ||
|
|
dc692589a9 | ||
|
|
624b4595b5 | ||
|
|
0ed85c33c2 | ||
|
|
5b01de605f | ||
|
|
c351ac988a | ||
|
|
759ec3934c | ||
|
|
b499b88a85 | ||
|
|
5f4edac3c1 | ||
|
|
edfce0d936 | ||
|
|
1516c7fc24 | ||
|
|
389fb668ce | ||
|
|
647d5e72a5 | ||
|
|
43ac4c84bb | ||
|
|
8a1a82b50a | ||
|
|
72f386298c | ||
|
|
41d697e298 | ||
|
|
c3ce932029 | ||
|
|
a956162114 | ||
|
|
cb5a178e40 | ||
|
|
d81c336c59 | ||
|
|
4244a24d81 | ||
|
|
3b488555e5 | ||
|
|
6abc478f33 | ||
|
|
666c2579eb | ||
|
|
5af3aa4231 | ||
|
|
e48d828ab0 | ||
|
|
44aa636c21 | ||
|
|
4678f9adc3 | ||
|
|
5bf85edade | ||
|
|
94f381e884 | ||
|
|
ea1b7599c3 | ||
|
|
6b8a6befde | ||
|
|
c1511b7b74 | ||
|
|
8f007a3333 | ||
|
|
18a11bbd8d |
29
Dockerfiles/1.0.21/Dockerfile
Normal file
29
Dockerfiles/1.0.21/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
||||
FROM continuumio/miniconda:4.5.11
|
||||
|
||||
# install git
|
||||
RUN apt-get update && apt-get upgrade -y && apt-get install -y git
|
||||
|
||||
# create a new conda environment named azureml
|
||||
RUN conda create -n azureml -y -q Python=3.6
|
||||
|
||||
# install additional packages used by sample notebooks. this is optional
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
|
||||
|
||||
# install azurmel-sdk components
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.21"]
|
||||
|
||||
# clone Azure ML GitHub sample notebooks
|
||||
RUN cd /home && git clone -b "azureml-sdk-1.0.21" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
|
||||
|
||||
# generate jupyter configuration file
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
|
||||
|
||||
# set an emtpy token for Jupyter to remove authentication.
|
||||
# this is NOT recommended for production environment
|
||||
RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
|
||||
# open up port 8887 on the container
|
||||
EXPOSE 8887
|
||||
|
||||
# start Jupyter notebook server on port 8887 when the container starts
|
||||
CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
|
||||
29
Dockerfiles/1.0.23/Dockerfile
Normal file
29
Dockerfiles/1.0.23/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
||||
FROM continuumio/miniconda:4.5.11
|
||||
|
||||
# install git
|
||||
RUN apt-get update && apt-get upgrade -y && apt-get install -y git
|
||||
|
||||
# create a new conda environment named azureml
|
||||
RUN conda create -n azureml -y -q Python=3.6
|
||||
|
||||
# install additional packages used by sample notebooks. this is optional
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
|
||||
|
||||
# install azurmel-sdk components
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.23"]
|
||||
|
||||
# clone Azure ML GitHub sample notebooks
|
||||
RUN cd /home && git clone -b "azureml-sdk-1.0.23" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
|
||||
|
||||
# generate jupyter configuration file
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
|
||||
|
||||
# set an emtpy token for Jupyter to remove authentication.
|
||||
# this is NOT recommended for production environment
|
||||
RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
|
||||
# open up port 8887 on the container
|
||||
EXPOSE 8887
|
||||
|
||||
# start Jupyter notebook server on port 8887 when the container starts
|
||||
CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
|
||||
29
Dockerfiles/1.0.30/Dockerfile
Normal file
29
Dockerfiles/1.0.30/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
||||
FROM continuumio/miniconda:4.5.11
|
||||
|
||||
# install git
|
||||
RUN apt-get update && apt-get upgrade -y && apt-get install -y git
|
||||
|
||||
# create a new conda environment named azureml
|
||||
RUN conda create -n azureml -y -q Python=3.6
|
||||
|
||||
# install additional packages used by sample notebooks. this is optional
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
|
||||
|
||||
# install azurmel-sdk components
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.30"]
|
||||
|
||||
# clone Azure ML GitHub sample notebooks
|
||||
RUN cd /home && git clone -b "azureml-sdk-1.0.30" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
|
||||
|
||||
# generate jupyter configuration file
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
|
||||
|
||||
# set an emtpy token for Jupyter to remove authentication.
|
||||
# this is NOT recommended for production environment
|
||||
RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
|
||||
# open up port 8887 on the container
|
||||
EXPOSE 8887
|
||||
|
||||
# start Jupyter notebook server on port 8887 when the container starts
|
||||
CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
|
||||
29
Dockerfiles/1.0.33/Dockerfile
Normal file
29
Dockerfiles/1.0.33/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
||||
FROM continuumio/miniconda:4.5.11
|
||||
|
||||
# install git
|
||||
RUN apt-get update && apt-get upgrade -y && apt-get install -y git
|
||||
|
||||
# create a new conda environment named azureml
|
||||
RUN conda create -n azureml -y -q Python=3.6
|
||||
|
||||
# install additional packages used by sample notebooks. this is optional
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
|
||||
|
||||
# install azurmel-sdk components
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.33"]
|
||||
|
||||
# clone Azure ML GitHub sample notebooks
|
||||
RUN cd /home && git clone -b "azureml-sdk-1.0.33" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
|
||||
|
||||
# generate jupyter configuration file
|
||||
RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
|
||||
|
||||
# set an emtpy token for Jupyter to remove authentication.
|
||||
# this is NOT recommended for production environment
|
||||
RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||
|
||||
# open up port 8887 on the container
|
||||
EXPOSE 8887
|
||||
|
||||
# start Jupyter notebook server on port 8887 when the container starts
|
||||
CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
|
||||
@@ -1,3 +1,4 @@
|
||||
|
||||
This software is made available to you on the condition that you agree to
|
||||
[your agreement][1] governing your use of Azure.
|
||||
If you do not have an existing agreement governing your use of Azure, you agree that
|
||||
21
NBSETUP.md
21
NBSETUP.md
@@ -1,6 +1,4 @@
|
||||
# Setting up environment
|
||||
|
||||
---
|
||||
# Set up your notebook environment for Azure Machine Learning
|
||||
|
||||
To run the notebooks in this repository use one of following options.
|
||||
|
||||
@@ -12,9 +10,7 @@ Azure Notebooks is a hosted Jupyter-based notebook service in the Azure cloud. A
|
||||
1. Follow the instructions in the [Configuration](configuration.ipynb) notebook to create and connect to a workspace
|
||||
1. Open one of the sample notebooks
|
||||
|
||||
**Make sure the Azure Notebook kernel is set to `Python 3.6`** when you open a notebook
|
||||
|
||||

|
||||
**Make sure the Azure Notebook kernel is set to `Python 3.6`** when you open a notebook by choosing Kernel > Change Kernel > Python 3.6 from the menus.
|
||||
|
||||
## **Option 2: Use your own notebook server**
|
||||
|
||||
@@ -28,11 +24,8 @@ pip install azureml-sdk
|
||||
git clone https://github.com/Azure/MachineLearningNotebooks.git
|
||||
|
||||
# below steps are optional
|
||||
# install the base SDK and a Jupyter notebook server
|
||||
pip install azureml-sdk[notebooks]
|
||||
|
||||
# install the data prep component
|
||||
pip install azureml-dataprep
|
||||
# install the base SDK, Jupyter notebook server and tensorboard
|
||||
pip install azureml-sdk[notebooks,tensorboard]
|
||||
|
||||
# install model explainability component
|
||||
pip install azureml-sdk[explain]
|
||||
@@ -58,8 +51,7 @@ Please make sure you start with the [Configuration](configuration.ipynb) noteboo
|
||||
|
||||
### Video walkthrough:
|
||||
|
||||
[](https://youtu.be/VIsXeTuW3FU)
|
||||
|
||||
[!VIDEO https://youtu.be/VIsXeTuW3FU]
|
||||
|
||||
## **Option 3: Use Docker**
|
||||
|
||||
@@ -90,9 +82,6 @@ Now you can point your browser to http://localhost:8887. We recommend that you s
|
||||
If you need additional Azure ML SDK components, you can either modify the Docker files before you build the Docker images to add additional steps, or install them through command line in the live container after you build the Docker image. For example:
|
||||
|
||||
```sh
|
||||
# install dataprep components
|
||||
pip install azureml-dataprep
|
||||
|
||||
# install the core SDK and automated ml components
|
||||
pip install azureml-sdk[automl]
|
||||
|
||||
|
||||
26
README.md
26
README.md
@@ -11,7 +11,7 @@ pip install azureml-sdk
|
||||
Read more detailed instructions on [how to set up your environment](./NBSETUP.md) using Azure Notebook service, your own Jupyter notebook server, or Docker.
|
||||
|
||||
## How to navigate and use the example notebooks?
|
||||
You should always run the [Configuration](./configuration.ipynb) notebook first when setting up a notebook library on a new machine or in a new environment. It configures your notebook library to connect to an Azure Machine Learning workspace, and sets up your workspace and compute to be used by many of the other examples.
|
||||
If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, you should always run the [Configuration](./configuration.ipynb) notebook first when setting up a notebook library on a new machine or in a new environment. It configures your notebook library to connect to an Azure Machine Learning workspace, and sets up your workspace and compute to be used by many of the other examples.
|
||||
|
||||
If you want to...
|
||||
|
||||
@@ -20,7 +20,7 @@ If you want to...
|
||||
* ...learn about experimentation and tracking run history, first [train within Notebook](./how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb), then try [training on remote VM](./how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb) and [using logging APIs](./how-to-use-azureml/training/logging-api/logging-api.ipynb).
|
||||
* ...train deep learning models at scale, first learn about [Machine Learning Compute](./how-to-use-azureml/training/train-on-amlcompute/train-on-amlcompute.ipynb), and then try [distributed hyperparameter tuning](./how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) and [distributed training](./how-to-use-azureml/training-with-deep-learning/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb).
|
||||
* ...deploy models as a realtime scoring service, first learn the basics by [training within Notebook and deploying to Azure Container Instance](./how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb), then learn how to [register and manage models, and create Docker images](./how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb), and [production deploy models on Azure Kubernetes Cluster](./how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb).
|
||||
* ...deploy models as a batch scoring service, first [train a model within Notebook](./how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb), learn how to [register and manage models](./how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb), then [create Machine Learning Compute for scoring compute](./how-to-use-azureml/training/train-on-amlcompute/train-on-amlcompute.ipynb), and [use Machine Learning Pipelines to deploy your model](./how-to-use-azureml/machine-learning-pipelines/pipeline-mpi-batch-prediction.ipynb).
|
||||
* ...deploy models as a batch scoring service, first [train a model within Notebook](./how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb), learn how to [register and manage models](./how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb), then [create Machine Learning Compute for scoring compute](./how-to-use-azureml/training/train-on-amlcompute/train-on-amlcompute.ipynb), and [use Machine Learning Pipelines to deploy your model](https://aka.ms/pl-batch-scoring).
|
||||
* ...monitor your deployed models, learn about using [App Insights](./how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb) and [model data collection](./how-to-use-azureml/deployment/enable-data-collection-for-models-in-aks/enable-data-collection-for-models-in-aks.ipynb).
|
||||
|
||||
## Tutorials
|
||||
@@ -54,3 +54,25 @@ Visit following repos to see projects contributed by Azure ML users:
|
||||
|
||||
- [Fine tune natural language processing models using Azure Machine Learning service](https://github.com/Microsoft/AzureML-BERT)
|
||||
- [Fashion MNIST with Azure ML SDK](https://github.com/amynic/azureml-sdk-fashion)
|
||||
|
||||
## Data/Telemetry
|
||||
This repository collects usage data and sends it to Mircosoft to help improve our products and services. Read Microsoft's [privacy statement to learn more](https://privacy.microsoft.com/en-US/privacystatement)
|
||||
|
||||
To opt out of tracking, please go to the raw markdown or .ipynb files and remove the following line of code:
|
||||
|
||||
```sh
|
||||
""
|
||||
```
|
||||
This URL will be slightly different depending on the file.
|
||||
|
||||
## Data/Telemetry
|
||||
This repository collects usage data and sends it to Mircosoft to help improve our products and services. Read Microsoft's [privacy statement to learn more](https://privacy.microsoft.com/en-US/privacystatement)
|
||||
|
||||
To opt out of tracking, please go to the raw markdown or .ipynb files and remove the following line of code:
|
||||
|
||||
```sh
|
||||
""
|
||||
```
|
||||
This URL will be slightly different depending on the file.
|
||||
|
||||

|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -32,7 +39,6 @@
|
||||
" 1. Workspace parameters\n",
|
||||
" 1. Access your workspace\n",
|
||||
" 1. Create a new workspace\n",
|
||||
" 1. Create compute resources\n",
|
||||
"1. [Next steps](#Next%20steps)\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
@@ -96,7 +102,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.0.21 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.41 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -198,7 +204,7 @@
|
||||
"\n",
|
||||
"If you don't have an existing workspace and are the owner of the subscription or resource group, you can create a new workspace. If you don't have a resource group, the create workspace command will create one for you using the name you provide.\n",
|
||||
"\n",
|
||||
"**Note**: As with other Azure services, there are limits on certain resources (for example AmlCompute quota) associated with the Azure ML service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota.\n",
|
||||
"**Note**: The Workspace creation command will create default CPU and GPU compute clusters for you. As with other Azure services, there are limits on certain resources (for example AmlCompute quota) associated with the Azure ML service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota.\n",
|
||||
"\n",
|
||||
"This cell will create an Azure ML workspace for you in a subscription provided you have the correct permissions.\n",
|
||||
"\n",
|
||||
@@ -227,6 +233,8 @@
|
||||
" subscription_id = subscription_id,\n",
|
||||
" resource_group = resource_group, \n",
|
||||
" location = workspace_region,\n",
|
||||
" default_cpu_compute_target=Workspace.DEFAULT_CPU_CLUSTER_CONFIGURATION,\n",
|
||||
" default_gpu_compute_target=Workspace.DEFAULT_GPU_CLUSTER_CONFIGURATION,\n",
|
||||
" create_resource_group = True,\n",
|
||||
" exist_ok = True)\n",
|
||||
"ws.get_details()\n",
|
||||
@@ -235,97 +243,6 @@
|
||||
"ws.write_config()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create compute resources for your training experiments\n",
|
||||
"\n",
|
||||
"Many of the sample notebooks use Azure ML managed compute (AmlCompute) to train models using a dynamically scalable pool of compute. In this section you will create default compute clusters for use by the other notebooks and any other operations you choose.\n",
|
||||
"\n",
|
||||
"To create a cluster, you need to specify a compute configuration that specifies the type of machine to be used and the scalability behaviors. Then you choose a name for the cluster that is unique within the workspace that can be used to address the cluster later.\n",
|
||||
"\n",
|
||||
"The cluster parameters are:\n",
|
||||
"* vm_size - this describes the virtual machine type and size used in the cluster. All machines in the cluster are the same type. You can get the list of vm sizes available in your region by using the CLI command\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"az vm list-skus -o tsv\n",
|
||||
"```\n",
|
||||
"* min_nodes - this sets the minimum size of the cluster. If you set the minimum to 0 the cluster will shut down all nodes while note in use. Setting this number to a value higher than 0 will allow for faster start-up times, but you will also be billed when the cluster is not in use.\n",
|
||||
"* max_nodes - this sets the maximum size of the cluster. Setting this to a larger number allows for more concurrency and a greater distributed processing of scale-out jobs.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"To create a **CPU** cluster now, run the cell below. The autoscale settings mean that the cluster will scale down to 0 nodes when inactive and up to 4 nodes when busy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"cpu_cluster_name = \"cpucluster\"\n",
|
||||
"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||
" print(\"Found existing cpucluster\")\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print(\"Creating new cpucluster\")\n",
|
||||
" \n",
|
||||
" # Specify the configuration for the new cluster\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_D2_V2\",\n",
|
||||
" min_nodes=0,\n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # Create the cluster with the specified name and configuration\n",
|
||||
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||
" \n",
|
||||
" # Wait for the cluster to complete, show the output log\n",
|
||||
" cpu_cluster.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To create a **GPU** cluster, run the cell below. Note that your subscription must have sufficient quota for GPU VMs or the command will fail. To increase quota, see [these instructions](https://docs.microsoft.com/en-us/azure/azure-supportability/resource-manager-core-quotas-request). "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your GPU cluster\n",
|
||||
"gpu_cluster_name = \"gpucluster\"\n",
|
||||
"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)\n",
|
||||
" print(\"Found existing gpu cluster\")\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print(\"Creating new gpucluster\")\n",
|
||||
" \n",
|
||||
" # Specify the configuration for the new cluster\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n",
|
||||
" min_nodes=0,\n",
|
||||
" max_nodes=4)\n",
|
||||
" # Create the cluster with the specified name and configuration\n",
|
||||
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # Wait for the cluster to complete, show the output log\n",
|
||||
" gpu_cluster.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -287,6 +287,8 @@ Notice how the parameters are modified when using the CPU-only mode.
|
||||
|
||||
The outputs of the script can be observed in the master notebook as the script is executed
|
||||
|
||||

|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# Table of Contents
|
||||
1. [Automated ML Introduction](#introduction)
|
||||
1. [Running samples in Azure Notebooks](#jupyter)
|
||||
1. [Running samples in Azure Databricks](#databricks)
|
||||
1. [Running samples in a Local Conda environment](#localconda)
|
||||
1. [Setup using Azure Notebooks](#jupyter)
|
||||
1. [Setup using Azure Databricks](#databricks)
|
||||
1. [Setup using a Local Conda environment](#localconda)
|
||||
1. [Automated ML SDK Sample Notebooks](#samples)
|
||||
1. [Documentation](#documentation)
|
||||
1. [Running using python command](#pythoncommand)
|
||||
@@ -13,15 +13,15 @@
|
||||
Automated machine learning (automated ML) builds high quality machine learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, automated ML will give you a high quality machine learning model that you can use for predictions.
|
||||
|
||||
|
||||
If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
|
||||
If you are new to Data Science, automated ML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
|
||||
|
||||
If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
|
||||
If you are an experienced data scientist, automated ML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. Automated ML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
|
||||
|
||||
Below are the three execution environments supported by AutoML.
|
||||
Below are the three execution environments supported by automated ML.
|
||||
|
||||
|
||||
<a name="jupyter"></a>
|
||||
## Running samples in Azure Notebooks - Jupyter based notebooks in the Azure cloud
|
||||
## Setup using Azure Notebooks - Jupyter based notebooks in the Azure cloud
|
||||
|
||||
1. [](https://aka.ms/aml-clone-azure-notebooks)
|
||||
[Import sample notebooks ](https://aka.ms/aml-clone-azure-notebooks) into Azure Notebooks.
|
||||
@@ -29,7 +29,7 @@ Below are the three execution environments supported by AutoML.
|
||||
1. Open one of the sample notebooks.
|
||||
|
||||
<a name="databricks"></a>
|
||||
## Running samples in Azure Databricks
|
||||
## Setup using Azure Databricks
|
||||
|
||||
**NOTE**: Please create your Azure Databricks cluster as v4.x (high concurrency preferred) with **Python 3** (dropdown).
|
||||
**NOTE**: You should at least have contributor access to your Azure subcription to run the notebook.
|
||||
@@ -39,7 +39,7 @@ Below are the three execution environments supported by AutoML.
|
||||
- Attach the notebook to the cluster.
|
||||
|
||||
<a name="localconda"></a>
|
||||
## Running samples in a Local Conda environment
|
||||
## Setup using a Local Conda environment
|
||||
|
||||
To run these notebook on your own notebook server, use these installation instructions.
|
||||
The instructions below will install everything you need and then start a Jupyter notebook.
|
||||
@@ -49,11 +49,15 @@ The instructions below will install everything you need and then start a Jupyter
|
||||
There's no need to install mini-conda specifically.
|
||||
|
||||
### 2. Downloading the sample notebooks
|
||||
- Download the sample notebooks from [GitHub](https://github.com/Azure/MachineLearningNotebooks) as zip and extract the contents to a local directory. The AutoML sample notebooks are in the "automl" folder.
|
||||
- Download the sample notebooks from [GitHub](https://github.com/Azure/MachineLearningNotebooks) as zip and extract the contents to a local directory. The automated ML sample notebooks are in the "automated-machine-learning" folder.
|
||||
|
||||
### 3. Setup a new conda environment
|
||||
The **automl/automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook.
|
||||
It takes the conda environment name as an optional parameter. The default conda environment name is azure_automl. The exact command depends on the operating system. See the specific sections below for Windows, Mac and Linux. It can take about 10 minutes to execute.
|
||||
The **automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook. It takes the conda environment name as an optional parameter. The default conda environment name is azure_automl. The exact command depends on the operating system. See the specific sections below for Windows, Mac and Linux. It can take about 10 minutes to execute.
|
||||
|
||||
Packages installed by the **automl_setup** script:
|
||||
<ul><li>python</li><li>nb_conda</li><li>matplotlib</li><li>numpy</li><li>cython</li><li>urllib3</li><li>scipy</li><li>scikit-learn</li><li>pandas</li><li>tensorflow</li><li>py-xgboost</li><li>azureml-sdk</li><li>azureml-widgets</li><li>pandas-ml</li></ul>
|
||||
|
||||
For more details refer to the [automl_env.yml](./automl_env.yml)
|
||||
## Windows
|
||||
Start an **Anaconda Prompt** window, cd to the **how-to-use-azureml/automated-machine-learning** folder where the sample notebooks were extracted and then run:
|
||||
```
|
||||
@@ -81,7 +85,7 @@ bash automl_setup_linux.sh
|
||||
|
||||
### 5. Running Samples
|
||||
- Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks.
|
||||
- Follow the instructions in the individual notebooks to explore various features in AutoML
|
||||
- Follow the instructions in the individual notebooks to explore various features in automated ML.
|
||||
|
||||
### 6. Starting jupyter notebook manually
|
||||
To start your Jupyter notebook manually, use:
|
||||
@@ -103,22 +107,22 @@ jupyter notebook
|
||||
|
||||
- [auto-ml-classification.ipynb](classification/auto-ml-classification.ipynb)
|
||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||
- Simple example of using Auto ML for classification
|
||||
- Simple example of using automated ML for classification
|
||||
- Uses local compute for training
|
||||
|
||||
- [auto-ml-regression.ipynb](regression/auto-ml-regression.ipynb)
|
||||
- Dataset: scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html)
|
||||
- Simple example of using Auto ML for regression
|
||||
- Simple example of using automated ML for regression
|
||||
- Uses local compute for training
|
||||
|
||||
- [auto-ml-remote-execution.ipynb](remote-execution/auto-ml-remote-execution.ipynb)
|
||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||
- Example of using Auto ML for classification using a remote linux DSVM for training
|
||||
- Example of using automated ML for classification using a remote linux DSVM for training
|
||||
- Parallel execution of iterations
|
||||
- Async tracking of progress
|
||||
- Cancelling individual iterations or entire run
|
||||
- Retrieving models for any iteration or logged metric
|
||||
- Specify automl settings as kwargs
|
||||
- Specify automated ML settings as kwargs
|
||||
|
||||
- [auto-ml-remote-amlcompute.ipynb](remote-batchai/auto-ml-remote-amlcompute.ipynb)
|
||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||
@@ -127,7 +131,7 @@ jupyter notebook
|
||||
- Async tracking of progress
|
||||
- Cancelling individual iterations or entire run
|
||||
- Retrieving models for any iteration or logged metric
|
||||
- Specify automl settings as kwargs
|
||||
- Specify automated ML settings as kwargs
|
||||
|
||||
- [auto-ml-remote-attach.ipynb](remote-attach/auto-ml-remote-attach.ipynb)
|
||||
- Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html)
|
||||
@@ -148,8 +152,8 @@ jupyter notebook
|
||||
|
||||
- [auto-ml-exploring-previous-runs.ipynb](exploring-previous-runs/auto-ml-exploring-previous-runs.ipynb)
|
||||
- List all projects for the workspace
|
||||
- List all AutoML Runs for a given project
|
||||
- Get details for a AutoML Run. (Automl settings, run widget & all metrics)
|
||||
- List all automated ML Runs for a given project
|
||||
- Get details for a automated ML Run. (automated ML settings, run widget & all metrics)
|
||||
- Download fitted pipeline for any iteration
|
||||
|
||||
- [auto-ml-remote-execution-with-datastore.ipynb](remote-execution-with-datastore/auto-ml-remote-execution-with-datastore.ipynb)
|
||||
@@ -158,7 +162,7 @@ jupyter notebook
|
||||
|
||||
- [auto-ml-classification-with-deployment.ipynb](classification-with-deployment/auto-ml-classification-with-deployment.ipynb)
|
||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||
- Simple example of using Auto ML for classification
|
||||
- Simple example of using automated ML for classification
|
||||
- Registering the model
|
||||
- Creating Image and creating aci service
|
||||
- Testing the aci service
|
||||
@@ -178,16 +182,21 @@ jupyter notebook
|
||||
|
||||
- [auto-ml-classification-with-whitelisting.ipynb](classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb)
|
||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||
- Simple example of using Auto ML for classification with whitelisting tensorflow models.
|
||||
- Simple example of using automated ML for classification with whitelisting tensorflow models.
|
||||
- Uses local compute for training
|
||||
|
||||
- [auto-ml-forecasting-energy-demand.ipynb](forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb)
|
||||
- Dataset: [NYC energy demand data](forecasting-a/nyc_energy.csv)
|
||||
- Example of using AutoML for training a forecasting model
|
||||
- Example of using automated ML for training a forecasting model
|
||||
|
||||
- [auto-ml-forecasting-orange-juice-sales.ipynb](forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb)
|
||||
- Dataset: [Dominick's grocery sales of orange juice](forecasting-b/dominicks_OJ.csv)
|
||||
- Example of training an AutoML forecasting model on multiple time-series
|
||||
- Example of training an automated ML forecasting model on multiple time-series
|
||||
|
||||
- [auto-ml-classification-with-onnx.ipynb](classification-with-onnx/auto-ml-classification-with-onnx.ipynb)
|
||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||
- Simple example of using automated ML for classification with ONNX models
|
||||
- Uses local compute for training
|
||||
|
||||
<a name="documentation"></a>
|
||||
See [Configure automated machine learning experiments](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train) to learn how more about the the settings and features available for automated machine learning experiments.
|
||||
@@ -206,10 +215,18 @@ The main code of the file must be indented so that it is under this condition.
|
||||
<a name="troubleshooting"></a>
|
||||
# Troubleshooting
|
||||
## automl_setup fails
|
||||
1. On windows, make sure that you are running automl_setup from an Anconda Prompt window rather than a regular cmd window. You can launch the "Anaconda Prompt" window by hitting the Start button and typing "Anaconda Prompt". If you don't see the application "Anaconda Prompt", you might not have conda or mini conda installed. In that case, you can install it [here](https://conda.io/miniconda.html)
|
||||
1. On Windows, make sure that you are running automl_setup from an Anconda Prompt window rather than a regular cmd window. You can launch the "Anaconda Prompt" window by hitting the Start button and typing "Anaconda Prompt". If you don't see the application "Anaconda Prompt", you might not have conda or mini conda installed. In that case, you can install it [here](https://conda.io/miniconda.html)
|
||||
2. Check that you have conda 64-bit installed rather than 32-bit. You can check this with the command `conda info`. The `platform` should be `win-64` for Windows or `osx-64` for Mac.
|
||||
3. Check that you have conda 4.4.10 or later. You can check the version with the command `conda -V`. If you have a previous version installed, you can update it using the command: `conda update conda`.
|
||||
4. Pass a new name as the first parameter to automl_setup so that it creates a new conda environment. You can view existing conda environments using `conda env list` and remove them with `conda env remove -n <environmentname>`.
|
||||
4. On Linux, if the error is `gcc: error trying to exec 'cc1plus': execvp: No such file or directory`, install build essentials using the command `sudo apt-get install build-essential`.
|
||||
5. Pass a new name as the first parameter to automl_setup so that it creates a new conda environment. You can view existing conda environments using `conda env list` and remove them with `conda env remove -n <environmentname>`.
|
||||
|
||||
## automl_setup_linux.sh fails
|
||||
If automl_setup_linux.sh fails on Ubuntu Linux with the error: `unable to execute 'gcc': No such file or directory`
|
||||
1. Make sure that outbound ports 53 and 80 are enabled. On an Azure VM, you can do this from the Azure Portal by selecting the VM and clicking on Networking.
|
||||
2. Run the command: `sudo apt-get update`
|
||||
3. Run the command: `sudo apt-get install build-essential --fix-missing`
|
||||
4. Run `automl_setup_linux.sh` again.
|
||||
|
||||
## configuration.ipynb fails
|
||||
1) For local conda, make sure that you have susccessfully run automl_setup first.
|
||||
@@ -233,13 +250,20 @@ If a sample notebook fails with an error that property, method or library does n
|
||||
## Numpy import fails on Windows
|
||||
Some Windows environments see an error loading numpy with the latest Python version 3.6.8. If you see this issue, try with Python version 3.6.7.
|
||||
|
||||
## Numpy import fails
|
||||
Check the tensorflow version in the automated ml conda environment. Supported versions are < 1.13. Uninstall tensorflow from the environment if version is >= 1.13
|
||||
You may check the version of tensorflow and uninstall as follows
|
||||
1) start a command shell, activate conda environment where automated ml packages are installed
|
||||
2) enter `pip freeze` and look for `tensorflow` , if found, the version listed should be < 1.13
|
||||
3) If the listed version is a not a supported version, `pip uninstall tensorflow` in the command shell and enter y for confirmation.
|
||||
|
||||
## Remote run: DsvmCompute.create fails
|
||||
There are several reasons why the DsvmCompute.create can fail. The reason is usually in the error message but you have to look at the end of the error message for the detailed reason. Some common reasons are:
|
||||
1) `Compute name is invalid, it should start with a letter, be between 2 and 16 character, and only include letters (a-zA-Z), numbers (0-9) and \'-\'.` Note that underscore is not allowed in the name.
|
||||
2) `The requested VM size xxxxx is not available in the current region.` You can select a different region or vm_size.
|
||||
|
||||
## Remote run: Unable to establish SSH connection
|
||||
AutoML uses the SSH protocol to communicate with remote DSVMs. This defaults to port 22. Possible causes for this error are:
|
||||
Automated ML uses the SSH protocol to communicate with remote DSVMs. This defaults to port 22. Possible causes for this error are:
|
||||
1) The DSVM is not ready for SSH connections. When DSVM creation completes, the DSVM might still not be ready to acceept SSH connections. The sample notebooks have a one minute delay to allow for this.
|
||||
2) Your Azure Subscription may restrict the IP address ranges that can access the DSVM on port 22. You can check this in the Azure Portal by selecting the Virtual Machine and then clicking Networking. The Virtual Machine name is the name that you provided in the notebook plus 10 alpha numeric characters to make the name unique. The Inbound Port Rules define what can access the VM on specific ports. Note that there is a priority priority order. So, a Deny entry with a low priority number will override a Allow entry with a higher priority number.
|
||||
|
||||
@@ -250,13 +274,13 @@ This is often an issue with the `get_data` method.
|
||||
3) You can get to the error log for the setup iteration by clicking the `Click here to see the run in Azure portal` link, click `Back to Experiment`, click on the highest run number and then click on Logs.
|
||||
|
||||
## Remote run: disk full
|
||||
AutoML creates files under /tmp/azureml_runs for each iteration that it runs. It creates a folder with the iteration id. For example: AutoML_9a038a18-77cc-48f1-80fb-65abdbc33abe_93. Under this, there is a azureml-logs folder, which contains logs. If you run too many iterations on the same DSVM, these files can fill the disk.
|
||||
Automated ML creates files under /tmp/azureml_runs for each iteration that it runs. It creates a folder with the iteration id. For example: AutoML_9a038a18-77cc-48f1-80fb-65abdbc33abe_93. Under this, there is a azureml-logs folder, which contains logs. If you run too many iterations on the same DSVM, these files can fill the disk.
|
||||
You can delete the files under /tmp/azureml_runs or just delete the VM and create a new one.
|
||||
If your get_data downloads files, make sure the delete them or they can use disk space as well.
|
||||
When using DataStore, it is good to specify an absolute path for the files so that they are downloaded just once. If you specify a relative path, it will download a file for each iteration.
|
||||
|
||||
## Remote run: Iterations fail and the log contains "MemoryError"
|
||||
This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size.
|
||||
This can be caused by insufficient memory on the DSVM. Automated ML loads all training data into memory. So, the available memory should be more than the training data size.
|
||||
If you are using a remote DSVM, memory is needed for each concurrent iteration. The max_concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and max_concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
|
||||
To resolve this issue, allocate a DSVM with more memory or reduce the value specified for max_concurrent_iterations.
|
||||
|
||||
|
||||
21
how-to-use-azureml/automated-machine-learning/automl_env.yml
Normal file
21
how-to-use-azureml/automated-machine-learning/automl_env.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
name: azure_automl
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- python>=3.5.2,<3.6.8
|
||||
- nb_conda
|
||||
- matplotlib==2.1.0
|
||||
- numpy>=1.11.0,<=1.16.2
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- scipy>=1.0.0,<=1.1.0
|
||||
- scikit-learn>=0.19.0,<=0.20.3
|
||||
- pandas>=0.22.0,<=0.23.4
|
||||
- py-xgboost<=0.80
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-sdk[automl,explain]
|
||||
- azureml-widgets
|
||||
- pandas_ml
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
name: azure_automl
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- nomkl
|
||||
- python>=3.5.2,<3.6.8
|
||||
- nb_conda
|
||||
- matplotlib==2.1.0
|
||||
- numpy>=1.11.0,<=1.16.2
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- scipy>=1.0.0,<=1.1.0
|
||||
- scikit-learn>=0.19.0,<=0.20.3
|
||||
- pandas>=0.22.0,<0.23.0
|
||||
- py-xgboost<=0.80
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-sdk[automl,explain]
|
||||
- azureml-widgets
|
||||
- pandas_ml
|
||||
|
||||
@@ -0,0 +1,51 @@
|
||||
@echo off
|
||||
set conda_env_name=%1
|
||||
set automl_env_file=%2
|
||||
set options=%3
|
||||
set PIP_NO_WARN_SCRIPT_LOCATION=0
|
||||
|
||||
IF "%conda_env_name%"=="" SET conda_env_name="azure_automl"
|
||||
IF "%automl_env_file%"=="" SET automl_env_file="automl_env.yml"
|
||||
|
||||
IF NOT EXIST %automl_env_file% GOTO YmlMissing
|
||||
|
||||
call conda activate %conda_env_name% 2>nul:
|
||||
|
||||
if not errorlevel 1 (
|
||||
echo Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment %conda_env_name%
|
||||
call pip install --upgrade azureml-sdk[automl,notebooks,explain]
|
||||
if errorlevel 1 goto ErrorExit
|
||||
) else (
|
||||
call conda env create -f %automl_env_file% -n %conda_env_name%
|
||||
)
|
||||
|
||||
call conda activate %conda_env_name% 2>nul:
|
||||
if errorlevel 1 goto ErrorExit
|
||||
|
||||
call python -m ipykernel install --user --name %conda_env_name% --display-name "Python (%conda_env_name%)"
|
||||
|
||||
REM azureml.widgets is now installed as part of the pip install under the conda env.
|
||||
REM Removing the old user install so that the notebooks will use the latest widget.
|
||||
call jupyter nbextension uninstall --user --py azureml.widgets
|
||||
|
||||
echo.
|
||||
echo.
|
||||
echo ***************************************
|
||||
echo * AutoML setup completed successfully *
|
||||
echo ***************************************
|
||||
IF NOT "%options%"=="nolaunch" (
|
||||
echo.
|
||||
echo Starting jupyter notebook - please run the configuration notebook
|
||||
echo.
|
||||
jupyter notebook --log-level=50 --notebook-dir='..\..'
|
||||
)
|
||||
|
||||
goto End
|
||||
|
||||
:YmlMissing
|
||||
echo File %automl_env_file% not found.
|
||||
|
||||
:ErrorExit
|
||||
echo Install failed
|
||||
|
||||
:End
|
||||
@@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
|
||||
CONDA_ENV_NAME=$1
|
||||
AUTOML_ENV_FILE=$2
|
||||
OPTIONS=$3
|
||||
PIP_NO_WARN_SCRIPT_LOCATION=0
|
||||
|
||||
if [ "$CONDA_ENV_NAME" == "" ]
|
||||
then
|
||||
CONDA_ENV_NAME="azure_automl"
|
||||
fi
|
||||
|
||||
if [ "$AUTOML_ENV_FILE" == "" ]
|
||||
then
|
||||
AUTOML_ENV_FILE="automl_env.yml"
|
||||
fi
|
||||
|
||||
if [ ! -f $AUTOML_ENV_FILE ]; then
|
||||
echo "File $AUTOML_ENV_FILE not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if source activate $CONDA_ENV_NAME 2> /dev/null
|
||||
then
|
||||
echo "Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment" $CONDA_ENV_NAME
|
||||
pip install --upgrade azureml-sdk[automl,notebooks,explain] &&
|
||||
jupyter nbextension uninstall --user --py azureml.widgets
|
||||
else
|
||||
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&
|
||||
source activate $CONDA_ENV_NAME &&
|
||||
python -m ipykernel install --user --name $CONDA_ENV_NAME --display-name "Python ($CONDA_ENV_NAME)" &&
|
||||
jupyter nbextension uninstall --user --py azureml.widgets &&
|
||||
echo "" &&
|
||||
echo "" &&
|
||||
echo "***************************************" &&
|
||||
echo "* AutoML setup completed successfully *" &&
|
||||
echo "***************************************" &&
|
||||
if [ "$OPTIONS" != "nolaunch" ]
|
||||
then
|
||||
echo "" &&
|
||||
echo "Starting jupyter notebook - please run the configuration notebook" &&
|
||||
echo "" &&
|
||||
jupyter notebook --log-level=50 --notebook-dir '../..'
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $? -gt 0 ]
|
||||
then
|
||||
echo "Installation failed"
|
||||
fi
|
||||
|
||||
|
||||
@@ -0,0 +1,54 @@
|
||||
#!/bin/bash
|
||||
|
||||
CONDA_ENV_NAME=$1
|
||||
AUTOML_ENV_FILE=$2
|
||||
OPTIONS=$3
|
||||
PIP_NO_WARN_SCRIPT_LOCATION=0
|
||||
|
||||
if [ "$CONDA_ENV_NAME" == "" ]
|
||||
then
|
||||
CONDA_ENV_NAME="azure_automl"
|
||||
fi
|
||||
|
||||
if [ "$AUTOML_ENV_FILE" == "" ]
|
||||
then
|
||||
AUTOML_ENV_FILE="automl_env_mac.yml"
|
||||
fi
|
||||
|
||||
if [ ! -f $AUTOML_ENV_FILE ]; then
|
||||
echo "File $AUTOML_ENV_FILE not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if source activate $CONDA_ENV_NAME 2> /dev/null
|
||||
then
|
||||
echo "Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment" $CONDA_ENV_NAME
|
||||
pip install --upgrade azureml-sdk[automl,notebooks,explain] &&
|
||||
jupyter nbextension uninstall --user --py azureml.widgets
|
||||
else
|
||||
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&
|
||||
source activate $CONDA_ENV_NAME &&
|
||||
conda install lightgbm -c conda-forge -y &&
|
||||
python -m ipykernel install --user --name $CONDA_ENV_NAME --display-name "Python ($CONDA_ENV_NAME)" &&
|
||||
jupyter nbextension uninstall --user --py azureml.widgets &&
|
||||
echo "" &&
|
||||
echo "" &&
|
||||
echo "***************************************" &&
|
||||
echo "* AutoML setup completed successfully *" &&
|
||||
echo "***************************************" &&
|
||||
if [ "$OPTIONS" != "nolaunch" ]
|
||||
then
|
||||
echo "" &&
|
||||
echo "Starting jupyter notebook - please run the configuration notebook" &&
|
||||
echo "" &&
|
||||
jupyter notebook --log-level=50 --notebook-dir '../..'
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $? -gt 0 ]
|
||||
then
|
||||
echo "Installation failed"
|
||||
fi
|
||||
|
||||
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -139,7 +146,6 @@
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" iteration_timeout_minutes = 20,\n",
|
||||
" iterations = 10,\n",
|
||||
" n_cross_validations = 2,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
@@ -263,7 +269,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To ensure the fit results are consistent with the training results, the SDK dependency versions need to be the same as the environment that trains the model. Details about retrieving the versions can be found in notebook [12.auto-ml-retrieve-the-training-sdk-versions](12.auto-ml-retrieve-the-training-sdk-versions.ipynb)."
|
||||
"To ensure the fit results are consistent with the training results, the SDK dependency versions need to be the same as the environment that trains the model. The following cells create a file, myenv.yml, which specifies the dependencies from the run."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -303,7 +309,8 @@
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],\n",
|
||||
" pip_packages=['azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"conda_env_file_name = 'myenv.yml'\n",
|
||||
"myenv.save_to_file('.', conda_env_file_name)"
|
||||
|
||||
@@ -0,0 +1,358 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning\n",
|
||||
"_**Classification with Local Compute**_\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Data](#Data)\n",
|
||||
"1. [Train](#Train)\n",
|
||||
"1. [Results](#Results)\n",
|
||||
"1. [Test](#Test)\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"Please find the ONNX related documentations [here](https://github.com/onnx/onnx).\n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"3. Train the model using local compute with ONNX compatible config on.\n",
|
||||
"4. Explore the results and save the ONNX model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn import datasets\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.train.automl import AutoMLConfig, constants"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"experiment_name = 'automl-classification-onnx'\n",
|
||||
"project_folder = './sample_projects/automl-classification-onnx'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data\n",
|
||||
"\n",
|
||||
"This uses scikit-learn's [load_iris](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html) method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iris = datasets.load_iris()\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(iris.data, \n",
|
||||
" iris.target, \n",
|
||||
" test_size=0.2, \n",
|
||||
" random_state=0)\n",
|
||||
"\n",
|
||||
"# Convert the X_train and X_test to pandas DataFrame and set column names,\n",
|
||||
"# This is needed for initializing the input variable names of ONNX model, \n",
|
||||
"# and the prediction with the ONNX model using the inference helper.\n",
|
||||
"X_train = pd.DataFrame(X_train, columns=['c1', 'c2', 'c3', 'c4'])\n",
|
||||
"X_test = pd.DataFrame(X_test, columns=['c1', 'c2', 'c3', 'c4'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train with enable ONNX compatible models config on\n",
|
||||
"\n",
|
||||
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
|
||||
"\n",
|
||||
"Set the parameter enable_onnx_compatible_models=True, if you also want to generate the ONNX compatible models. Please note, the forecasting task and TensorFlow models are not ONNX compatible yet.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**enable_onnx_compatible_models**|Enable the ONNX compatible models in the experiment.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" iteration_timeout_minutes = 60,\n",
|
||||
" iterations = 10,\n",
|
||||
" verbosity = logging.INFO, \n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" preprocess=True,\n",
|
||||
" enable_onnx_compatible_models=True,\n",
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(local_run).show() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best ONNX Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*.\n",
|
||||
"\n",
|
||||
"Set the parameter return_onnx_model=True to retrieve the best ONNX model, instead of the Python model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, onnx_mdl = local_run.get_output(return_onnx_model=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save the best ONNX model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.automl.core.onnx_convert import OnnxConverter\n",
|
||||
"onnx_fl_path = \"./best_model.onnx\"\n",
|
||||
"OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Predict with the ONNX model, using onnxruntime package"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"import json\n",
|
||||
"from azureml.automl.core.onnx_convert import OnnxConvertConstants\n",
|
||||
"\n",
|
||||
"if sys.version_info < OnnxConvertConstants.OnnxIncompatiblePythonVersion:\n",
|
||||
" python_version_compatible = True\n",
|
||||
"else:\n",
|
||||
" python_version_compatible = False\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" import onnxruntime\n",
|
||||
" from azureml.automl.core.onnx_convert import OnnxInferenceHelper \n",
|
||||
" onnxrt_present = True\n",
|
||||
"except ImportError:\n",
|
||||
" onnxrt_present = False\n",
|
||||
"\n",
|
||||
"def get_onnx_res(run):\n",
|
||||
" res_path = '_debug_y_trans_converter.json'\n",
|
||||
" run.download_file(name=constants.MODEL_RESOURCE_PATH_ONNX, output_file_path=res_path)\n",
|
||||
" with open(res_path) as f:\n",
|
||||
" onnx_res = json.load(f)\n",
|
||||
" return onnx_res\n",
|
||||
"\n",
|
||||
"if onnxrt_present and python_version_compatible: \n",
|
||||
" mdl_bytes = onnx_mdl.SerializeToString()\n",
|
||||
" onnx_res = get_onnx_res(best_run)\n",
|
||||
"\n",
|
||||
" onnxrt_helper = OnnxInferenceHelper(mdl_bytes, onnx_res)\n",
|
||||
" pred_onnx, pred_prob_onnx = onnxrt_helper.predict(X_test)\n",
|
||||
"\n",
|
||||
" print(pred_onnx)\n",
|
||||
" print(pred_prob_onnx)\n",
|
||||
"else:\n",
|
||||
" if not python_version_compatible:\n",
|
||||
" print('Please use Python version 3.6 to run the inference helper.') \n",
|
||||
" if not onnxrt_present:\n",
|
||||
" print('Please install the onnxruntime package to do the prediction with ONNX model.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "savitam"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -71,11 +78,17 @@
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"import sys\n",
|
||||
"whitelist_models=[\"LightGBM\"]\n",
|
||||
"if \"3.7\" != sys.version[0:3]:\n",
|
||||
" try:\n",
|
||||
" import tensorflow as tf1\n",
|
||||
" except ImportError:\n",
|
||||
" from pip._internal import main\n",
|
||||
" main(['install', 'tensorflow>=1.10.0,<=1.12.0'])\n",
|
||||
" logging.getLogger().setLevel(logging.ERROR)\n",
|
||||
" whitelist_models=[\"TensorFlowLinearClassifier\", \"TensorFlowDNN\"]\n",
|
||||
"\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
@@ -160,12 +173,11 @@
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" iteration_timeout_minutes = 60,\n",
|
||||
" iterations = 10,\n",
|
||||
" n_cross_validations = 3,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" enable_tf=True,\n",
|
||||
" whitelist_models=[\"TensorFlowLinearClassifier\", \"TensorFlowDNN\"],\n",
|
||||
" whitelist_models=whitelist_models,\n",
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -72,6 +79,32 @@
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Accessing the Azure ML workspace requires authentication with Azure.\n",
|
||||
"\n",
|
||||
"The default authentication is interactive authentication using the default tenant. Executing the `ws = Workspace.from_config()` line in the cell below will prompt for authentication the first time that it is run.\n",
|
||||
"\n",
|
||||
"If you have multiple Azure tenants, you can specify the tenant by replacing the `ws = Workspace.from_config()` line in the cell below with the following:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"from azureml.core.authentication import InteractiveLoginAuthentication\n",
|
||||
"auth = InteractiveLoginAuthentication(tenant_id = 'mytenantid')\n",
|
||||
"ws = Workspace.from_config(auth = auth)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you need to run in an environment where interactive login is not possible, you can use Service Principal authentication by replacing the `ws = Workspace.from_config()` line in the cell below with the following:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"from azureml.core.authentication import ServicePrincipalAuthentication\n",
|
||||
"auth = auth = ServicePrincipalAuthentication('mytenantid', 'myappid', 'mypassword')\n",
|
||||
"ws = Workspace.from_config(auth = auth)\n",
|
||||
"```\n",
|
||||
"For more details, see [aka.ms/aml-notebook-auth](http://aka.ms/aml-notebook-auth)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -133,12 +166,17 @@
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|\n",
|
||||
"\n",
|
||||
"Automated machine learning trains multiple machine learning pipelines. Each pipelines training is known as an iteration.\n",
|
||||
"* You can specify a maximum number of iterations using the `iterations` parameter.\n",
|
||||
"* You can specify a maximum time for the run using the `experiment_timeout_minutes` parameter.\n",
|
||||
"* If you specify neither the `iterations` nor the `experiment_timeout_minutes`, automated ML keeps running iterations while it continues to see improvements in the scores.\n",
|
||||
"\n",
|
||||
"The following example doesn't specify `iterations` or `experiment_timeout_minutes` and so runs until the scores stop improving.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -148,15 +186,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" iteration_timeout_minutes = 60,\n",
|
||||
" iterations = 25,\n",
|
||||
" n_cross_validations = 3,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path = project_folder)"
|
||||
" n_cross_validations = 3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -302,6 +335,12 @@
|
||||
" print()\n",
|
||||
" for estimator in step[1].estimators:\n",
|
||||
" print_model(estimator[1], estimator[0]+ ' - ')\n",
|
||||
" elif hasattr(step[1], '_base_learners') and hasattr(step[1], '_meta_learner'):\n",
|
||||
" print(\"\\nMeta Learner\")\n",
|
||||
" pprint(step[1]._meta_learner)\n",
|
||||
" print()\n",
|
||||
" for estimator in step[1]._base_learners:\n",
|
||||
" print_model(estimator[1], estimator[0]+ ' - ')\n",
|
||||
" else:\n",
|
||||
" pprint(step[1].get_params())\n",
|
||||
" print()\n",
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -117,21 +124,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can use `auto_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
|
||||
"# The data referenced here was pulled from `sklearn.datasets.load_digits()`.\n",
|
||||
"simple_example_data_root = 'https://dprepdata.blob.core.windows.net/automl-notebook-data/'\n",
|
||||
"X = dprep.auto_read_file(simple_example_data_root + 'X.csv').skip(1) # Remove the header row.\n",
|
||||
"\n",
|
||||
"# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n",
|
||||
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
|
||||
"# and convert column types manually.\n",
|
||||
"# Here we read a comma delimited file and convert all columns to integers.\n",
|
||||
"y = dprep.read_csv(simple_example_data_root + 'y.csv').to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets."
|
||||
"example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n",
|
||||
"dflow = dprep.auto_read_file(example_data).skip(1) # Remove the header row.\n",
|
||||
"dflow.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -140,7 +138,30 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X.skip(1).head(5)"
|
||||
"# As `Primary Type` is our y data, we need to drop the values those are null in this column.\n",
|
||||
"dflow = dflow.drop_nulls('Primary Type')\n",
|
||||
"dflow.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Review the Data Preparation Result\n",
|
||||
"\n",
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets.\n",
|
||||
"\n",
|
||||
"`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = dflow.drop_columns(columns=['Primary Type', 'FBI Code'])\n",
|
||||
"y = dflow.keep_columns(columns=['Primary Type'], validate_column_exists=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -162,9 +183,8 @@
|
||||
" \"iteration_timeout_minutes\" : 10,\n",
|
||||
" \"iterations\" : 2,\n",
|
||||
" \"primary_metric\" : 'AUC_weighted',\n",
|
||||
" \"preprocess\" : False,\n",
|
||||
" \"verbosity\" : logging.INFO,\n",
|
||||
" \"n_cross_validations\": 3\n",
|
||||
" \"preprocess\" : True,\n",
|
||||
" \"verbosity\" : logging.INFO\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
@@ -181,7 +201,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dsvm_name = 'mydsvmc'\n",
|
||||
"dsvm_name = 'mydsvmb'\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" while ws.compute_targets[dsvm_name].provisioning_state == 'Creating':\n",
|
||||
@@ -257,6 +277,23 @@
|
||||
"remote_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pre-process cache cleanup\n",
|
||||
"The preprocess data gets cache at user default file store. When the run is completed the cache can be cleaned by running below cell"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run.clean_preprocessor_cache()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -376,7 +413,8 @@
|
||||
"source": [
|
||||
"## Test\n",
|
||||
"\n",
|
||||
"#### Load Test Data"
|
||||
"#### Load Test Data\n",
|
||||
"For the test data, it should have the same preparation step as the train data. Otherwise it might get failed at the preprocessing step."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -385,12 +423,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn import datasets\n",
|
||||
"\n",
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_test = digits.data[:10, :]\n",
|
||||
"y_test = digits.target[:10]\n",
|
||||
"images = digits.images[:10]"
|
||||
"dflow_test = dprep.auto_read_file(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv').skip(1)\n",
|
||||
"dflow_test = dflow_test.drop_nulls('Primary Type')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -398,7 +432,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing Our Best Fitted Model\n",
|
||||
"We will try to predict 2 digits and see how our model works."
|
||||
"We will use confusion matrix to see how our model works."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -407,65 +441,19 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Randomly select digits and test\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"from pandas_ml import ConfusionMatrix\n",
|
||||
"\n",
|
||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||
" label = y_test[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Appendix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Capture the `Dataflow` Objects for Later Use in AutoML\n",
|
||||
"y_test = dflow_test.keep_columns(columns=['Primary Type']).to_pandas_dataframe()\n",
|
||||
"X_test = dflow_test.drop_columns(columns=['Primary Type', 'FBI Code']).to_pandas_dataframe()\n",
|
||||
"\n",
|
||||
"`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sklearn.digits.data + target\n",
|
||||
"digits_complete = dprep.auto_read_file('https://dprepdata.blob.core.windows.net/automl-notebook-data/digits-complete.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`digits_complete` (sourced from `sklearn.datasets.load_digits()`) is forked into `dflow_X` to capture all the feature columns and `dflow_y` to capture the label column."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(digits_complete.to_pandas_dataframe().shape)\n",
|
||||
"labels_column = 'Column64'\n",
|
||||
"dflow_X = digits_complete.drop_columns(columns = [labels_column])\n",
|
||||
"dflow_y = digits_complete.keep_columns(columns = [labels_column])"
|
||||
"\n",
|
||||
"ypred = fitted_model.predict(X_test)\n",
|
||||
"\n",
|
||||
"cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n",
|
||||
"\n",
|
||||
"print(cm)\n",
|
||||
"\n",
|
||||
"cm.plot()"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -115,23 +122,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can use `auto_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
|
||||
"# The data referenced here was pulled from `sklearn.datasets.load_digits()`.\n",
|
||||
"simple_example_data_root = 'https://dprepdata.blob.core.windows.net/automl-notebook-data/'\n",
|
||||
"X = dprep.auto_read_file(simple_example_data_root + 'X.csv').skip(1) # Remove the header row.\n",
|
||||
"\n",
|
||||
"# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n",
|
||||
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
|
||||
"# and convert column types manually.\n",
|
||||
"# Here we read a comma delimited file and convert all columns to integers.\n",
|
||||
"y = dprep.read_csv(simple_example_data_root + 'y.csv').to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Review the Data Preparation Result\n",
|
||||
"\n",
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets."
|
||||
"example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n",
|
||||
"dflow = dprep.auto_read_file(example_data).skip(1) # Remove the header row.\n",
|
||||
"dflow.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -140,7 +136,30 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X.skip(1).head(5)"
|
||||
"# As `Primary Type` is our y data, we need to drop the values those are null in this column.\n",
|
||||
"dflow = dflow.drop_nulls('Primary Type')\n",
|
||||
"dflow.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Review the Data Preparation Result\n",
|
||||
"\n",
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets.\n",
|
||||
"\n",
|
||||
"`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = dflow.drop_columns(columns=['Primary Type', 'FBI Code'])\n",
|
||||
"y = dflow.keep_columns(columns=['Primary Type'], validate_column_exists=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -162,9 +181,8 @@
|
||||
" \"iteration_timeout_minutes\" : 10,\n",
|
||||
" \"iterations\" : 2,\n",
|
||||
" \"primary_metric\" : 'AUC_weighted',\n",
|
||||
" \"preprocess\" : False,\n",
|
||||
" \"verbosity\" : logging.INFO,\n",
|
||||
" \"n_cross_validations\": 3\n",
|
||||
" \"preprocess\" : True,\n",
|
||||
" \"verbosity\" : logging.INFO\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
@@ -327,7 +345,8 @@
|
||||
"source": [
|
||||
"## Test\n",
|
||||
"\n",
|
||||
"#### Load Test Data"
|
||||
"#### Load Test Data\n",
|
||||
"For the test data, it should have the same preparation step as the train data. Otherwise it might get failed at the preprocessing step."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -336,12 +355,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn import datasets\n",
|
||||
"\n",
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_test = digits.data[:10, :]\n",
|
||||
"y_test = digits.target[:10]\n",
|
||||
"images = digits.images[:10]"
|
||||
"dflow_test = dprep.auto_read_file(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv').skip(1)\n",
|
||||
"dflow_test = dflow_test.drop_nulls('Primary Type')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -349,7 +364,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing Our Best Fitted Model\n",
|
||||
"We will try to predict 2 digits and see how our model works."
|
||||
"We will use confusion matrix to see how our model works."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -358,65 +373,18 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Randomly select digits and test\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"from pandas_ml import ConfusionMatrix\n",
|
||||
"\n",
|
||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||
" label = y_test[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Appendix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Capture the `Dataflow` Objects for Later Use in AutoML\n",
|
||||
"y_test = dflow_test.keep_columns(columns=['Primary Type']).to_pandas_dataframe()\n",
|
||||
"X_test = dflow_test.drop_columns(columns=['Primary Type', 'FBI Code']).to_pandas_dataframe()\n",
|
||||
"\n",
|
||||
"`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# sklearn.digits.data + target\n",
|
||||
"digits_complete = dprep.auto_read_file('https://dprepdata.blob.core.windows.net/automl-notebook-data/digits-complete.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`digits_complete` (sourced from `sklearn.datasets.load_digits()`) is forked into `dflow_X` to capture all the feature columns and `dflow_y` to capture the label column."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(digits_complete.to_pandas_dataframe().shape)\n",
|
||||
"labels_column = 'Column64'\n",
|
||||
"dflow_X = digits_complete.drop_columns(columns = [labels_column])\n",
|
||||
"dflow_y = digits_complete.keep_columns(columns = [labels_column])"
|
||||
"ypred = fitted_model.predict(X_test)\n",
|
||||
"\n",
|
||||
"cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n",
|
||||
"\n",
|
||||
"print(cm)\n",
|
||||
"\n",
|
||||
"cm.plot()"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -0,0 +1,500 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning\n",
|
||||
"**BikeShare Demand Forecasting**\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Data](#Data)\n",
|
||||
"1. [Train](#Train)\n",
|
||||
"1. [Evaluate](#Evaluate)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"In this example, we show how AutoML can be used for bike share forecasting.\n",
|
||||
"\n",
|
||||
"The purpose is to demonstrate how to take advantage of the built-in holiday featurization, access the feature names, and further demonstrate how to work with the `forecast` function. Please also look at the additional forecasting notebooks, which document lagging, rolling windows, forecast quantiles, other ways to use the forecast function, and forecaster deployment.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Creating an Experiment in an existing Workspace\n",
|
||||
"2. Instantiating AutoMLConfig with new task type \"forecasting\" for timeseries data training, and other timeseries related settings: for this dataset we use the basic one: \"time_column_name\" \n",
|
||||
"3. Training the Model using local compute\n",
|
||||
"4. Exploring the results\n",
|
||||
"5. Viewing the engineered names for featurized data and featurization summary for all raw features\n",
|
||||
"6. Testing the fitted model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import logging\n",
|
||||
"import warnings\n",
|
||||
"# Squash warning messages for cleaner output in the notebook\n",
|
||||
"warnings.showwarning = lambda *args, **kwargs: None\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from sklearn.metrics import mean_absolute_error, mean_squared_error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for the run history container in the workspace\n",
|
||||
"experiment_name = 'automl-bikeshareforecasting'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-bikeshareforecasting'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Run History Name'] = experiment_name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data\n",
|
||||
"Read bike share demand data from file, and preview data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = pd.read_csv('bike-no.csv', parse_dates=['date'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's set up what we know abou the dataset. \n",
|
||||
"\n",
|
||||
"**Target column** is what we want to forecast.\n",
|
||||
"\n",
|
||||
"**Time column** is the time axis along which to predict.\n",
|
||||
"\n",
|
||||
"**Grain** is another word for an individual time series in your dataset. Grains are identified by values of the columns listed `grain_column_names`, for example \"store\" and \"item\" if your data has multiple time series of sales, one series for each combination of store and item sold.\n",
|
||||
"\n",
|
||||
"This dataset has only one time series. Please see the [orange juice notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales) for an example of a multi-time series dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"target_column_name = 'cnt'\n",
|
||||
"time_column_name = 'date'\n",
|
||||
"grain_column_names = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Split the data\n",
|
||||
"\n",
|
||||
"The first split we make is into train and test sets. Note we are splitting on time."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train = data[data[time_column_name] < '2012-09-01']\n",
|
||||
"test = data[data[time_column_name] >= '2012-09-01']\n",
|
||||
"\n",
|
||||
"X_train = train.copy()\n",
|
||||
"y_train = X_train.pop(target_column_name).values\n",
|
||||
"\n",
|
||||
"X_test = test.copy()\n",
|
||||
"y_test = X_test.pop(target_column_name).values\n",
|
||||
"\n",
|
||||
"print(X_train.shape)\n",
|
||||
"print(y_train.shape)\n",
|
||||
"print(X_test.shape)\n",
|
||||
"print(y_test.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setting forecaster maximum horizon \n",
|
||||
"\n",
|
||||
"Assuming your test data forms a full and regular time series(regular time intervals and no holes), \n",
|
||||
"the maximum horizon you will need to forecast is the length of the longest grain in your test set."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if len(grain_column_names) == 0:\n",
|
||||
" max_horizon = len(X_test)\n",
|
||||
"else:\n",
|
||||
" max_horizon = X_test.groupby(grain_column_names)[time_column_name].count().max()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train\n",
|
||||
"\n",
|
||||
"Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|forecasting|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>\n",
|
||||
"|**iterations**|Number of iterations. In each iteration, Auto ML trains a specific pipeline on the given data|\n",
|
||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**country_or_region**|The country/region used to generate holiday features. These should be ISO 3166 two-letter country/region codes (i.e. 'US', 'GB').|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"time_column_name = 'date'\n",
|
||||
"automl_settings = {\n",
|
||||
" \"time_column_name\": time_column_name,\n",
|
||||
" # these columns are a breakdown of the total and therefore a leak\n",
|
||||
" \"drop_column_names\": ['casual', 'registered'],\n",
|
||||
" # knowing the country/region allows Automated ML to bring in holidays\n",
|
||||
" \"country_or_region\" : 'US',\n",
|
||||
" \"max_horizon\" : max_horizon,\n",
|
||||
" \"target_lags\": 1 \n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'forecasting', \n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" iterations = 10,\n",
|
||||
" iteration_timeout_minutes = 5,\n",
|
||||
" X = X_train,\n",
|
||||
" y = y_train,\n",
|
||||
" n_cross_validations = 3, \n",
|
||||
" path=project_folder,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" **automl_settings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will now run the experiment, starting with 10 iterations of model search. Experiment can be continued for more iterations if the results are not yet good. You will see the currently running iterations printing to the console."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Displaying the run objects gives you links to the visual tools in the Azure Portal. Go try them!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"Below we select the best pipeline from our iterations. The get_output method on automl_classifier returns the best run and the fitted model for the last fit invocation. There are overloads on get_output that allow you to retrieve the best run and fitted model for any logged metric or a particular iteration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = local_run.get_output()\n",
|
||||
"fitted_model.steps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### View the engineered names for featurized data\n",
|
||||
"\n",
|
||||
"You can accees the engineered feature names generated in time-series featurization. Note that a number of named holiday periods are represented. We recommend that you have at least one year of data when using this feature to ensure that all yearly holidays are captured in the training featurization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps['timeseriestransformer'].get_engineered_feature_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### View the featurization summary\n",
|
||||
"\n",
|
||||
"You can also see what featurization steps were performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:\n",
|
||||
"\n",
|
||||
"- Raw feature name\n",
|
||||
"- Number of engineered features formed out of this raw feature\n",
|
||||
"- Type detected\n",
|
||||
"- If feature was dropped\n",
|
||||
"- List of feature transformations for the raw feature"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps['timeseriestransformer'].get_featurization_summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test the Best Fitted Model\n",
|
||||
"\n",
|
||||
"Predict on training and test set, and calculate residual values.\n",
|
||||
"\n",
|
||||
"We always score on the original dataset whose schema matches the scheme of the training dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_test.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_query = y_test.copy().astype(np.float)\n",
|
||||
"y_query.fill(np.NaN)\n",
|
||||
"y_fcst, X_trans = fitted_model.forecast(X_test, y_query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It is a good practice to always align the output explicitly to the input, as the count and order of the rows may have changed during transformations that span multiple rows."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def align_outputs(y_predicted, X_trans, X_test, y_test, predicted_column_name = 'predicted'):\n",
|
||||
" \"\"\"\n",
|
||||
" Demonstrates how to get the output aligned to the inputs\n",
|
||||
" using pandas indexes. Helps understand what happened if\n",
|
||||
" the output's shape differs from the input shape, or if\n",
|
||||
" the data got re-sorted by time and grain during forecasting.\n",
|
||||
" \n",
|
||||
" Typical causes of misalignment are:\n",
|
||||
" * we predicted some periods that were missing in actuals -> drop from eval\n",
|
||||
" * model was asked to predict past max_horizon -> increase max horizon\n",
|
||||
" * data at start of X_test was needed for lags -> provide previous periods\n",
|
||||
" \"\"\"\n",
|
||||
" df_fcst = pd.DataFrame({predicted_column_name : y_predicted})\n",
|
||||
" # y and X outputs are aligned by forecast() function contract\n",
|
||||
" df_fcst.index = X_trans.index\n",
|
||||
" \n",
|
||||
" # align original X_test to y_test \n",
|
||||
" X_test_full = X_test.copy()\n",
|
||||
" X_test_full[target_column_name] = y_test\n",
|
||||
"\n",
|
||||
" # X_test_full's index does not include origin, so reset for merge\n",
|
||||
" df_fcst.reset_index(inplace=True)\n",
|
||||
" X_test_full = X_test_full.reset_index().drop(columns='index')\n",
|
||||
" together = df_fcst.merge(X_test_full, how='right')\n",
|
||||
" \n",
|
||||
" # drop rows where prediction or actuals are nan \n",
|
||||
" # happens because of missing actuals \n",
|
||||
" # or at edges of time due to lags/rolling windows\n",
|
||||
" clean = together[together[[target_column_name, predicted_column_name]].notnull().all(axis=1)]\n",
|
||||
" return(clean)\n",
|
||||
"\n",
|
||||
"df_all = align_outputs(y_fcst, X_trans, X_test, y_test)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def MAPE(actual, pred):\n",
|
||||
" \"\"\"\n",
|
||||
" Calculate mean absolute percentage error.\n",
|
||||
" Remove NA and values where actual is close to zero\n",
|
||||
" \"\"\"\n",
|
||||
" not_na = ~(np.isnan(actual) | np.isnan(pred))\n",
|
||||
" not_zero = ~np.isclose(actual, 0.0)\n",
|
||||
" actual_safe = actual[not_na & not_zero]\n",
|
||||
" pred_safe = pred[not_na & not_zero]\n",
|
||||
" APE = 100*np.abs((actual_safe - pred_safe)/actual_safe)\n",
|
||||
" return np.mean(APE)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"Simple forecasting model\")\n",
|
||||
"rmse = np.sqrt(mean_squared_error(df_all[target_column_name], df_all['predicted']))\n",
|
||||
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
|
||||
"mae = mean_absolute_error(df_all[target_column_name], df_all['predicted'])\n",
|
||||
"print('mean_absolute_error score: %.2f' % mae)\n",
|
||||
"print('MAPE: %.2f' % MAPE(df_all[target_column_name], df_all['predicted']))\n",
|
||||
"\n",
|
||||
"# Plot outputs\n",
|
||||
"%matplotlib notebook\n",
|
||||
"test_pred = plt.scatter(df_all[target_column_name], df_all['predicted'], color='b')\n",
|
||||
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "xiaga@microsoft.com, tosingli@microsoft.com"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,732 @@
|
||||
instant,date,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
|
||||
1,1/1/2011,1,0,1,6,2,0.344167,0.363625,0.805833,0.160446,331,654,985
|
||||
2,1/2/2011,1,0,1,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
|
||||
3,1/3/2011,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
|
||||
4,1/4/2011,1,0,1,2,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
|
||||
5,1/5/2011,1,0,1,3,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600
|
||||
6,1/6/2011,1,0,1,4,1,0.204348,0.233209,0.518261,0.0895652,88,1518,1606
|
||||
7,1/7/2011,1,0,1,5,2,0.196522,0.208839,0.498696,0.168726,148,1362,1510
|
||||
8,1/8/2011,1,0,1,6,2,0.165,0.162254,0.535833,0.266804,68,891,959
|
||||
9,1/9/2011,1,0,1,0,1,0.138333,0.116175,0.434167,0.36195,54,768,822
|
||||
10,1/10/2011,1,0,1,1,1,0.150833,0.150888,0.482917,0.223267,41,1280,1321
|
||||
11,1/11/2011,1,0,1,2,2,0.169091,0.191464,0.686364,0.122132,43,1220,1263
|
||||
12,1/12/2011,1,0,1,3,1,0.172727,0.160473,0.599545,0.304627,25,1137,1162
|
||||
13,1/13/2011,1,0,1,4,1,0.165,0.150883,0.470417,0.301,38,1368,1406
|
||||
14,1/14/2011,1,0,1,5,1,0.16087,0.188413,0.537826,0.126548,54,1367,1421
|
||||
15,1/15/2011,1,0,1,6,2,0.233333,0.248112,0.49875,0.157963,222,1026,1248
|
||||
16,1/16/2011,1,0,1,0,1,0.231667,0.234217,0.48375,0.188433,251,953,1204
|
||||
17,1/17/2011,1,0,1,1,2,0.175833,0.176771,0.5375,0.194017,117,883,1000
|
||||
18,1/18/2011,1,0,1,2,2,0.216667,0.232333,0.861667,0.146775,9,674,683
|
||||
19,1/19/2011,1,0,1,3,2,0.292174,0.298422,0.741739,0.208317,78,1572,1650
|
||||
20,1/20/2011,1,0,1,4,2,0.261667,0.25505,0.538333,0.195904,83,1844,1927
|
||||
21,1/21/2011,1,0,1,5,1,0.1775,0.157833,0.457083,0.353242,75,1468,1543
|
||||
22,1/22/2011,1,0,1,6,1,0.0591304,0.0790696,0.4,0.17197,93,888,981
|
||||
23,1/23/2011,1,0,1,0,1,0.0965217,0.0988391,0.436522,0.2466,150,836,986
|
||||
24,1/24/2011,1,0,1,1,1,0.0973913,0.11793,0.491739,0.15833,86,1330,1416
|
||||
25,1/25/2011,1,0,1,2,2,0.223478,0.234526,0.616957,0.129796,186,1799,1985
|
||||
26,1/26/2011,1,0,1,3,3,0.2175,0.2036,0.8625,0.29385,34,472,506
|
||||
27,1/27/2011,1,0,1,4,1,0.195,0.2197,0.6875,0.113837,15,416,431
|
||||
28,1/28/2011,1,0,1,5,2,0.203478,0.223317,0.793043,0.1233,38,1129,1167
|
||||
29,1/29/2011,1,0,1,6,1,0.196522,0.212126,0.651739,0.145365,123,975,1098
|
||||
30,1/30/2011,1,0,1,0,1,0.216522,0.250322,0.722174,0.0739826,140,956,1096
|
||||
31,1/31/2011,1,0,1,1,2,0.180833,0.18625,0.60375,0.187192,42,1459,1501
|
||||
32,2/1/2011,1,0,2,2,2,0.192174,0.23453,0.829565,0.053213,47,1313,1360
|
||||
33,2/2/2011,1,0,2,3,2,0.26,0.254417,0.775417,0.264308,72,1454,1526
|
||||
34,2/3/2011,1,0,2,4,1,0.186957,0.177878,0.437826,0.277752,61,1489,1550
|
||||
35,2/4/2011,1,0,2,5,2,0.211304,0.228587,0.585217,0.127839,88,1620,1708
|
||||
36,2/5/2011,1,0,2,6,2,0.233333,0.243058,0.929167,0.161079,100,905,1005
|
||||
37,2/6/2011,1,0,2,0,1,0.285833,0.291671,0.568333,0.1418,354,1269,1623
|
||||
38,2/7/2011,1,0,2,1,1,0.271667,0.303658,0.738333,0.0454083,120,1592,1712
|
||||
39,2/8/2011,1,0,2,2,1,0.220833,0.198246,0.537917,0.36195,64,1466,1530
|
||||
40,2/9/2011,1,0,2,3,2,0.134783,0.144283,0.494783,0.188839,53,1552,1605
|
||||
41,2/10/2011,1,0,2,4,1,0.144348,0.149548,0.437391,0.221935,47,1491,1538
|
||||
42,2/11/2011,1,0,2,5,1,0.189091,0.213509,0.506364,0.10855,149,1597,1746
|
||||
43,2/12/2011,1,0,2,6,1,0.2225,0.232954,0.544167,0.203367,288,1184,1472
|
||||
44,2/13/2011,1,0,2,0,1,0.316522,0.324113,0.457391,0.260883,397,1192,1589
|
||||
45,2/14/2011,1,0,2,1,1,0.415,0.39835,0.375833,0.417908,208,1705,1913
|
||||
46,2/15/2011,1,0,2,2,1,0.266087,0.254274,0.314348,0.291374,140,1675,1815
|
||||
47,2/16/2011,1,0,2,3,1,0.318261,0.3162,0.423478,0.251791,218,1897,2115
|
||||
48,2/17/2011,1,0,2,4,1,0.435833,0.428658,0.505,0.230104,259,2216,2475
|
||||
49,2/18/2011,1,0,2,5,1,0.521667,0.511983,0.516667,0.264925,579,2348,2927
|
||||
50,2/19/2011,1,0,2,6,1,0.399167,0.391404,0.187917,0.507463,532,1103,1635
|
||||
51,2/20/2011,1,0,2,0,1,0.285217,0.27733,0.407826,0.223235,639,1173,1812
|
||||
52,2/21/2011,1,0,2,1,2,0.303333,0.284075,0.605,0.307846,195,912,1107
|
||||
53,2/22/2011,1,0,2,2,1,0.182222,0.186033,0.577778,0.195683,74,1376,1450
|
||||
54,2/23/2011,1,0,2,3,1,0.221739,0.245717,0.423043,0.094113,139,1778,1917
|
||||
55,2/24/2011,1,0,2,4,2,0.295652,0.289191,0.697391,0.250496,100,1707,1807
|
||||
56,2/25/2011,1,0,2,5,2,0.364348,0.350461,0.712174,0.346539,120,1341,1461
|
||||
57,2/26/2011,1,0,2,6,1,0.2825,0.282192,0.537917,0.186571,424,1545,1969
|
||||
58,2/27/2011,1,0,2,0,1,0.343478,0.351109,0.68,0.125248,694,1708,2402
|
||||
59,2/28/2011,1,0,2,1,2,0.407273,0.400118,0.876364,0.289686,81,1365,1446
|
||||
60,3/1/2011,1,0,3,2,1,0.266667,0.263879,0.535,0.216425,137,1714,1851
|
||||
61,3/2/2011,1,0,3,3,1,0.335,0.320071,0.449583,0.307833,231,1903,2134
|
||||
62,3/3/2011,1,0,3,4,1,0.198333,0.200133,0.318333,0.225754,123,1562,1685
|
||||
63,3/4/2011,1,0,3,5,2,0.261667,0.255679,0.610417,0.203346,214,1730,1944
|
||||
64,3/5/2011,1,0,3,6,2,0.384167,0.378779,0.789167,0.251871,640,1437,2077
|
||||
65,3/6/2011,1,0,3,0,2,0.376522,0.366252,0.948261,0.343287,114,491,605
|
||||
66,3/7/2011,1,0,3,1,1,0.261739,0.238461,0.551304,0.341352,244,1628,1872
|
||||
67,3/8/2011,1,0,3,2,1,0.2925,0.3024,0.420833,0.12065,316,1817,2133
|
||||
68,3/9/2011,1,0,3,3,2,0.295833,0.286608,0.775417,0.22015,191,1700,1891
|
||||
69,3/10/2011,1,0,3,4,3,0.389091,0.385668,0,0.261877,46,577,623
|
||||
70,3/11/2011,1,0,3,5,2,0.316522,0.305,0.649565,0.23297,247,1730,1977
|
||||
71,3/12/2011,1,0,3,6,1,0.329167,0.32575,0.594583,0.220775,724,1408,2132
|
||||
72,3/13/2011,1,0,3,0,1,0.384348,0.380091,0.527391,0.270604,982,1435,2417
|
||||
73,3/14/2011,1,0,3,1,1,0.325217,0.332,0.496957,0.136926,359,1687,2046
|
||||
74,3/15/2011,1,0,3,2,2,0.317391,0.318178,0.655652,0.184309,289,1767,2056
|
||||
75,3/16/2011,1,0,3,3,2,0.365217,0.36693,0.776522,0.203117,321,1871,2192
|
||||
76,3/17/2011,1,0,3,4,1,0.415,0.410333,0.602917,0.209579,424,2320,2744
|
||||
77,3/18/2011,1,0,3,5,1,0.54,0.527009,0.525217,0.231017,884,2355,3239
|
||||
78,3/19/2011,1,0,3,6,1,0.4725,0.466525,0.379167,0.368167,1424,1693,3117
|
||||
79,3/20/2011,1,0,3,0,1,0.3325,0.32575,0.47375,0.207721,1047,1424,2471
|
||||
80,3/21/2011,2,0,3,1,2,0.430435,0.409735,0.737391,0.288783,401,1676,2077
|
||||
81,3/22/2011,2,0,3,2,1,0.441667,0.440642,0.624583,0.22575,460,2243,2703
|
||||
82,3/23/2011,2,0,3,3,2,0.346957,0.337939,0.839565,0.234261,203,1918,2121
|
||||
83,3/24/2011,2,0,3,4,2,0.285,0.270833,0.805833,0.243787,166,1699,1865
|
||||
84,3/25/2011,2,0,3,5,1,0.264167,0.256312,0.495,0.230725,300,1910,2210
|
||||
85,3/26/2011,2,0,3,6,1,0.265833,0.257571,0.394167,0.209571,981,1515,2496
|
||||
86,3/27/2011,2,0,3,0,2,0.253043,0.250339,0.493913,0.1843,472,1221,1693
|
||||
87,3/28/2011,2,0,3,1,1,0.264348,0.257574,0.302174,0.212204,222,1806,2028
|
||||
88,3/29/2011,2,0,3,2,1,0.3025,0.292908,0.314167,0.226996,317,2108,2425
|
||||
89,3/30/2011,2,0,3,3,2,0.3,0.29735,0.646667,0.172888,168,1368,1536
|
||||
90,3/31/2011,2,0,3,4,3,0.268333,0.257575,0.918333,0.217646,179,1506,1685
|
||||
91,4/1/2011,2,0,4,5,2,0.3,0.283454,0.68625,0.258708,307,1920,2227
|
||||
92,4/2/2011,2,0,4,6,2,0.315,0.315637,0.65375,0.197146,898,1354,2252
|
||||
93,4/3/2011,2,0,4,0,1,0.378333,0.378767,0.48,0.182213,1651,1598,3249
|
||||
94,4/4/2011,2,0,4,1,1,0.573333,0.542929,0.42625,0.385571,734,2381,3115
|
||||
95,4/5/2011,2,0,4,2,2,0.414167,0.39835,0.642083,0.388067,167,1628,1795
|
||||
96,4/6/2011,2,0,4,3,1,0.390833,0.387608,0.470833,0.263063,413,2395,2808
|
||||
97,4/7/2011,2,0,4,4,1,0.4375,0.433696,0.602917,0.162312,571,2570,3141
|
||||
98,4/8/2011,2,0,4,5,2,0.335833,0.324479,0.83625,0.226992,172,1299,1471
|
||||
99,4/9/2011,2,0,4,6,2,0.3425,0.341529,0.8775,0.133083,879,1576,2455
|
||||
100,4/10/2011,2,0,4,0,2,0.426667,0.426737,0.8575,0.146767,1188,1707,2895
|
||||
101,4/11/2011,2,0,4,1,2,0.595652,0.565217,0.716956,0.324474,855,2493,3348
|
||||
102,4/12/2011,2,0,4,2,2,0.5025,0.493054,0.739167,0.274879,257,1777,2034
|
||||
103,4/13/2011,2,0,4,3,2,0.4125,0.417283,0.819167,0.250617,209,1953,2162
|
||||
104,4/14/2011,2,0,4,4,1,0.4675,0.462742,0.540417,0.1107,529,2738,3267
|
||||
105,4/15/2011,2,0,4,5,1,0.446667,0.441913,0.67125,0.226375,642,2484,3126
|
||||
106,4/16/2011,2,0,4,6,3,0.430833,0.425492,0.888333,0.340808,121,674,795
|
||||
107,4/17/2011,2,0,4,0,1,0.456667,0.445696,0.479583,0.303496,1558,2186,3744
|
||||
108,4/18/2011,2,0,4,1,1,0.5125,0.503146,0.5425,0.163567,669,2760,3429
|
||||
109,4/19/2011,2,0,4,2,2,0.505833,0.489258,0.665833,0.157971,409,2795,3204
|
||||
110,4/20/2011,2,0,4,3,1,0.595,0.564392,0.614167,0.241925,613,3331,3944
|
||||
111,4/21/2011,2,0,4,4,1,0.459167,0.453892,0.407083,0.325258,745,3444,4189
|
||||
112,4/22/2011,2,0,4,5,2,0.336667,0.321954,0.729583,0.219521,177,1506,1683
|
||||
113,4/23/2011,2,0,4,6,2,0.46,0.450121,0.887917,0.230725,1462,2574,4036
|
||||
114,4/24/2011,2,0,4,0,2,0.581667,0.551763,0.810833,0.192175,1710,2481,4191
|
||||
115,4/25/2011,2,0,4,1,1,0.606667,0.5745,0.776667,0.185333,773,3300,4073
|
||||
116,4/26/2011,2,0,4,2,1,0.631667,0.594083,0.729167,0.3265,678,3722,4400
|
||||
117,4/27/2011,2,0,4,3,2,0.62,0.575142,0.835417,0.3122,547,3325,3872
|
||||
118,4/28/2011,2,0,4,4,2,0.6175,0.578929,0.700833,0.320908,569,3489,4058
|
||||
119,4/29/2011,2,0,4,5,1,0.51,0.497463,0.457083,0.240063,878,3717,4595
|
||||
120,4/30/2011,2,0,4,6,1,0.4725,0.464021,0.503333,0.235075,1965,3347,5312
|
||||
121,5/1/2011,2,0,5,0,2,0.451667,0.448204,0.762083,0.106354,1138,2213,3351
|
||||
122,5/2/2011,2,0,5,1,2,0.549167,0.532833,0.73,0.183454,847,3554,4401
|
||||
123,5/3/2011,2,0,5,2,2,0.616667,0.582079,0.697083,0.342667,603,3848,4451
|
||||
124,5/4/2011,2,0,5,3,2,0.414167,0.40465,0.737083,0.328996,255,2378,2633
|
||||
125,5/5/2011,2,0,5,4,1,0.459167,0.441917,0.444167,0.295392,614,3819,4433
|
||||
126,5/6/2011,2,0,5,5,1,0.479167,0.474117,0.59,0.228246,894,3714,4608
|
||||
127,5/7/2011,2,0,5,6,1,0.52,0.512621,0.54125,0.16045,1612,3102,4714
|
||||
128,5/8/2011,2,0,5,0,1,0.528333,0.518933,0.631667,0.0746375,1401,2932,4333
|
||||
129,5/9/2011,2,0,5,1,1,0.5325,0.525246,0.58875,0.176,664,3698,4362
|
||||
130,5/10/2011,2,0,5,2,1,0.5325,0.522721,0.489167,0.115671,694,4109,4803
|
||||
131,5/11/2011,2,0,5,3,1,0.5425,0.5284,0.632917,0.120642,550,3632,4182
|
||||
132,5/12/2011,2,0,5,4,1,0.535,0.523363,0.7475,0.189667,695,4169,4864
|
||||
133,5/13/2011,2,0,5,5,2,0.5125,0.4943,0.863333,0.179725,692,3413,4105
|
||||
134,5/14/2011,2,0,5,6,2,0.520833,0.500629,0.9225,0.13495,902,2507,3409
|
||||
135,5/15/2011,2,0,5,0,2,0.5625,0.536,0.867083,0.152979,1582,2971,4553
|
||||
136,5/16/2011,2,0,5,1,1,0.5775,0.550512,0.787917,0.126871,773,3185,3958
|
||||
137,5/17/2011,2,0,5,2,2,0.561667,0.538529,0.837917,0.277354,678,3445,4123
|
||||
138,5/18/2011,2,0,5,3,2,0.55,0.527158,0.87,0.201492,536,3319,3855
|
||||
139,5/19/2011,2,0,5,4,2,0.530833,0.510742,0.829583,0.108213,735,3840,4575
|
||||
140,5/20/2011,2,0,5,5,1,0.536667,0.529042,0.719583,0.125013,909,4008,4917
|
||||
141,5/21/2011,2,0,5,6,1,0.6025,0.571975,0.626667,0.12065,2258,3547,5805
|
||||
142,5/22/2011,2,0,5,0,1,0.604167,0.5745,0.749583,0.148008,1576,3084,4660
|
||||
143,5/23/2011,2,0,5,1,2,0.631667,0.590296,0.81,0.233842,836,3438,4274
|
||||
144,5/24/2011,2,0,5,2,2,0.66,0.604813,0.740833,0.207092,659,3833,4492
|
||||
145,5/25/2011,2,0,5,3,1,0.660833,0.615542,0.69625,0.154233,740,4238,4978
|
||||
146,5/26/2011,2,0,5,4,1,0.708333,0.654688,0.6775,0.199642,758,3919,4677
|
||||
147,5/27/2011,2,0,5,5,1,0.681667,0.637008,0.65375,0.240679,871,3808,4679
|
||||
148,5/28/2011,2,0,5,6,1,0.655833,0.612379,0.729583,0.230092,2001,2757,4758
|
||||
149,5/29/2011,2,0,5,0,1,0.6675,0.61555,0.81875,0.213938,2355,2433,4788
|
||||
150,5/30/2011,2,0,5,1,1,0.733333,0.671092,0.685,0.131225,1549,2549,4098
|
||||
151,5/31/2011,2,0,5,2,1,0.775,0.725383,0.636667,0.111329,673,3309,3982
|
||||
152,6/1/2011,2,0,6,3,2,0.764167,0.720967,0.677083,0.207092,513,3461,3974
|
||||
153,6/2/2011,2,0,6,4,1,0.715,0.643942,0.305,0.292287,736,4232,4968
|
||||
154,6/3/2011,2,0,6,5,1,0.62,0.587133,0.354167,0.253121,898,4414,5312
|
||||
155,6/4/2011,2,0,6,6,1,0.635,0.594696,0.45625,0.123142,1869,3473,5342
|
||||
156,6/5/2011,2,0,6,0,2,0.648333,0.616804,0.6525,0.138692,1685,3221,4906
|
||||
157,6/6/2011,2,0,6,1,1,0.678333,0.621858,0.6,0.121896,673,3875,4548
|
||||
158,6/7/2011,2,0,6,2,1,0.7075,0.65595,0.597917,0.187808,763,4070,4833
|
||||
159,6/8/2011,2,0,6,3,1,0.775833,0.727279,0.622083,0.136817,676,3725,4401
|
||||
160,6/9/2011,2,0,6,4,2,0.808333,0.757579,0.568333,0.149883,563,3352,3915
|
||||
161,6/10/2011,2,0,6,5,1,0.755,0.703292,0.605,0.140554,815,3771,4586
|
||||
162,6/11/2011,2,0,6,6,1,0.725,0.678038,0.654583,0.15485,1729,3237,4966
|
||||
163,6/12/2011,2,0,6,0,1,0.6925,0.643325,0.747917,0.163567,1467,2993,4460
|
||||
164,6/13/2011,2,0,6,1,1,0.635,0.601654,0.494583,0.30535,863,4157,5020
|
||||
165,6/14/2011,2,0,6,2,1,0.604167,0.591546,0.507083,0.269283,727,4164,4891
|
||||
166,6/15/2011,2,0,6,3,1,0.626667,0.587754,0.471667,0.167912,769,4411,5180
|
||||
167,6/16/2011,2,0,6,4,2,0.628333,0.595346,0.688333,0.206471,545,3222,3767
|
||||
168,6/17/2011,2,0,6,5,1,0.649167,0.600383,0.735833,0.143029,863,3981,4844
|
||||
169,6/18/2011,2,0,6,6,1,0.696667,0.643954,0.670417,0.119408,1807,3312,5119
|
||||
170,6/19/2011,2,0,6,0,2,0.699167,0.645846,0.666667,0.102,1639,3105,4744
|
||||
171,6/20/2011,2,0,6,1,2,0.635,0.595346,0.74625,0.155475,699,3311,4010
|
||||
172,6/21/2011,3,0,6,2,2,0.680833,0.637646,0.770417,0.171025,774,4061,4835
|
||||
173,6/22/2011,3,0,6,3,1,0.733333,0.693829,0.7075,0.172262,661,3846,4507
|
||||
174,6/23/2011,3,0,6,4,2,0.728333,0.693833,0.703333,0.238804,746,4044,4790
|
||||
175,6/24/2011,3,0,6,5,1,0.724167,0.656583,0.573333,0.222025,969,4022,4991
|
||||
176,6/25/2011,3,0,6,6,1,0.695,0.643313,0.483333,0.209571,1782,3420,5202
|
||||
177,6/26/2011,3,0,6,0,1,0.68,0.637629,0.513333,0.0945333,1920,3385,5305
|
||||
178,6/27/2011,3,0,6,1,2,0.6825,0.637004,0.658333,0.107588,854,3854,4708
|
||||
179,6/28/2011,3,0,6,2,1,0.744167,0.692558,0.634167,0.144283,732,3916,4648
|
||||
180,6/29/2011,3,0,6,3,1,0.728333,0.654688,0.497917,0.261821,848,4377,5225
|
||||
181,6/30/2011,3,0,6,4,1,0.696667,0.637008,0.434167,0.185312,1027,4488,5515
|
||||
182,7/1/2011,3,0,7,5,1,0.7225,0.652162,0.39625,0.102608,1246,4116,5362
|
||||
183,7/2/2011,3,0,7,6,1,0.738333,0.667308,0.444583,0.115062,2204,2915,5119
|
||||
184,7/3/2011,3,0,7,0,2,0.716667,0.668575,0.6825,0.228858,2282,2367,4649
|
||||
185,7/4/2011,3,0,7,1,2,0.726667,0.665417,0.637917,0.0814792,3065,2978,6043
|
||||
186,7/5/2011,3,0,7,2,1,0.746667,0.696338,0.590417,0.126258,1031,3634,4665
|
||||
187,7/6/2011,3,0,7,3,1,0.72,0.685633,0.743333,0.149883,784,3845,4629
|
||||
188,7/7/2011,3,0,7,4,1,0.75,0.686871,0.65125,0.1592,754,3838,4592
|
||||
189,7/8/2011,3,0,7,5,2,0.709167,0.670483,0.757917,0.225129,692,3348,4040
|
||||
190,7/9/2011,3,0,7,6,1,0.733333,0.664158,0.609167,0.167912,1988,3348,5336
|
||||
191,7/10/2011,3,0,7,0,1,0.7475,0.690025,0.578333,0.183471,1743,3138,4881
|
||||
192,7/11/2011,3,0,7,1,1,0.7625,0.729804,0.635833,0.282337,723,3363,4086
|
||||
193,7/12/2011,3,0,7,2,1,0.794167,0.739275,0.559167,0.200254,662,3596,4258
|
||||
194,7/13/2011,3,0,7,3,1,0.746667,0.689404,0.631667,0.146133,748,3594,4342
|
||||
195,7/14/2011,3,0,7,4,1,0.680833,0.635104,0.47625,0.240667,888,4196,5084
|
||||
196,7/15/2011,3,0,7,5,1,0.663333,0.624371,0.59125,0.182833,1318,4220,5538
|
||||
197,7/16/2011,3,0,7,6,1,0.686667,0.638263,0.585,0.208342,2418,3505,5923
|
||||
198,7/17/2011,3,0,7,0,1,0.719167,0.669833,0.604167,0.245033,2006,3296,5302
|
||||
199,7/18/2011,3,0,7,1,1,0.746667,0.703925,0.65125,0.215804,841,3617,4458
|
||||
200,7/19/2011,3,0,7,2,1,0.776667,0.747479,0.650417,0.1306,752,3789,4541
|
||||
201,7/20/2011,3,0,7,3,1,0.768333,0.74685,0.707083,0.113817,644,3688,4332
|
||||
202,7/21/2011,3,0,7,4,2,0.815,0.826371,0.69125,0.222021,632,3152,3784
|
||||
203,7/22/2011,3,0,7,5,1,0.848333,0.840896,0.580417,0.1331,562,2825,3387
|
||||
204,7/23/2011,3,0,7,6,1,0.849167,0.804287,0.5,0.131221,987,2298,3285
|
||||
205,7/24/2011,3,0,7,0,1,0.83,0.794829,0.550833,0.169171,1050,2556,3606
|
||||
206,7/25/2011,3,0,7,1,1,0.743333,0.720958,0.757083,0.0908083,568,3272,3840
|
||||
207,7/26/2011,3,0,7,2,1,0.771667,0.696979,0.540833,0.200258,750,3840,4590
|
||||
208,7/27/2011,3,0,7,3,1,0.775,0.690667,0.402917,0.183463,755,3901,4656
|
||||
209,7/28/2011,3,0,7,4,1,0.779167,0.7399,0.583333,0.178479,606,3784,4390
|
||||
210,7/29/2011,3,0,7,5,1,0.838333,0.785967,0.5425,0.174138,670,3176,3846
|
||||
211,7/30/2011,3,0,7,6,1,0.804167,0.728537,0.465833,0.168537,1559,2916,4475
|
||||
212,7/31/2011,3,0,7,0,1,0.805833,0.729796,0.480833,0.164813,1524,2778,4302
|
||||
213,8/1/2011,3,0,8,1,1,0.771667,0.703292,0.550833,0.156717,729,3537,4266
|
||||
214,8/2/2011,3,0,8,2,1,0.783333,0.707071,0.49125,0.20585,801,4044,4845
|
||||
215,8/3/2011,3,0,8,3,2,0.731667,0.679937,0.6575,0.135583,467,3107,3574
|
||||
216,8/4/2011,3,0,8,4,2,0.71,0.664788,0.7575,0.19715,799,3777,4576
|
||||
217,8/5/2011,3,0,8,5,1,0.710833,0.656567,0.630833,0.184696,1023,3843,4866
|
||||
218,8/6/2011,3,0,8,6,2,0.716667,0.676154,0.755,0.22825,1521,2773,4294
|
||||
219,8/7/2011,3,0,8,0,1,0.7425,0.715292,0.752917,0.201487,1298,2487,3785
|
||||
220,8/8/2011,3,0,8,1,1,0.765,0.703283,0.592083,0.192175,846,3480,4326
|
||||
221,8/9/2011,3,0,8,2,1,0.775,0.724121,0.570417,0.151121,907,3695,4602
|
||||
222,8/10/2011,3,0,8,3,1,0.766667,0.684983,0.424167,0.200258,884,3896,4780
|
||||
223,8/11/2011,3,0,8,4,1,0.7175,0.651521,0.42375,0.164796,812,3980,4792
|
||||
224,8/12/2011,3,0,8,5,1,0.708333,0.654042,0.415,0.125621,1051,3854,4905
|
||||
225,8/13/2011,3,0,8,6,2,0.685833,0.645858,0.729583,0.211454,1504,2646,4150
|
||||
226,8/14/2011,3,0,8,0,2,0.676667,0.624388,0.8175,0.222633,1338,2482,3820
|
||||
227,8/15/2011,3,0,8,1,1,0.665833,0.616167,0.712083,0.208954,775,3563,4338
|
||||
228,8/16/2011,3,0,8,2,1,0.700833,0.645837,0.578333,0.236329,721,4004,4725
|
||||
229,8/17/2011,3,0,8,3,1,0.723333,0.666671,0.575417,0.143667,668,4026,4694
|
||||
230,8/18/2011,3,0,8,4,1,0.711667,0.662258,0.654583,0.233208,639,3166,3805
|
||||
231,8/19/2011,3,0,8,5,2,0.685,0.633221,0.722917,0.139308,797,3356,4153
|
||||
232,8/20/2011,3,0,8,6,1,0.6975,0.648996,0.674167,0.104467,1914,3277,5191
|
||||
233,8/21/2011,3,0,8,0,1,0.710833,0.675525,0.77,0.248754,1249,2624,3873
|
||||
234,8/22/2011,3,0,8,1,1,0.691667,0.638254,0.47,0.27675,833,3925,4758
|
||||
235,8/23/2011,3,0,8,2,1,0.640833,0.606067,0.455417,0.146763,1281,4614,5895
|
||||
236,8/24/2011,3,0,8,3,1,0.673333,0.630692,0.605,0.253108,949,4181,5130
|
||||
237,8/25/2011,3,0,8,4,2,0.684167,0.645854,0.771667,0.210833,435,3107,3542
|
||||
238,8/26/2011,3,0,8,5,1,0.7,0.659733,0.76125,0.0839625,768,3893,4661
|
||||
239,8/27/2011,3,0,8,6,2,0.68,0.635556,0.85,0.375617,226,889,1115
|
||||
240,8/28/2011,3,0,8,0,1,0.707059,0.647959,0.561765,0.304659,1415,2919,4334
|
||||
241,8/29/2011,3,0,8,1,1,0.636667,0.607958,0.554583,0.159825,729,3905,4634
|
||||
242,8/30/2011,3,0,8,2,1,0.639167,0.594704,0.548333,0.125008,775,4429,5204
|
||||
243,8/31/2011,3,0,8,3,1,0.656667,0.611121,0.597917,0.0833333,688,4370,5058
|
||||
244,9/1/2011,3,0,9,4,1,0.655,0.614921,0.639167,0.141796,783,4332,5115
|
||||
245,9/2/2011,3,0,9,5,2,0.643333,0.604808,0.727083,0.139929,875,3852,4727
|
||||
246,9/3/2011,3,0,9,6,1,0.669167,0.633213,0.716667,0.185325,1935,2549,4484
|
||||
247,9/4/2011,3,0,9,0,1,0.709167,0.665429,0.742083,0.206467,2521,2419,4940
|
||||
248,9/5/2011,3,0,9,1,2,0.673333,0.625646,0.790417,0.212696,1236,2115,3351
|
||||
249,9/6/2011,3,0,9,2,3,0.54,0.5152,0.886957,0.343943,204,2506,2710
|
||||
250,9/7/2011,3,0,9,3,3,0.599167,0.544229,0.917083,0.0970208,118,1878,1996
|
||||
251,9/8/2011,3,0,9,4,3,0.633913,0.555361,0.939565,0.192748,153,1689,1842
|
||||
252,9/9/2011,3,0,9,5,2,0.65,0.578946,0.897917,0.124379,417,3127,3544
|
||||
253,9/10/2011,3,0,9,6,1,0.66,0.607962,0.75375,0.153608,1750,3595,5345
|
||||
254,9/11/2011,3,0,9,0,1,0.653333,0.609229,0.71375,0.115054,1633,3413,5046
|
||||
255,9/12/2011,3,0,9,1,1,0.644348,0.60213,0.692174,0.088913,690,4023,4713
|
||||
256,9/13/2011,3,0,9,2,1,0.650833,0.603554,0.7125,0.141804,701,4062,4763
|
||||
257,9/14/2011,3,0,9,3,1,0.673333,0.6269,0.697083,0.1673,647,4138,4785
|
||||
258,9/15/2011,3,0,9,4,2,0.5775,0.553671,0.709167,0.271146,428,3231,3659
|
||||
259,9/16/2011,3,0,9,5,2,0.469167,0.461475,0.590417,0.164183,742,4018,4760
|
||||
260,9/17/2011,3,0,9,6,2,0.491667,0.478512,0.718333,0.189675,1434,3077,4511
|
||||
261,9/18/2011,3,0,9,0,1,0.5075,0.490537,0.695,0.178483,1353,2921,4274
|
||||
262,9/19/2011,3,0,9,1,2,0.549167,0.529675,0.69,0.151742,691,3848,4539
|
||||
263,9/20/2011,3,0,9,2,2,0.561667,0.532217,0.88125,0.134954,438,3203,3641
|
||||
264,9/21/2011,3,0,9,3,2,0.595,0.550533,0.9,0.0964042,539,3813,4352
|
||||
265,9/22/2011,3,0,9,4,2,0.628333,0.554963,0.902083,0.128125,555,4240,4795
|
||||
266,9/23/2011,4,0,9,5,2,0.609167,0.522125,0.9725,0.0783667,258,2137,2395
|
||||
267,9/24/2011,4,0,9,6,2,0.606667,0.564412,0.8625,0.0783833,1776,3647,5423
|
||||
268,9/25/2011,4,0,9,0,2,0.634167,0.572637,0.845,0.0503792,1544,3466,5010
|
||||
269,9/26/2011,4,0,9,1,2,0.649167,0.589042,0.848333,0.1107,684,3946,4630
|
||||
270,9/27/2011,4,0,9,2,2,0.636667,0.574525,0.885417,0.118171,477,3643,4120
|
||||
271,9/28/2011,4,0,9,3,2,0.635,0.575158,0.84875,0.148629,480,3427,3907
|
||||
272,9/29/2011,4,0,9,4,1,0.616667,0.574512,0.699167,0.172883,653,4186,4839
|
||||
273,9/30/2011,4,0,9,5,1,0.564167,0.544829,0.6475,0.206475,830,4372,5202
|
||||
274,10/1/2011,4,0,10,6,2,0.41,0.412863,0.75375,0.292296,480,1949,2429
|
||||
275,10/2/2011,4,0,10,0,2,0.356667,0.345317,0.791667,0.222013,616,2302,2918
|
||||
276,10/3/2011,4,0,10,1,2,0.384167,0.392046,0.760833,0.0833458,330,3240,3570
|
||||
277,10/4/2011,4,0,10,2,1,0.484167,0.472858,0.71,0.205854,486,3970,4456
|
||||
278,10/5/2011,4,0,10,3,1,0.538333,0.527138,0.647917,0.17725,559,4267,4826
|
||||
279,10/6/2011,4,0,10,4,1,0.494167,0.480425,0.620833,0.134954,639,4126,4765
|
||||
280,10/7/2011,4,0,10,5,1,0.510833,0.504404,0.684167,0.0223917,949,4036,4985
|
||||
281,10/8/2011,4,0,10,6,1,0.521667,0.513242,0.70125,0.0454042,2235,3174,5409
|
||||
282,10/9/2011,4,0,10,0,1,0.540833,0.523983,0.7275,0.06345,2397,3114,5511
|
||||
283,10/10/2011,4,0,10,1,1,0.570833,0.542925,0.73375,0.0423042,1514,3603,5117
|
||||
284,10/11/2011,4,0,10,2,2,0.566667,0.546096,0.80875,0.143042,667,3896,4563
|
||||
285,10/12/2011,4,0,10,3,3,0.543333,0.517717,0.90625,0.24815,217,2199,2416
|
||||
286,10/13/2011,4,0,10,4,2,0.589167,0.551804,0.896667,0.141787,290,2623,2913
|
||||
287,10/14/2011,4,0,10,5,2,0.550833,0.529675,0.71625,0.223883,529,3115,3644
|
||||
288,10/15/2011,4,0,10,6,1,0.506667,0.498725,0.483333,0.258083,1899,3318,5217
|
||||
289,10/16/2011,4,0,10,0,1,0.511667,0.503154,0.486667,0.281717,1748,3293,5041
|
||||
290,10/17/2011,4,0,10,1,1,0.534167,0.510725,0.579583,0.175379,713,3857,4570
|
||||
291,10/18/2011,4,0,10,2,2,0.5325,0.522721,0.701667,0.110087,637,4111,4748
|
||||
292,10/19/2011,4,0,10,3,3,0.541739,0.513848,0.895217,0.243339,254,2170,2424
|
||||
293,10/20/2011,4,0,10,4,1,0.475833,0.466525,0.63625,0.422275,471,3724,4195
|
||||
294,10/21/2011,4,0,10,5,1,0.4275,0.423596,0.574167,0.221396,676,3628,4304
|
||||
295,10/22/2011,4,0,10,6,1,0.4225,0.425492,0.629167,0.0926667,1499,2809,4308
|
||||
296,10/23/2011,4,0,10,0,1,0.421667,0.422333,0.74125,0.0995125,1619,2762,4381
|
||||
297,10/24/2011,4,0,10,1,1,0.463333,0.457067,0.772083,0.118792,699,3488,4187
|
||||
298,10/25/2011,4,0,10,2,1,0.471667,0.463375,0.622917,0.166658,695,3992,4687
|
||||
299,10/26/2011,4,0,10,3,2,0.484167,0.472846,0.720417,0.148642,404,3490,3894
|
||||
300,10/27/2011,4,0,10,4,2,0.47,0.457046,0.812917,0.197763,240,2419,2659
|
||||
301,10/28/2011,4,0,10,5,2,0.330833,0.318812,0.585833,0.229479,456,3291,3747
|
||||
302,10/29/2011,4,0,10,6,3,0.254167,0.227913,0.8825,0.351371,57,570,627
|
||||
303,10/30/2011,4,0,10,0,1,0.319167,0.321329,0.62375,0.176617,885,2446,3331
|
||||
304,10/31/2011,4,0,10,1,1,0.34,0.356063,0.703333,0.10635,362,3307,3669
|
||||
305,11/1/2011,4,0,11,2,1,0.400833,0.397088,0.68375,0.135571,410,3658,4068
|
||||
306,11/2/2011,4,0,11,3,1,0.3775,0.390133,0.71875,0.0820917,370,3816,4186
|
||||
307,11/3/2011,4,0,11,4,1,0.408333,0.405921,0.702083,0.136817,318,3656,3974
|
||||
308,11/4/2011,4,0,11,5,2,0.403333,0.403392,0.6225,0.271779,470,3576,4046
|
||||
309,11/5/2011,4,0,11,6,1,0.326667,0.323854,0.519167,0.189062,1156,2770,3926
|
||||
310,11/6/2011,4,0,11,0,1,0.348333,0.362358,0.734583,0.0920542,952,2697,3649
|
||||
311,11/7/2011,4,0,11,1,1,0.395,0.400871,0.75875,0.057225,373,3662,4035
|
||||
312,11/8/2011,4,0,11,2,1,0.408333,0.412246,0.721667,0.0690375,376,3829,4205
|
||||
313,11/9/2011,4,0,11,3,1,0.4,0.409079,0.758333,0.0621958,305,3804,4109
|
||||
314,11/10/2011,4,0,11,4,2,0.38,0.373721,0.813333,0.189067,190,2743,2933
|
||||
315,11/11/2011,4,0,11,5,1,0.324167,0.306817,0.44625,0.314675,440,2928,3368
|
||||
316,11/12/2011,4,0,11,6,1,0.356667,0.357942,0.552917,0.212062,1275,2792,4067
|
||||
317,11/13/2011,4,0,11,0,1,0.440833,0.43055,0.458333,0.281721,1004,2713,3717
|
||||
318,11/14/2011,4,0,11,1,1,0.53,0.524612,0.587083,0.306596,595,3891,4486
|
||||
319,11/15/2011,4,0,11,2,2,0.53,0.507579,0.68875,0.199633,449,3746,4195
|
||||
320,11/16/2011,4,0,11,3,3,0.456667,0.451988,0.93,0.136829,145,1672,1817
|
||||
321,11/17/2011,4,0,11,4,2,0.341667,0.323221,0.575833,0.305362,139,2914,3053
|
||||
322,11/18/2011,4,0,11,5,1,0.274167,0.272721,0.41,0.168533,245,3147,3392
|
||||
323,11/19/2011,4,0,11,6,1,0.329167,0.324483,0.502083,0.224496,943,2720,3663
|
||||
324,11/20/2011,4,0,11,0,2,0.463333,0.457058,0.684583,0.18595,787,2733,3520
|
||||
325,11/21/2011,4,0,11,1,3,0.4475,0.445062,0.91,0.138054,220,2545,2765
|
||||
326,11/22/2011,4,0,11,2,3,0.416667,0.421696,0.9625,0.118792,69,1538,1607
|
||||
327,11/23/2011,4,0,11,3,2,0.440833,0.430537,0.757917,0.335825,112,2454,2566
|
||||
328,11/24/2011,4,0,11,4,1,0.373333,0.372471,0.549167,0.167304,560,935,1495
|
||||
329,11/25/2011,4,0,11,5,1,0.375,0.380671,0.64375,0.0988958,1095,1697,2792
|
||||
330,11/26/2011,4,0,11,6,1,0.375833,0.385087,0.681667,0.0684208,1249,1819,3068
|
||||
331,11/27/2011,4,0,11,0,1,0.459167,0.4558,0.698333,0.208954,810,2261,3071
|
||||
332,11/28/2011,4,0,11,1,1,0.503478,0.490122,0.743043,0.142122,253,3614,3867
|
||||
333,11/29/2011,4,0,11,2,2,0.458333,0.451375,0.830833,0.258092,96,2818,2914
|
||||
334,11/30/2011,4,0,11,3,1,0.325,0.311221,0.613333,0.271158,188,3425,3613
|
||||
335,12/1/2011,4,0,12,4,1,0.3125,0.305554,0.524583,0.220158,182,3545,3727
|
||||
336,12/2/2011,4,0,12,5,1,0.314167,0.331433,0.625833,0.100754,268,3672,3940
|
||||
337,12/3/2011,4,0,12,6,1,0.299167,0.310604,0.612917,0.0957833,706,2908,3614
|
||||
338,12/4/2011,4,0,12,0,1,0.330833,0.3491,0.775833,0.0839583,634,2851,3485
|
||||
339,12/5/2011,4,0,12,1,2,0.385833,0.393925,0.827083,0.0622083,233,3578,3811
|
||||
340,12/6/2011,4,0,12,2,3,0.4625,0.4564,0.949583,0.232583,126,2468,2594
|
||||
341,12/7/2011,4,0,12,3,3,0.41,0.400246,0.970417,0.266175,50,655,705
|
||||
342,12/8/2011,4,0,12,4,1,0.265833,0.256938,0.58,0.240058,150,3172,3322
|
||||
343,12/9/2011,4,0,12,5,1,0.290833,0.317542,0.695833,0.0827167,261,3359,3620
|
||||
344,12/10/2011,4,0,12,6,1,0.275,0.266412,0.5075,0.233221,502,2688,3190
|
||||
345,12/11/2011,4,0,12,0,1,0.220833,0.253154,0.49,0.0665417,377,2366,2743
|
||||
346,12/12/2011,4,0,12,1,1,0.238333,0.270196,0.670833,0.06345,143,3167,3310
|
||||
347,12/13/2011,4,0,12,2,1,0.2825,0.301138,0.59,0.14055,155,3368,3523
|
||||
348,12/14/2011,4,0,12,3,2,0.3175,0.338362,0.66375,0.0609583,178,3562,3740
|
||||
349,12/15/2011,4,0,12,4,2,0.4225,0.412237,0.634167,0.268042,181,3528,3709
|
||||
350,12/16/2011,4,0,12,5,2,0.375,0.359825,0.500417,0.260575,178,3399,3577
|
||||
351,12/17/2011,4,0,12,6,2,0.258333,0.249371,0.560833,0.243167,275,2464,2739
|
||||
352,12/18/2011,4,0,12,0,1,0.238333,0.245579,0.58625,0.169779,220,2211,2431
|
||||
353,12/19/2011,4,0,12,1,1,0.276667,0.280933,0.6375,0.172896,260,3143,3403
|
||||
354,12/20/2011,4,0,12,2,2,0.385833,0.396454,0.595417,0.0615708,216,3534,3750
|
||||
355,12/21/2011,1,0,12,3,2,0.428333,0.428017,0.858333,0.2214,107,2553,2660
|
||||
356,12/22/2011,1,0,12,4,2,0.423333,0.426121,0.7575,0.047275,227,2841,3068
|
||||
357,12/23/2011,1,0,12,5,1,0.373333,0.377513,0.68625,0.274246,163,2046,2209
|
||||
358,12/24/2011,1,0,12,6,1,0.3025,0.299242,0.5425,0.190304,155,856,1011
|
||||
359,12/25/2011,1,0,12,0,1,0.274783,0.279961,0.681304,0.155091,303,451,754
|
||||
360,12/26/2011,1,0,12,1,1,0.321739,0.315535,0.506957,0.239465,430,887,1317
|
||||
361,12/27/2011,1,0,12,2,2,0.325,0.327633,0.7625,0.18845,103,1059,1162
|
||||
362,12/28/2011,1,0,12,3,1,0.29913,0.279974,0.503913,0.293961,255,2047,2302
|
||||
363,12/29/2011,1,0,12,4,1,0.248333,0.263892,0.574167,0.119412,254,2169,2423
|
||||
364,12/30/2011,1,0,12,5,1,0.311667,0.318812,0.636667,0.134337,491,2508,2999
|
||||
365,12/31/2011,1,0,12,6,1,0.41,0.414121,0.615833,0.220154,665,1820,2485
|
||||
366,1/1/2012,1,1,1,0,1,0.37,0.375621,0.6925,0.192167,686,1608,2294
|
||||
367,1/2/2012,1,1,1,1,1,0.273043,0.252304,0.381304,0.329665,244,1707,1951
|
||||
368,1/3/2012,1,1,1,2,1,0.15,0.126275,0.44125,0.365671,89,2147,2236
|
||||
369,1/4/2012,1,1,1,3,2,0.1075,0.119337,0.414583,0.1847,95,2273,2368
|
||||
370,1/5/2012,1,1,1,4,1,0.265833,0.278412,0.524167,0.129987,140,3132,3272
|
||||
371,1/6/2012,1,1,1,5,1,0.334167,0.340267,0.542083,0.167908,307,3791,4098
|
||||
372,1/7/2012,1,1,1,6,1,0.393333,0.390779,0.531667,0.174758,1070,3451,4521
|
||||
373,1/8/2012,1,1,1,0,1,0.3375,0.340258,0.465,0.191542,599,2826,3425
|
||||
374,1/9/2012,1,1,1,1,2,0.224167,0.247479,0.701667,0.0989,106,2270,2376
|
||||
375,1/10/2012,1,1,1,2,1,0.308696,0.318826,0.646522,0.187552,173,3425,3598
|
||||
376,1/11/2012,1,1,1,3,2,0.274167,0.282821,0.8475,0.131221,92,2085,2177
|
||||
377,1/12/2012,1,1,1,4,2,0.3825,0.381938,0.802917,0.180967,269,3828,4097
|
||||
378,1/13/2012,1,1,1,5,1,0.274167,0.249362,0.5075,0.378108,174,3040,3214
|
||||
379,1/14/2012,1,1,1,6,1,0.18,0.183087,0.4575,0.187183,333,2160,2493
|
||||
380,1/15/2012,1,1,1,0,1,0.166667,0.161625,0.419167,0.251258,284,2027,2311
|
||||
381,1/16/2012,1,1,1,1,1,0.19,0.190663,0.5225,0.231358,217,2081,2298
|
||||
382,1/17/2012,1,1,1,2,2,0.373043,0.364278,0.716087,0.34913,127,2808,2935
|
||||
383,1/18/2012,1,1,1,3,1,0.303333,0.275254,0.443333,0.415429,109,3267,3376
|
||||
384,1/19/2012,1,1,1,4,1,0.19,0.190038,0.4975,0.220158,130,3162,3292
|
||||
385,1/20/2012,1,1,1,5,2,0.2175,0.220958,0.45,0.20275,115,3048,3163
|
||||
386,1/21/2012,1,1,1,6,2,0.173333,0.174875,0.83125,0.222642,67,1234,1301
|
||||
387,1/22/2012,1,1,1,0,2,0.1625,0.16225,0.79625,0.199638,196,1781,1977
|
||||
388,1/23/2012,1,1,1,1,2,0.218333,0.243058,0.91125,0.110708,145,2287,2432
|
||||
389,1/24/2012,1,1,1,2,1,0.3425,0.349108,0.835833,0.123767,439,3900,4339
|
||||
390,1/25/2012,1,1,1,3,1,0.294167,0.294821,0.64375,0.161071,467,3803,4270
|
||||
391,1/26/2012,1,1,1,4,2,0.341667,0.35605,0.769583,0.0733958,244,3831,4075
|
||||
392,1/27/2012,1,1,1,5,2,0.425,0.415383,0.74125,0.342667,269,3187,3456
|
||||
393,1/28/2012,1,1,1,6,1,0.315833,0.326379,0.543333,0.210829,775,3248,4023
|
||||
394,1/29/2012,1,1,1,0,1,0.2825,0.272721,0.31125,0.24005,558,2685,3243
|
||||
395,1/30/2012,1,1,1,1,1,0.269167,0.262625,0.400833,0.215792,126,3498,3624
|
||||
396,1/31/2012,1,1,1,2,1,0.39,0.381317,0.416667,0.261817,324,4185,4509
|
||||
397,2/1/2012,1,1,2,3,1,0.469167,0.466538,0.507917,0.189067,304,4275,4579
|
||||
398,2/2/2012,1,1,2,4,2,0.399167,0.398971,0.672917,0.187187,190,3571,3761
|
||||
399,2/3/2012,1,1,2,5,1,0.313333,0.309346,0.526667,0.178496,310,3841,4151
|
||||
400,2/4/2012,1,1,2,6,2,0.264167,0.272725,0.779583,0.121896,384,2448,2832
|
||||
401,2/5/2012,1,1,2,0,2,0.265833,0.264521,0.687917,0.175996,318,2629,2947
|
||||
402,2/6/2012,1,1,2,1,1,0.282609,0.296426,0.622174,0.1538,206,3578,3784
|
||||
403,2/7/2012,1,1,2,2,1,0.354167,0.361104,0.49625,0.147379,199,4176,4375
|
||||
404,2/8/2012,1,1,2,3,2,0.256667,0.266421,0.722917,0.133721,109,2693,2802
|
||||
405,2/9/2012,1,1,2,4,1,0.265,0.261988,0.562083,0.194037,163,3667,3830
|
||||
406,2/10/2012,1,1,2,5,2,0.280833,0.293558,0.54,0.116929,227,3604,3831
|
||||
407,2/11/2012,1,1,2,6,3,0.224167,0.210867,0.73125,0.289796,192,1977,2169
|
||||
408,2/12/2012,1,1,2,0,1,0.1275,0.101658,0.464583,0.409212,73,1456,1529
|
||||
409,2/13/2012,1,1,2,1,1,0.2225,0.227913,0.41125,0.167283,94,3328,3422
|
||||
410,2/14/2012,1,1,2,2,2,0.319167,0.333946,0.50875,0.141179,135,3787,3922
|
||||
411,2/15/2012,1,1,2,3,1,0.348333,0.351629,0.53125,0.1816,141,4028,4169
|
||||
412,2/16/2012,1,1,2,4,2,0.316667,0.330162,0.752917,0.091425,74,2931,3005
|
||||
413,2/17/2012,1,1,2,5,1,0.343333,0.351629,0.634583,0.205846,349,3805,4154
|
||||
414,2/18/2012,1,1,2,6,1,0.346667,0.355425,0.534583,0.190929,1435,2883,4318
|
||||
415,2/19/2012,1,1,2,0,2,0.28,0.265788,0.515833,0.253112,618,2071,2689
|
||||
416,2/20/2012,1,1,2,1,1,0.28,0.273391,0.507826,0.229083,502,2627,3129
|
||||
417,2/21/2012,1,1,2,2,1,0.287826,0.295113,0.594348,0.205717,163,3614,3777
|
||||
418,2/22/2012,1,1,2,3,1,0.395833,0.392667,0.567917,0.234471,394,4379,4773
|
||||
419,2/23/2012,1,1,2,4,1,0.454167,0.444446,0.554583,0.190913,516,4546,5062
|
||||
420,2/24/2012,1,1,2,5,2,0.4075,0.410971,0.7375,0.237567,246,3241,3487
|
||||
421,2/25/2012,1,1,2,6,1,0.290833,0.255675,0.395833,0.421642,317,2415,2732
|
||||
422,2/26/2012,1,1,2,0,1,0.279167,0.268308,0.41,0.205229,515,2874,3389
|
||||
423,2/27/2012,1,1,2,1,1,0.366667,0.357954,0.490833,0.268033,253,4069,4322
|
||||
424,2/28/2012,1,1,2,2,1,0.359167,0.353525,0.395833,0.193417,229,4134,4363
|
||||
425,2/29/2012,1,1,2,3,2,0.344348,0.34847,0.804783,0.179117,65,1769,1834
|
||||
426,3/1/2012,1,1,3,4,1,0.485833,0.475371,0.615417,0.226987,325,4665,4990
|
||||
427,3/2/2012,1,1,3,5,2,0.353333,0.359842,0.657083,0.144904,246,2948,3194
|
||||
428,3/3/2012,1,1,3,6,2,0.414167,0.413492,0.62125,0.161079,956,3110,4066
|
||||
429,3/4/2012,1,1,3,0,1,0.325833,0.303021,0.403333,0.334571,710,2713,3423
|
||||
430,3/5/2012,1,1,3,1,1,0.243333,0.241171,0.50625,0.228858,203,3130,3333
|
||||
431,3/6/2012,1,1,3,2,1,0.258333,0.255042,0.456667,0.200875,221,3735,3956
|
||||
432,3/7/2012,1,1,3,3,1,0.404167,0.3851,0.513333,0.345779,432,4484,4916
|
||||
433,3/8/2012,1,1,3,4,1,0.5275,0.524604,0.5675,0.441563,486,4896,5382
|
||||
434,3/9/2012,1,1,3,5,2,0.410833,0.397083,0.407083,0.4148,447,4122,4569
|
||||
435,3/10/2012,1,1,3,6,1,0.2875,0.277767,0.350417,0.22575,968,3150,4118
|
||||
436,3/11/2012,1,1,3,0,1,0.361739,0.35967,0.476957,0.222587,1658,3253,4911
|
||||
437,3/12/2012,1,1,3,1,1,0.466667,0.459592,0.489167,0.207713,838,4460,5298
|
||||
438,3/13/2012,1,1,3,2,1,0.565,0.542929,0.6175,0.23695,762,5085,5847
|
||||
439,3/14/2012,1,1,3,3,1,0.5725,0.548617,0.507083,0.115062,997,5315,6312
|
||||
440,3/15/2012,1,1,3,4,1,0.5575,0.532825,0.579583,0.149883,1005,5187,6192
|
||||
441,3/16/2012,1,1,3,5,2,0.435833,0.436229,0.842083,0.113192,548,3830,4378
|
||||
442,3/17/2012,1,1,3,6,2,0.514167,0.505046,0.755833,0.110704,3155,4681,7836
|
||||
443,3/18/2012,1,1,3,0,2,0.4725,0.464,0.81,0.126883,2207,3685,5892
|
||||
444,3/19/2012,1,1,3,1,1,0.545,0.532821,0.72875,0.162317,982,5171,6153
|
||||
445,3/20/2012,1,1,3,2,1,0.560833,0.538533,0.807917,0.121271,1051,5042,6093
|
||||
446,3/21/2012,2,1,3,3,2,0.531667,0.513258,0.82125,0.0895583,1122,5108,6230
|
||||
447,3/22/2012,2,1,3,4,1,0.554167,0.531567,0.83125,0.117562,1334,5537,6871
|
||||
448,3/23/2012,2,1,3,5,2,0.601667,0.570067,0.694167,0.1163,2469,5893,8362
|
||||
449,3/24/2012,2,1,3,6,2,0.5025,0.486733,0.885417,0.192783,1033,2339,3372
|
||||
450,3/25/2012,2,1,3,0,2,0.4375,0.437488,0.880833,0.220775,1532,3464,4996
|
||||
451,3/26/2012,2,1,3,1,1,0.445833,0.43875,0.477917,0.386821,795,4763,5558
|
||||
452,3/27/2012,2,1,3,2,1,0.323333,0.315654,0.29,0.187192,531,4571,5102
|
||||
453,3/28/2012,2,1,3,3,1,0.484167,0.47095,0.48125,0.291671,674,5024,5698
|
||||
454,3/29/2012,2,1,3,4,1,0.494167,0.482304,0.439167,0.31965,834,5299,6133
|
||||
455,3/30/2012,2,1,3,5,2,0.37,0.375621,0.580833,0.138067,796,4663,5459
|
||||
456,3/31/2012,2,1,3,6,2,0.424167,0.421708,0.738333,0.250617,2301,3934,6235
|
||||
457,4/1/2012,2,1,4,0,2,0.425833,0.417287,0.67625,0.172267,2347,3694,6041
|
||||
458,4/2/2012,2,1,4,1,1,0.433913,0.427513,0.504348,0.312139,1208,4728,5936
|
||||
459,4/3/2012,2,1,4,2,1,0.466667,0.461483,0.396667,0.100133,1348,5424,6772
|
||||
460,4/4/2012,2,1,4,3,1,0.541667,0.53345,0.469583,0.180975,1058,5378,6436
|
||||
461,4/5/2012,2,1,4,4,1,0.435,0.431163,0.374167,0.219529,1192,5265,6457
|
||||
462,4/6/2012,2,1,4,5,1,0.403333,0.390767,0.377083,0.300388,1807,4653,6460
|
||||
463,4/7/2012,2,1,4,6,1,0.4375,0.426129,0.254167,0.274871,3252,3605,6857
|
||||
464,4/8/2012,2,1,4,0,1,0.5,0.492425,0.275833,0.232596,2230,2939,5169
|
||||
465,4/9/2012,2,1,4,1,1,0.489167,0.476638,0.3175,0.358196,905,4680,5585
|
||||
466,4/10/2012,2,1,4,2,1,0.446667,0.436233,0.435,0.249375,819,5099,5918
|
||||
467,4/11/2012,2,1,4,3,1,0.348696,0.337274,0.469565,0.295274,482,4380,4862
|
||||
468,4/12/2012,2,1,4,4,1,0.3975,0.387604,0.46625,0.290429,663,4746,5409
|
||||
469,4/13/2012,2,1,4,5,1,0.4425,0.431808,0.408333,0.155471,1252,5146,6398
|
||||
470,4/14/2012,2,1,4,6,1,0.495,0.487996,0.502917,0.190917,2795,4665,7460
|
||||
471,4/15/2012,2,1,4,0,1,0.606667,0.573875,0.507917,0.225129,2846,4286,7132
|
||||
472,4/16/2012,2,1,4,1,1,0.664167,0.614925,0.561667,0.284829,1198,5172,6370
|
||||
473,4/17/2012,2,1,4,2,1,0.608333,0.598487,0.390417,0.273629,989,5702,6691
|
||||
474,4/18/2012,2,1,4,3,2,0.463333,0.457038,0.569167,0.167912,347,4020,4367
|
||||
475,4/19/2012,2,1,4,4,1,0.498333,0.493046,0.6125,0.0659292,846,5719,6565
|
||||
476,4/20/2012,2,1,4,5,1,0.526667,0.515775,0.694583,0.149871,1340,5950,7290
|
||||
477,4/21/2012,2,1,4,6,1,0.57,0.542921,0.682917,0.283587,2541,4083,6624
|
||||
478,4/22/2012,2,1,4,0,3,0.396667,0.389504,0.835417,0.344546,120,907,1027
|
||||
479,4/23/2012,2,1,4,1,2,0.321667,0.301125,0.766667,0.303496,195,3019,3214
|
||||
480,4/24/2012,2,1,4,2,1,0.413333,0.405283,0.454167,0.249383,518,5115,5633
|
||||
481,4/25/2012,2,1,4,3,1,0.476667,0.470317,0.427917,0.118792,655,5541,6196
|
||||
482,4/26/2012,2,1,4,4,2,0.498333,0.483583,0.756667,0.176625,475,4551,5026
|
||||
483,4/27/2012,2,1,4,5,1,0.4575,0.452637,0.400833,0.347633,1014,5219,6233
|
||||
484,4/28/2012,2,1,4,6,2,0.376667,0.377504,0.489583,0.129975,1120,3100,4220
|
||||
485,4/29/2012,2,1,4,0,1,0.458333,0.450121,0.587083,0.116908,2229,4075,6304
|
||||
486,4/30/2012,2,1,4,1,2,0.464167,0.457696,0.57,0.171638,665,4907,5572
|
||||
487,5/1/2012,2,1,5,2,2,0.613333,0.577021,0.659583,0.156096,653,5087,5740
|
||||
488,5/2/2012,2,1,5,3,1,0.564167,0.537896,0.797083,0.138058,667,5502,6169
|
||||
489,5/3/2012,2,1,5,4,2,0.56,0.537242,0.768333,0.133696,764,5657,6421
|
||||
490,5/4/2012,2,1,5,5,1,0.6275,0.590917,0.735417,0.162938,1069,5227,6296
|
||||
491,5/5/2012,2,1,5,6,2,0.621667,0.584608,0.756667,0.152992,2496,4387,6883
|
||||
492,5/6/2012,2,1,5,0,2,0.5625,0.546737,0.74,0.149879,2135,4224,6359
|
||||
493,5/7/2012,2,1,5,1,2,0.5375,0.527142,0.664167,0.230721,1008,5265,6273
|
||||
494,5/8/2012,2,1,5,2,2,0.581667,0.557471,0.685833,0.296029,738,4990,5728
|
||||
495,5/9/2012,2,1,5,3,2,0.575,0.553025,0.744167,0.216412,620,4097,4717
|
||||
496,5/10/2012,2,1,5,4,1,0.505833,0.491783,0.552083,0.314063,1026,5546,6572
|
||||
497,5/11/2012,2,1,5,5,1,0.533333,0.520833,0.360417,0.236937,1319,5711,7030
|
||||
498,5/12/2012,2,1,5,6,1,0.564167,0.544817,0.480417,0.123133,2622,4807,7429
|
||||
499,5/13/2012,2,1,5,0,1,0.6125,0.585238,0.57625,0.225117,2172,3946,6118
|
||||
500,5/14/2012,2,1,5,1,2,0.573333,0.5499,0.789583,0.212692,342,2501,2843
|
||||
501,5/15/2012,2,1,5,2,2,0.611667,0.576404,0.794583,0.147392,625,4490,5115
|
||||
502,5/16/2012,2,1,5,3,1,0.636667,0.595975,0.697917,0.122512,991,6433,7424
|
||||
503,5/17/2012,2,1,5,4,1,0.593333,0.572613,0.52,0.229475,1242,6142,7384
|
||||
504,5/18/2012,2,1,5,5,1,0.564167,0.551121,0.523333,0.136817,1521,6118,7639
|
||||
505,5/19/2012,2,1,5,6,1,0.6,0.566908,0.45625,0.083975,3410,4884,8294
|
||||
506,5/20/2012,2,1,5,0,1,0.620833,0.583967,0.530417,0.254367,2704,4425,7129
|
||||
507,5/21/2012,2,1,5,1,2,0.598333,0.565667,0.81125,0.233204,630,3729,4359
|
||||
508,5/22/2012,2,1,5,2,2,0.615,0.580825,0.765833,0.118167,819,5254,6073
|
||||
509,5/23/2012,2,1,5,3,2,0.621667,0.584612,0.774583,0.102,766,4494,5260
|
||||
510,5/24/2012,2,1,5,4,1,0.655,0.6067,0.716667,0.172896,1059,5711,6770
|
||||
511,5/25/2012,2,1,5,5,1,0.68,0.627529,0.747083,0.14055,1417,5317,6734
|
||||
512,5/26/2012,2,1,5,6,1,0.6925,0.642696,0.7325,0.198992,2855,3681,6536
|
||||
513,5/27/2012,2,1,5,0,1,0.69,0.641425,0.697083,0.215171,3283,3308,6591
|
||||
514,5/28/2012,2,1,5,1,1,0.7125,0.6793,0.67625,0.196521,2557,3486,6043
|
||||
515,5/29/2012,2,1,5,2,1,0.7225,0.672992,0.684583,0.2954,880,4863,5743
|
||||
516,5/30/2012,2,1,5,3,2,0.656667,0.611129,0.67,0.134329,745,6110,6855
|
||||
517,5/31/2012,2,1,5,4,1,0.68,0.631329,0.492917,0.195279,1100,6238,7338
|
||||
518,6/1/2012,2,1,6,5,2,0.654167,0.607962,0.755417,0.237563,533,3594,4127
|
||||
519,6/2/2012,2,1,6,6,1,0.583333,0.566288,0.549167,0.186562,2795,5325,8120
|
||||
520,6/3/2012,2,1,6,0,1,0.6025,0.575133,0.493333,0.184087,2494,5147,7641
|
||||
521,6/4/2012,2,1,6,1,1,0.5975,0.578283,0.487083,0.284833,1071,5927,6998
|
||||
522,6/5/2012,2,1,6,2,2,0.540833,0.525892,0.613333,0.209575,968,6033,7001
|
||||
523,6/6/2012,2,1,6,3,1,0.554167,0.542292,0.61125,0.077125,1027,6028,7055
|
||||
524,6/7/2012,2,1,6,4,1,0.6025,0.569442,0.567083,0.15735,1038,6456,7494
|
||||
525,6/8/2012,2,1,6,5,1,0.649167,0.597862,0.467917,0.175383,1488,6248,7736
|
||||
526,6/9/2012,2,1,6,6,1,0.710833,0.648367,0.437083,0.144287,2708,4790,7498
|
||||
527,6/10/2012,2,1,6,0,1,0.726667,0.663517,0.538333,0.133721,2224,4374,6598
|
||||
528,6/11/2012,2,1,6,1,2,0.720833,0.659721,0.587917,0.207713,1017,5647,6664
|
||||
529,6/12/2012,2,1,6,2,2,0.653333,0.597875,0.833333,0.214546,477,4495,4972
|
||||
530,6/13/2012,2,1,6,3,1,0.655833,0.611117,0.582083,0.343279,1173,6248,7421
|
||||
531,6/14/2012,2,1,6,4,1,0.648333,0.624383,0.569583,0.253733,1180,6183,7363
|
||||
532,6/15/2012,2,1,6,5,1,0.639167,0.599754,0.589583,0.176617,1563,6102,7665
|
||||
533,6/16/2012,2,1,6,6,1,0.631667,0.594708,0.504167,0.166667,2963,4739,7702
|
||||
534,6/17/2012,2,1,6,0,1,0.5925,0.571975,0.59875,0.144904,2634,4344,6978
|
||||
535,6/18/2012,2,1,6,1,2,0.568333,0.544842,0.777917,0.174746,653,4446,5099
|
||||
536,6/19/2012,2,1,6,2,1,0.688333,0.654692,0.69,0.148017,968,5857,6825
|
||||
537,6/20/2012,2,1,6,3,1,0.7825,0.720975,0.592083,0.113812,872,5339,6211
|
||||
538,6/21/2012,3,1,6,4,1,0.805833,0.752542,0.567917,0.118787,778,5127,5905
|
||||
539,6/22/2012,3,1,6,5,1,0.7775,0.724121,0.57375,0.182842,964,4859,5823
|
||||
540,6/23/2012,3,1,6,6,1,0.731667,0.652792,0.534583,0.179721,2657,4801,7458
|
||||
541,6/24/2012,3,1,6,0,1,0.743333,0.674254,0.479167,0.145525,2551,4340,6891
|
||||
542,6/25/2012,3,1,6,1,1,0.715833,0.654042,0.504167,0.300383,1139,5640,6779
|
||||
543,6/26/2012,3,1,6,2,1,0.630833,0.594704,0.373333,0.347642,1077,6365,7442
|
||||
544,6/27/2012,3,1,6,3,1,0.6975,0.640792,0.36,0.271775,1077,6258,7335
|
||||
545,6/28/2012,3,1,6,4,1,0.749167,0.675512,0.4225,0.17165,921,5958,6879
|
||||
546,6/29/2012,3,1,6,5,1,0.834167,0.786613,0.48875,0.165417,829,4634,5463
|
||||
547,6/30/2012,3,1,6,6,1,0.765,0.687508,0.60125,0.161071,1455,4232,5687
|
||||
548,7/1/2012,3,1,7,0,1,0.815833,0.750629,0.51875,0.168529,1421,4110,5531
|
||||
549,7/2/2012,3,1,7,1,1,0.781667,0.702038,0.447083,0.195267,904,5323,6227
|
||||
550,7/3/2012,3,1,7,2,1,0.780833,0.70265,0.492083,0.126237,1052,5608,6660
|
||||
551,7/4/2012,3,1,7,3,1,0.789167,0.732337,0.53875,0.13495,2562,4841,7403
|
||||
552,7/5/2012,3,1,7,4,1,0.8275,0.761367,0.457917,0.194029,1405,4836,6241
|
||||
553,7/6/2012,3,1,7,5,1,0.828333,0.752533,0.450833,0.146142,1366,4841,6207
|
||||
554,7/7/2012,3,1,7,6,1,0.861667,0.804913,0.492083,0.163554,1448,3392,4840
|
||||
555,7/8/2012,3,1,7,0,1,0.8225,0.790396,0.57375,0.125629,1203,3469,4672
|
||||
556,7/9/2012,3,1,7,1,2,0.710833,0.654054,0.683333,0.180975,998,5571,6569
|
||||
557,7/10/2012,3,1,7,2,2,0.720833,0.664796,0.6675,0.151737,954,5336,6290
|
||||
558,7/11/2012,3,1,7,3,1,0.716667,0.650271,0.633333,0.151733,975,6289,7264
|
||||
559,7/12/2012,3,1,7,4,1,0.715833,0.654683,0.529583,0.146775,1032,6414,7446
|
||||
560,7/13/2012,3,1,7,5,2,0.731667,0.667933,0.485833,0.08085,1511,5988,7499
|
||||
561,7/14/2012,3,1,7,6,2,0.703333,0.666042,0.699167,0.143679,2355,4614,6969
|
||||
562,7/15/2012,3,1,7,0,1,0.745833,0.705196,0.717917,0.166667,1920,4111,6031
|
||||
563,7/16/2012,3,1,7,1,1,0.763333,0.724125,0.645,0.164187,1088,5742,6830
|
||||
564,7/17/2012,3,1,7,2,1,0.818333,0.755683,0.505833,0.114429,921,5865,6786
|
||||
565,7/18/2012,3,1,7,3,1,0.793333,0.745583,0.577083,0.137442,799,4914,5713
|
||||
566,7/19/2012,3,1,7,4,1,0.77,0.714642,0.600417,0.165429,888,5703,6591
|
||||
567,7/20/2012,3,1,7,5,2,0.665833,0.613025,0.844167,0.208967,747,5123,5870
|
||||
568,7/21/2012,3,1,7,6,3,0.595833,0.549912,0.865417,0.2133,1264,3195,4459
|
||||
569,7/22/2012,3,1,7,0,2,0.6675,0.623125,0.7625,0.0939208,2544,4866,7410
|
||||
570,7/23/2012,3,1,7,1,1,0.741667,0.690017,0.694167,0.138683,1135,5831,6966
|
||||
571,7/24/2012,3,1,7,2,1,0.750833,0.70645,0.655,0.211454,1140,6452,7592
|
||||
572,7/25/2012,3,1,7,3,1,0.724167,0.654054,0.45,0.1648,1383,6790,8173
|
||||
573,7/26/2012,3,1,7,4,1,0.776667,0.739263,0.596667,0.284813,1036,5825,6861
|
||||
574,7/27/2012,3,1,7,5,1,0.781667,0.734217,0.594583,0.152992,1259,5645,6904
|
||||
575,7/28/2012,3,1,7,6,1,0.755833,0.697604,0.613333,0.15735,2234,4451,6685
|
||||
576,7/29/2012,3,1,7,0,1,0.721667,0.667933,0.62375,0.170396,2153,4444,6597
|
||||
577,7/30/2012,3,1,7,1,1,0.730833,0.684987,0.66875,0.153617,1040,6065,7105
|
||||
578,7/31/2012,3,1,7,2,1,0.713333,0.662896,0.704167,0.165425,968,6248,7216
|
||||
579,8/1/2012,3,1,8,3,1,0.7175,0.667308,0.6775,0.141179,1074,6506,7580
|
||||
580,8/2/2012,3,1,8,4,1,0.7525,0.707088,0.659583,0.129354,983,6278,7261
|
||||
581,8/3/2012,3,1,8,5,2,0.765833,0.722867,0.6425,0.215792,1328,5847,7175
|
||||
582,8/4/2012,3,1,8,6,1,0.793333,0.751267,0.613333,0.257458,2345,4479,6824
|
||||
583,8/5/2012,3,1,8,0,1,0.769167,0.731079,0.6525,0.290421,1707,3757,5464
|
||||
584,8/6/2012,3,1,8,1,2,0.7525,0.710246,0.654167,0.129354,1233,5780,7013
|
||||
585,8/7/2012,3,1,8,2,2,0.735833,0.697621,0.70375,0.116908,1278,5995,7273
|
||||
586,8/8/2012,3,1,8,3,2,0.75,0.707717,0.672917,0.1107,1263,6271,7534
|
||||
587,8/9/2012,3,1,8,4,1,0.755833,0.699508,0.620417,0.1561,1196,6090,7286
|
||||
588,8/10/2012,3,1,8,5,2,0.715833,0.667942,0.715833,0.238813,1065,4721,5786
|
||||
589,8/11/2012,3,1,8,6,2,0.6925,0.638267,0.732917,0.206479,2247,4052,6299
|
||||
590,8/12/2012,3,1,8,0,1,0.700833,0.644579,0.530417,0.122512,2182,4362,6544
|
||||
591,8/13/2012,3,1,8,1,1,0.720833,0.662254,0.545417,0.136212,1207,5676,6883
|
||||
592,8/14/2012,3,1,8,2,1,0.726667,0.676779,0.686667,0.169158,1128,5656,6784
|
||||
593,8/15/2012,3,1,8,3,1,0.706667,0.654037,0.619583,0.169771,1198,6149,7347
|
||||
594,8/16/2012,3,1,8,4,1,0.719167,0.654688,0.519167,0.141796,1338,6267,7605
|
||||
595,8/17/2012,3,1,8,5,1,0.723333,0.2424,0.570833,0.231354,1483,5665,7148
|
||||
596,8/18/2012,3,1,8,6,1,0.678333,0.618071,0.603333,0.177867,2827,5038,7865
|
||||
597,8/19/2012,3,1,8,0,2,0.635833,0.603554,0.711667,0.08645,1208,3341,4549
|
||||
598,8/20/2012,3,1,8,1,2,0.635833,0.595967,0.734167,0.129979,1026,5504,6530
|
||||
599,8/21/2012,3,1,8,2,1,0.649167,0.601025,0.67375,0.0727708,1081,5925,7006
|
||||
600,8/22/2012,3,1,8,3,1,0.6675,0.621854,0.677083,0.0702833,1094,6281,7375
|
||||
601,8/23/2012,3,1,8,4,1,0.695833,0.637008,0.635833,0.0845958,1363,6402,7765
|
||||
602,8/24/2012,3,1,8,5,2,0.7025,0.6471,0.615,0.0721458,1325,6257,7582
|
||||
603,8/25/2012,3,1,8,6,2,0.661667,0.618696,0.712917,0.244408,1829,4224,6053
|
||||
604,8/26/2012,3,1,8,0,2,0.653333,0.595996,0.845833,0.228858,1483,3772,5255
|
||||
605,8/27/2012,3,1,8,1,1,0.703333,0.654688,0.730417,0.128733,989,5928,6917
|
||||
606,8/28/2012,3,1,8,2,1,0.728333,0.66605,0.62,0.190925,935,6105,7040
|
||||
607,8/29/2012,3,1,8,3,1,0.685,0.635733,0.552083,0.112562,1177,6520,7697
|
||||
608,8/30/2012,3,1,8,4,1,0.706667,0.652779,0.590417,0.0771167,1172,6541,7713
|
||||
609,8/31/2012,3,1,8,5,1,0.764167,0.6894,0.5875,0.168533,1433,5917,7350
|
||||
610,9/1/2012,3,1,9,6,2,0.753333,0.702654,0.638333,0.113187,2352,3788,6140
|
||||
611,9/2/2012,3,1,9,0,2,0.696667,0.649,0.815,0.0640708,2613,3197,5810
|
||||
612,9/3/2012,3,1,9,1,1,0.7075,0.661629,0.790833,0.151121,1965,4069,6034
|
||||
613,9/4/2012,3,1,9,2,1,0.725833,0.686888,0.755,0.236321,867,5997,6864
|
||||
614,9/5/2012,3,1,9,3,1,0.736667,0.708983,0.74125,0.187808,832,6280,7112
|
||||
615,9/6/2012,3,1,9,4,2,0.696667,0.655329,0.810417,0.142421,611,5592,6203
|
||||
616,9/7/2012,3,1,9,5,1,0.703333,0.657204,0.73625,0.171646,1045,6459,7504
|
||||
617,9/8/2012,3,1,9,6,2,0.659167,0.611121,0.799167,0.281104,1557,4419,5976
|
||||
618,9/9/2012,3,1,9,0,1,0.61,0.578925,0.5475,0.224496,2570,5657,8227
|
||||
619,9/10/2012,3,1,9,1,1,0.583333,0.565654,0.50375,0.258713,1118,6407,7525
|
||||
620,9/11/2012,3,1,9,2,1,0.5775,0.554292,0.52,0.0920542,1070,6697,7767
|
||||
621,9/12/2012,3,1,9,3,1,0.599167,0.570075,0.577083,0.131846,1050,6820,7870
|
||||
622,9/13/2012,3,1,9,4,1,0.6125,0.579558,0.637083,0.0827208,1054,6750,7804
|
||||
623,9/14/2012,3,1,9,5,1,0.633333,0.594083,0.6725,0.103863,1379,6630,8009
|
||||
624,9/15/2012,3,1,9,6,1,0.608333,0.585867,0.501667,0.247521,3160,5554,8714
|
||||
625,9/16/2012,3,1,9,0,1,0.58,0.563125,0.57,0.0901833,2166,5167,7333
|
||||
626,9/17/2012,3,1,9,1,2,0.580833,0.55305,0.734583,0.151742,1022,5847,6869
|
||||
627,9/18/2012,3,1,9,2,2,0.623333,0.565067,0.8725,0.357587,371,3702,4073
|
||||
628,9/19/2012,3,1,9,3,1,0.5525,0.540404,0.536667,0.215175,788,6803,7591
|
||||
629,9/20/2012,3,1,9,4,1,0.546667,0.532192,0.618333,0.118167,939,6781,7720
|
||||
630,9/21/2012,3,1,9,5,1,0.599167,0.571971,0.66875,0.154229,1250,6917,8167
|
||||
631,9/22/2012,3,1,9,6,1,0.65,0.610488,0.646667,0.283583,2512,5883,8395
|
||||
632,9/23/2012,4,1,9,0,1,0.529167,0.518933,0.467083,0.223258,2454,5453,7907
|
||||
633,9/24/2012,4,1,9,1,1,0.514167,0.502513,0.492917,0.142404,1001,6435,7436
|
||||
634,9/25/2012,4,1,9,2,1,0.55,0.544179,0.57,0.236321,845,6693,7538
|
||||
635,9/26/2012,4,1,9,3,1,0.635,0.596613,0.630833,0.2444,787,6946,7733
|
||||
636,9/27/2012,4,1,9,4,2,0.65,0.607975,0.690833,0.134342,751,6642,7393
|
||||
637,9/28/2012,4,1,9,5,2,0.619167,0.585863,0.69,0.164179,1045,6370,7415
|
||||
638,9/29/2012,4,1,9,6,1,0.5425,0.530296,0.542917,0.227604,2589,5966,8555
|
||||
639,9/30/2012,4,1,9,0,1,0.526667,0.517663,0.583333,0.134958,2015,4874,6889
|
||||
640,10/1/2012,4,1,10,1,2,0.520833,0.512,0.649167,0.0908042,763,6015,6778
|
||||
641,10/2/2012,4,1,10,2,3,0.590833,0.542333,0.871667,0.104475,315,4324,4639
|
||||
642,10/3/2012,4,1,10,3,2,0.6575,0.599133,0.79375,0.0665458,728,6844,7572
|
||||
643,10/4/2012,4,1,10,4,2,0.6575,0.607975,0.722917,0.117546,891,6437,7328
|
||||
644,10/5/2012,4,1,10,5,1,0.615,0.580187,0.6275,0.10635,1516,6640,8156
|
||||
645,10/6/2012,4,1,10,6,1,0.554167,0.538521,0.664167,0.268025,3031,4934,7965
|
||||
646,10/7/2012,4,1,10,0,2,0.415833,0.419813,0.708333,0.141162,781,2729,3510
|
||||
647,10/8/2012,4,1,10,1,2,0.383333,0.387608,0.709583,0.189679,874,4604,5478
|
||||
648,10/9/2012,4,1,10,2,2,0.446667,0.438112,0.761667,0.1903,601,5791,6392
|
||||
649,10/10/2012,4,1,10,3,1,0.514167,0.503142,0.630833,0.187821,780,6911,7691
|
||||
650,10/11/2012,4,1,10,4,1,0.435,0.431167,0.463333,0.181596,834,6736,7570
|
||||
651,10/12/2012,4,1,10,5,1,0.4375,0.433071,0.539167,0.235092,1060,6222,7282
|
||||
652,10/13/2012,4,1,10,6,1,0.393333,0.391396,0.494583,0.146142,2252,4857,7109
|
||||
653,10/14/2012,4,1,10,0,1,0.521667,0.508204,0.640417,0.278612,2080,4559,6639
|
||||
654,10/15/2012,4,1,10,1,2,0.561667,0.53915,0.7075,0.296037,760,5115,5875
|
||||
655,10/16/2012,4,1,10,2,1,0.468333,0.460846,0.558333,0.182221,922,6612,7534
|
||||
656,10/17/2012,4,1,10,3,1,0.455833,0.450108,0.692917,0.101371,979,6482,7461
|
||||
657,10/18/2012,4,1,10,4,2,0.5225,0.512625,0.728333,0.236937,1008,6501,7509
|
||||
658,10/19/2012,4,1,10,5,2,0.563333,0.537896,0.815,0.134954,753,4671,5424
|
||||
659,10/20/2012,4,1,10,6,1,0.484167,0.472842,0.572917,0.117537,2806,5284,8090
|
||||
660,10/21/2012,4,1,10,0,1,0.464167,0.456429,0.51,0.166054,2132,4692,6824
|
||||
661,10/22/2012,4,1,10,1,1,0.4875,0.482942,0.568333,0.0814833,830,6228,7058
|
||||
662,10/23/2012,4,1,10,2,1,0.544167,0.530304,0.641667,0.0945458,841,6625,7466
|
||||
663,10/24/2012,4,1,10,3,1,0.5875,0.558721,0.63625,0.0727792,795,6898,7693
|
||||
664,10/25/2012,4,1,10,4,2,0.55,0.529688,0.800417,0.124375,875,6484,7359
|
||||
665,10/26/2012,4,1,10,5,2,0.545833,0.52275,0.807083,0.132467,1182,6262,7444
|
||||
666,10/27/2012,4,1,10,6,2,0.53,0.515133,0.72,0.235692,2643,5209,7852
|
||||
667,10/28/2012,4,1,10,0,2,0.4775,0.467771,0.694583,0.398008,998,3461,4459
|
||||
668,10/29/2012,4,1,10,1,3,0.44,0.4394,0.88,0.3582,2,20,22
|
||||
669,10/30/2012,4,1,10,2,2,0.318182,0.309909,0.825455,0.213009,87,1009,1096
|
||||
670,10/31/2012,4,1,10,3,2,0.3575,0.3611,0.666667,0.166667,419,5147,5566
|
||||
671,11/1/2012,4,1,11,4,2,0.365833,0.369942,0.581667,0.157346,466,5520,5986
|
||||
672,11/2/2012,4,1,11,5,1,0.355,0.356042,0.522083,0.266175,618,5229,5847
|
||||
673,11/3/2012,4,1,11,6,2,0.343333,0.323846,0.49125,0.270529,1029,4109,5138
|
||||
674,11/4/2012,4,1,11,0,1,0.325833,0.329538,0.532917,0.179108,1201,3906,5107
|
||||
675,11/5/2012,4,1,11,1,1,0.319167,0.308075,0.494167,0.236325,378,4881,5259
|
||||
676,11/6/2012,4,1,11,2,1,0.280833,0.281567,0.567083,0.173513,466,5220,5686
|
||||
677,11/7/2012,4,1,11,3,2,0.295833,0.274621,0.5475,0.304108,326,4709,5035
|
||||
678,11/8/2012,4,1,11,4,1,0.352174,0.341891,0.333478,0.347835,340,4975,5315
|
||||
679,11/9/2012,4,1,11,5,1,0.361667,0.355413,0.540833,0.214558,709,5283,5992
|
||||
680,11/10/2012,4,1,11,6,1,0.389167,0.393937,0.645417,0.0578458,2090,4446,6536
|
||||
681,11/11/2012,4,1,11,0,1,0.420833,0.421713,0.659167,0.1275,2290,4562,6852
|
||||
682,11/12/2012,4,1,11,1,1,0.485,0.475383,0.741667,0.173517,1097,5172,6269
|
||||
683,11/13/2012,4,1,11,2,2,0.343333,0.323225,0.662917,0.342046,327,3767,4094
|
||||
684,11/14/2012,4,1,11,3,1,0.289167,0.281563,0.552083,0.199625,373,5122,5495
|
||||
685,11/15/2012,4,1,11,4,2,0.321667,0.324492,0.620417,0.152987,320,5125,5445
|
||||
686,11/16/2012,4,1,11,5,1,0.345,0.347204,0.524583,0.171025,484,5214,5698
|
||||
687,11/17/2012,4,1,11,6,1,0.325,0.326383,0.545417,0.179729,1313,4316,5629
|
||||
688,11/18/2012,4,1,11,0,1,0.3425,0.337746,0.692917,0.227612,922,3747,4669
|
||||
689,11/19/2012,4,1,11,1,2,0.380833,0.375621,0.623333,0.235067,449,5050,5499
|
||||
690,11/20/2012,4,1,11,2,2,0.374167,0.380667,0.685,0.082725,534,5100,5634
|
||||
691,11/21/2012,4,1,11,3,1,0.353333,0.364892,0.61375,0.103246,615,4531,5146
|
||||
692,11/22/2012,4,1,11,4,1,0.34,0.350371,0.580417,0.0528708,955,1470,2425
|
||||
693,11/23/2012,4,1,11,5,1,0.368333,0.378779,0.56875,0.148021,1603,2307,3910
|
||||
694,11/24/2012,4,1,11,6,1,0.278333,0.248742,0.404583,0.376871,532,1745,2277
|
||||
695,11/25/2012,4,1,11,0,1,0.245833,0.257583,0.468333,0.1505,309,2115,2424
|
||||
696,11/26/2012,4,1,11,1,1,0.313333,0.339004,0.535417,0.04665,337,4750,5087
|
||||
697,11/27/2012,4,1,11,2,2,0.291667,0.281558,0.786667,0.237562,123,3836,3959
|
||||
698,11/28/2012,4,1,11,3,1,0.296667,0.289762,0.50625,0.210821,198,5062,5260
|
||||
699,11/29/2012,4,1,11,4,1,0.28087,0.298422,0.555652,0.115522,243,5080,5323
|
||||
700,11/30/2012,4,1,11,5,1,0.298333,0.323867,0.649583,0.0584708,362,5306,5668
|
||||
701,12/1/2012,4,1,12,6,2,0.298333,0.316904,0.806667,0.0597042,951,4240,5191
|
||||
702,12/2/2012,4,1,12,0,2,0.3475,0.359208,0.823333,0.124379,892,3757,4649
|
||||
703,12/3/2012,4,1,12,1,1,0.4525,0.455796,0.7675,0.0827208,555,5679,6234
|
||||
704,12/4/2012,4,1,12,2,1,0.475833,0.469054,0.73375,0.174129,551,6055,6606
|
||||
705,12/5/2012,4,1,12,3,1,0.438333,0.428012,0.485,0.324021,331,5398,5729
|
||||
706,12/6/2012,4,1,12,4,1,0.255833,0.258204,0.50875,0.174754,340,5035,5375
|
||||
707,12/7/2012,4,1,12,5,2,0.320833,0.321958,0.764167,0.1306,349,4659,5008
|
||||
708,12/8/2012,4,1,12,6,2,0.381667,0.389508,0.91125,0.101379,1153,4429,5582
|
||||
709,12/9/2012,4,1,12,0,2,0.384167,0.390146,0.905417,0.157975,441,2787,3228
|
||||
710,12/10/2012,4,1,12,1,2,0.435833,0.435575,0.925,0.190308,329,4841,5170
|
||||
711,12/11/2012,4,1,12,2,2,0.353333,0.338363,0.596667,0.296037,282,5219,5501
|
||||
712,12/12/2012,4,1,12,3,2,0.2975,0.297338,0.538333,0.162937,310,5009,5319
|
||||
713,12/13/2012,4,1,12,4,1,0.295833,0.294188,0.485833,0.174129,425,5107,5532
|
||||
714,12/14/2012,4,1,12,5,1,0.281667,0.294192,0.642917,0.131229,429,5182,5611
|
||||
715,12/15/2012,4,1,12,6,1,0.324167,0.338383,0.650417,0.10635,767,4280,5047
|
||||
716,12/16/2012,4,1,12,0,2,0.3625,0.369938,0.83875,0.100742,538,3248,3786
|
||||
717,12/17/2012,4,1,12,1,2,0.393333,0.4015,0.907083,0.0982583,212,4373,4585
|
||||
718,12/18/2012,4,1,12,2,1,0.410833,0.409708,0.66625,0.221404,433,5124,5557
|
||||
719,12/19/2012,4,1,12,3,1,0.3325,0.342162,0.625417,0.184092,333,4934,5267
|
||||
720,12/20/2012,4,1,12,4,2,0.33,0.335217,0.667917,0.132463,314,3814,4128
|
||||
721,12/21/2012,1,1,12,5,2,0.326667,0.301767,0.556667,0.374383,221,3402,3623
|
||||
722,12/22/2012,1,1,12,6,1,0.265833,0.236113,0.44125,0.407346,205,1544,1749
|
||||
723,12/23/2012,1,1,12,0,1,0.245833,0.259471,0.515417,0.133083,408,1379,1787
|
||||
724,12/24/2012,1,1,12,1,2,0.231304,0.2589,0.791304,0.0772304,174,746,920
|
||||
725,12/25/2012,1,1,12,2,2,0.291304,0.294465,0.734783,0.168726,440,573,1013
|
||||
726,12/26/2012,1,1,12,3,3,0.243333,0.220333,0.823333,0.316546,9,432,441
|
||||
727,12/27/2012,1,1,12,4,2,0.254167,0.226642,0.652917,0.350133,247,1867,2114
|
||||
728,12/28/2012,1,1,12,5,2,0.253333,0.255046,0.59,0.155471,644,2451,3095
|
||||
729,12/29/2012,1,1,12,6,2,0.253333,0.2424,0.752917,0.124383,159,1182,1341
|
||||
730,12/30/2012,1,1,12,0,1,0.255833,0.2317,0.483333,0.350754,364,1432,1796
|
||||
731,12/31/2012,1,1,12,1,2,0.215833,0.223487,0.5775,0.154846,439,2290,2729
|
||||
|
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -37,7 +44,8 @@
|
||||
"2. Instantiating AutoMLConfig with new task type \"forecasting\" for timeseries data training, and other timeseries related settings: for this dataset we use the basic one: \"time_column_name\" \n",
|
||||
"3. Training the Model using local compute\n",
|
||||
"4. Exploring the results\n",
|
||||
"5. Testing the fitted model"
|
||||
"5. Viewing the engineered names for featurized data and featurization summary for all raw features\n",
|
||||
"6. Testing the fitted model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -122,12 +130,22 @@
|
||||
"data.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# let's take note of what columns means what in the data\n",
|
||||
"time_column_name = 'timeStamp'\n",
|
||||
"target_column_name = 'demand'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Split the data to train and test\n",
|
||||
"\n"
|
||||
"### Split the data into train and test sets\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -136,50 +154,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train = data[data['timeStamp'] < '2017-02-01']\n",
|
||||
"test = data[data['timeStamp'] >= '2017-02-01']\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare the test data, we will feed X_test to the fitted model and get prediction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_test = test.pop('demand').values\n",
|
||||
"X_test = test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Split the train data to train and valid\n",
|
||||
"\n",
|
||||
"Use one month's data as valid data\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train = train[train['timeStamp'] < '2017-01-01']\n",
|
||||
"X_valid = train[train['timeStamp'] >= '2017-01-01']\n",
|
||||
"y_train = X_train.pop('demand').values\n",
|
||||
"y_valid = X_valid.pop('demand').values\n",
|
||||
"print(X_train.shape)\n",
|
||||
"print(y_train.shape)\n",
|
||||
"print(X_valid.shape)\n",
|
||||
"print(y_valid.shape)"
|
||||
"X_train = data[data[time_column_name] < '2017-02-01']\n",
|
||||
"X_test = data[data[time_column_name] >= '2017-02-01']\n",
|
||||
"y_train = X_train.pop(target_column_name).values\n",
|
||||
"y_test = X_test.pop(target_column_name).values"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -198,8 +176,7 @@
|
||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||
"|**X_valid**|Data used to evaluate a model in a iteration. (sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y_valid**|Data used to evaluate a model in a iteration. (sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. "
|
||||
]
|
||||
},
|
||||
@@ -209,9 +186,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"time_column_name = 'timeStamp'\n",
|
||||
"automl_settings = {\n",
|
||||
" \"time_column_name\": time_column_name,\n",
|
||||
" \"time_column_name\": time_column_name \n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -222,8 +198,7 @@
|
||||
" iteration_timeout_minutes = 5,\n",
|
||||
" X = X_train,\n",
|
||||
" y = y_train,\n",
|
||||
" X_valid = X_valid,\n",
|
||||
" y_valid = y_valid,\n",
|
||||
" n_cross_validations = 3,\n",
|
||||
" path=project_folder,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" **automl_settings)"
|
||||
@@ -233,7 +208,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n",
|
||||
"Submitting the configuration will start a new run in this experiment. For local runs, the execution is synchronous. Depending on the data and number of iterations, this can run for a while. Parameters controlling concurrency may speed up the process, depending on your hardware.\n",
|
||||
"\n",
|
||||
"You will see the currently running iterations printing to the console."
|
||||
]
|
||||
},
|
||||
@@ -273,13 +249,34 @@
|
||||
"fitted_model.steps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### View the engineered names for featurized data\n",
|
||||
"Below we display the engineered feature names generated for the featurized data using the time-series featurization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps['timeseriestransformer'].get_engineered_feature_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test the Best Fitted Model\n",
|
||||
"\n",
|
||||
"Predict on training and test set, and calculate residual values."
|
||||
"For forecasting, we will use the `forecast` function instead of the `predict` function. There are two reasons for this.\n",
|
||||
"\n",
|
||||
"We need to pass the recent values of the target variable `y`, whereas the scikit-compatible `predict` function only takes the non-target variables `X`. In our case, the test data immediately follows the training data, and we fill the `y` variable with `NaN`. The `NaN` serves as a question mark for the forecaster to fill with the actuals. Using the forecast function will produce forecasts using the shortest possible forecast horizon. The last time at which a definite (non-NaN) value is seen is the _forecast origin_ - the last time when the value of the target is known. \n",
|
||||
"\n",
|
||||
"Using the `predict` method would result in getting predictions for EVERY horizon the forecaster can predict at. This is useful when training and evaluating the performance of the forecaster at various horizons, but the level of detail is excessive for normal use."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -288,15 +285,64 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_pred = fitted_model.predict(X_test)\n",
|
||||
"y_pred"
|
||||
"# Replace ALL values in y_pred by NaN. \n",
|
||||
"# The forecast origin will be at the beginning of the first forecast period\n",
|
||||
"# (which is the same time as the end of the last training period).\n",
|
||||
"y_query = y_test.copy().astype(np.float)\n",
|
||||
"y_query.fill(np.nan)\n",
|
||||
"# The featurized data, aligned to y, will also be returned.\n",
|
||||
"# This contains the assumptions that were made in the forecast\n",
|
||||
"# and helps align the forecast to the original data\n",
|
||||
"y_fcst, X_trans = fitted_model.forecast(X_test, y_query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# limit the evaluation to data where y_test has actuals\n",
|
||||
"def align_outputs(y_predicted, X_trans, X_test, y_test, predicted_column_name = 'predicted'):\n",
|
||||
" \"\"\"\n",
|
||||
" Demonstrates how to get the output aligned to the inputs\n",
|
||||
" using pandas indexes. Helps understand what happened if\n",
|
||||
" the output's shape differs from the input shape, or if\n",
|
||||
" the data got re-sorted by time and grain during forecasting.\n",
|
||||
" \n",
|
||||
" Typical causes of misalignment are:\n",
|
||||
" * we predicted some periods that were missing in actuals -> drop from eval\n",
|
||||
" * model was asked to predict past max_horizon -> increase max horizon\n",
|
||||
" * data at start of X_test was needed for lags -> provide previous periods\n",
|
||||
" \"\"\"\n",
|
||||
" df_fcst = pd.DataFrame({predicted_column_name : y_predicted})\n",
|
||||
" # y and X outputs are aligned by forecast() function contract\n",
|
||||
" df_fcst.index = X_trans.index\n",
|
||||
" \n",
|
||||
" # align original X_test to y_test \n",
|
||||
" X_test_full = X_test.copy()\n",
|
||||
" X_test_full[target_column_name] = y_test\n",
|
||||
"\n",
|
||||
" # X_test_full's does not include origin, so reset for merge\n",
|
||||
" df_fcst.reset_index(inplace=True)\n",
|
||||
" X_test_full = X_test_full.reset_index().drop(columns='index')\n",
|
||||
" together = df_fcst.merge(X_test_full, how='right')\n",
|
||||
" \n",
|
||||
" # drop rows where prediction or actuals are nan \n",
|
||||
" # happens because of missing actuals \n",
|
||||
" # or at edges of time due to lags/rolling windows\n",
|
||||
" clean = together[together[[target_column_name, predicted_column_name]].notnull().all(axis=1)]\n",
|
||||
" return(clean)\n",
|
||||
"\n",
|
||||
"df_all = align_outputs(y_fcst, X_trans, X_test, y_test)\n",
|
||||
"df_all.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Use the Check Data Function to remove the nan values from y_test to avoid error when calculate metrics "
|
||||
"Looking at `X_trans` is also useful to see what featurization happened to the data."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -305,29 +351,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if len(y_test) != len(y_pred):\n",
|
||||
" raise ValueError(\n",
|
||||
" 'the true values and prediction values do not have equal length.')\n",
|
||||
"elif len(y_test) == 0:\n",
|
||||
" raise ValueError(\n",
|
||||
" 'y_true and y_pred are empty.')\n",
|
||||
"\n",
|
||||
"# if there is any non-numeric element in the y_true or y_pred,\n",
|
||||
"# the ValueError exception will be thrown.\n",
|
||||
"y_test_f = np.array(y_test).astype(float)\n",
|
||||
"y_pred_f = np.array(y_pred).astype(float)\n",
|
||||
"\n",
|
||||
"# remove entries both in y_true and y_pred where at least\n",
|
||||
"# one element in y_true or y_pred is missing\n",
|
||||
"y_test = y_test_f[~(np.isnan(y_test_f) | np.isnan(y_pred_f))]\n",
|
||||
"y_pred = y_pred_f[~(np.isnan(y_test_f) | np.isnan(y_pred_f))]"
|
||||
"X_trans"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Calculate metrics for the prediction\n"
|
||||
"### Calculate accuracy metrics\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -336,26 +367,180 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % np.sqrt(mean_squared_error(y_test, y_pred)))\n",
|
||||
"# Explained variance score: 1 is perfect prediction\n",
|
||||
"print('mean_absolute_error score: %.2f' % mean_absolute_error(y_test, y_pred))\n",
|
||||
"print('R2 score: %.2f' % r2_score(y_test, y_pred))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def MAPE(actual, pred):\n",
|
||||
" \"\"\"\n",
|
||||
" Calculate mean absolute percentage error.\n",
|
||||
" Remove NA and values where actual is close to zero\n",
|
||||
" \"\"\"\n",
|
||||
" not_na = ~(np.isnan(actual) | np.isnan(pred))\n",
|
||||
" not_zero = ~np.isclose(actual, 0.0)\n",
|
||||
" actual_safe = actual[not_na & not_zero]\n",
|
||||
" pred_safe = pred[not_na & not_zero]\n",
|
||||
" APE = 100*np.abs((actual_safe - pred_safe)/actual_safe)\n",
|
||||
" return np.mean(APE)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"Simple forecasting model\")\n",
|
||||
"rmse = np.sqrt(mean_squared_error(df_all[target_column_name], df_all['predicted']))\n",
|
||||
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
|
||||
"mae = mean_absolute_error(df_all[target_column_name], df_all['predicted'])\n",
|
||||
"print('mean_absolute_error score: %.2f' % mae)\n",
|
||||
"print('MAPE: %.2f' % MAPE(df_all[target_column_name], df_all['predicted']))\n",
|
||||
"\n",
|
||||
"# Plot outputs\n",
|
||||
"%matplotlib notebook\n",
|
||||
"test_pred = plt.scatter(y_test, y_pred, color='b')\n",
|
||||
"test_pred = plt.scatter(df_all[target_column_name], df_all['predicted'], color='b')\n",
|
||||
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The distribution looks a little heavy tailed: we underestimate the excursions of the extremes. A normal-quantile transform of the target might help, but let's first try using some past data with the lags and rolling window transforms.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Using lags and rolling window features to improve the forecast"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data.\n",
|
||||
"\n",
|
||||
"Now that we configured target lags, that is the previous values of the target variables, and the prediction is no longer horizon-less. We therefore must specify the `max_horizon` that the model will learn to forecast. The `target_lags` keyword specifies how far back we will construct the lags of the target variable, and the `target_rolling_window_size` specifies the size of the rolling window over which we will generate the `max`, `min` and `sum` features."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_settings_lags = {\n",
|
||||
" 'time_column_name': time_column_name,\n",
|
||||
" 'target_lags': 1,\n",
|
||||
" 'target_rolling_window_size': 5,\n",
|
||||
" # you MUST set the max_horizon when using lags and rolling windows\n",
|
||||
" # it is optional when looking-back features are not used \n",
|
||||
" 'max_horizon': len(y_test), # only one grain\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"automl_config_lags = AutoMLConfig(task = 'forecasting',\n",
|
||||
" debug_log = 'automl_nyc_energy_errors.log',\n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" iterations = 10,\n",
|
||||
" iteration_timeout_minutes = 5,\n",
|
||||
" X = X_train,\n",
|
||||
" y = y_train,\n",
|
||||
" n_cross_validations = 3,\n",
|
||||
" path=project_folder,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" **automl_settings_lags)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run_lags = experiment.submit(automl_config_lags, show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run_lags, fitted_model_lags = local_run_lags.get_output()\n",
|
||||
"y_fcst_lags, X_trans_lags = fitted_model_lags.forecast(X_test, y_query)\n",
|
||||
"df_lags = align_outputs(y_fcst_lags, X_trans_lags, X_test, y_test)\n",
|
||||
"df_lags.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_trans_lags"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"Forecasting model with lags\")\n",
|
||||
"rmse = np.sqrt(mean_squared_error(df_lags[target_column_name], df_lags['predicted']))\n",
|
||||
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
|
||||
"mae = mean_absolute_error(df_lags[target_column_name], df_lags['predicted'])\n",
|
||||
"print('mean_absolute_error score: %.2f' % mae)\n",
|
||||
"print('MAPE: %.2f' % MAPE(df_lags[target_column_name], df_lags['predicted']))\n",
|
||||
"\n",
|
||||
"# Plot outputs\n",
|
||||
"%matplotlib notebook\n",
|
||||
"test_pred = plt.scatter(df_lags[target_column_name], df_lags['predicted'], color='b')\n",
|
||||
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### What features matter for the forecast?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl.automlexplainer import explain_model\n",
|
||||
"\n",
|
||||
"# feature names are everything in the transformed data except the target\n",
|
||||
"features = X_trans.columns[:-1]\n",
|
||||
"expl = explain_model(fitted_model, X_train, X_test, features = features, best_run=best_run_lags, y_train = y_train)\n",
|
||||
"# unpack the tuple\n",
|
||||
"shap_values, expected_values, feat_overall_imp, feat_names, per_class_summary, per_class_imp = expl\n",
|
||||
"best_run_lags"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Please go to the Azure Portal's best run to see the top features chart.\n",
|
||||
"\n",
|
||||
"The informative features make all sorts of intuitive sense. Temperature is a strong driver of heating and cooling demand in NYC. Apart from that, the daily life cycle, expressed by `hour`, and the weekly cycle, expressed by `wday` drives people's energy use habits."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "xiaga"
|
||||
"name": "xiaga, tosingli"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -373,7 +558,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
"version": "3.6.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -20,7 +27,9 @@
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Data](#Data)\n",
|
||||
"1. [Train](#Train)"
|
||||
"1. [Train](#Train)\n",
|
||||
"1. [Predict](#Predict)\n",
|
||||
"1. [Operationalize](#Operationalize)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -85,9 +94,9 @@
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for the run history container in the workspace\n",
|
||||
"experiment_name = 'automl-ojsalesforecasting'\n",
|
||||
"experiment_name = 'automl-ojforecasting'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-ojsalesforecasting'\n",
|
||||
"project_folder = './sample_projects/automl-local-ojforecasting'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -260,12 +269,12 @@
|
||||
" 'time_column_name': time_column_name,\n",
|
||||
" 'grain_column_names': grain_column_names,\n",
|
||||
" 'drop_column_names': ['logQuantity'],\n",
|
||||
" 'max_horizon': n_test_periods\n",
|
||||
" 'max_horizon': n_test_periods # optional\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task='forecasting',\n",
|
||||
" debug_log='automl_oj_sales_errors.log',\n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" primary_metric='normalized_mean_absolute_error',\n",
|
||||
" iterations=10,\n",
|
||||
" X=X_train,\n",
|
||||
" y=y_train,\n",
|
||||
@@ -293,15 +302,6 @@
|
||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -324,7 +324,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Make Predictions from the Best Fitted Model\n",
|
||||
"# Predict\n",
|
||||
"Now that we have retrieved the best pipeline/model, it can be used to make predictions on test data. First, we remove the target values from the test set:"
|
||||
]
|
||||
},
|
||||
@@ -352,7 +352,7 @@
|
||||
"source": [
|
||||
"To produce predictions on the test set, we need to know the feature values at all dates in the test set. This requirement is somewhat reasonable for the OJ sales data since the features mainly consist of price, which is usually set in advance, and customer demographics which are approximately constant for each store over the 20 week forecast horizon in the testing data. \n",
|
||||
"\n",
|
||||
"The target predictions can be retrieved by calling the `predict` method on the best model:"
|
||||
"We will first create a query `y_query`, which is aligned index-for-index to `X_test`. This is a vector of target values where each `NaN` serves the function of the question mark to be replaced by forecast. Passing definite values in the `y` argument allows the `forecast` function to make predictions on data that does not immediately follow the train data which contains `y`. In each grain, the last time point where the model sees a definite value of `y` is that grain's _forecast origin_."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -361,15 +361,76 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_pred = fitted_pipeline.predict(X_test)"
|
||||
"# Replace ALL values in y_pred by NaN.\n",
|
||||
"# The forecast origin will be at the beginning of the first forecast period.\n",
|
||||
"# (Which is the same time as the end of the last training period.)\n",
|
||||
"y_query = y_test.copy().astype(np.float)\n",
|
||||
"y_query.fill(np.nan)\n",
|
||||
"# The featurized data, aligned to y, will also be returned.\n",
|
||||
"# This contains the assumptions that were made in the forecast\n",
|
||||
"# and helps align the forecast to the original data\n",
|
||||
"y_pred, X_trans = fitted_pipeline.forecast(X_test, y_query)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Calculate evaluation metrics for the prediction\n",
|
||||
"To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE)."
|
||||
"If you are used to scikit pipelines, perhaps you expected `predict(X_test)`. However, forecasting requires a more general interface that also supplies the past target `y` values. Please use `forecast(X,y)` as `predict(X)` is reserved for internal purposes on forecasting models.\n",
|
||||
"\n",
|
||||
"The [energy demand forecasting notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand) demonstrates the use of the forecast function in more detail in the context of using lags and rolling window features. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Evaluate\n",
|
||||
"\n",
|
||||
"To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE). \n",
|
||||
"\n",
|
||||
"It is a good practice to always align the output explicitly to the input, as the count and order of the rows may have changed during transformations that span multiple rows."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def align_outputs(y_predicted, X_trans, X_test, y_test, predicted_column_name = 'predicted'):\n",
|
||||
" \"\"\"\n",
|
||||
" Demonstrates how to get the output aligned to the inputs\n",
|
||||
" using pandas indexes. Helps understand what happened if\n",
|
||||
" the output's shape differs from the input shape, or if\n",
|
||||
" the data got re-sorted by time and grain during forecasting.\n",
|
||||
" \n",
|
||||
" Typical causes of misalignment are:\n",
|
||||
" * we predicted some periods that were missing in actuals -> drop from eval\n",
|
||||
" * model was asked to predict past max_horizon -> increase max horizon\n",
|
||||
" * data at start of X_test was needed for lags -> provide previous periods in y\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" df_fcst = pd.DataFrame({predicted_column_name : y_predicted})\n",
|
||||
" # y and X outputs are aligned by forecast() function contract\n",
|
||||
" df_fcst.index = X_trans.index\n",
|
||||
" \n",
|
||||
" # align original X_test to y_test \n",
|
||||
" X_test_full = X_test.copy()\n",
|
||||
" X_test_full[target_column_name] = y_test\n",
|
||||
"\n",
|
||||
" # X_test_full's index does not include origin, so reset for merge\n",
|
||||
" df_fcst.reset_index(inplace=True)\n",
|
||||
" X_test_full = X_test_full.reset_index().drop(columns='index')\n",
|
||||
" together = df_fcst.merge(X_test_full, how='right')\n",
|
||||
" \n",
|
||||
" # drop rows where prediction or actuals are nan \n",
|
||||
" # happens because of missing actuals \n",
|
||||
" # or at edges of time due to lags/rolling windows\n",
|
||||
" clean = together[together[[target_column_name, predicted_column_name]].notnull().all(axis=1)]\n",
|
||||
" return(clean)\n",
|
||||
"\n",
|
||||
"df_all = align_outputs(y_pred, X_trans, X_test, y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -388,18 +449,392 @@
|
||||
" actual_safe = actual[not_na & not_zero]\n",
|
||||
" pred_safe = pred[not_na & not_zero]\n",
|
||||
" APE = 100*np.abs((actual_safe - pred_safe)/actual_safe)\n",
|
||||
" return np.mean(APE)\n",
|
||||
" return np.mean(APE)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"Simple forecasting model\")\n",
|
||||
"rmse = np.sqrt(mean_squared_error(df_all[target_column_name], df_all['predicted']))\n",
|
||||
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
|
||||
"mae = mean_absolute_error(df_all[target_column_name], df_all['predicted'])\n",
|
||||
"print('mean_absolute_error score: %.2f' % mae)\n",
|
||||
"print('MAPE: %.2f' % MAPE(df_all[target_column_name], df_all['predicted']))\n",
|
||||
"\n",
|
||||
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % np.sqrt(mean_squared_error(y_test, y_pred)))\n",
|
||||
"print('mean_absolute_error score: %.2f' % mean_absolute_error(y_test, y_pred))\n",
|
||||
"print('MAPE: %.2f' % MAPE(y_test, y_pred))"
|
||||
"# Plot outputs\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"%matplotlib notebook\n",
|
||||
"test_pred = plt.scatter(df_all[target_column_name], df_all['predicted'], color='b')\n",
|
||||
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Operationalize"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"_Operationalization_ means getting the model into the cloud so that other can run it after you close the notebook. We will create a docker running on Azure Container Instances with the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"description = 'AutoML OJ forecaster'\n",
|
||||
"tags = None\n",
|
||||
"model = local_run.register_model(description = description, tags = tags)\n",
|
||||
"\n",
|
||||
"print(local_run.model_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Develop the scoring script\n",
|
||||
"\n",
|
||||
"Serializing and deserializing complex data frames may be tricky. We first develop the `run()` function of the scoring script locally, then write it into a scoring script. It is much easier to debug any quirks of the scoring function without crossing two compute environments. For this exercise, we handle a common quirk of how pandas dataframes serialize time stamp values."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# this is where we test the run function of the scoring script interactively\n",
|
||||
"# before putting it in the scoring script\n",
|
||||
"\n",
|
||||
"timestamp_columns = ['WeekStarting']\n",
|
||||
"\n",
|
||||
"def run(rawdata, test_model = None):\n",
|
||||
" \"\"\"\n",
|
||||
" Intended to process 'rawdata' string produced by\n",
|
||||
" \n",
|
||||
" {'X': X_test.to_json(), y' : y_test.to_json()}\n",
|
||||
" \n",
|
||||
" Don't convert the X payload to numpy.array, use it as pandas.DataFrame\n",
|
||||
" \"\"\"\n",
|
||||
" try:\n",
|
||||
" # unpack the data frame with timestamp \n",
|
||||
" rawobj = json.loads(rawdata) # rawobj is now a dict of strings \n",
|
||||
" X_pred = pd.read_json(rawobj['X'], convert_dates=False) # load the pandas DF from a json string\n",
|
||||
" for col in timestamp_columns: # fix timestamps\n",
|
||||
" X_pred[col] = pd.to_datetime(X_pred[col], unit='ms') \n",
|
||||
" \n",
|
||||
" y_pred = np.array(rawobj['y']) # reconstitute numpy array from serialized list\n",
|
||||
" \n",
|
||||
" if test_model is None:\n",
|
||||
" result = model.forecast(X_pred, y_pred) # use the global model from init function\n",
|
||||
" else:\n",
|
||||
" result = test_model.forecast(X_pred, y_pred) # use the model on which we are testing\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" result = str(e)\n",
|
||||
" return json.dumps({\"error\": result})\n",
|
||||
" \n",
|
||||
" forecast_as_list = result[0].tolist()\n",
|
||||
" index_as_df = result[1].index.to_frame().reset_index(drop=True)\n",
|
||||
" \n",
|
||||
" return json.dumps({\"forecast\": forecast_as_list, # return the minimum over the wire: \n",
|
||||
" \"index\": index_as_df.to_json() # no forecast and its featurized values\n",
|
||||
" })"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# test the run function here before putting in the scoring script\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"test_sample = json.dumps({'X': X_test.to_json(), 'y' : y_query.tolist()})\n",
|
||||
"response = run(test_sample, fitted_pipeline)\n",
|
||||
"\n",
|
||||
"# unpack the response, dealing with the timestamp serialization again\n",
|
||||
"res_dict = json.loads(response)\n",
|
||||
"y_fcst_all = pd.read_json(res_dict['index'])\n",
|
||||
"y_fcst_all[time_column_name] = pd.to_datetime(y_fcst_all[time_column_name], unit = 'ms')\n",
|
||||
"y_fcst_all['forecast'] = res_dict['forecast']\n",
|
||||
"y_fcst_all.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now that the function works locally in the notebook, let's write it down into the scoring script. The scoring script is authored by the data scientist. Adjust it to taste, adding inputs, outputs and processing as needed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile score_fcast.py\n",
|
||||
"import pickle\n",
|
||||
"import json\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import azureml.train.automl\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global model\n",
|
||||
" model_path = Model.get_model_path(model_name = '<<modelid>>') # this name is model.id of model that we want to deploy\n",
|
||||
" # deserialize the model file back into a sklearn model\n",
|
||||
" model = joblib.load(model_path)\n",
|
||||
"\n",
|
||||
"timestamp_columns = ['WeekStarting']\n",
|
||||
"\n",
|
||||
"def run(rawdata, test_model = None):\n",
|
||||
" \"\"\"\n",
|
||||
" Intended to process 'rawdata' string produced by\n",
|
||||
" \n",
|
||||
" {'X': X_test.to_json(), y' : y_test.to_json()}\n",
|
||||
" \n",
|
||||
" Don't convert the X payload to numpy.array, use it as pandas.DataFrame\n",
|
||||
" \"\"\"\n",
|
||||
" try:\n",
|
||||
" # unpack the data frame with timestamp \n",
|
||||
" rawobj = json.loads(rawdata) # rawobj is now a dict of strings \n",
|
||||
" X_pred = pd.read_json(rawobj['X'], convert_dates=False) # load the pandas DF from a json string\n",
|
||||
" for col in timestamp_columns: # fix timestamps\n",
|
||||
" X_pred[col] = pd.to_datetime(X_pred[col], unit='ms') \n",
|
||||
" \n",
|
||||
" y_pred = np.array(rawobj['y']) # reconstitute numpy array from serialized list\n",
|
||||
" \n",
|
||||
" if test_model is None:\n",
|
||||
" result = model.forecast(X_pred, y_pred) # use the global model from init function\n",
|
||||
" else:\n",
|
||||
" result = test_model.forecast(X_pred, y_pred) # use the model on which we are testing\n",
|
||||
" \n",
|
||||
" except Exception as e:\n",
|
||||
" result = str(e)\n",
|
||||
" return json.dumps({\"error\": result})\n",
|
||||
" \n",
|
||||
" # prepare to send over wire as json\n",
|
||||
" forecast_as_list = result[0].tolist()\n",
|
||||
" index_as_df = result[1].index.to_frame().reset_index(drop=True)\n",
|
||||
" \n",
|
||||
" return json.dumps({\"forecast\": forecast_as_list, # return the minimum over the wire: \n",
|
||||
" \"index\": index_as_df.to_json() # no forecast and its featurized values\n",
|
||||
" })"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get the model\n",
|
||||
"from azureml.train.automl.run import AutoMLRun\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"ml_run = AutoMLRun(experiment = experiment, run_id = local_run.id)\n",
|
||||
"best_iteration = int(str.split(best_run.id,'_')[-1]) # the iteration number is a postfix of the run ID."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get the best model's dependencies and write them into this file\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"conda_env_file_name = 'fcast_env.yml'\n",
|
||||
"\n",
|
||||
"dependencies = ml_run.get_run_sdk_dependencies(iteration = best_iteration)\n",
|
||||
"for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n",
|
||||
" print('{}\\t{}'.format(p, dependencies[p]))\n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"myenv.save_to_file('.', conda_env_file_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# this is the script file name we wrote a few cells above\n",
|
||||
"script_file_name = 'score_fcast.py'\n",
|
||||
"\n",
|
||||
"# Substitute the actual version number in the environment file.\n",
|
||||
"# This is not strictly needed in this notebook because the model should have been generated using the current SDK version.\n",
|
||||
"# However, we include this in case this code is used on an experiment from a previous SDK version.\n",
|
||||
"\n",
|
||||
"with open(conda_env_file_name, 'r') as cefr:\n",
|
||||
" content = cefr.read()\n",
|
||||
"\n",
|
||||
"with open(conda_env_file_name, 'w') as cefw:\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n",
|
||||
"\n",
|
||||
"# Substitute the actual model id in the script file.\n",
|
||||
"\n",
|
||||
"with open(script_file_name, 'r') as cefr:\n",
|
||||
" content = cefr.read()\n",
|
||||
"\n",
|
||||
"with open(script_file_name, 'w') as cefw:\n",
|
||||
" cefw.write(content.replace('<<modelid>>', local_run.model_id))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a Container Image"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.image import Image, ContainerImage\n",
|
||||
"\n",
|
||||
"image_config = ContainerImage.image_configuration(runtime= \"python\",\n",
|
||||
" execution_script = script_file_name,\n",
|
||||
" conda_file = conda_env_file_name,\n",
|
||||
" tags = {'type': \"automl-forecasting\"},\n",
|
||||
" description = \"Image for automl forecasting sample\")\n",
|
||||
"\n",
|
||||
"image = Image.create(name = \"automl-fcast-image\",\n",
|
||||
" # this is the model object \n",
|
||||
" models = [model],\n",
|
||||
" image_config = image_config, \n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"image.wait_for_creation(show_output = True)\n",
|
||||
"\n",
|
||||
"if image.creation_state == 'Failed':\n",
|
||||
" print(\"Image build log at: \" + image.image_build_log_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy the Image as a Web Service on Azure Container Instance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||
" memory_gb = 2, \n",
|
||||
" tags = {'type': \"automl-forecasting\"},\n",
|
||||
" description = \"Automl forecasting sample service\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"\n",
|
||||
"aci_service_name = 'automl-forecast-01'\n",
|
||||
"print(aci_service_name)\n",
|
||||
"\n",
|
||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||
" image = image,\n",
|
||||
" name = aci_service_name,\n",
|
||||
" workspace = ws)\n",
|
||||
"aci_service.wait_for_deployment(True)\n",
|
||||
"print(aci_service.state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Call the service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# we send the data to the service serialized into a json string\n",
|
||||
"test_sample = json.dumps({'X':X_test.to_json(), 'y' : y_query.tolist()})\n",
|
||||
"response = aci_service.run(input_data = test_sample)\n",
|
||||
"\n",
|
||||
"# translate from networkese to datascientese\n",
|
||||
"try: \n",
|
||||
" res_dict = json.loads(response)\n",
|
||||
" y_fcst_all = pd.read_json(res_dict['index'])\n",
|
||||
" y_fcst_all[time_column_name] = pd.to_datetime(y_fcst_all[time_column_name], unit = 'ms')\n",
|
||||
" y_fcst_all['forecast'] = res_dict['forecast'] \n",
|
||||
"except:\n",
|
||||
" print(res_dict)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_fcst_all.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete the web service if desired"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"serv = Webservice(ws, 'automl-forecast-01')\n",
|
||||
"# serv.delete() # don't do it accidentally"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "erwright"
|
||||
"name": "erwright, tosingli"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -417,7 +852,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
"version": "3.6.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -37,8 +44,9 @@
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"4. Train the model.\n",
|
||||
"5. Explore the results.\n",
|
||||
"3. Train the model.\n",
|
||||
"4. Explore the results.\n",
|
||||
"5. Viewing the engineered names for featurized data and featurization summary for all raw features.\n",
|
||||
"6. Test the best fitted model.\n",
|
||||
"\n",
|
||||
"In addition this notebook showcases the following features\n",
|
||||
@@ -154,7 +162,6 @@
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
|
||||
"|**experiment_exit_score**|*double* value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
|
||||
"|**blacklist_models**|*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i>|\n",
|
||||
@@ -174,7 +181,6 @@
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" iteration_timeout_minutes = 60,\n",
|
||||
" iterations = 20,\n",
|
||||
" n_cross_validations = 5,\n",
|
||||
" preprocess = True,\n",
|
||||
" experiment_exit_score = 0.9984,\n",
|
||||
" blacklist_models = ['KNN','LinearSVM'],\n",
|
||||
@@ -318,6 +324,45 @@
|
||||
"# best_run, fitted_model = local_run.get_output(iteration = iteration)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### View the engineered names for featurized data\n",
|
||||
"Below we display the engineered feature names generated for the featurized data using the preprocessing featurization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps['datatransformer'].get_engineered_feature_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### View the featurization summary\n",
|
||||
"Below we display the featurization that was performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:-\n",
|
||||
"- Raw feature name\n",
|
||||
"- Number of engineered features formed out of this raw feature\n",
|
||||
"- Type detected\n",
|
||||
"- If feature was dropped\n",
|
||||
"- List of feature transformations for the raw feature"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps['datatransformer'].get_featurization_summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -254,7 +261,9 @@
|
||||
"3.\toverall_summary: The model level feature importance values sorted in descending order\n",
|
||||
"4.\toverall_imp: The feature names sorted in the same order as in overall_summary\n",
|
||||
"5.\tper_class_summary: The class level feature importance values sorted in descending order. Only available for the classification case\n",
|
||||
"6.\tper_class_imp: The feature names sorted in the same order as in per_class_summary. Only available for the classification case"
|
||||
"6.\tper_class_imp: The feature names sorted in the same order as in per_class_summary. Only available for the classification case\n",
|
||||
"\n",
|
||||
"Note:- The **retrieve_model_explanation()** API only works in case AutoML has been configured with **'model_explainability'** flag set to **True**. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -305,7 +314,7 @@
|
||||
"from azureml.train.automl.automlexplainer import explain_model\n",
|
||||
"\n",
|
||||
"shap_values, expected_values, overall_summary, overall_imp, per_class_summary, per_class_imp = \\\n",
|
||||
" explain_model(fitted_model, X_train, X_test)"
|
||||
" explain_model(fitted_model, X_train, X_test, features=features)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -112,9 +119,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach existing AmlCompute\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you create `AmlCompute` as your training compute resource.\n",
|
||||
"\n",
|
||||
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you create an AmlCompute as your training compute resource.\n",
|
||||
"\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
@@ -129,10 +134,12 @@
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"automlcl\"\n",
|
||||
"amlcompute_cluster_name = \"cpucluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
@@ -145,7 +152,7 @@
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 6)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\n",
|
||||
" # Create the cluster.\\n\",\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
"\n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
@@ -155,13 +162,6 @@
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -40,7 +47,8 @@
|
||||
"3. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"4. Train the model using the DSVM.\n",
|
||||
"5. Explore the results.\n",
|
||||
"6. Test the best fitted model.\n",
|
||||
"6. Viewing the engineered names for featurized data and featurization summary for all raw features.\n",
|
||||
"7. Test the best fitted model.\n",
|
||||
"\n",
|
||||
"In addition this notebook showcases the following features\n",
|
||||
"- **Parallel** executions for iterations\n",
|
||||
@@ -110,7 +118,7 @@
|
||||
"source": [
|
||||
"### Attach a Remote Linux DSVM\n",
|
||||
"To use a remote Docker compute target:\n",
|
||||
"1. Create a Linux DSVM in Azure, following these [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor (not CentOS). Make sure that disk space is available under `/tmp` because AutoML creates files under `/tmp/azureml_run`s. The DSVM should have more cores than the number of parallel runs that you plan to enable. It should also have at least 4GB per core.\n",
|
||||
"1. Create a Linux DSVM in Azure, following these [instructions](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/dsvm-ubuntu-intro). Make sure you use the Ubuntu flavor (not CentOS). Make sure that disk space is available under `/tmp` because AutoML creates files under `/tmp/azureml_run`s. The DSVM should have more cores than the number of parallel runs that you plan to enable. It should also have at least 4GB per core.\n",
|
||||
"2. Enter the IP address, user name and password below.\n",
|
||||
"\n",
|
||||
"**Note:** By default, SSH runs on port 22 and you don't need to change the port number below. If you've configured SSH to use a different port, change `dsvm_ssh_port` accordinglyaddress. [Read more](https://docs.microsoft.com/en-us/azure/virtual-machines/troubleshooting/detailed-troubleshoot-ssh-connection) on changing SSH ports for security reasons."
|
||||
@@ -160,6 +168,7 @@
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
@@ -167,7 +176,9 @@
|
||||
"# Set compute target to the Linux DSVM\n",
|
||||
"conda_run_config.target = dsvm_compute\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"pandas_dependency = 'pandas==' + pkg_resources.get_distribution(\"pandas\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80',pandas_dependency])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
@@ -407,6 +418,45 @@
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### View the engineered names for featurized data\n",
|
||||
"Below we display the engineered feature names generated for the featurized data using the preprocessing featurization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps['datatransformer'].get_engineered_feature_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### View the featurization summary\n",
|
||||
"Below we display the featurization that was performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:-\n",
|
||||
"- Raw feature name\n",
|
||||
"- Number of engineered features formed out of this raw feature\n",
|
||||
"- Type detected\n",
|
||||
"- If feature was dropped\n",
|
||||
"- List of feature transformations for the raw feature"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps['datatransformer'].get_featurization_summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -245,6 +252,7 @@
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
@@ -254,7 +262,9 @@
|
||||
"# set the data reference of the run coonfiguration\n",
|
||||
"conda_run_config.data_references = {ds.name: dr}\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"pandas_dependency = 'pandas==' + pkg_resources.get_distribution(\"pandas\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80',pandas_dependency])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -130,7 +137,7 @@
|
||||
" print('Found an existing DSVM.')\n",
|
||||
"except:\n",
|
||||
" print('Creating a new DSVM.')\n",
|
||||
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2s_v3\")\n",
|
||||
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
|
||||
" dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n",
|
||||
" dsvm_compute.wait_for_completion(show_output = True)\n",
|
||||
" print(\"Waiting one minute for ssh to be accessible\")\n",
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -27,3 +27,7 @@ You can use Azure Databricks as a compute target from [Azure Machine Learning Pi
|
||||
For more on SDK concepts, please refer to [notebooks](https://github.com/Azure/MachineLearningNotebooks).
|
||||
|
||||
**Please let us know your feedback.**
|
||||
|
||||
|
||||
|
||||

|
||||
@@ -11,6 +11,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -333,6 +340,13 @@
|
||||
"source": [
|
||||
"dbutils.notebook.exit(\"success\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -11,6 +11,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -277,6 +284,13 @@
|
||||
"#comment to not delete the web service\n",
|
||||
"myservice.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -11,6 +11,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -203,6 +210,13 @@
|
||||
"#model.delete()\n",
|
||||
"aks_target.delete() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -11,6 +11,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -139,6 +146,13 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -11,6 +11,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -143,6 +150,13 @@
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -23,7 +23,8 @@
|
||||
"3. Configure Automated ML using `AutoMLConfig`.\n",
|
||||
"4. Train the model using Azure Databricks.\n",
|
||||
"5. Explore the results.\n",
|
||||
"6. Test the best fitted model.\n",
|
||||
"6. Viewing the engineered names for featurized data and featurization summary for all raw features.\n",
|
||||
"7. Test the best fitted model.\n",
|
||||
"\n",
|
||||
"Before running this notebook, please follow the <a href=\"https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/azure-databricks\" target=\"_blank\">readme for using Automated ML on Azure Databricks</a> for installing necessary libraries to your cluster."
|
||||
]
|
||||
@@ -556,6 +557,45 @@
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### View the engineered names for featurized data\n",
|
||||
"Below we display the engineered feature names generated for the featurized data using the preprocessing featurization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps['datatransformer'].get_engineered_feature_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### View the featurization summary\n",
|
||||
"Below we display the featurization that was performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:-\n",
|
||||
"- Raw feature name\n",
|
||||
"- Number of engineered features formed out of this raw feature\n",
|
||||
"- Type detected\n",
|
||||
"- If feature was dropped\n",
|
||||
"- List of feature transformations for the raw feature"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps['datatransformer'].get_featurization_summary()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -620,6 +660,13 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -207,6 +207,7 @@
|
||||
"import os\n",
|
||||
"import random\n",
|
||||
"import time\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from matplotlib.pyplot import imshow\n",
|
||||
@@ -427,7 +428,7 @@
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" iteration_timeout_minutes = 10,\n",
|
||||
" iterations = 30,\n",
|
||||
" iterations = 5,\n",
|
||||
" preprocess = True,\n",
|
||||
" n_cross_validations = 10,\n",
|
||||
" max_concurrent_iterations = 2, #change it based on number of worker nodes\n",
|
||||
@@ -591,22 +592,21 @@
|
||||
"%%writefile score.py\n",
|
||||
"import pickle\n",
|
||||
"import json\n",
|
||||
"import numpy\n",
|
||||
"import numpy as np\n",
|
||||
"import azureml.train.automl\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global model\n",
|
||||
" model_path = Model.get_model_path(model_name = '<<modelid>>') # this name is model.id of model that we want to deploy\n",
|
||||
" model_path = Model.get_model_path(model_name = '<<model_id>>') # this name is model.id of model that we want to deploy\n",
|
||||
" # deserialize the model file back into a sklearn model\n",
|
||||
" model = joblib.load(model_path)\n",
|
||||
"\n",
|
||||
"def run(rawdata):\n",
|
||||
"def run(raw_data):\n",
|
||||
" try:\n",
|
||||
" data = json.loads(rawdata)['data']\n",
|
||||
" data = numpy.array(data)\n",
|
||||
" data = (pd.DataFrame(np.array(json.loads(raw_data)['data']), columns=[str(i) for i in range(0,64)]))\n",
|
||||
" result = model.predict(data)\n",
|
||||
" except Exception as e:\n",
|
||||
" result = str(e)\n",
|
||||
@@ -614,6 +614,22 @@
|
||||
" return json.dumps({\"result\":result.tolist()})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Replace <<model_id>>\n",
|
||||
"content = \"\"\n",
|
||||
"with open(\"score.py\", \"r\") as fo:\n",
|
||||
" content = fo.read()\n",
|
||||
"\n",
|
||||
"new_content = content.replace(\"<<model_id>>\", local_run.model_id)\n",
|
||||
"with open(\"score.py\", \"w\") as fw:\n",
|
||||
" fw.write(new_content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -672,16 +688,19 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"# this will take 10-15 minutes to finish\n",
|
||||
"\n",
|
||||
"service_name = \"<<servicename>>\"\n",
|
||||
"import uuid\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"guid = str(uuid.uuid4()).split(\"-\")[0]\n",
|
||||
"service_name = \"myservice-{}\".format(guid)\n",
|
||||
"print(\"Creating service with name: {}\".format(service_name))\n",
|
||||
"runtime = \"spark-py\" \n",
|
||||
"driver_file = \"score.py\"\n",
|
||||
"my_conda_file = \"mydeployenv.yml\"\n",
|
||||
"\n",
|
||||
"# image creation\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"myimage_config = ContainerImage.image_configuration(execution_script = driver_file, \n",
|
||||
" runtime = runtime, \n",
|
||||
" conda_file = 'mydeployenv.yml')\n",
|
||||
@@ -744,18 +763,46 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"# Randomly select digits and test.\n",
|
||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||
" test_sample = json.dumps({'data':X_test[index:index + 1].values.tolist()})\n",
|
||||
" predicted = myservice.run(input_data = test_sample)\n",
|
||||
" label = y_test.values[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||
" predictedDict = json.loads(predicted)\n",
|
||||
" title = \"Label value = %d Predicted value = %s \" % ( label,predictedDict['result'][0]) \n",
|
||||
" fig = plt.figure(3, figsize = (5,5))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" display(fig)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"### Delete the service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"myservice.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -677,6 +677,13 @@
|
||||
"# Next: ADLA as a Compute Target\n",
|
||||
"To use ADLA as a compute target from Azure Machine Learning Pipeline, a AdlaStep is used. This [notebook](./aml-pipelines-use-adla-as-compute-target.ipynb) demonstrates the use of AdlaStep in Azure Machine Learning Pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
55
how-to-use-azureml/azure-hdi/README.md
Normal file
55
how-to-use-azureml/azure-hdi/README.md
Normal file
@@ -0,0 +1,55 @@
|
||||
**Azure HDInsight**
|
||||
|
||||
Azure HDInsight is a fully managed cloud Hadoop & Spark offering the gives
|
||||
optimized open-source analytic clusters for Spark, Hive, MapReduce, HBase,
|
||||
Storm, and Kafka. HDInsight Spark clusters provide kernels that you can use with
|
||||
the Jupyter notebook on [Apache Spark](https://spark.apache.org/) for testing
|
||||
your applications.
|
||||
|
||||
How Azure HDInsight works with Azure Machine Learning service
|
||||
|
||||
- You can train a model using Spark clusters and deploy the model to ACI/AKS
|
||||
from within Azure HDInsight.
|
||||
|
||||
- You can also use [automated machine
|
||||
learning](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-automated-ml) capabilities
|
||||
integrated within Azure HDInsight.
|
||||
|
||||
You can use Azure HDInsight as a compute target from an [Azure Machine Learning
|
||||
pipeline](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines).
|
||||
|
||||
**Set up your HDInsight cluster**
|
||||
|
||||
Create [HDInsight
|
||||
cluster](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters)
|
||||
|
||||
**Quick create: Basic cluster setup**
|
||||
|
||||
This article walks you through setup in the [Azure
|
||||
portal](https://portal.azure.com/), where you can create an HDInsight cluster
|
||||
using *Quick create* or *Custom*.
|
||||
|
||||

|
||||
|
||||
Follow instructions on the screen to do a basic cluster setup. Details are
|
||||
provided below for:
|
||||
|
||||
- [Resource group
|
||||
name](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters#resource-group-name)
|
||||
|
||||
- [Cluster types and
|
||||
configuration](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters#cluster-types)
|
||||
(Cluster must be Spark 2.3 (HDI 3.6) or greater)
|
||||
|
||||
- Cluster login and SSH username
|
||||
|
||||
- [Location](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters#location)
|
||||
|
||||
**Import the sample HDI notebook in Jupyter**
|
||||
|
||||
**Important links:**
|
||||
|
||||
Create HDI cluster:
|
||||
<https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters>
|
||||
|
||||

|
||||
@@ -0,0 +1,631 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated ML on Azure HDInsight\n",
|
||||
"\n",
|
||||
"In this example we use the scikit-learn's <a href=\"http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset\" target=\"_blank\">digit dataset</a> to showcase how you can use AutoML for a simple classification problem.\n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create Azure Machine Learning Workspace object and initialize your notebook directory to easily reload this object from a configuration file.\n",
|
||||
"2. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"3. Configure Automated ML using `AutoMLConfig`.\n",
|
||||
"4. Train the model using Azure HDInsight.\n",
|
||||
"5. Explore the results.\n",
|
||||
"6. Test the best fitted model.\n",
|
||||
"\n",
|
||||
"Before running this notebook, please follow the readme for using Automated ML on Azure HDI for installing necessary libraries to your cluster."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Check the Azure ML Core SDK Version to Validate Your Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"import pandas as pd\n",
|
||||
"from azureml.core.authentication import ServicePrincipalAuthentication\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.train.automl.run import AutoMLRun\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"print(\"SDK Version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize an Azure ML Workspace\n",
|
||||
"### What is an Azure ML Workspace and Why Do I Need One?\n",
|
||||
"\n",
|
||||
"An Azure ML workspace is an Azure resource that organizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an Azure ML workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, operationalization, and the monitoring of operationalized models.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### What do I Need?\n",
|
||||
"\n",
|
||||
"To create or access an Azure ML workspace, you will need to import the Azure ML library and specify following information:\n",
|
||||
"* A name for your workspace. You can choose one.\n",
|
||||
"* Your subscription id. Use the `id` value from the `az account show` command output above.\n",
|
||||
"* The resource group name. The resource group organizes Azure resources and provides a default region for the resources in the group. The resource group will be created if it doesn't exist. Resource groups can be created and viewed in the [Azure portal](https://portal.azure.com)\n",
|
||||
"* Supported regions include `eastus2`, `eastus`,`westcentralus`, `southeastasia`, `westeurope`, `australiaeast`, `westus2`, `southcentralus`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"import pandas as pd\n",
|
||||
"from azureml.core.authentication import ServicePrincipalAuthentication\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.train.automl.run import AutoMLRun\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"subscription_id = \"<Your SubscriptionId>\" #you should be owner or contributor\n",
|
||||
"resource_group = \"<Resource group - new or existing>\" #you should be owner or contributor\n",
|
||||
"workspace_name = \"<workspace to be created>\" #your workspace name\n",
|
||||
"workspace_region = \"<azureregion>\" #your region\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"tenant_id = \"<tenant_id>\"\n",
|
||||
"app_id = \"<app_id>\"\n",
|
||||
"app_key = \"<app_key>\"\n",
|
||||
"\n",
|
||||
"auth_sp = ServicePrincipalAuthentication(tenant_id = tenant_id,\n",
|
||||
" service_principal_id = app_id,\n",
|
||||
" service_principal_password = app_key)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating a Workspace\n",
|
||||
"If you already have access to an Azure ML workspace you want to use, you can skip this cell. Otherwise, this cell will create an Azure ML workspace for you in the specified subscription, provided you have the correct permissions for the given `subscription_id`.\n",
|
||||
"\n",
|
||||
"This will fail when:\n",
|
||||
"1. The workspace already exists.\n",
|
||||
"2. You do not have permission to create a workspace in the resource group.\n",
|
||||
"3. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this subscription.\n",
|
||||
"\n",
|
||||
"If workspace creation fails for any reason other than already existing, please work with your IT administrator to provide you with the appropriate permissions or to provision the required resources.\n",
|
||||
"\n",
|
||||
"**Note:** Creation of a new workspace can take several minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"##TESTONLY\n",
|
||||
"# Import the Workspace class and check the Azure ML SDK version.\n",
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.create(name = workspace_name,\n",
|
||||
" subscription_id = subscription_id,\n",
|
||||
" resource_group = resource_group, \n",
|
||||
" location = workspace_region,\n",
|
||||
" auth = auth_sp,\n",
|
||||
" exist_ok=True)\n",
|
||||
"ws.get_details()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configuring Your Local Environment\n",
|
||||
"You can validate that you have access to the specified workspace and write a configuration file to the default configuration location, `./aml_config/config.json`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace(workspace_name = workspace_name,\n",
|
||||
" subscription_id = subscription_id,\n",
|
||||
" resource_group = resource_group,\n",
|
||||
" auth = auth_sp)\n",
|
||||
"\n",
|
||||
"# Persist the subscription id, resource group name, and workspace name in aml_config/config.json.\n",
|
||||
"ws.write_config()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a Folder to Host Sample Projects\n",
|
||||
"Finally, create a folder where all the sample projects will be hosted."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"sample_projects_folder = './sample_projects'\n",
|
||||
"\n",
|
||||
"if not os.path.isdir(sample_projects_folder):\n",
|
||||
" os.mkdir(sample_projects_folder)\n",
|
||||
" \n",
|
||||
"print('Sample projects will be created in {}.'.format(sample_projects_folder))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For Automated ML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"import os\n",
|
||||
"import random\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from matplotlib.pyplot import imshow\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.train.automl.run import AutoMLRun"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"experiment_name = 'automl-local-classification-hdi'\n",
|
||||
"project_folder = './sample_projects/automl-local-classification-hdi'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"pd.DataFrame(data = output, index = ['']).T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Registering Datastore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Datastore is the way to save connection information to a storage service (e.g. Azure Blob, Azure Data Lake, Azure SQL) information to your workspace so you can access them without exposing credentials in your code. The first thing you will need to do is register a datastore, you can refer to our [python SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.datastore.datastore?view=azure-ml-py) on how to register datastores. __Note: for best security practices, please do not check in code that contains registering datastores with secrets into your source control__\n",
|
||||
"\n",
|
||||
"The code below registers a datastore pointing to a publicly readable blob container."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Datastore\n",
|
||||
"\n",
|
||||
"datastore_name = 'demo_training'\n",
|
||||
"container_name = 'digits' \n",
|
||||
"account_name = 'automlpublicdatasets'\n",
|
||||
"Datastore.register_azure_blob_container(\n",
|
||||
" workspace = ws, \n",
|
||||
" datastore_name = datastore_name, \n",
|
||||
" container_name = container_name, \n",
|
||||
" account_name = account_name,\n",
|
||||
" overwrite = True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Below is an example on how to register a private blob container\n",
|
||||
"```python\n",
|
||||
"datastore = Datastore.register_azure_blob_container(\n",
|
||||
" workspace = ws, \n",
|
||||
" datastore_name = 'example_datastore', \n",
|
||||
" container_name = 'example-container', \n",
|
||||
" account_name = 'storageaccount',\n",
|
||||
" account_key = 'accountkey'\n",
|
||||
")\n",
|
||||
"```\n",
|
||||
"The example below shows how to register an Azure Data Lake store. Please make sure you have granted the necessary permissions for the service principal to access the data lake.\n",
|
||||
"```python\n",
|
||||
"datastore = Datastore.register_azure_data_lake(\n",
|
||||
" workspace = ws,\n",
|
||||
" datastore_name = 'example_datastore',\n",
|
||||
" store_name = 'adlsstore',\n",
|
||||
" tenant_id = 'tenant-id-of-service-principal',\n",
|
||||
" client_id = 'client-id-of-service-principal',\n",
|
||||
" client_secret = 'client-secret-of-service-principal'\n",
|
||||
")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Training Data Using DataPrep"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Automated ML takes a Dataflow as input.\n",
|
||||
"\n",
|
||||
"If you are familiar with Pandas and have done your data preparation work in Pandas already, you can use the `read_pandas_dataframe` method in dprep to convert the DataFrame to a Dataflow.\n",
|
||||
"```python\n",
|
||||
"df = pd.read_csv(...)\n",
|
||||
"# apply some transforms\n",
|
||||
"dprep.read_pandas_dataframe(df, temp_folder='/path/accessible/by/both/driver/and/worker')\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you just need to ingest data without doing any preparation, you can directly use AzureML Data Prep (Data Prep) to do so. The code below demonstrates this scenario. Data Prep also has data preparation capabilities, we have many [sample notebooks](https://github.com/Microsoft/AMLDataPrepDocs) demonstrating the capabilities.\n",
|
||||
"\n",
|
||||
"You will get the datastore you registered previously and pass it to Data Prep for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.dataprep as dprep\n",
|
||||
"from azureml.data.datapath import DataPath\n",
|
||||
"\n",
|
||||
"datastore = Datastore.get(workspace = ws, datastore_name = datastore_name)\n",
|
||||
"\n",
|
||||
"X_train = dprep.read_csv(datastore.path('X.csv'))\n",
|
||||
"y_train = dprep.read_csv(datastore.path('y.csv')).to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Review the Data Preparation Result\n",
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only j records for all the steps in the Dataflow, which makes it fast even against large datasets."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_train.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure AutoML\n",
|
||||
"\n",
|
||||
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
|
||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**spark_context**|Spark Context object. for HDInsight, use spark_context=sc|\n",
|
||||
"|**max_concurrent_iterations**|Maximum number of iterations to execute in parallel. This should be <= number of worker nodes in your Azure HDInsight cluster.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
|
||||
"|**preprocess**|set this to True to enable pre-processing of data eg. string to numeric using one-hot encoding|\n",
|
||||
"|**exit_score**|Target score for experiment. It is associated with the metric. eg. exit_score=0.995 will exit experiment after that|"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" iteration_timeout_minutes = 10,\n",
|
||||
" iterations = 3,\n",
|
||||
" preprocess = True,\n",
|
||||
" n_cross_validations = 10,\n",
|
||||
" max_concurrent_iterations = 2, #change it based on number of worker nodes\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" spark_context=sc, #HDI /spark related\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train the Models\n",
|
||||
"\n",
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explore the Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following will show the child runs and waits for the parent run to complete."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Retrieve All Child Runs after the experiment is completed (in portal)\n",
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"children = list(local_run.get_children())\n",
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
"\n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
"rundata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model after the above run is complete \n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = local_run.get_output()\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model Based on Any Other Metric after the above run is complete based on the child run\n",
|
||||
"Show the run and the model that has the smallest `log_loss` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lookup_metric = \"log_loss\"\n",
|
||||
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test the Best Fitted Model\n",
|
||||
"\n",
|
||||
"#### Load Test Data - you can split the dataset beforehand & pass Train dataset to AutoML and use Test dataset to evaluate the best model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"blob_location = \"https://{}.blob.core.windows.net/{}\".format(account_name, container_name)\n",
|
||||
"X_test = pd.read_csv(\"{}./X_valid.csv\".format(blob_location), header=0)\n",
|
||||
"y_test = pd.read_csv(\"{}/y_valid.csv\".format(blob_location), header=0)\n",
|
||||
"images = pd.read_csv(\"{}/images.csv\".format(blob_location), header=None)\n",
|
||||
"images = np.reshape(images.values, (100,8,8))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing Our Best Fitted Model\n",
|
||||
"We will try to predict digits and see how our model works. This is just an example to show you."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Randomly select digits and test.\n",
|
||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||
" label = y_test.values[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||
" fig = plt.figure(3, figsize = (5,5))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" display(fig)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"When deploying an automated ML trained model, please specify _pippackages=['azureml-sdk[automl]']_ in your CondaDependencies.\n",
|
||||
"\n",
|
||||
"Please refer to only the **Deploy** section in this notebook - <a href=\"https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment\" target=\"_blank\">Deployment of Automated ML trained model</a>"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "savitam"
|
||||
},
|
||||
{
|
||||
"name": "sasum"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "Python",
|
||||
"name": "Python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "python",
|
||||
"version": 3
|
||||
},
|
||||
"mimetype": "text/x-python",
|
||||
"name": "pyspark3",
|
||||
"pygments_lexer": "python3"
|
||||
},
|
||||
"name": "auto-ml-classification-local-adb",
|
||||
"notebookId": 587284549713154
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
12
how-to-use-azureml/deploy-to-cloud/README.md
Normal file
12
how-to-use-azureml/deploy-to-cloud/README.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# Model Deployment with Azure ML service
|
||||
You can use Azure Machine Learning to package, debug, validate and deploy inference containers to a variety of compute targets. This process is known as "MLOps" (ML operationalization).
|
||||
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where
|
||||
|
||||
## Get Started
|
||||
To begin, you will need an ML workspace.
|
||||
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace
|
||||
|
||||
## Deploy to the cloud
|
||||
You can deploy to the cloud using the Azure ML CLI or the Azure ML SDK.
|
||||
- CLI example: https://aka.ms/azmlcli
|
||||
- Notebook example: [model-register-and-deploy](./model-register-and-deploy.ipynb).
|
||||
1
how-to-use-azureml/deploy-to-cloud/helloworld.txt
Normal file
1
how-to-use-azureml/deploy-to-cloud/helloworld.txt
Normal file
@@ -0,0 +1 @@
|
||||
RUN echo "this is test"
|
||||
@@ -0,0 +1,289 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Register Model and deploy as Webservice\n",
|
||||
"\n",
|
||||
"This example shows how to deploy a Webservice in step-by-step fashion:\n",
|
||||
"\n",
|
||||
" 1. Register Model\n",
|
||||
" 2. Deploy Model as Webservice"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize Workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"create workspace"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can add tags and descriptions to your Models. Note you need to have a `sklearn_regression_model.pkl` file in the current directory. This file is generated by the 01 notebook. The below call registers that file as a Model with the same name `sklearn_regression_model.pkl` in the workspace.\n",
|
||||
"\n",
|
||||
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model. Note that tags must be alphanumeric."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"register model from file"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
|
||||
" model_name = \"sklearn_regression_model.pkl\",\n",
|
||||
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||
" description = \"Ridge regression model to predict diabetes\",\n",
|
||||
" workspace = ws)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Inference Configuration\n",
|
||||
"\n",
|
||||
"There is now support for a source directory, you can upload an entire folder from your local machine as dependencies for the Webservice.\n",
|
||||
"Note: in that case, your entry_script, conda_file, and extra_docker_file_steps paths are relative paths to the source_directory path.\n",
|
||||
"\n",
|
||||
"Sample code for using a source directory:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
|
||||
" runtime= \"python\", \n",
|
||||
" entry_script=\"x/y/score.py\",\n",
|
||||
" conda_file=\"env/myenv.yml\", \n",
|
||||
" extra_docker_file_steps=\"helloworld.txt\")\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
" - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
|
||||
" - runtime = Which runtime to use for the image. Current supported runtimes are 'spark-py' and 'python\n",
|
||||
" - entry_script = contains logic specific to initializing your model and running predictions\n",
|
||||
" - conda_file = manages conda and python package dependencies.\n",
|
||||
" - extra_docker_file_steps = optional: any extra steps you want to inject into docker file"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"create image"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\", \n",
|
||||
" extra_docker_file_steps=\"helloworld.txt\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy Model as Webservice on Azure Container Instance\n",
|
||||
"\n",
|
||||
"Note that the service creation can take few minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||
"from azureml.exceptions import WebserviceException\n",
|
||||
"\n",
|
||||
"deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)\n",
|
||||
"aci_service_name = 'aciservice1'\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" # if you want to get existing service below is the command\n",
|
||||
" # since aci name needs to be unique in subscription deleting existing aci if any\n",
|
||||
" # we use aci_service_name to create azure aci\n",
|
||||
" service = Webservice(ws, name=aci_service_name)\n",
|
||||
" if service:\n",
|
||||
" service.delete()\n",
|
||||
"except WebserviceException as e:\n",
|
||||
" print()\n",
|
||||
"\n",
|
||||
"service = Model.deploy(ws, aci_service_name, [model], inference_config, deployment_config)\n",
|
||||
"\n",
|
||||
"service.wait_for_deployment(True)\n",
|
||||
"print(service.state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Test web service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"test_sample = json.dumps({'data': [\n",
|
||||
" [1,2,3,4,5,6,7,8,9,10], \n",
|
||||
" [10,9,8,7,6,5,4,3,2,1]\n",
|
||||
"]})\n",
|
||||
"\n",
|
||||
"test_sample_encoded = bytes(test_sample,encoding = 'utf8')\n",
|
||||
"prediction = service.run(input_data=test_sample_encoded)\n",
|
||||
"print(prediction)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Delete ACI to clean up"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"deploy service",
|
||||
"aci"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"service.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Model Profiling\n",
|
||||
"\n",
|
||||
"you can also take advantage of profiling feature for model\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"\n",
|
||||
"profile = model.profile(ws, \"profilename\", [model], inference_config, test_sample)\n",
|
||||
"profile.wait_for_profiling(True)\n",
|
||||
"profiling_results = profile.get_results()\n",
|
||||
"print(profiling_results)\n",
|
||||
"\n",
|
||||
"```"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "aashishb"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
8
how-to-use-azureml/deploy-to-cloud/myenv.yml
Normal file
8
how-to-use-azureml/deploy-to-cloud/myenv.yml
Normal file
@@ -0,0 +1,8 @@
|
||||
name: project_environment
|
||||
dependencies:
|
||||
- python=3.6.2
|
||||
- pip:
|
||||
- azureml-defaults
|
||||
- scikit-learn
|
||||
- numpy
|
||||
- inference-schema[numpy-support]
|
||||
34
how-to-use-azureml/deploy-to-cloud/score.py
Normal file
34
how-to-use-azureml/deploy-to-cloud/score.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import pickle
|
||||
import json
|
||||
import numpy as np
|
||||
from sklearn.externals import joblib
|
||||
from sklearn.linear_model import Ridge
|
||||
from azureml.core.model import Model
|
||||
|
||||
from inference_schema.schema_decorators import input_schema, output_schema
|
||||
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
|
||||
|
||||
|
||||
def init():
|
||||
global model
|
||||
# note here "sklearn_regression_model.pkl" is the name of the model registered under
|
||||
# this is a different behavior than before when the code is run locally, even though the code is the same.
|
||||
model_path = Model.get_model_path('sklearn_regression_model.pkl')
|
||||
# deserialize the model file back into a sklearn model
|
||||
model = joblib.load(model_path)
|
||||
|
||||
|
||||
input_sample = np.array([[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]])
|
||||
output_sample = np.array([3726.995])
|
||||
|
||||
|
||||
@input_schema('data', NumpyParameterType(input_sample))
|
||||
@output_schema(NumpyParameterType(output_sample))
|
||||
def run(data):
|
||||
try:
|
||||
result = model.predict(data)
|
||||
# you can return any datatype as long as it is JSON-serializable
|
||||
return result.tolist()
|
||||
except Exception as e:
|
||||
error = str(e)
|
||||
return error
|
||||
BIN
how-to-use-azureml/deploy-to-cloud/sklearn_regression_model.pkl
Normal file
BIN
how-to-use-azureml/deploy-to-cloud/sklearn_regression_model.pkl
Normal file
Binary file not shown.
12
how-to-use-azureml/deploy-to-local/README.md
Normal file
12
how-to-use-azureml/deploy-to-local/README.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# Model Deployment with Azure ML service
|
||||
You can use Azure Machine Learning to package, debug, validate and deploy inference containers to a variety of compute targets. This process is known as "MLOps" (ML operationalization).
|
||||
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where
|
||||
|
||||
## Get Started
|
||||
To begin, you will need an ML workspace.
|
||||
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace
|
||||
|
||||
## Deploy locally
|
||||
You can deploy a model locally for testing & debugging using the Azure ML CLI or the Azure ML SDK.
|
||||
- CLI example: https://aka.ms/azmlcli
|
||||
- Notebook example: [register-model-deploy-local](./register-model-deploy-local.ipynb).
|
||||
BIN
how-to-use-azureml/deploy-to-local/dockerSharedDrive.JPG
Normal file
BIN
how-to-use-azureml/deploy-to-local/dockerSharedDrive.JPG
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 22 KiB |
1
how-to-use-azureml/deploy-to-local/helloworld.txt
Normal file
1
how-to-use-azureml/deploy-to-local/helloworld.txt
Normal file
@@ -0,0 +1 @@
|
||||
RUN echo "this is test"
|
||||
8
how-to-use-azureml/deploy-to-local/myenv.yml
Normal file
8
how-to-use-azureml/deploy-to-local/myenv.yml
Normal file
@@ -0,0 +1,8 @@
|
||||
name: project_environment
|
||||
dependencies:
|
||||
- python=3.6.2
|
||||
- pip:
|
||||
- azureml-defaults
|
||||
- scikit-learn
|
||||
- numpy
|
||||
- inference-schema[numpy-support]
|
||||
@@ -0,0 +1,494 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Register model and deploy locally with advanced usages\n",
|
||||
"\n",
|
||||
"This example shows how to deploy a web service in step-by-step fashion:\n",
|
||||
"\n",
|
||||
" 1. Register model\n",
|
||||
" 2. Deploy the image as a web service in a local Docker container.\n",
|
||||
" 3. Quickly test changes to your entry script by reloading the local service.\n",
|
||||
" 4. Optionally, you can also make changes to model, conda or extra_docker_file_steps and update local service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize Workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"create workspace"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the same name `sklearn_regression_model.pkl` in the workspace.\n",
|
||||
"\n",
|
||||
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model, framework, category, target customer etc. Note that tags must be alphanumeric."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"register model from file"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
|
||||
" model_name = \"sklearn_regression_model.pkl\",\n",
|
||||
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||
" description = \"Ridge regression model to predict diabetes\",\n",
|
||||
" workspace = ws)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Manage your dependencies in a folder"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"source_directory = \"C:/abc\"\n",
|
||||
"\n",
|
||||
"os.makedirs(source_directory, exist_ok = True)\n",
|
||||
"os.makedirs(\"C:/abc/x/y\", exist_ok = True)\n",
|
||||
"os.makedirs(\"C:/abc/env\", exist_ok = True)\n",
|
||||
"os.makedirs(\"C:/abc/dockerstep\", exist_ok = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Show `score.py`. Note that the `sklearn_regression_model.pkl` in the `get_model_path` call is referring to a model named `sklearn_regression_model.pkl` registered under the workspace. It is NOT referencing the local file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile C:/abc/x/y/score.py\n",
|
||||
"import pickle\n",
|
||||
"import json\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from sklearn.linear_model import Ridge\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"from inference_schema.schema_decorators import input_schema, output_schema\n",
|
||||
"from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global model\n",
|
||||
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
|
||||
" # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
|
||||
" model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
|
||||
" # deserialize the model file back into a sklearn model\n",
|
||||
" model = joblib.load(model_path)\n",
|
||||
" global name\n",
|
||||
" # note here, entire source directory on inference config gets added into image\n",
|
||||
" # bellow is the example how you can use any extra files in image\n",
|
||||
" with open('./abc/extradata.json') as json_file: \n",
|
||||
" data = json.load(json_file)\n",
|
||||
" name = data[\"people\"][0][\"name\"]\n",
|
||||
"\n",
|
||||
"input_sample = np.array([[10,9,8,7,6,5,4,3,2,1]])\n",
|
||||
"output_sample = np.array([3726.995])\n",
|
||||
"\n",
|
||||
"@input_schema('data', NumpyParameterType(input_sample))\n",
|
||||
"@output_schema(NumpyParameterType(output_sample))\n",
|
||||
"def run(data):\n",
|
||||
" try:\n",
|
||||
" result = model.predict(data)\n",
|
||||
" # you can return any datatype as long as it is JSON-serializable\n",
|
||||
" return \"Hello \" + name + \" here is your result = \" + str(result)\n",
|
||||
" except Exception as e:\n",
|
||||
" error = str(e)\n",
|
||||
" return error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile C:/abc/env/myenv.yml\n",
|
||||
"name: project_environment\n",
|
||||
"dependencies:\n",
|
||||
" - python=3.6.2\n",
|
||||
" - pip:\n",
|
||||
" - azureml-defaults\n",
|
||||
" - scikit-learn\n",
|
||||
" - numpy\n",
|
||||
" - inference-schema[numpy-support]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile C:/abc/dockerstep/customDockerStep.txt\n",
|
||||
"RUN echo \"this is test\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile C:/abc/extradata.json\n",
|
||||
"{\n",
|
||||
" \"people\": [\n",
|
||||
" {\n",
|
||||
" \"website\": \"microsoft.com\", \n",
|
||||
" \"from\": \"Seattle\", \n",
|
||||
" \"name\": \"Mrudula\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Inference Configuration\n",
|
||||
"\n",
|
||||
" - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
|
||||
" - runtime = Which runtime to use for the image. Current supported runtimes are 'spark-py' and 'python\n",
|
||||
" - entry_script = contains logic specific to initializing your model and running predictions\n",
|
||||
" - conda_file = manages conda and python package dependencies.\n",
|
||||
" - extra_docker_file_steps = optional: any extra steps you want to inject into docker file"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
|
||||
" runtime= \"python\", \n",
|
||||
" entry_script=\"x/y/score.py\",\n",
|
||||
" conda_file=\"env/myenv.yml\", \n",
|
||||
" extra_docker_file_steps=\"dockerstep/customDockerStep.txt\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy Model as a Local Docker Web Service\n",
|
||||
"\n",
|
||||
"*Make sure you have Docker installed and running.*\n",
|
||||
"\n",
|
||||
"Note that the service creation can take few minutes.\n",
|
||||
"\n",
|
||||
"NOTE:\n",
|
||||
"\n",
|
||||
"we require docker running with linux container. If you are running Docker for Windows, you need to ensure the Linux Engine is running\n",
|
||||
"\n",
|
||||
" powershell command to switch to linux engine\n",
|
||||
" & 'C:\\Program Files\\Docker\\Docker\\DockerCli.exe' -SwitchLinuxEngine\n",
|
||||
"\n",
|
||||
"and c drive is shared https://docs.docker.com/docker-for-windows/#shared-drives\n",
|
||||
"sometimes you have to reshare c drive as docker \n",
|
||||
"\n",
|
||||
"<img src=\"./dockerSharedDrive.JPG\" align=\"left\"/>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"deploy service",
|
||||
"aci"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import LocalWebservice\n",
|
||||
"\n",
|
||||
"#this is optional, if not provided we choose random port\n",
|
||||
"deployment_config = LocalWebservice.deploy_configuration(port=6789)\n",
|
||||
"\n",
|
||||
"local_service = Model.deploy(ws, \"test\", [model], inference_config, deployment_config)\n",
|
||||
"\n",
|
||||
"local_service.wait_for_deployment()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('Local service port: {}'.format(local_service.port))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Check Status and Get Container Logs\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(local_service.get_logs())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test Web Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the web service with some input data to get a prediction."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"sample_input = json.dumps({\n",
|
||||
" 'data': [\n",
|
||||
" [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
|
||||
" [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
|
||||
" ]\n",
|
||||
"})\n",
|
||||
"\n",
|
||||
"sample_input = bytes(sample_input, encoding='utf-8')\n",
|
||||
"\n",
|
||||
"print(local_service.run(input_data=sample_input))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Reload Service\n",
|
||||
"\n",
|
||||
"You can update your score.py file and then call `reload()` to quickly restart the service. This will only reload your execution script and dependency files, it will not rebuild the underlying Docker image. As a result, `reload()` is fast, but if you do need to rebuild the image -- to add a new Conda or pip package, for instance -- you will have to call `update()`, instead (see below)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile C:/abc/x/y/score.py\n",
|
||||
"import pickle\n",
|
||||
"import json\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from sklearn.linear_model import Ridge\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"from inference_schema.schema_decorators import input_schema, output_schema\n",
|
||||
"from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global model\n",
|
||||
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
|
||||
" # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
|
||||
" model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
|
||||
" # deserialize the model file back into a sklearn model\n",
|
||||
" model = joblib.load(model_path)\n",
|
||||
" global name, from_location\n",
|
||||
" # note here, entire source directory on inference config gets added into image\n",
|
||||
" # bellow is the example how you can use any extra files in image\n",
|
||||
" with open('./abc/extradata.json') as json_file: \n",
|
||||
" data = json.load(json_file)\n",
|
||||
" name = data[\"people\"][0][\"name\"]\n",
|
||||
" from_location = data[\"people\"][0][\"from\"]\n",
|
||||
"\n",
|
||||
"input_sample = np.array([[10,9,8,7,6,5,4,3,2,1]])\n",
|
||||
"output_sample = np.array([3726.995])\n",
|
||||
"\n",
|
||||
"@input_schema('data', NumpyParameterType(input_sample))\n",
|
||||
"@output_schema(NumpyParameterType(output_sample))\n",
|
||||
"def run(data):\n",
|
||||
" try:\n",
|
||||
" result = model.predict(data)\n",
|
||||
" # you can return any datatype as long as it is JSON-serializable\n",
|
||||
" return \"Hello \" + name + \" from \" + from_location + \" here is your result = \" + str(result)\n",
|
||||
" except Exception as e:\n",
|
||||
" error = str(e)\n",
|
||||
" return error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_service.reload()\n",
|
||||
"print(\"--------------------------------------------------------------\")\n",
|
||||
"\n",
|
||||
"# after reload now if you call run this will return updated return message\n",
|
||||
"\n",
|
||||
"print(local_service.run(input_data=sample_input))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Update Service\n",
|
||||
"\n",
|
||||
"If you want to change your model(s), Conda dependencies, or deployment configuration, call `update()` to rebuild the Docker image.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"\n",
|
||||
"local_service.update(models = [SomeOtherModelObject],\n",
|
||||
" deployment_config = local_config,\n",
|
||||
" inference_config = inference_config)\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Delete Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_service.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "raymondl"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,349 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Register model and deploy locally\n",
|
||||
"\n",
|
||||
"This example shows how to deploy a web service in step-by-step fashion:\n",
|
||||
"\n",
|
||||
" 1. Register model\n",
|
||||
" 2. Deploy the image as a web service in a local Docker container.\n",
|
||||
" 3. Quickly test changes to your entry script by reloading the local service.\n",
|
||||
" 4. Optionally, you can also make changes to model, conda or extra_docker_file_steps and update local service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize Workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the same name `sklearn_regression_model.pkl` in the workspace.\n",
|
||||
"\n",
|
||||
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model, framework, category, target customer etc. Note that tags must be alphanumeric."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"register model from file"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
|
||||
" model_name = \"sklearn_regression_model.pkl\",\n",
|
||||
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||
" description = \"Ridge regression model to predict diabetes\",\n",
|
||||
" workspace = ws)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Inference Configuration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy Model as a Local Docker Web Service\n",
|
||||
"\n",
|
||||
"*Make sure you have Docker installed and running.*\n",
|
||||
"\n",
|
||||
"Note that the service creation can take few minutes.\n",
|
||||
"\n",
|
||||
"NOTE:\n",
|
||||
"\n",
|
||||
"we require docker running with linux container. If you are running Docker for Windows, you need to ensure the Linux Engine is running\n",
|
||||
"\n",
|
||||
" powershell command to switch to linux engine\n",
|
||||
" & 'C:\\Program Files\\Docker\\Docker\\DockerCli.exe' -SwitchLinuxEngine\n",
|
||||
"\n",
|
||||
"and c drive is shared https://docs.docker.com/docker-for-windows/#shared-drives\n",
|
||||
"sometimes you have to reshare c drive as docker \n",
|
||||
"\n",
|
||||
"<img src=\"./dockerSharedDrive.JPG\" align=\"left\"/>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import LocalWebservice\n",
|
||||
"\n",
|
||||
"#this is optional, if not provided we choose random port\n",
|
||||
"deployment_config = LocalWebservice.deploy_configuration(port=6789)\n",
|
||||
"\n",
|
||||
"local_service = Model.deploy(ws, \"test\", [model], inference_config, deployment_config)\n",
|
||||
"\n",
|
||||
"local_service.wait_for_deployment()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('Local service port: {}'.format(local_service.port))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Check Status and Get Container Logs\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(local_service.get_logs())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test Web Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the web service with some input data to get a prediction."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"sample_input = json.dumps({\n",
|
||||
" 'data': [\n",
|
||||
" [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
|
||||
" [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
|
||||
" ]\n",
|
||||
"})\n",
|
||||
"\n",
|
||||
"sample_input = bytes(sample_input, encoding='utf-8')\n",
|
||||
"\n",
|
||||
"print(local_service.run(input_data=sample_input))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Reload Service\n",
|
||||
"\n",
|
||||
"You can update your score.py file and then call `reload()` to quickly restart the service. This will only reload your execution script and dependency files, it will not rebuild the underlying Docker image. As a result, `reload()` is fast, but if you do need to rebuild the image -- to add a new Conda or pip package, for instance -- you will have to call `update()`, instead (see below)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile score.py\n",
|
||||
"import pickle\n",
|
||||
"import json\n",
|
||||
"import numpy as np\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from sklearn.linear_model import Ridge\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"from inference_schema.schema_decorators import input_schema, output_schema\n",
|
||||
"from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global model\n",
|
||||
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
|
||||
" # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
|
||||
" model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
|
||||
" # deserialize the model file back into a sklearn model\n",
|
||||
" model = joblib.load(model_path)\n",
|
||||
"\n",
|
||||
"input_sample = np.array([[10,9,8,7,6,5,4,3,2,1]])\n",
|
||||
"output_sample = np.array([3726.995])\n",
|
||||
"\n",
|
||||
"@input_schema('data', NumpyParameterType(input_sample))\n",
|
||||
"@output_schema(NumpyParameterType(output_sample))\n",
|
||||
"def run(data):\n",
|
||||
" try:\n",
|
||||
" result = model.predict(data)\n",
|
||||
" # you can return any datatype as long as it is JSON-serializable\n",
|
||||
" return 'hello from updated score.py'\n",
|
||||
" except Exception as e:\n",
|
||||
" error = str(e)\n",
|
||||
" return error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_service.reload()\n",
|
||||
"print(\"--------------------------------------------------------------\")\n",
|
||||
"\n",
|
||||
"# after reload now if you call run this will return updated return message\n",
|
||||
"\n",
|
||||
"print(local_service.run(input_data=sample_input))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Update Service\n",
|
||||
"\n",
|
||||
"If you want to change your model(s), Conda dependencies, or deployment configuration, call `update()` to rebuild the Docker image.\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"\n",
|
||||
"local_service.update(models = [SomeOtherModelObject],\n",
|
||||
" deployment_config = local_config,\n",
|
||||
" inference_config = inference_config)\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Delete Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_service.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "raymondl"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
34
how-to-use-azureml/deploy-to-local/score.py
Normal file
34
how-to-use-azureml/deploy-to-local/score.py
Normal file
@@ -0,0 +1,34 @@
|
||||
import pickle
|
||||
import json
|
||||
import numpy as np
|
||||
from sklearn.externals import joblib
|
||||
from sklearn.linear_model import Ridge
|
||||
from azureml.core.model import Model
|
||||
|
||||
from inference_schema.schema_decorators import input_schema, output_schema
|
||||
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
|
||||
|
||||
|
||||
def init():
|
||||
global model
|
||||
# note here "sklearn_regression_model.pkl" is the name of the model registered under
|
||||
# this is a different behavior than before when the code is run locally, even though the code is the same.
|
||||
model_path = Model.get_model_path('sklearn_regression_model.pkl')
|
||||
# deserialize the model file back into a sklearn model
|
||||
model = joblib.load(model_path)
|
||||
|
||||
|
||||
input_sample = np.array([[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]])
|
||||
output_sample = np.array([3726.995])
|
||||
|
||||
|
||||
@input_schema('data', NumpyParameterType(input_sample))
|
||||
@output_schema(NumpyParameterType(output_sample))
|
||||
def run(data):
|
||||
try:
|
||||
result = model.predict(data)
|
||||
# you can return any datatype as long as it is JSON-serializable
|
||||
return result.tolist()
|
||||
except Exception as e:
|
||||
error = str(e)
|
||||
return error
|
||||
BIN
how-to-use-azureml/deploy-to-local/sklearn_regression_model.pkl
Normal file
BIN
how-to-use-azureml/deploy-to-local/sklearn_regression_model.pkl
Normal file
Binary file not shown.
102
how-to-use-azureml/deployment/accelerated-models/README.md
Normal file
102
how-to-use-azureml/deployment/accelerated-models/README.md
Normal file
@@ -0,0 +1,102 @@
|
||||
|
||||
# Notebooks for Microsoft Azure Machine Learning Hardware Accelerated Models SDK
|
||||
|
||||
Easily create and train a model using various deep neural networks (DNNs) as a featurizer for deployment to Azure or a Data Box Edge device for ultra-low latency inferencing using FPGA's. These models are currently available:
|
||||
|
||||
* ResNet 50
|
||||
* ResNet 152
|
||||
* DenseNet-121
|
||||
* VGG-16
|
||||
* SSD-VGG
|
||||
|
||||
To learn more about the azureml-accel-model classes, see the section [Model Classes](#model-classes) below or the [Azure ML Accel Models SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-accel-models/azureml.accel?view=azure-ml-py).
|
||||
|
||||
### Step 1: Create an Azure ML workspace
|
||||
Follow [these instructions](https://docs.microsoft.com/en-us/azure/machine-learning/service/quickstart-create-workspace-with-python) to install the Azure ML SDK on your local machine, create an Azure ML workspace, and set up your notebook environment, which is required for the next step.
|
||||
|
||||
### Step 2: Check your FPGA quota
|
||||
Use the Azure CLI to check whether you have quota.
|
||||
|
||||
```shell
|
||||
az vm list-usage --location "eastus" -o table
|
||||
```
|
||||
|
||||
The other locations are ``southeastasia``, ``westeurope``, and ``westus2``.
|
||||
|
||||
Under the "Name" column, look for "Standard PBS Family vCPUs" and ensure you have at least 6 vCPUs under "CurrentValue."
|
||||
|
||||
If you do not have quota, then submit a request form [here](https://aka.ms/accelerateAI).
|
||||
|
||||
### Step 3: Install the Azure ML Accelerated Models SDK
|
||||
Once you have set up your environment, install the Azure ML Accel Models SDK. This package requires tensorflow >= 1.6,<2.0 to be installed.
|
||||
|
||||
If you already have tensorflow >= 1.6,<2.0 installed in your development environment, you can install the SDK package using:
|
||||
|
||||
```
|
||||
pip install azureml-accel-models
|
||||
```
|
||||
|
||||
If you do not have tensorflow >= 1.6,<2.0 and are using a CPU-only development environment, our SDK with tensorflow can be installed using:
|
||||
|
||||
```
|
||||
pip install azureml-accel-models[cpu]
|
||||
```
|
||||
|
||||
If your machine supports GPU (for example, on an [Azure DSVM](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/overview)), then you can leverage the tensorflow-gpu functionality using:
|
||||
|
||||
```
|
||||
pip install azureml-accel-models[gpu]
|
||||
```
|
||||
|
||||
### Step 4: Follow our notebooks
|
||||
|
||||
The notebooks in this repo walk through the following scenarios:
|
||||
* [Quickstart](accelerated-models-quickstart.ipynb), deploy and inference a ResNet50 model trained on ImageNet
|
||||
* [Object Detection](accelerated-models-object-detection.ipynb), deploy and inference an SSD-VGG model that can do object detection
|
||||
* [Training models](accelerated-models-training.ipynb), train one of our accelerated models on the Kaggle Cats and Dogs dataset to see how to improve accuracy on custom datasets
|
||||
|
||||
<a name="model-classes"></a>
|
||||
## Model Classes
|
||||
As stated above, we support 5 Accelerated Models. Here's more information on their input and output tensors.
|
||||
|
||||
**Available models and output tensors**
|
||||
|
||||
The available models and the corresponding default classifier output tensors are below. This is the value that you would use during inferencing if you used the default classifier.
|
||||
* Resnet50, QuantizedResnet50
|
||||
``
|
||||
output_tensors = "classifier_1/resnet_v1_50/predictions/Softmax:0"
|
||||
``
|
||||
* Resnet152, QuantizedResnet152
|
||||
``
|
||||
output_tensors = "classifier/resnet_v1_152/predictions/Softmax:0"
|
||||
``
|
||||
* Densenet121, QuantizedDensenet121
|
||||
``
|
||||
output_tensors = "classifier/densenet121/predictions/Softmax:0"
|
||||
``
|
||||
* Vgg16, QuantizedVgg16
|
||||
``
|
||||
output_tensors = "classifier/vgg_16/fc8/squeezed:0"
|
||||
``
|
||||
* SsdVgg, QuantizedSsdVgg
|
||||
``
|
||||
output_tensors = ['ssd_300_vgg/block4_box/Reshape_1:0', 'ssd_300_vgg/block7_box/Reshape_1:0', 'ssd_300_vgg/block8_box/Reshape_1:0', 'ssd_300_vgg/block9_box/Reshape_1:0', 'ssd_300_vgg/block10_box/Reshape_1:0', 'ssd_300_vgg/block11_box/Reshape_1:0', 'ssd_300_vgg/block4_box/Reshape:0', 'ssd_300_vgg/block7_box/Reshape:0', 'ssd_300_vgg/block8_box/Reshape:0', 'ssd_300_vgg/block9_box/Reshape:0', 'ssd_300_vgg/block10_box/Reshape:0', 'ssd_300_vgg/block11_box/Reshape:0']
|
||||
``
|
||||
|
||||
For more information, please reference the azureml.accel.models package in the [Azure ML Python SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-accel-models/azureml.accel.models?view=azure-ml-py).
|
||||
|
||||
**Input tensors**
|
||||
|
||||
The input_tensors value defaults to "Placeholder:0" and is created in the [Image Preprocessing](#construct-model) step in the line:
|
||||
``
|
||||
in_images = tf.placeholder(tf.string)
|
||||
``
|
||||
|
||||
You can change the input_tensors name by doing this:
|
||||
``
|
||||
in_images = tf.placeholder(tf.string, name="images")
|
||||
``
|
||||
|
||||
|
||||
## Resources
|
||||
* [Read more about FPGAs](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-accelerate-with-fpgas)
|
||||
@@ -0,0 +1,492 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure ML Hardware Accelerated Object Detection"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This tutorial will show you how to deploy an object detection service based on the SSD-VGG model in just a few minutes using the Azure Machine Learning Accelerated AI service.\n",
|
||||
"\n",
|
||||
"We will use the SSD-VGG model accelerated on an FPGA. Our Accelerated Models Service handles translating deep neural networks (DNN) into an FPGA program.\n",
|
||||
"\n",
|
||||
"The steps in this notebook are: \n",
|
||||
"1. [Setup Environment](#set-up-environment)\n",
|
||||
"* [Construct Model](#construct-model)\n",
|
||||
" * Image Preprocessing\n",
|
||||
" * Featurizer\n",
|
||||
" * Save Model\n",
|
||||
" * Save input and output tensor names\n",
|
||||
"* [Create Image](#create-image)\n",
|
||||
"* [Deploy Image](#deploy-image)\n",
|
||||
"* [Test the Service](#test-service)\n",
|
||||
" * Create Client\n",
|
||||
" * Serve the model\n",
|
||||
"* [Cleanup](#cleanup)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"set-up-environment\"></a>\n",
|
||||
"## 1. Set up Environment\n",
|
||||
"### 1.a. Imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import tensorflow as tf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 1.b. Retrieve Workspace\n",
|
||||
"If you haven't created a Workspace, please follow [this notebook](\"../../../configuration.ipynb\") to do so. If you have, run the codeblock below to retrieve it. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"construct-model\"></a>\n",
|
||||
"## 2. Construct model\n",
|
||||
"### 2.a. Image preprocessing\n",
|
||||
"We'd like our service to accept JPEG images as input. However the input to SSD-VGG is a float tensor of shape \\[1, 300, 300, 3\\]. The first dimension is batch, then height, width, and channels (i.e. NHWC). To bridge this gap, we need code that decodes JPEG images and resizes them appropriately for input to SSD-VGG. The Accelerated AI service can execute TensorFlow graphs as part of the service and we'll use that ability to do the image preprocessing. This code defines a TensorFlow graph that preprocesses an array of JPEG images (as TensorFlow strings) and produces a tensor that is ready to be featurized by SSD-VGG.\n",
|
||||
"\n",
|
||||
"**Note:** Expect to see TF deprecation warnings until we port our SDK over to use Tensorflow 2.0."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Input images as a two-dimensional tensor containing an arbitrary number of images represented a strings\n",
|
||||
"import azureml.accel.models.utils as utils\n",
|
||||
"tf.reset_default_graph()\n",
|
||||
"\n",
|
||||
"in_images = tf.placeholder(tf.string)\n",
|
||||
"image_tensors = utils.preprocess_array(in_images, output_width=300, output_height=300, preserve_aspect_ratio=False)\n",
|
||||
"print(image_tensors.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.b. Featurizer\n",
|
||||
"The SSD-VGG model is different from our other models in that it generates 12 tensor outputs. These corresponds to x,y displacements of the anchor boxes and the detection confidence (for 21 classes). Because these outputs are not convenient to work with, we will later use a pre-defined post-processing utility to transform the outputs into a simplified list of bounding boxes with their respective class and confidence.\n",
|
||||
"\n",
|
||||
"For more information about the output tensors, take this example: the output tensor 'ssd_300_vgg/block4_box/Reshape_1:0' has a shape of [None, 37, 37, 4, 21]. This gives the pre-softmax confidence for 4 anchor boxes situated at each site of a 37 x 37 grid imposed on the image, one confidence score for each of the 21 classes. The first dimension is the batch dimension. Likewise, 'ssd_300_vgg/block4_box/Reshape:0' has shape [None, 37, 37, 4, 4] and encodes the (cx, cy) center shift and rescaling (sw, sh) relative to each anchor box. Refer to the [SSD-VGG paper](https://arxiv.org/abs/1512.02325) to understand how these are computed. The other 10 tensors are defined similarly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.accel.models import SsdVgg\n",
|
||||
"\n",
|
||||
"saved_model_dir = os.path.join(os.path.expanduser('~'), 'models')\n",
|
||||
"model_graph = SsdVgg(saved_model_dir, is_frozen = True)\n",
|
||||
"\n",
|
||||
"print('SSD-VGG Input Tensors:')\n",
|
||||
"for idx, input_name in enumerate(model_graph.input_tensor_list):\n",
|
||||
" print('{}, {}'.format(input_name, model_graph.get_input_dims(idx)))\n",
|
||||
" \n",
|
||||
"print('SSD-VGG Output Tensors:')\n",
|
||||
"for idx, output_name in enumerate(model_graph.output_tensor_list):\n",
|
||||
" print('{}, {}'.format(output_name, model_graph.get_output_dims(idx)))\n",
|
||||
"\n",
|
||||
"ssd_outputs = model_graph.import_graph_def(image_tensors, is_training=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.c. Save Model\n",
|
||||
"Now that we loaded both parts of the tensorflow graph (preprocessor and SSD-VGG featurizer), we can save the graph and associated variables to a directory which we can register as an Azure ML Model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_name = \"ssdvgg\"\n",
|
||||
"model_save_path = os.path.join(saved_model_dir, model_name, \"saved_model\")\n",
|
||||
"print(\"Saving model in {}\".format(model_save_path))\n",
|
||||
"\n",
|
||||
"output_map = {}\n",
|
||||
"for i, output in enumerate(ssd_outputs):\n",
|
||||
" output_map['out_{}'.format(i)] = output\n",
|
||||
"\n",
|
||||
"with tf.Session() as sess:\n",
|
||||
" model_graph.restore_weights(sess)\n",
|
||||
" tf.saved_model.simple_save(sess, \n",
|
||||
" model_save_path, \n",
|
||||
" inputs={'images': in_images}, \n",
|
||||
" outputs=output_map)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.d. Important! Save names of input and output tensors\n",
|
||||
"\n",
|
||||
"These input and output tensors that were created during the preprocessing and classifier steps are also going to be used when **converting the model** to an Accelerated Model that can run on FPGA's and for **making an inferencing request**. It is very important to save this information!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"register model from file"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"input_tensors = in_images.name\n",
|
||||
"# We will use the list of output tensors during inferencing\n",
|
||||
"output_tensors = [output.name for output in ssd_outputs]\n",
|
||||
"# However, for multiple output tensors, our AccelOnnxConverter will \n",
|
||||
"# accept comma-delimited strings (lists will cause error)\n",
|
||||
"output_tensors_str = \",\".join(output_tensors)\n",
|
||||
"\n",
|
||||
"print(input_tensors)\n",
|
||||
"print(output_tensors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"create-image\"></a>\n",
|
||||
"## 3. Create AccelContainerImage\n",
|
||||
"Below we will execute all the same steps as in the [Quickstart](./accelerated-models-quickstart.ipynb#create-image) to package the model we have saved locally into an accelerated Docker image saved in our workspace. To complete all the steps, it may take a few minutes. For more details on each step, check out the [Quickstart section on model registration](./accelerated-models-quickstart.ipynb#register-model)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.image import Image\n",
|
||||
"from azureml.accel import AccelOnnxConverter\n",
|
||||
"from azureml.accel import AccelContainerImage\n",
|
||||
"\n",
|
||||
"# Retrieve workspace\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(\"Successfully retrieved workspace:\", ws.name, ws.resource_group, ws.location, ws.subscription_id, '\\n')\n",
|
||||
"\n",
|
||||
"# Register model\n",
|
||||
"registered_model = Model.register(workspace = ws,\n",
|
||||
" model_path = model_save_path,\n",
|
||||
" model_name = model_name)\n",
|
||||
"print(\"Successfully registered: \", registered_model.name, registered_model.description, registered_model.version, '\\n', sep = '\\t')\n",
|
||||
"\n",
|
||||
"# Convert model\n",
|
||||
"convert_request = AccelOnnxConverter.convert_tf_model(ws, registered_model, input_tensors, output_tensors_str)\n",
|
||||
"# If it fails, you can run wait_for_completion again with show_output=True.\n",
|
||||
"convert_request.wait_for_completion(show_output=False)\n",
|
||||
"converted_model = convert_request.result\n",
|
||||
"print(\"\\nSuccessfully converted: \", converted_model.name, converted_model.url, converted_model.version, \n",
|
||||
" converted_model.id, converted_model.created_time, '\\n')\n",
|
||||
"\n",
|
||||
"# Package into AccelContainerImage\n",
|
||||
"image_config = AccelContainerImage.image_configuration()\n",
|
||||
"# Image name must be lowercase\n",
|
||||
"image_name = \"{}-image\".format(model_name)\n",
|
||||
"image = Image.create(name = image_name,\n",
|
||||
" models = [converted_model],\n",
|
||||
" image_config = image_config, \n",
|
||||
" workspace = ws)\n",
|
||||
"image.wait_for_creation()\n",
|
||||
"print(\"Created AccelContainerImage: {} {} {}\\n\".format(image.name, image.creation_state, image.image_location))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"deploy-image\"></a>\n",
|
||||
"## 4. Deploy image\n",
|
||||
"Once you have an Azure ML Accelerated Image in your Workspace, you can deploy it to two destinations, to a Databox Edge machine or to an AKS cluster. \n",
|
||||
"\n",
|
||||
"### 4.a. Deploy to Databox Edge Machine using IoT Hub\n",
|
||||
"See the sample [here](https://github.com/Azure-Samples/aml-real-time-ai/) for using the Azure IoT CLI extension for deploying your Docker image to your Databox Edge Machine.\n",
|
||||
"\n",
|
||||
"### 4.b. Deploy to AKS Cluster\n",
|
||||
"Same as in the [Quickstart section on image deployment](./accelerated-models-quickstart.ipynb#deploy-image), we are going to create an AKS cluster with FPGA-enabled machines, then deploy our service to it.\n",
|
||||
"#### Create AKS ComputeTarget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||
"\n",
|
||||
"# Uses the specific FPGA enabled VM (sku: Standard_PB6s)\n",
|
||||
"# Standard_PB6s are available in: eastus, westus2, westeurope, southeastasia\n",
|
||||
"prov_config = AksCompute.provisioning_configuration(vm_size = \"Standard_PB6s\",\n",
|
||||
" agent_count = 1, \n",
|
||||
" location = \"eastus\")\n",
|
||||
"\n",
|
||||
"aks_name = 'aks-pb6-obj'\n",
|
||||
"# Create the cluster\n",
|
||||
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
||||
" name = aks_name, \n",
|
||||
" provisioning_configuration = prov_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Provisioning an AKS cluster might take awhile (15 or so minutes), and we want to wait until it's successfully provisioned before we can deploy a service to it. If you interrupt this cell, provisioning of the cluster will continue. You can re-run it or check the status in your Workspace under Compute."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_target.wait_for_completion(show_output = True)\n",
|
||||
"print(aks_target.provisioning_state)\n",
|
||||
"print(aks_target.provisioning_errors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Deploy AccelContainerImage to AKS ComputeTarget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||
"\n",
|
||||
"# Set the web service configuration (for creating a test service, we don't want autoscale enabled)\n",
|
||||
"# Authentication is enabled by default, but for testing we specify False\n",
|
||||
"aks_config = AksWebservice.deploy_configuration(autoscale_enabled=False,\n",
|
||||
" num_replicas=1,\n",
|
||||
" auth_enabled = False)\n",
|
||||
"\n",
|
||||
"aks_service_name ='my-aks-service'\n",
|
||||
"\n",
|
||||
"aks_service = Webservice.deploy_from_image(workspace = ws,\n",
|
||||
" name = aks_service_name,\n",
|
||||
" image = image,\n",
|
||||
" deployment_config = aks_config,\n",
|
||||
" deployment_target = aks_target)\n",
|
||||
"aks_service.wait_for_deployment(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"test-service\"></a>\n",
|
||||
"## 5. Test the service\n",
|
||||
"<a id=\"create-client\"></a>\n",
|
||||
"### 5.a. Create Client\n",
|
||||
"The image supports gRPC and the TensorFlow Serving \"predict\" API. We have a client that can call into the docker image to get predictions. \n",
|
||||
"\n",
|
||||
"**Note:** If you chose to use auth_enabled=True when creating your AksWebservice.deploy_configuration(), see documentation [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py#get-keys--) on how to retrieve your keys and use either key as an argument to PredictionClient(...,access_token=key)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using the grpc client in AzureML Accelerated Models SDK\n",
|
||||
"from azureml.accel.client import PredictionClient\n",
|
||||
"\n",
|
||||
"address = aks_service.scoring_uri\n",
|
||||
"ssl_enabled = address.startswith(\"https\")\n",
|
||||
"address = address[address.find('/')+2:].strip('/')\n",
|
||||
"port = 443 if ssl_enabled else 80\n",
|
||||
"\n",
|
||||
"# Initialize AzureML Accelerated Models client\n",
|
||||
"client = PredictionClient(address=address,\n",
|
||||
" port=port,\n",
|
||||
" use_ssl=ssl_enabled,\n",
|
||||
" service_name=aks_service.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can adapt the client [code](https://github.com/Azure/aml-real-time-ai/blob/master/pythonlib/amlrealtimeai/client.py) to meet your needs. There is also an example C# [client](https://github.com/Azure/aml-real-time-ai/blob/master/sample-clients/csharp).\n",
|
||||
"\n",
|
||||
"The service provides an API that is compatible with TensorFlow Serving. There are instructions to download a sample client [here](https://www.tensorflow.org/serving/setup)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"serve-model\"></a>\n",
|
||||
"### 5.b. Serve the model\n",
|
||||
"The SSD-VGG model returns the confidence and bounding boxes for all possible anchor boxes. As mentioned earlier, we will use a post-processing routine to transform this into a list of bounding boxes (y1, x1, y2, x2) where x, y are fractional coordinates measured from left and top respectively. A respective list of classes and scores is also returned to tag each bounding box. Below we make use of this information to draw the bounding boxes on top the original image. Note that in the post-processing routine we select a confidence threshold of 0.5."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import cv2\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"\n",
|
||||
"colors_tableau = [(255, 255, 255), (31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),\n",
|
||||
" (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),\n",
|
||||
" (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),\n",
|
||||
" (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),\n",
|
||||
" (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def draw_boxes_on_img(img, classes, scores, bboxes, thickness=2):\n",
|
||||
" shape = img.shape\n",
|
||||
" for i in range(bboxes.shape[0]):\n",
|
||||
" bbox = bboxes[i]\n",
|
||||
" color = colors_tableau[classes[i]]\n",
|
||||
" # Draw bounding box...\n",
|
||||
" p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))\n",
|
||||
" p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))\n",
|
||||
" cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)\n",
|
||||
" # Draw text...\n",
|
||||
" s = '%s/%.3f' % (classes[i], scores[i])\n",
|
||||
" p1 = (p1[0]-5, p1[1])\n",
|
||||
" cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.accel._external.ssdvgg_utils as ssdvgg_utils\n",
|
||||
"\n",
|
||||
"result = client.score_file(path=\"meeting.jpg\", input_name=input_tensors, outputs=output_tensors)\n",
|
||||
"classes, scores, bboxes = ssdvgg_utils.postprocess(result, select_threshold=0.5)\n",
|
||||
"\n",
|
||||
"img = cv2.imread('meeting.jpg', 1)\n",
|
||||
"img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)\n",
|
||||
"draw_boxes_on_img(img, classes, scores, bboxes)\n",
|
||||
"plt.imshow(img)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"cleanup\"></a>\n",
|
||||
"## 6. Cleanup\n",
|
||||
"It's important to clean up your resources, so that you won't incur unnecessary costs. In the [next notebook](./accelerated-models-training.ipynb) you will learn how to train a classfier on a new dataset using transfer learning."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service.delete()\n",
|
||||
"aks_target.delete()\n",
|
||||
"image.delete()\n",
|
||||
"registered_model.delete()\n",
|
||||
"converted_model.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "coverste"
|
||||
},
|
||||
{
|
||||
"name": "paledger"
|
||||
},
|
||||
{
|
||||
"name": "sukha"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,546 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure ML Hardware Accelerated Models Quickstart"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This tutorial will show you how to deploy an image recognition service based on the ResNet 50 classifier using the Azure Machine Learning Accelerated Models service. Get more information about our service from our [documentation](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-accelerate-with-fpgas), [API reference](https://docs.microsoft.com/en-us/python/api/azureml-accel-models/azureml.accel?view=azure-ml-py), or [forum](https://aka.ms/aml-forum).\n",
|
||||
"\n",
|
||||
"We will use an accelerated ResNet50 featurizer running on an FPGA. Our Accelerated Models Service handles translating deep neural networks (DNN) into an FPGA program.\n",
|
||||
"\n",
|
||||
"For more information about using other models besides Resnet50, see the [README](./README.md).\n",
|
||||
"\n",
|
||||
"The steps covered in this notebook are: \n",
|
||||
"1. [Set up environment](#set-up-environment)\n",
|
||||
"* [Construct model](#construct-model)\n",
|
||||
" * Image Preprocessing\n",
|
||||
" * Featurizer (Resnet50)\n",
|
||||
" * Classifier\n",
|
||||
" * Save Model\n",
|
||||
"* [Register Model](#register-model)\n",
|
||||
"* [Convert into Accelerated Model](#convert-model)\n",
|
||||
"* [Create Image](#create-image)\n",
|
||||
"* [Deploy](#deploy-image)\n",
|
||||
"* [Test service](#test-service)\n",
|
||||
"* [Clean-up](#clean-up)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"set-up-environment\"></a>\n",
|
||||
"## 1. Set up environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import tensorflow as tf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve Workspace\n",
|
||||
"If you haven't created a Workspace, please follow [this notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) to do so. If you have, run the codeblock below to retrieve it. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"construct-model\"></a>\n",
|
||||
"## 2. Construct model\n",
|
||||
"\n",
|
||||
"There are three parts to the model we are deploying: pre-processing, featurizer with ResNet50, and classifier with ImageNet dataset. Then we will save this complete Tensorflow model graph locally before registering it to your Azure ML Workspace.\n",
|
||||
"\n",
|
||||
"### 2.a. Image preprocessing\n",
|
||||
"We'd like our service to accept JPEG images as input. However the input to ResNet50 is a tensor. So we need code that decodes JPEG images and does the preprocessing required by ResNet50. The Accelerated AI service can execute TensorFlow graphs as part of the service and we'll use that ability to do the image preprocessing. This code defines a TensorFlow graph that preprocesses an array of JPEG images (as strings) and produces a tensor that is ready to be featurized by ResNet50.\n",
|
||||
"\n",
|
||||
"**Note:** Expect to see TF deprecation warnings until we port our SDK over to use Tensorflow 2.0."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Input images as a two-dimensional tensor containing an arbitrary number of images represented a strings\n",
|
||||
"import azureml.accel.models.utils as utils\n",
|
||||
"tf.reset_default_graph()\n",
|
||||
"\n",
|
||||
"in_images = tf.placeholder(tf.string)\n",
|
||||
"image_tensors = utils.preprocess_array(in_images)\n",
|
||||
"print(image_tensors.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.b. Featurizer\n",
|
||||
"We use ResNet50 as a featurizer. In this step we initialize the model. This downloads a TensorFlow checkpoint of the quantized ResNet50."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.accel.models import QuantizedResnet50\n",
|
||||
"save_path = os.path.expanduser('~/models')\n",
|
||||
"model_graph = QuantizedResnet50(save_path, is_frozen = True)\n",
|
||||
"feature_tensor = model_graph.import_graph_def(image_tensors)\n",
|
||||
"print(model_graph.version)\n",
|
||||
"print(feature_tensor.name)\n",
|
||||
"print(feature_tensor.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.c. Classifier\n",
|
||||
"The model we downloaded includes a classifier which takes the output of the ResNet50 and identifies an image. This classifier is trained on the ImageNet dataset. We are going to use this classifier for our service. The next [notebook](./accelerated-models-training.ipynb) shows how to train a classifier for a different data set. The input to the classifier is a tensor matching the output of our ResNet50 featurizer."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"classifier_output = model_graph.get_default_classifier(feature_tensor)\n",
|
||||
"print(classifier_output)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.d. Save Model\n",
|
||||
"Now that we loaded all three parts of the tensorflow graph (preprocessor, resnet50 featurizer, and the classifier), we can save the graph and associated variables to a directory which we can register as an Azure ML Model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# model_name must be lowercase\n",
|
||||
"model_name = \"resnet50\"\n",
|
||||
"model_save_path = os.path.join(save_path, model_name)\n",
|
||||
"print(\"Saving model in {}\".format(model_save_path))\n",
|
||||
"\n",
|
||||
"with tf.Session() as sess:\n",
|
||||
" model_graph.restore_weights(sess)\n",
|
||||
" tf.saved_model.simple_save(sess, model_save_path,\n",
|
||||
" inputs={'images': in_images},\n",
|
||||
" outputs={'output_alias': classifier_output})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.e. Important! Save names of input and output tensors\n",
|
||||
"\n",
|
||||
"These input and output tensors that were created during the preprocessing and classifier steps are also going to be used when **converting the model** to an Accelerated Model that can run on FPGA's and for **making an inferencing request**. It is very important to save this information! You can see our defaults for all the models in the [README](./README.md).\n",
|
||||
"\n",
|
||||
"By default for Resnet50, these are the values you should see when running the cell below: \n",
|
||||
"* input_tensors = \"Placeholder:0\"\n",
|
||||
"* output_tensors = \"classifier/resnet_v1_50/predictions/Softmax:0\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"register model from file"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"input_tensors = in_images.name\n",
|
||||
"output_tensors = classifier_output.name\n",
|
||||
"\n",
|
||||
"print(input_tensors)\n",
|
||||
"print(output_tensors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"register-model\"></a>\n",
|
||||
"## 3. Register Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can add tags and descriptions to your models. Using tags, you can track useful information such as the name and version of the machine learning library used to train the model. Note that tags must be alphanumeric."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"register model from file"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"registered_model = Model.register(workspace = ws,\n",
|
||||
" model_path = model_save_path,\n",
|
||||
" model_name = model_name)\n",
|
||||
"\n",
|
||||
"print(\"Successfully registered: \", registered_model.name, registered_model.description, registered_model.version, sep = '\\t')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"convert-model\"></a>\n",
|
||||
"## 4. Convert Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For conversion you need to provide names of input and output tensors. This information can be found from the model_graph you saved in step 2.e. above.\n",
|
||||
"\n",
|
||||
"**Note**: Conversion may take a while and on average for FPGA model it is about 1-3 minutes and it depends on model type."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"register model from file"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.accel import AccelOnnxConverter\n",
|
||||
"\n",
|
||||
"convert_request = AccelOnnxConverter.convert_tf_model(ws, registered_model, input_tensors, output_tensors)\n",
|
||||
"# If it fails, you can run wait_for_completion again with show_output=True.\n",
|
||||
"convert_request.wait_for_completion(show_output = False)\n",
|
||||
"# If the above call succeeded, get the converted model\n",
|
||||
"converted_model = convert_request.result\n",
|
||||
"print(\"\\nSuccessfully converted: \", converted_model.name, converted_model.url, converted_model.version, \n",
|
||||
" converted_model.id, converted_model.created_time, '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"create-image\"></a>\n",
|
||||
"## 5. Package the model into an Image"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can add tags and descriptions to image. Also, for FPGA model an image can only contain **single** model.\n",
|
||||
"\n",
|
||||
"**Note**: The following command can take few minutes. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.image import Image\n",
|
||||
"from azureml.accel import AccelContainerImage\n",
|
||||
"\n",
|
||||
"image_config = AccelContainerImage.image_configuration()\n",
|
||||
"# Image name must be lowercase\n",
|
||||
"image_name = \"{}-image\".format(model_name)\n",
|
||||
"\n",
|
||||
"image = Image.create(name = image_name,\n",
|
||||
" models = [converted_model],\n",
|
||||
" image_config = image_config, \n",
|
||||
" workspace = ws)\n",
|
||||
"image.wait_for_creation(show_output = False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"deploy-image\"></a>\n",
|
||||
"## 6. Deploy\n",
|
||||
"Once you have an Azure ML Accelerated Image in your Workspace, you can deploy it to two destinations, to a Databox Edge machine or to an AKS cluster. \n",
|
||||
"\n",
|
||||
"### 6.a. Databox Edge Machine using IoT Hub\n",
|
||||
"See the sample [here](https://github.com/Azure-Samples/aml-real-time-ai/) for using the Azure IoT CLI extension for deploying your Docker image to your Databox Edge Machine.\n",
|
||||
"\n",
|
||||
"### 6.b. Azure Kubernetes Service (AKS) using Azure ML Service\n",
|
||||
"We are going to create an AKS cluster with FPGA-enabled machines, then deploy our service to it. For more information, see [AKS official docs](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where#aks).\n",
|
||||
"\n",
|
||||
"#### Create AKS ComputeTarget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||
"\n",
|
||||
"# Uses the specific FPGA enabled VM (sku: Standard_PB6s)\n",
|
||||
"# Standard_PB6s are available in: eastus, westus2, westeurope, southeastasia\n",
|
||||
"prov_config = AksCompute.provisioning_configuration(vm_size = \"Standard_PB6s\",\n",
|
||||
" agent_count = 1, \n",
|
||||
" location = \"eastus\")\n",
|
||||
"\n",
|
||||
"aks_name = 'my-aks-pb6'\n",
|
||||
"# Create the cluster\n",
|
||||
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
||||
" name = aks_name, \n",
|
||||
" provisioning_configuration = prov_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Provisioning an AKS cluster might take awhile (15 or so minutes), and we want to wait until it's successfully provisioned before we can deploy a service to it. If you interrupt this cell, provisioning of the cluster will continue. You can also check the status in your Workspace under Compute."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_target.wait_for_completion(show_output = True)\n",
|
||||
"print(aks_target.provisioning_state)\n",
|
||||
"print(aks_target.provisioning_errors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Deploy AccelContainerImage to AKS ComputeTarget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||
"\n",
|
||||
"#Set the web service configuration (for creating a test service, we don't want autoscale enabled)\n",
|
||||
"# Authentication is enabled by default, but for testing we specify False\n",
|
||||
"aks_config = AksWebservice.deploy_configuration(autoscale_enabled=False,\n",
|
||||
" num_replicas=1,\n",
|
||||
" auth_enabled = False)\n",
|
||||
"\n",
|
||||
"aks_service_name ='my-aks-service'\n",
|
||||
"\n",
|
||||
"aks_service = Webservice.deploy_from_image(workspace = ws,\n",
|
||||
" name = aks_service_name,\n",
|
||||
" image = image,\n",
|
||||
" deployment_config = aks_config,\n",
|
||||
" deployment_target = aks_target)\n",
|
||||
"aks_service.wait_for_deployment(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"test-service\"></a>\n",
|
||||
"## 7. Test the service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 7.a. Create Client\n",
|
||||
"The image supports gRPC and the TensorFlow Serving \"predict\" API. We have a client that can call into the docker image to get predictions.\n",
|
||||
"\n",
|
||||
"**Note:** If you chose to use auth_enabled=True when creating your AksWebservice, see documentation [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py#get-keys--) on how to retrieve your keys and use either key as an argument to PredictionClient(...,access_token=key)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using the grpc client in AzureML Accelerated Models SDK\n",
|
||||
"from azureml.accel.client import PredictionClient\n",
|
||||
"\n",
|
||||
"address = aks_service.scoring_uri\n",
|
||||
"ssl_enabled = address.startswith(\"https\")\n",
|
||||
"address = address[address.find('/')+2:].strip('/')\n",
|
||||
"port = 443 if ssl_enabled else 80\n",
|
||||
"\n",
|
||||
"# Initialize AzureML Accelerated Models client\n",
|
||||
"client = PredictionClient(address=address,\n",
|
||||
" port=port,\n",
|
||||
" use_ssl=ssl_enabled,\n",
|
||||
" service_name=aks_service.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can adapt the client [code](https://github.com/Azure/aml-real-time-ai/blob/master/pythonlib/amlrealtimeai/client.py) to meet your needs. There is also an example C# [client](https://github.com/Azure/aml-real-time-ai/blob/master/sample-clients/csharp).\n",
|
||||
"\n",
|
||||
"The service provides an API that is compatible with TensorFlow Serving. There are instructions to download a sample client [here](https://www.tensorflow.org/serving/setup)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 7.b. Serve the model\n",
|
||||
"To understand the results we need a mapping to the human readable imagenet classes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"classes_entries = requests.get(\"https://raw.githubusercontent.com/Lasagne/Recipes/master/examples/resnet50/imagenet_classes.txt\").text.splitlines()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Score image with input and output tensor names\n",
|
||||
"results = client.score_file(path=\"./snowleopardgaze.jpg\", \n",
|
||||
" input_name=input_tensors, \n",
|
||||
" outputs=output_tensors)\n",
|
||||
"\n",
|
||||
"# map results [class_id] => [confidence]\n",
|
||||
"results = enumerate(results)\n",
|
||||
"# sort results by confidence\n",
|
||||
"sorted_results = sorted(results, key=lambda x: x[1], reverse=True)\n",
|
||||
"# print top 5 results\n",
|
||||
"for top in sorted_results[:5]:\n",
|
||||
" print(classes_entries[top[0]], 'confidence:', top[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"clean-up\"></a>\n",
|
||||
"## 8. Clean-up\n",
|
||||
"Run the cell below to delete your webservice, image, and model (must be done in that order). In the [next notebook](./accelerated-models-training.ipynb) you will learn how to train a classfier on a new dataset using transfer learning and finetune the weights."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service.delete()\n",
|
||||
"aks_target.delete()\n",
|
||||
"image.delete()\n",
|
||||
"registered_model.delete()\n",
|
||||
"converted_model.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "coverste"
|
||||
},
|
||||
{
|
||||
"name": "paledger"
|
||||
},
|
||||
{
|
||||
"name": "aibhalla"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,860 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Training with the Azure Machine Learning Accelerated Models Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook will introduce how to apply common machine learning techniques, like transfer learning, custom weights, and unquantized vs. quantized models, when working with our Azure Machine Learning Accelerated Models Service (Azure ML Accel Models).\n",
|
||||
"\n",
|
||||
"We will use Tensorflow for the preprocessing steps, ResNet50 for the featurizer, and the Keras API (built on Tensorflow backend) to build the classifier layers instead of the default ImageNet classifier used in Quickstart. Then we will train the model, evaluate it, and deploy it to run on an FPGA.\n",
|
||||
"\n",
|
||||
"#### Transfer Learning and Custom weights\n",
|
||||
"We will walk you through two ways to build and train a ResNet50 model on the Kaggle Cats and Dogs dataset: transfer learning only and then transfer learning with custom weights.\n",
|
||||
"\n",
|
||||
"In using transfer learning, our goal is to re-purpose the ResNet50 model already trained on the [ImageNet image dataset](http://www.image-net.org/) as a basis for our training of the Kaggle Cats and Dogs dataset. The ResNet50 featurizer will be imported as frozen, so only the Keras classifier will be trained.\n",
|
||||
"\n",
|
||||
"With the addition of custom weights, we will build the model so that the ResNet50 featurizer weights as not frozen. This will let us retrain starting with custom weights trained with ImageNet on ResNet50 and then use the Kaggle Cats and Dogs dataset to retrain and fine-tune the quantized version of the model.\n",
|
||||
"\n",
|
||||
"#### Unquantized vs. Quantized models\n",
|
||||
"The unquantized version of our models (ie. Resnet50, Resnet152, Densenet121, Vgg16, SsdVgg) uses native float precision (32-bit floats), which will be faster at training. We will use this for our first run through, then fine-tune the weights with the quantized version. The quantized version of our models (i.e. QuantizedResnet50, QuantizedResnet152, QuantizedDensenet121, QuantizedVgg16, QuantizedSsdVgg) will have the same node names as the unquantized version, but use quantized operations and will match the performance of the model when running on an FPGA.\n",
|
||||
"\n",
|
||||
"#### Contents\n",
|
||||
"1. [Setup Environment](#setup)\n",
|
||||
"* [Prepare Data](#prepare-data)\n",
|
||||
"* [Construct Model](#construct-model)\n",
|
||||
" * Preprocessor\n",
|
||||
" * Classifier\n",
|
||||
" * Model construction\n",
|
||||
"* [Train Model](#train-model)\n",
|
||||
"* [Test Model](#test-model)\n",
|
||||
"* [Execution](#execution)\n",
|
||||
" * [Transfer Learning](#transfer-learning)\n",
|
||||
" * [Transfer Learning with Custom Weights](#custom-weights)\n",
|
||||
"* [Create Image](#create-image)\n",
|
||||
"* [Deploy Image](#deploy-image)\n",
|
||||
"* [Test the service](#test-service)\n",
|
||||
"* [Clean-up](#cleanup)\n",
|
||||
"* [Appendix](#appendix)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"setup\"></a>\n",
|
||||
"## 1. Setup Environment\n",
|
||||
"#### 1.a. Please set up your environment as described in the [Quickstart](./accelerated-models-quickstart.ipynb), meaning:\n",
|
||||
"* Make sure your Workspace config.json exists and has the correct info\n",
|
||||
"* Install Tensorflow\n",
|
||||
"\n",
|
||||
"#### 1.b. Download dataset into ~/catsanddogs \n",
|
||||
"The dataset we will be using for training can be downloaded [here](https://www.microsoft.com/en-us/download/details.aspx?id=54765). Download the zip and extract to a directory named 'catsanddogs' under your user directory (\"~/catsanddogs\"). \n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### 1.c. Import packages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"import tensorflow as tf\n",
|
||||
"import numpy as np\n",
|
||||
"from keras import backend as K\n",
|
||||
"import sklearn\n",
|
||||
"import tqdm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### 1.d. Create directories for later use\n",
|
||||
"After you train your model in float32, you'll write the weights to a place on disk. We also need a location to store the models that get downloaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_weights_dir = os.path.expanduser(\"~/custom-weights\")\n",
|
||||
"saved_model_dir = os.path.expanduser(\"~/models\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"prepare-data\"></a>\n",
|
||||
"## 2. Prepare Data\n",
|
||||
"Load the files we are going to use for training and testing. By default this notebook uses only a very small subset of the Cats and Dogs dataset. That makes it run relatively quickly."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import glob\n",
|
||||
"import imghdr\n",
|
||||
"datadir = os.path.expanduser(\"~/catsanddogs\")\n",
|
||||
"\n",
|
||||
"cat_files = glob.glob(os.path.join(datadir, 'PetImages', 'Cat', '*.jpg'))\n",
|
||||
"dog_files = glob.glob(os.path.join(datadir, 'PetImages', 'Dog', '*.jpg'))\n",
|
||||
"\n",
|
||||
"# Limit the data set to make the notebook execute quickly.\n",
|
||||
"cat_files = cat_files[:64]\n",
|
||||
"dog_files = dog_files[:64]\n",
|
||||
"\n",
|
||||
"# The data set has a few images that are not jpeg. Remove them.\n",
|
||||
"cat_files = [f for f in cat_files if imghdr.what(f) == 'jpeg']\n",
|
||||
"dog_files = [f for f in dog_files if imghdr.what(f) == 'jpeg']\n",
|
||||
"\n",
|
||||
"if(not len(cat_files) or not len(dog_files)):\n",
|
||||
" print(\"Please download the Kaggle Cats and Dogs dataset form https://www.microsoft.com/en-us/download/details.aspx?id=54765 and extract the zip to \" + datadir) \n",
|
||||
" raise ValueError(\"Data not found\")\n",
|
||||
"else:\n",
|
||||
" print(cat_files[0])\n",
|
||||
" print(dog_files[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Construct a numpy array as labels\n",
|
||||
"image_paths = cat_files + dog_files\n",
|
||||
"total_files = len(cat_files) + len(dog_files)\n",
|
||||
"labels = np.zeros(total_files)\n",
|
||||
"labels[len(cat_files):] = 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split images data as training data and test data\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"onehot_labels = np.array([[0,1] if i else [1,0] for i in labels])\n",
|
||||
"img_train, img_test, label_train, label_test = train_test_split(image_paths, onehot_labels, random_state=42, shuffle=True)\n",
|
||||
"\n",
|
||||
"print(len(img_train), len(img_test), label_train.shape, label_test.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"construct-model\"></a>\n",
|
||||
"## 3. Construct Model\n",
|
||||
"We will define the functions to handle creating the preprocessor and the classifier first, and then run them together to actually construct the model with the Resnet50 featurizer in a single Tensorflow session in a separate cell.\n",
|
||||
"\n",
|
||||
"We use ResNet50 for the featurizer and build our own classifier using Keras layers. We train the featurizer and the classifier as one model. We will provide parameters to determine whether we are using the quantized version and whether we are using custom weights in training or not."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 3.a. Define image preprocessing step\n",
|
||||
"Same as in the Quickstart, before passing image dataset to the ResNet50 featurizer, we need to preprocess the input file to get it into the form expected by ResNet50. ResNet50 expects float tensors representing the images in BGR, channel last order. We've provided a default implementation of the preprocessing that you can use.\n",
|
||||
"\n",
|
||||
"**Note:** Expect to see TF deprecation warnings until we port our SDK over to use Tensorflow 2.0."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.accel.models.utils as utils\n",
|
||||
"\n",
|
||||
"def preprocess_images(scaling_factor=1.0):\n",
|
||||
" # Convert images to 3D tensors [width,height,channel] - channels are in BGR order.\n",
|
||||
" in_images = tf.placeholder(tf.string)\n",
|
||||
" image_tensors = utils.preprocess_array(in_images, 'RGB', scaling_factor)\n",
|
||||
" return in_images, image_tensors"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 3.b. Define classifier\n",
|
||||
"We use Keras layer APIs to construct the classifier. Because we're using the tensorflow backend, we can train this classifier in one session with our Resnet50 model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def construct_classifier(in_tensor, seed=None):\n",
|
||||
" from keras.layers import Dropout, Dense, Flatten\n",
|
||||
" from keras.initializers import glorot_uniform\n",
|
||||
" K.set_session(tf.get_default_session())\n",
|
||||
"\n",
|
||||
" FC_SIZE = 1024\n",
|
||||
" NUM_CLASSES = 2\n",
|
||||
"\n",
|
||||
" x = Dropout(0.2, input_shape=(1, 1, int(in_tensor.shape[3]),), seed=seed)(in_tensor)\n",
|
||||
" x = Dense(FC_SIZE, activation='relu', input_dim=(1, 1, int(in_tensor.shape[3]),),\n",
|
||||
" kernel_initializer=glorot_uniform(seed=seed), bias_initializer='zeros')(x)\n",
|
||||
" x = Flatten()(x)\n",
|
||||
" preds = Dense(NUM_CLASSES, activation='softmax', input_dim=FC_SIZE, name='classifier_output',\n",
|
||||
" kernel_initializer=glorot_uniform(seed=seed), bias_initializer='zeros')(x)\n",
|
||||
" return preds"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 3.c. Define model construction\n",
|
||||
"Now that the preprocessor and classifier for the model are defined, we can define how we want to construct the model. \n",
|
||||
"\n",
|
||||
"Constructing the model has these steps: \n",
|
||||
"1. Get preprocessing steps\n",
|
||||
"* Get featurizer using the Azure ML Accel Models SDK:\n",
|
||||
" * import the graph definition\n",
|
||||
" * restore the weights of the model into a Tensorflow session\n",
|
||||
"* Get classifier\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def construct_model(quantized, starting_weights_directory = None):\n",
|
||||
" from azureml.accel.models import Resnet50, QuantizedResnet50\n",
|
||||
" \n",
|
||||
" # Convert images to 3D tensors [width,height,channel]\n",
|
||||
" in_images, image_tensors = preprocess_images(1.0)\n",
|
||||
"\n",
|
||||
" # Construct featurizer using quantized or unquantized ResNet50 model\n",
|
||||
" if not quantized:\n",
|
||||
" featurizer = Resnet50(saved_model_dir)\n",
|
||||
" else:\n",
|
||||
" featurizer = QuantizedResnet50(saved_model_dir, custom_weights_directory = starting_weights_directory)\n",
|
||||
"\n",
|
||||
" features = featurizer.import_graph_def(input_tensor=image_tensors)\n",
|
||||
" \n",
|
||||
" # Construct classifier\n",
|
||||
" preds = construct_classifier(features)\n",
|
||||
" \n",
|
||||
" # Initialize weights\n",
|
||||
" sess = tf.get_default_session()\n",
|
||||
" tf.global_variables_initializer().run()\n",
|
||||
"\n",
|
||||
" featurizer.restore_weights(sess)\n",
|
||||
"\n",
|
||||
" return in_images, image_tensors, features, preds, featurizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"train-model\"></a>\n",
|
||||
"## 4. Train Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def read_files(files):\n",
|
||||
" \"\"\" Read files to array\"\"\"\n",
|
||||
" contents = []\n",
|
||||
" for path in files:\n",
|
||||
" with open(path, 'rb') as f:\n",
|
||||
" contents.append(f.read())\n",
|
||||
" return contents"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def train_model(preds, in_images, img_train, label_train, is_retrain = False, train_epoch = 10, learning_rate=None):\n",
|
||||
" \"\"\" training model \"\"\"\n",
|
||||
" from keras.objectives import binary_crossentropy\n",
|
||||
" from tqdm import tqdm\n",
|
||||
" \n",
|
||||
" learning_rate = learning_rate if learning_rate else 0.001 if is_retrain else 0.01\n",
|
||||
" \n",
|
||||
" # Specify the loss function\n",
|
||||
" in_labels = tf.placeholder(tf.float32, shape=(None, 2)) \n",
|
||||
" cross_entropy = tf.reduce_mean(binary_crossentropy(in_labels, preds))\n",
|
||||
" optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)\n",
|
||||
"\n",
|
||||
" def chunks(a, b, n):\n",
|
||||
" \"\"\"Yield successive n-sized chunks from a and b.\"\"\"\n",
|
||||
" if (len(a) != len(b)):\n",
|
||||
" print(\"a and b are not equal in chunks(a,b,n)\")\n",
|
||||
" raise ValueError(\"Parameter error\")\n",
|
||||
"\n",
|
||||
" for i in range(0, len(a), n):\n",
|
||||
" yield a[i:i + n], b[i:i + n]\n",
|
||||
"\n",
|
||||
" chunk_size = 16\n",
|
||||
" chunk_num = len(label_train) / chunk_size\n",
|
||||
"\n",
|
||||
" sess = tf.get_default_session()\n",
|
||||
" for epoch in range(train_epoch):\n",
|
||||
" avg_loss = 0\n",
|
||||
" for img_chunk, label_chunk in tqdm(chunks(img_train, label_train, chunk_size)):\n",
|
||||
" contents = read_files(img_chunk)\n",
|
||||
" _, loss = sess.run([optimizer, cross_entropy],\n",
|
||||
" feed_dict={in_images: contents,\n",
|
||||
" in_labels: label_chunk,\n",
|
||||
" K.learning_phase(): 1})\n",
|
||||
" avg_loss += loss / chunk_num\n",
|
||||
" print(\"Epoch:\", (epoch + 1), \"loss = \", \"{:.3f}\".format(avg_loss))\n",
|
||||
" \n",
|
||||
" # Reach desired performance\n",
|
||||
" if (avg_loss < 0.001):\n",
|
||||
" break"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"test-model\"></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"test-model\"></a>\n",
|
||||
"## 5. Test Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def test_model(preds, in_images, img_test, label_test):\n",
|
||||
" \"\"\"Test the model\"\"\"\n",
|
||||
" from keras.metrics import categorical_accuracy\n",
|
||||
"\n",
|
||||
" in_labels = tf.placeholder(tf.float32, shape=(None, 2))\n",
|
||||
" accuracy = tf.reduce_mean(categorical_accuracy(in_labels, preds))\n",
|
||||
" contents = read_files(img_test)\n",
|
||||
"\n",
|
||||
" accuracy = accuracy.eval(feed_dict={in_images: contents,\n",
|
||||
" in_labels: label_test,\n",
|
||||
" K.learning_phase(): 0})\n",
|
||||
" return accuracy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"execution\"></a>\n",
|
||||
"## 6. Execute steps\n",
|
||||
"You can run through the Transfer Learning section, then skip to Create AccelContainerImage. By default, because the custom weights section takes much longer for training twice, it is not saved as executable cells. You can copy the code or change cell type to 'Code'.\n",
|
||||
"\n",
|
||||
"<a id=\"transfer-learning\"></a>\n",
|
||||
"### 6.a. Training using Transfer Learning"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Launch the training\n",
|
||||
"tf.reset_default_graph()\n",
|
||||
"sess = tf.Session(graph=tf.get_default_graph())\n",
|
||||
"\n",
|
||||
"with sess.as_default():\n",
|
||||
" in_images, image_tensors, features, preds, featurizer = construct_model(quantized=True)\n",
|
||||
" train_model(preds, in_images, img_train, label_train, is_retrain=False, train_epoch=10, learning_rate=0.01) \n",
|
||||
" accuracy = test_model(preds, in_images, img_test, label_test) \n",
|
||||
" print(\"Accuracy:\", accuracy)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Save Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_name = 'resnet50-catsanddogs-tl'\n",
|
||||
"model_save_path = os.path.join(saved_model_dir, model_name)\n",
|
||||
"\n",
|
||||
"tf.saved_model.simple_save(sess, model_save_path,\n",
|
||||
" inputs={'images': in_images},\n",
|
||||
" outputs={'output_alias': preds})\n",
|
||||
"\n",
|
||||
"input_tensors = in_images.name\n",
|
||||
"output_tensors = preds.name\n",
|
||||
"\n",
|
||||
"print(input_tensors)\n",
|
||||
"print(output_tensors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"custom-weights\"></a>\n",
|
||||
"### 6.b. Traning using Custom Weights\n",
|
||||
"\n",
|
||||
"Because the quantized graph defintion and the float32 graph defintion share the same node names in the graph definitions, we can initally train the weights in float32, and then reload them with the quantized operations (which take longer) to fine-tune the model.\n",
|
||||
"\n",
|
||||
"First we train the model with custom weights but without quantization. Training is done with native float precision (32-bit floats). We load the training data set and batch the training with 10 epochs. When the performance reaches desired level or starts decredation, we stop the training iteration and save the weights as tensorflow checkpoint files. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Launch the training\n",
|
||||
"```\n",
|
||||
"tf.reset_default_graph()\n",
|
||||
"sess = tf.Session(graph=tf.get_default_graph())\n",
|
||||
"\n",
|
||||
"with sess.as_default():\n",
|
||||
" in_images, image_tensors, features, preds, featurizer = construct_model(quantized=False)\n",
|
||||
" train_model(preds, in_images, img_train, label_train, is_retrain=False, train_epoch=10) \n",
|
||||
" accuracy = test_model(preds, in_images, img_test, label_test) \n",
|
||||
" print(\"Accuracy:\", accuracy)\n",
|
||||
" featurizer.save_weights(custom_weights_dir + \"/rn50\", tf.get_default_session())\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Test Model\n",
|
||||
"After training, we evaluate the trained model's accuracy on test dataset with quantization. So that we know the model's performance if it is deployed on the FPGA."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```\n",
|
||||
"tf.reset_default_graph()\n",
|
||||
"sess = tf.Session(graph=tf.get_default_graph())\n",
|
||||
"\n",
|
||||
"with sess.as_default():\n",
|
||||
" print(\"Testing trained model with quantization\")\n",
|
||||
" in_images, image_tensors, features, preds, quantized_featurizer = construct_model(quantized=True, starting_weights_directory=custom_weights_dir)\n",
|
||||
" accuracy = test_model(preds, in_images, img_test, label_test) \n",
|
||||
" print(\"Accuracy:\", accuracy)\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Fine-Tune Model\n",
|
||||
"Sometimes, the model's accuracy can drop significantly after quantization. In those cases, we need to retrain the model enabled with quantization to get better model accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```\n",
|
||||
"if (accuracy < 0.93):\n",
|
||||
" with sess.as_default():\n",
|
||||
" print(\"Fine-tuning model with quantization\")\n",
|
||||
" train_model(preds, in_images, img_train, label_train, is_retrain=True, train_epoch=10)\n",
|
||||
" accuracy = test_model(preds, in_images, img_test, label_test) \n",
|
||||
" print(\"Accuracy:\", accuracy)\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Save Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"```\n",
|
||||
"model_name = 'resnet50-catsanddogs-cw'\n",
|
||||
"model_save_path = os.path.join(saved_model_dir, model_name)\n",
|
||||
"\n",
|
||||
"tf.saved_model.simple_save(sess, model_save_path,\n",
|
||||
" inputs={'images': in_images},\n",
|
||||
" outputs={'output_alias': preds})\n",
|
||||
"\n",
|
||||
"input_tensors = in_images.name\n",
|
||||
"output_tensors = preds.name\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"create-image\"></a>\n",
|
||||
"## 7. Create AccelContainerImage\n",
|
||||
"\n",
|
||||
"Below we will execute all the same steps as in the [Quickstart](./accelerated-models-quickstart.ipynb#create-image) to package the model we have saved locally into an accelerated Docker image saved in our workspace. To complete all the steps, it may take a few minutes. For more details on each step, check out the [Quickstart section on model registration](./accelerated-models-quickstart.ipynb#register-model)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.image import Image\n",
|
||||
"from azureml.accel import AccelOnnxConverter\n",
|
||||
"from azureml.accel import AccelContainerImage\n",
|
||||
"\n",
|
||||
"# Retrieve workspace\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(\"Successfully retrieved workspace:\", ws.name, ws.resource_group, ws.location, ws.subscription_id, '\\n')\n",
|
||||
"\n",
|
||||
"# Register model\n",
|
||||
"registered_model = Model.register(workspace = ws,\n",
|
||||
" model_path = model_save_path,\n",
|
||||
" model_name = model_name)\n",
|
||||
"print(\"Successfully registered: \", registered_model.name, registered_model.description, registered_model.version, '\\n', sep = '\\t')\n",
|
||||
"\n",
|
||||
"# Convert model\n",
|
||||
"convert_request = AccelOnnxConverter.convert_tf_model(ws, registered_model, input_tensors, output_tensors)\n",
|
||||
"# If it fails, you can run wait_for_completion again with show_output=True.\n",
|
||||
"convert_request.wait_for_completion(show_output=False)\n",
|
||||
"converted_model = convert_request.result\n",
|
||||
"print(\"\\nSuccessfully converted: \", converted_model.name, converted_model.url, converted_model.version, \n",
|
||||
" converted_model.id, converted_model.created_time, '\\n')\n",
|
||||
"\n",
|
||||
"# Package into AccelContainerImage\n",
|
||||
"image_config = AccelContainerImage.image_configuration()\n",
|
||||
"# Image name must be lowercase\n",
|
||||
"image_name = \"{}-image\".format(model_name)\n",
|
||||
"image = Image.create(name = image_name,\n",
|
||||
" models = [converted_model],\n",
|
||||
" image_config = image_config, \n",
|
||||
" workspace = ws)\n",
|
||||
"image.wait_for_creation()\n",
|
||||
"print(\"Created AccelContainerImage: {} {} {}\\n\".format(image.name, image.creation_state, image.image_location))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"deploy-image\"></a>\n",
|
||||
"## 8. Deploy image\n",
|
||||
"Once you have an Azure ML Accelerated Image in your Workspace, you can deploy it to two destinations, to a Databox Edge machine or to an AKS cluster. \n",
|
||||
"\n",
|
||||
"### 8.a. Deploy to Databox Edge Machine using IoT Hub\n",
|
||||
"See the sample [here](https://github.com/Azure-Samples/aml-real-time-ai/) for using the Azure IoT CLI extension for deploying your Docker image to your Databox Edge Machine.\n",
|
||||
"\n",
|
||||
"### 8.b. Deploy to AKS Cluster"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Create AKS ComputeTarget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||
"\n",
|
||||
"# Uses the specific FPGA enabled VM (sku: Standard_PB6s)\n",
|
||||
"# Standard_PB6s are available in: eastus, westus2, westeurope, southeastasia\n",
|
||||
"prov_config = AksCompute.provisioning_configuration(vm_size = \"Standard_PB6s\",\n",
|
||||
" agent_count = 1,\n",
|
||||
" location = \"eastus\")\n",
|
||||
"\n",
|
||||
"aks_name = 'aks-pb6-tl'\n",
|
||||
"# Create the cluster\n",
|
||||
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
||||
" name = aks_name, \n",
|
||||
" provisioning_configuration = prov_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Provisioning an AKS cluster might take awhile (15 or so minutes), and we want to wait until it's successfully provisioned before we can deploy a service to it. If you interrupt this cell, provisioning of the cluster will continue. You can re-run it or check the status in your Workspace under Compute."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_target.wait_for_completion(show_output = True)\n",
|
||||
"print(aks_target.provisioning_state)\n",
|
||||
"print(aks_target.provisioning_errors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Deploy AccelContainerImage to AKS ComputeTarget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||
"\n",
|
||||
"# Set the web service configuration (for creating a test service, we don't want autoscale enabled)\n",
|
||||
"# Authentication is enabled by default, but for testing we specify False\n",
|
||||
"aks_config = AksWebservice.deploy_configuration(autoscale_enabled=False,\n",
|
||||
" num_replicas=1,\n",
|
||||
" auth_enabled = False)\n",
|
||||
"\n",
|
||||
"aks_service_name ='my-aks-service'\n",
|
||||
"\n",
|
||||
"aks_service = Webservice.deploy_from_image(workspace = ws,\n",
|
||||
" name = aks_service_name,\n",
|
||||
" image = image,\n",
|
||||
" deployment_config = aks_config,\n",
|
||||
" deployment_target = aks_target)\n",
|
||||
"aks_service.wait_for_deployment(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"test-service\"></a>\n",
|
||||
"## 9. Test the service\n",
|
||||
"\n",
|
||||
"<a id=\"create-client\"></a>\n",
|
||||
"### 9.a. Create Client\n",
|
||||
"The image supports gRPC and the TensorFlow Serving \"predict\" API. We have a client that can call into the docker image to get predictions. \n",
|
||||
"\n",
|
||||
"**Note:** If you chose to use auth_enabled=True when creating your AksWebservice.deploy_configuration(), see documentation [here](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.webservice(class)?view=azure-ml-py#get-keys--) on how to retrieve your keys and use either key as an argument to PredictionClient(...,access_token=key)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Using the grpc client in AzureML Accelerated Models SDK\n",
|
||||
"from azureml.accel.client import PredictionClient\n",
|
||||
"\n",
|
||||
"address = aks_service.scoring_uri\n",
|
||||
"ssl_enabled = address.startswith(\"https\")\n",
|
||||
"address = address[address.find('/')+2:].strip('/')\n",
|
||||
"port = 443 if ssl_enabled else 80\n",
|
||||
"\n",
|
||||
"# Initialize AzureML Accelerated Models client\n",
|
||||
"client = PredictionClient(address=address,\n",
|
||||
" port=port,\n",
|
||||
" use_ssl=ssl_enabled,\n",
|
||||
" service_name=aks_service.name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"serve-model\"></a>\n",
|
||||
"### 9.b. Serve the model\n",
|
||||
"Let's see how our service does on a few images. It may get a few wrong."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Specify an image to classify\n",
|
||||
"print('CATS')\n",
|
||||
"for image_file in cat_files[:8]:\n",
|
||||
" results = client.score_file(path=image_file, \n",
|
||||
" input_name=input_tensors, \n",
|
||||
" outputs=output_tensors)\n",
|
||||
" result = 'CORRECT ' if results[0] > results[1] else 'WRONG '\n",
|
||||
" print(result + str(results))\n",
|
||||
"print('DOGS')\n",
|
||||
"for image_file in dog_files[:8]:\n",
|
||||
" results = client.score_file(path=image_file, \n",
|
||||
" input_name=input_tensors, \n",
|
||||
" outputs=output_tensors)\n",
|
||||
" result = 'CORRECT ' if results[1] > results[0] else 'WRONG '\n",
|
||||
" print(result + str(results))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"cleanup\"></a>\n",
|
||||
"## 10. Cleanup\n",
|
||||
"It's important to clean up your resources, so that you won't incur unnecessary costs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service.delete()\n",
|
||||
"aks_target.delete()\n",
|
||||
"image.delete()\n",
|
||||
"registered_model.delete()\n",
|
||||
"converted_model.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id=\"appendix\"></a>\n",
|
||||
"## 11. Appendix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"License for plot_confusion_matrix:\n",
|
||||
"\n",
|
||||
"New BSD License\n",
|
||||
"\n",
|
||||
"Copyright (c) 2007-2018 The scikit-learn developers.\n",
|
||||
"All rights reserved.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Redistribution and use in source and binary forms, with or without\n",
|
||||
"modification, are permitted provided that the following conditions are met:\n",
|
||||
"\n",
|
||||
" a. Redistributions of source code must retain the above copyright notice,\n",
|
||||
" this list of conditions and the following disclaimer.\n",
|
||||
" b. Redistributions in binary form must reproduce the above copyright\n",
|
||||
" notice, this list of conditions and the following disclaimer in the\n",
|
||||
" documentation and/or other materials provided with the distribution.\n",
|
||||
" c. Neither the name of the Scikit-learn Developers nor the names of\n",
|
||||
" its contributors may be used to endorse or promote products\n",
|
||||
" derived from this software without specific prior written\n",
|
||||
" permission. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n",
|
||||
"AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n",
|
||||
"IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n",
|
||||
"ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR\n",
|
||||
"ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n",
|
||||
"DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n",
|
||||
"SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n",
|
||||
"CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n",
|
||||
"LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n",
|
||||
"OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH\n",
|
||||
"DAMAGE.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "coverste"
|
||||
},
|
||||
{
|
||||
"name": "paledger"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
BIN
how-to-use-azureml/deployment/accelerated-models/meeting.jpg
Normal file
BIN
how-to-use-azureml/deployment/accelerated-models/meeting.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 74 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 79 KiB |
@@ -25,6 +25,13 @@
|
||||
"3. Build new image and deploy it. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -4,17 +4,20 @@ These tutorials show how to create and deploy Open Neural Network eXchange ([ONN
|
||||
|
||||
## Tutorials
|
||||
|
||||
0. [Configure your Azure Machine Learning Workspace](../../../configuration.ipynb)
|
||||
0. If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, [Configure your Azure Machine Learning Workspace](../../../configuration.ipynb)
|
||||
|
||||
#### Obtain models from the [ONNX Model Zoo](https://github.com/onnx/models) and deploy with ONNX Runtime Inference
|
||||
1. [Handwritten Digit Classification (MNIST)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb)
|
||||
2. [Facial Expression Recognition (Emotion FER+)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb)
|
||||
#### Obtain pretrained models from the [ONNX Model Zoo](https://github.com/onnx/models) and deploy with ONNX Runtime
|
||||
1. [MNIST - Handwritten Digit Classification with ONNX Runtime](onnx-inference-mnist-deploy.ipynb)
|
||||
2. [Emotion FER+ - Facial Expression Recognition with ONNX Runtime](onnx-inference-facial-expression-recognition-deploy.ipynb)
|
||||
|
||||
#### Train model on Azure ML, convert to ONNX, and deploy with ONNX Runtime
|
||||
3. [MNIST - Train using PyTorch and deploy with ONNX Runtime](onnx-train-pytorch-aml-deploy-mnist.ipynb)
|
||||
|
||||
#### Demo Notebooks from Microsoft Ignite 2018
|
||||
Note that the following notebooks do not have evaluation sections for the models since they were deployed as part of a live demo. You can find the respective pre-processing and post-processing code linked from the ONNX Model Zoo Github pages ([ResNet](https://github.com/onnx/models/tree/master/models/image_classification/resnet), [TinyYoloV2](https://github.com/onnx/models/tree/master/tiny_yolov2)), or experiment with the ONNX models by [running them in the browser](https://microsoft.github.io/onnxjs-demo/#/).
|
||||
|
||||
3. [Image Recognition (ResNet50)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb)
|
||||
4. [Convert Core ML Model to ONNX and deploy - Real Time Object Detection (TinyYOLO)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb)
|
||||
4. [ResNet50 - Image Recognition with ONNX Runtime](onnx-modelzoo-aml-deploy-resnet50.ipynb)
|
||||
5. [TinyYoloV2 - Convert from CoreML and deploy with ONNX Runtime](onnx-convert-aml-deploy-tinyyolo.ipynb)
|
||||
|
||||
## Documentation
|
||||
- [ONNX Runtime Python API Documentation](http://aka.ms/onnxruntime-python)
|
||||
@@ -31,3 +34,6 @@ Licensed under the MIT License.
|
||||
|
||||
## Acknowledgements
|
||||
These tutorials were developed by Vinitra Swamy and Prasanth Pulavarthi of the Microsoft AI Frameworks team and adapted for presentation at Microsoft Ignite 2018.
|
||||
|
||||
|
||||

|
||||
|
||||
124
how-to-use-azureml/deployment/onnx/mnist.py
Normal file
124
how-to-use-azureml/deployment/onnx/mnist.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# This is a modified version of https://github.com/pytorch/examples/blob/master/mnist/main.py which is
|
||||
# licensed under BSD 3-Clause (https://github.com/pytorch/examples/blob/master/LICENSE)
|
||||
|
||||
from __future__ import print_function
|
||||
import argparse
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torchvision import datasets, transforms
|
||||
import os
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
|
||||
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
|
||||
self.conv2_drop = nn.Dropout2d()
|
||||
self.fc1 = nn.Linear(320, 50)
|
||||
self.fc2 = nn.Linear(50, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = F.relu(F.max_pool2d(self.conv1(x), 2))
|
||||
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
|
||||
x = x.view(-1, 320)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.dropout(x, training=self.training)
|
||||
x = self.fc2(x)
|
||||
return F.log_softmax(x, dim=1)
|
||||
|
||||
|
||||
def train(args, model, device, train_loader, optimizer, epoch, output_dir):
|
||||
model.train()
|
||||
for batch_idx, (data, target) in enumerate(train_loader):
|
||||
data, target = data.to(device), target.to(device)
|
||||
optimizer.zero_grad()
|
||||
output = model(data)
|
||||
loss = F.nll_loss(output, target)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
if batch_idx % args.log_interval == 0:
|
||||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
|
||||
epoch, batch_idx * len(data), len(train_loader.dataset),
|
||||
100. * batch_idx / len(train_loader), loss.item()))
|
||||
|
||||
|
||||
def test(args, model, device, test_loader):
|
||||
model.eval()
|
||||
test_loss = 0
|
||||
correct = 0
|
||||
with torch.no_grad():
|
||||
for data, target in test_loader:
|
||||
data, target = data.to(device), target.to(device)
|
||||
output = model(data)
|
||||
test_loss += F.nll_loss(output, target, size_average=False, reduce=True).item() # sum up batch loss
|
||||
pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
|
||||
correct += pred.eq(target.view_as(pred)).sum().item()
|
||||
|
||||
test_loss /= len(test_loader.dataset)
|
||||
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
|
||||
test_loss, correct, len(test_loader.dataset),
|
||||
100. * correct / len(test_loader.dataset)))
|
||||
|
||||
|
||||
def main():
|
||||
# Training settings
|
||||
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
|
||||
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
|
||||
help='input batch size for training (default: 64)')
|
||||
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
|
||||
help='input batch size for testing (default: 1000)')
|
||||
parser.add_argument('--epochs', type=int, default=5, metavar='N',
|
||||
help='number of epochs to train (default: 5)')
|
||||
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
|
||||
help='learning rate (default: 0.01)')
|
||||
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
|
||||
help='SGD momentum (default: 0.5)')
|
||||
parser.add_argument('--no-cuda', action='store_true', default=False,
|
||||
help='disables CUDA training')
|
||||
parser.add_argument('--seed', type=int, default=1, metavar='S',
|
||||
help='random seed (default: 1)')
|
||||
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
|
||||
help='how many batches to wait before logging training status')
|
||||
parser.add_argument('--output-dir', type=str, default='outputs')
|
||||
args = parser.parse_args()
|
||||
use_cuda = not args.no_cuda and torch.cuda.is_available()
|
||||
|
||||
torch.manual_seed(args.seed)
|
||||
|
||||
device = torch.device("cuda" if use_cuda else "cpu")
|
||||
|
||||
output_dir = args.output_dir
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST('data', train=True, download=True,
|
||||
transform=transforms.Compose([transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))])
|
||||
),
|
||||
batch_size=args.batch_size, shuffle=True, **kwargs)
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST('data', train=False,
|
||||
transform=transforms.Compose([transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))])
|
||||
),
|
||||
batch_size=args.test_batch_size, shuffle=True, **kwargs)
|
||||
|
||||
model = Net().to(device)
|
||||
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
|
||||
|
||||
for epoch in range(1, args.epochs + 1):
|
||||
train(args, model, device, train_loader, optimizer, epoch, output_dir)
|
||||
test(args, model, device, test_loader)
|
||||
|
||||
# save model
|
||||
dummy_input = torch.randn(1, 1, 28, 28, device=device)
|
||||
model_path = os.path.join(output_dir, 'mnist.onnx')
|
||||
torch.onnx.export(model, dummy_input, model_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -33,7 +40,7 @@
|
||||
"To make the best use of your time, make sure you have done the following:\n",
|
||||
"\n",
|
||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||
"* Go through the [configuration](../../../configuration.ipynb) notebook to:\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook to:\n",
|
||||
" * install the AML SDK\n",
|
||||
" * create a workspace and its configuration file (config.json)"
|
||||
]
|
||||
@@ -248,7 +255,7 @@
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime==0.4.0\",\"azureml-core\"])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
|
||||
@@ -8,6 +8,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -34,7 +41,7 @@
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"### 1. Install Azure ML SDK and create a new workspace\n",
|
||||
"Please follow [Azure ML configuration notebook](../../../configuration.ipynb) to set up your environment.\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, please follow [Azure ML configuration notebook](../../../configuration.ipynb) to set up your environment.\n",
|
||||
"\n",
|
||||
"### 2. Install additional packages needed for this Notebook\n",
|
||||
"You need to install the popular plotting library `matplotlib`, the image manipulation library `opencv`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed.\n",
|
||||
|
||||
@@ -8,6 +8,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -34,7 +41,7 @@
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"### 1. Install Azure ML SDK and create a new workspace\n",
|
||||
"Please follow [Azure ML configuration notebook](../../../configuration.ipynb) to set up your environment.\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, please follow [Azure ML configuration notebook](../../../configuration.ipynb) to set up your environment.\n",
|
||||
"\n",
|
||||
"### 2. Install additional packages needed for this tutorial notebook\n",
|
||||
"You need to install the popular plotting library `matplotlib`, the image manipulation library `opencv`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed. \n",
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -33,7 +40,7 @@
|
||||
"To make the best use of your time, make sure you have done the following:\n",
|
||||
"\n",
|
||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||
"* Go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||
" * install the AML SDK\n",
|
||||
" * create a workspace and its configuration file (config.json)"
|
||||
]
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Deploying a web service to Azure Kubernetes Service (AKS)\n",
|
||||
"This notebook shows the steps for deploying a service: registering a model, creating an image, provisioning a cluster (one time action), and deploying a service to it. \n",
|
||||
"We then test and delete the service, image and model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||
"from azureml.core.image import Image\n",
|
||||
"from azureml.core.model import Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"print(azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Get workspace\n",
|
||||
"Load existing workspace from the config file info."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Register the model\n",
|
||||
"Register an existing trained model, add descirption and tags. Prior to registering the model, you should have a TensorFlow [Saved Model](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md) in the `resnet50` directory. You can download a [pretrained resnet50](https://github.com/tensorflow/models/tree/master/official/resnet#pre-trained-model) and unpack it to that directory."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Register the model\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"model = Model.register(model_path = \"resnet50\", # this points to a local file\n",
|
||||
" model_name = \"resnet50\", # this is the name the model is registered as\n",
|
||||
" tags = {'area': \"Image classification\", 'type': \"classification\"},\n",
|
||||
" description = \"Image classification trained on Imagenet Dataset\",\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"print(model.name, model.description, model.version)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Create an image\n",
|
||||
"Create an image using the registered model the script that will load and run the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile score.py\n",
|
||||
"import tensorflow as tf\n",
|
||||
"import numpy as np\n",
|
||||
"import ujson\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.contrib.services.aml_request import AMLRequest, rawhttp\n",
|
||||
"from azureml.contrib.services.aml_response import AMLResponse\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global session\n",
|
||||
" global input_name\n",
|
||||
" global output_name\n",
|
||||
" \n",
|
||||
" session = tf.Session()\n",
|
||||
"\n",
|
||||
" model_path = Model.get_model_path('resnet50')\n",
|
||||
" model = tf.saved_model.loader.load(session, ['serve'], model_path)\n",
|
||||
" if len(model.signature_def['serving_default'].inputs) > 1:\n",
|
||||
" raise ValueError(\"This score.py only supports one input\")\n",
|
||||
" if len(model.signature_def['serving_default'].outputs) > 1:\n",
|
||||
" raise ValueError(\"This score.py only supports one input\")\n",
|
||||
" input_name = [tensor.name for tensor in model.signature_def['serving_default'].inputs.values()][0]\n",
|
||||
" output_name = [tensor.name for tensor in model.signature_def['serving_default'].outputs.values()][0]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"@rawhttp\n",
|
||||
"def run(request):\n",
|
||||
" if request.method == 'POST':\n",
|
||||
" reqBody = request.get_data(False)\n",
|
||||
" resp = score(reqBody)\n",
|
||||
" return AMLResponse(resp, 200)\n",
|
||||
" if request.method == 'GET':\n",
|
||||
" respBody = str.encode(\"GET is not supported\")\n",
|
||||
" return AMLResponse(respBody, 405)\n",
|
||||
" return AMLResponse(\"bad request\", 500)\n",
|
||||
"\n",
|
||||
"def score(data):\n",
|
||||
" result = session.run(output_name, {input_name: [data]})\n",
|
||||
" return ujson.dumps(result[0])\n",
|
||||
"\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" init()\n",
|
||||
" with open(\"test_image.jpg\", 'rb') as f:\n",
|
||||
" content = f.read()\n",
|
||||
" print(score(content))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['tensorflow-gpu==1.12.0','numpy','ujson','azureml-contrib-services'])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||
" runtime = \"python\",\n",
|
||||
" conda_file = \"myenv.yml\",\n",
|
||||
" gpu_enabled = True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"image = ContainerImage.create(name = \"GpuImage\",\n",
|
||||
" # this is the model object\n",
|
||||
" models = [model],\n",
|
||||
" image_config = image_config,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"image.wait_for_creation(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Provision the AKS Cluster\n",
|
||||
"This is a one time setup. You can reuse this cluster for multiple deployments after it has been created. If you delete the cluster or the resource group that contains it, then you would have to recreate it."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use the default configuration (can also provide parameters to customize)\n",
|
||||
"prov_config = AksCompute.provisioning_configuration(vm_size=\"Standard_NC6\")\n",
|
||||
"\n",
|
||||
"aks_name = 'my-aks-9' \n",
|
||||
"# Create the cluster\n",
|
||||
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
||||
" name = aks_name, \n",
|
||||
" provisioning_configuration = prov_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Create AKS Cluster in an existing virtual network (optional)\n",
|
||||
"See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-enable-virtual-network#use-azure-kubernetes-service) for more details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"from azureml.core.compute import ComputeTarget, AksCompute\n",
|
||||
"\n",
|
||||
"# Create the compute configuration and set virtual network information\n",
|
||||
"config = AksCompute.provisioning_configuration(vm_size=\"Standard_NC6\", location=\"eastus2\")\n",
|
||||
"config.vnet_resourcegroup_name = \"mygroup\"\n",
|
||||
"config.vnet_name = \"mynetwork\"\n",
|
||||
"config.subnet_name = \"default\"\n",
|
||||
"config.service_cidr = \"10.0.0.0/16\"\n",
|
||||
"config.dns_service_ip = \"10.0.0.10\"\n",
|
||||
"config.docker_bridge_cidr = \"172.17.0.1/16\"\n",
|
||||
"\n",
|
||||
"# Create the compute target\n",
|
||||
"aks_target = ComputeTarget.create(workspace = ws,\n",
|
||||
" name = \"myaks\",\n",
|
||||
" provisioning_configuration = config)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Enable SSL on the AKS Cluster (optional)\n",
|
||||
"See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-secure-web-service) for more details"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# provisioning_config = AksCompute.provisioning_configuration(ssl_cert_pem_file=\"cert.pem\", ssl_key_pem_file=\"key.pem\", ssl_cname=\"www.contoso.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"aks_target.wait_for_completion(show_output = True)\n",
|
||||
"print(aks_target.provisioning_state)\n",
|
||||
"print(aks_target.provisioning_errors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Optional step: Attach existing AKS cluster\n",
|
||||
"\n",
|
||||
"If you have existing AKS cluster in your Azure subscription, you can attach it to the Workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"# Use the default configuration (can also provide parameters to customize)\n",
|
||||
"resource_id = '/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/raymondsdk0604/providers/Microsoft.ContainerService/managedClusters/my-aks-0605d37425356b7d01'\n",
|
||||
"\n",
|
||||
"create_name='my-existing-aks' \n",
|
||||
"# Create the cluster\n",
|
||||
"attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n",
|
||||
"aks_target = ComputeTarget.attach(workspace=ws, name=create_name, attach_configuration=attach_config)\n",
|
||||
"# Wait for the operation to complete\n",
|
||||
"aks_target.wait_for_completion(True)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Deploy web service to AKS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Set the web service configuration (using default here)\n",
|
||||
"aks_config = AksWebservice.deploy_configuration()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"aks_service_name ='aks-service-1'\n",
|
||||
"\n",
|
||||
"aks_service = Webservice.deploy_from_image(workspace = ws, \n",
|
||||
" name = aks_service_name,\n",
|
||||
" image = image,\n",
|
||||
" deployment_config = aks_config,\n",
|
||||
" deployment_target = aks_target)\n",
|
||||
"aks_service.wait_for_deployment(show_output = True)\n",
|
||||
"print(aks_service.state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test the web service\n",
|
||||
"We test the web sevice by passing the test images content."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"import requests\n",
|
||||
"key1, key2 = aks_service.get_keys()\n",
|
||||
"\n",
|
||||
"headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
||||
"test_sampe = open('test_image.jpg', 'rb').read()\n",
|
||||
"resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Clean up\n",
|
||||
"Delete the service, image, model and compute target"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"aks_service.delete()\n",
|
||||
"image.delete()\n",
|
||||
"model.delete()\n",
|
||||
"aks_target.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "aashishb"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -167,6 +174,31 @@
|
||||
"image.wait_for_creation(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Use a custom Docker image\n",
|
||||
"\n",
|
||||
"You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
|
||||
"\n",
|
||||
"Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
|
||||
"```python\n",
|
||||
"# use an image available in public Container Registry without authentication\n",
|
||||
"image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
|
||||
"\n",
|
||||
"# or, use an image available in a private Container Registry\n",
|
||||
"image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
|
||||
"image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
|
||||
"image_config.base_image_registry.username = \"username\"\n",
|
||||
"image_config.base_image_registry.password = \"password\"\n",
|
||||
"\n",
|
||||
"# or, use an image built during training.\n",
|
||||
"image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
|
||||
"```\n",
|
||||
"You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -191,6 +223,56 @@
|
||||
" provisioning_configuration = prov_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Create AKS Cluster in an existing virtual network (optional)\n",
|
||||
"See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-enable-virtual-network#use-azure-kubernetes-service) for more details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"from azureml.core.compute import ComputeTarget, AksCompute\n",
|
||||
"\n",
|
||||
"# Create the compute configuration and set virtual network information\n",
|
||||
"config = AksCompute.provisioning_configuration(location=\"eastus2\")\n",
|
||||
"config.vnet_resourcegroup_name = \"mygroup\"\n",
|
||||
"config.vnet_name = \"mynetwork\"\n",
|
||||
"config.subnet_name = \"default\"\n",
|
||||
"config.service_cidr = \"10.0.0.0/16\"\n",
|
||||
"config.dns_service_ip = \"10.0.0.10\"\n",
|
||||
"config.docker_bridge_cidr = \"172.17.0.1/16\"\n",
|
||||
"\n",
|
||||
"# Create the compute target\n",
|
||||
"aks_target = ComputeTarget.create(workspace = ws,\n",
|
||||
" name = \"myaks\",\n",
|
||||
" provisioning_configuration = config)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Enable SSL on the AKS Cluster (optional)\n",
|
||||
"See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-secure-web-service) for more details"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# provisioning_config = AksCompute.provisioning_configuration(ssl_cert_pem_file=\"cert.pem\", ssl_key_pem_file=\"key.pem\", ssl_cname=\"www.contoso.com\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -270,8 +352,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test the web service\n",
|
||||
"We test the web sevice by passing data."
|
||||
"# Test the web service using run method\n",
|
||||
"We test the web sevice by passing data.\n",
|
||||
"Run() method retrieves API keys behind the scenes to make sure that call is authenticated."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -293,6 +376,57 @@
|
||||
"print(prediction)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test the web service using raw HTTP request (optional)\n",
|
||||
"Alternatively you can construct a raw HTTP request and send it to the service. In this case you need to explicitly pass the HTTP header. This process is shown in the next 2 cells."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# retreive the API keys. AML generates two keys.\n",
|
||||
"'''\n",
|
||||
"key1, Key2 = aks_service.get_keys()\n",
|
||||
"print(key1)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# construct raw HTTP request and send to the service\n",
|
||||
"'''\n",
|
||||
"%%time\n",
|
||||
"\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"test_sample = json.dumps({'data': [\n",
|
||||
" [1,2,3,4,5,6,7,8,9,10], \n",
|
||||
" [10,9,8,7,6,5,4,3,2,1]\n",
|
||||
"]})\n",
|
||||
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
||||
"\n",
|
||||
"# Don't forget to add key to the HTTP header.\n",
|
||||
"headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
||||
"\n",
|
||||
"resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"prediction:\", resp.text)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -317,7 +451,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "raymondl"
|
||||
"name": "aashishb"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -34,7 +41,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"Make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -261,6 +268,31 @@
|
||||
"image.wait_for_creation(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Use a custom Docker image\n",
|
||||
"\n",
|
||||
"You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
|
||||
"\n",
|
||||
"Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
|
||||
"```python\n",
|
||||
"# use an image available in public Container Registry without authentication\n",
|
||||
"image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
|
||||
"\n",
|
||||
"# or, use an image available in a private Container Registry\n",
|
||||
"image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
|
||||
"image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
|
||||
"image_config.base_image_registry.username = \"username\"\n",
|
||||
"image_config.base_image_registry.password = \"password\"\n",
|
||||
"\n",
|
||||
"# or, use an image built during training.\n",
|
||||
"image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
|
||||
"```\n",
|
||||
"You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -395,7 +427,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "raymondl"
|
||||
"name": "aashishb"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
|
||||
@@ -2,10 +2,7 @@
|
||||
|
||||
Follow these sample notebooks to learn:
|
||||
|
||||
1. [Explain tabular data](explain-tabular-data): Basic example of explaining model trained on tabular data.
|
||||
2. [Explain local classification](explain-local-sklearn-classification): Explain a scikit-learn classification model.
|
||||
3. [Explain local regression](explain-local-sklearn-regression): Explain a scikit-learn regression model.
|
||||
1. [Explain tabular data locally](explain-tabular-data-local): Basic example of explaining model trained on tabular data.
|
||||
4. [Explain on remote AMLCompute](explain-on-amlcompute): Explain a model on a remote AMLCompute target.
|
||||
5. [Explain classification using Run History](explain-run-history-sklearn-classification): Explain a scikit-learn classification model with Run History.
|
||||
6. [Explain regression using Run History](explain-run-history-sklearn-regression): Explain a scikit-learn regression model with Run History.
|
||||
7. [Explain scikit-learn raw features](explain-sklearn-raw-features): Explain the raw features of a trained scikit-learn model.
|
||||
5. [Explain tabular data with Run History](explain-tabular-data-run-history): Explain a model with Run History.
|
||||
7. [Explain raw features](explain-tabular-data-raw-features): Explain the raw features of a trained model.
|
||||
|
||||
@@ -9,6 +9,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -32,7 +39,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"Make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't."
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -196,9 +203,6 @@
|
||||
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
|
||||
"run_config.environment.python.user_managed_dependencies = False\n",
|
||||
"\n",
|
||||
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
|
||||
"run_config.auto_prepare_environment = True\n",
|
||||
"\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-explain-model', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-explain-model'\n",
|
||||
@@ -576,7 +580,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "wamartin"
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
|
||||
@@ -1,221 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Summary\n",
|
||||
"From raw data that is a mixture of categoricals and numeric, featurize the categoricals using one hot encoding. Use tabular explainer to get explain object and then get raw feature importances"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load titanic dataset. Impute missing values by filling both backward and forward since some data is at the first/last row. This is just for illustration and not a recommended way to impute missing data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"titanic_url = ('https://raw.githubusercontent.com/amueller/'\n",
|
||||
" 'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')\n",
|
||||
"data = pd.read_csv(titanic_url)\n",
|
||||
"# fill missing values\n",
|
||||
"data = data.fillna(method=\"ffill\")\n",
|
||||
"data = data.fillna(method=\"bfill\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data.columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Similar to example [here](https://scikit-learn.org/stable/auto_examples/compose/plot_column_transformer_mixed_types.html#sphx-glr-auto-examples-compose-plot-column-transformer-mixed-types-py), use a subset of columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"numeric_features = ['age', 'fare']\n",
|
||||
"categorical_features = ['embarked', 'sex', 'pclass']\n",
|
||||
"\n",
|
||||
"y = data['survived'].values\n",
|
||||
"X = data[categorical_features + numeric_features]\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"One hot encode the categorical features"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||||
"one_enc = OneHotEncoder()\n",
|
||||
"one_enc.fit(X_train[categorical_features])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Columnwise concatenate one hot encoded categoricals and numerical features."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"from scipy import sparse\n",
|
||||
"def get_feats(X):\n",
|
||||
" a = one_enc.transform(X[categorical_features])\n",
|
||||
" b = X[numeric_features]\n",
|
||||
" return sparse.hstack((one_enc.transform(X[categorical_features]), X[numeric_features].values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Train a logistic regression model on featurized training data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"\n",
|
||||
"X_train_transformed = get_feats(X_train)\n",
|
||||
"X_test_transformed = get_feats(X_test)\n",
|
||||
"\n",
|
||||
"clf = LogisticRegression(solver='lbfgs', max_iter=200)\n",
|
||||
"clf.fit(X_train_transformed, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Get feature mapping between raw and generated features. Using the order in which features are concatenated in `get_feats` and using `categories_` in `OneHotEncoder` we are able to compute this mapping."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"raw_feat_mapping = []\n",
|
||||
"start_index = 0\n",
|
||||
"for cat_list in one_enc.categories_:\n",
|
||||
" raw_feat_mapping.append([start_index + i for i in range(len(cat_list))])\n",
|
||||
" start_index += len(cat_list)\n",
|
||||
"for i in range(len(numeric_features)):\n",
|
||||
" raw_feat_mapping.append([start_index])\n",
|
||||
" start_index += 1 "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.explain.model.tabular_explainer import TabularExplainer\n",
|
||||
"\n",
|
||||
"explainer = TabularExplainer(clf, X_train_transformed)\n",
|
||||
"global_explanation = explainer.explain_global(X_test_transformed)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"raw_feat_imps = global_explanation.get_raw_feature_importances(raw_feat_mapping)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"feature_names = categorical_features + numeric_features\n",
|
||||
"sorted_indices = np.argsort(raw_feat_imps)[::-1]\n",
|
||||
"\n",
|
||||
"for i in sorted_indices:\n",
|
||||
" print(\"{}: {}\".format(feature_names[i], raw_feat_imps[i]))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "hichando"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -7,6 +7,13 @@
|
||||
"# Breast cancer diagnosis classification with scikit-learn (run model explainer locally)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -24,7 +31,8 @@
|
||||
"\n",
|
||||
"1. Train a SVM classification model using Scikit-learn\n",
|
||||
"2. Run 'explain_model' with full data in local mode, which doesn't contact any Azure services\n",
|
||||
"3. Run 'explain_model' with summarized data in local mode, which doesn't contact any Azure services"
|
||||
"3. Run 'explain_model' with summarized data in local mode, which doesn't contact any Azure services\n",
|
||||
"4. Visualize the global and local explanations with the visualization dashboard."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -181,7 +189,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_explanation = tabular_explainer.explain_local(x_test[0,:])"
|
||||
"# explain the first member of the test set\n",
|
||||
"instance_num = 0\n",
|
||||
"local_explanation = tabular_explainer.explain_local(x_test[instance_num,:])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -190,9 +200,21 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# local feature importance information\n",
|
||||
"local_importance_values = local_explanation.local_importance_values\n",
|
||||
"print('local importance for first instance: {}'.format(local_importance_values[y_test[0]]))"
|
||||
"# get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||
"prediction_value = clf.predict(x_test)[instance_num]\n",
|
||||
"\n",
|
||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"dict(zip(sorted_local_importance_names, sorted_local_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 2. Load visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -201,7 +223,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('local importance feature names: {}'.format(list(local_explanation.features)))"
|
||||
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
||||
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"# Or, in Jupyter Labs, uncomment below\n",
|
||||
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"# jupyter labextension install microsoft-mli-widget"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,14 +237,23 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dict(zip(local_explanation.features, local_explanation.local_importance_values[y_test[0]]))"
|
||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "wamartin"
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -0,0 +1,280 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Iris flower classification with scikit-learn (run model explainer locally)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Explain a model with the AML explain-model package\n",
|
||||
"\n",
|
||||
"1. Train a SVM classification model using Scikit-learn\n",
|
||||
"2. Run 'explain_model' with full data in local mode, which doesn't contact any Azure services\n",
|
||||
"3. Run 'explain_model' with summarized data in local mode, which doesn't contact any Azure services\n",
|
||||
"4. Visualize the global and local explanations with the visualization dashboard."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_iris\n",
|
||||
"from sklearn import svm\n",
|
||||
"from azureml.explain.model.tabular_explainer import TabularExplainer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 1. Run model explainer locally with full data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load the breast cancer diagnosis data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iris = load_iris()\n",
|
||||
"X = iris['data']\n",
|
||||
"y = iris['target']\n",
|
||||
"classes = iris['target_names']\n",
|
||||
"feature_names = iris['feature_names']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split data into train and test\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train a SVM classification model, which you want to explain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"clf = svm.SVC(gamma=0.001, C=100., probability=True)\n",
|
||||
"model = clf.fit(x_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain predictions on your local machine"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tabular_explainer = TabularExplainer(model, x_train, features = feature_names, classes=classes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain overall model predictions (global explanation)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"global_explanation = tabular_explainer.explain_global(x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sorted SHAP values\n",
|
||||
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
||||
"# Corresponding feature names\n",
|
||||
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
||||
"# feature ranks (based on original order of features)\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))\n",
|
||||
"# per class feature names\n",
|
||||
"print('ranked per class feature names: {}'.format(global_explanation.get_ranked_per_class_names()))\n",
|
||||
"# per class feature importance values\n",
|
||||
"print('ranked per class feature values: {}'.format(global_explanation.get_ranked_per_class_values()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dict(zip(global_explanation.get_ranked_global_names(), global_explanation.get_ranked_global_values()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# feature shap values for all features and all data points in the training data\n",
|
||||
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain local data points (individual instances)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# explain the first member of the test set\n",
|
||||
"instance_num = 0\n",
|
||||
"local_explanation = tabular_explainer.explain_local(x_test[instance_num,:])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||
"prediction_value = clf.predict(x_test)[instance_num]\n",
|
||||
"\n",
|
||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"dict(zip(sorted_local_importance_names, sorted_local_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
||||
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"# Or, in Jupyter Labs, uncomment below\n",
|
||||
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"# jupyter labextension install microsoft-mli-widget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -7,6 +7,13 @@
|
||||
"# Boston Housing Price Prediction with scikit-learn (run model explainer locally)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -24,7 +31,8 @@
|
||||
"\n",
|
||||
"1. Train a GradientBoosting regression model using Scikit-learn\n",
|
||||
"2. Run 'explain_model' with full dataset in local mode, which doesn't contact any Azure services.\n",
|
||||
"3. Run 'explain_model' with summarized dataset in local mode, which doesn't contact any Azure services."
|
||||
"3. Run 'explain_model' with summarized dataset in local mode, which doesn't contact any Azure services.\n",
|
||||
"4. Visualize the global and local explanations with the visualization dashboard."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -85,10 +93,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"clf = GradientBoostingRegressor(n_estimators=100, max_depth=4,\n",
|
||||
"reg = GradientBoostingRegressor(n_estimators=100, max_depth=4,\n",
|
||||
" learning_rate=0.1, loss='huber',\n",
|
||||
" random_state=1)\n",
|
||||
"model = clf.fit(x_train, y_train)"
|
||||
"model = reg.fit(x_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -125,15 +133,6 @@
|
||||
"global_explanation = tabular_explainer.explain_global(x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"help(global_explanation)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -196,16 +195,58 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# local feature importance information\n",
|
||||
"local_importance_values = local_explanation.local_importance_values\n",
|
||||
"print('local importance values: {}'.format(local_importance_values))"
|
||||
"# sorted local feature importance information; reflects the original feature order\n",
|
||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()\n",
|
||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()\n",
|
||||
"\n",
|
||||
"print('sorted local importance names: {}'.format(sorted_local_importance_names))\n",
|
||||
"print('sorted local importance values: {}'.format(sorted_local_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
||||
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"# Or, in Jupyter Labs, uncomment below\n",
|
||||
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"# jupyter labextension install microsoft-mli-widget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "wamartin"
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -0,0 +1,302 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Summary\n",
|
||||
"From raw data that is a mixture of categoricals and numeric, featurize the categoricals using one hot encoding. Use tabular explainer to get explain object and then get raw feature importances"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Explain a model with the AML explain-model package on raw features\n",
|
||||
"\n",
|
||||
"1. Train a Logistic Regression model using Scikit-learn\n",
|
||||
"2. Run 'explain_model' with full dataset in local mode, which doesn't contact any Azure services.\n",
|
||||
"3. Run 'explain_model' with summarized dataset in local mode, which doesn't contact any Azure services.\n",
|
||||
"4. Visualize the global and local explanations with the visualization dashboard."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This example needs sklearn-pandas. If it is not installed, uncomment and run the following line."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install sklearn-pandas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"from azureml.explain.model.tabular_explainer import TabularExplainer\n",
|
||||
"from sklearn_pandas import DataFrameMapper\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"titanic_url = ('https://raw.githubusercontent.com/amueller/'\n",
|
||||
" 'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')\n",
|
||||
"data = pd.read_csv(titanic_url)\n",
|
||||
"# fill missing values\n",
|
||||
"data = data.fillna(method=\"ffill\")\n",
|
||||
"data = data.fillna(method=\"bfill\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 1. Run model explainer locally with full data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Similar to example [here](https://scikit-learn.org/stable/auto_examples/compose/plot_column_transformer_mixed_types.html#sphx-glr-auto-examples-compose-plot-column-transformer-mixed-types-py), use a subset of columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"numeric_features = ['age', 'fare']\n",
|
||||
"categorical_features = ['embarked', 'sex', 'pclass']\n",
|
||||
"\n",
|
||||
"y = data['survived'].values\n",
|
||||
"X = data[categorical_features + numeric_features]\n",
|
||||
"\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||
"from sklearn_pandas import DataFrameMapper\n",
|
||||
"\n",
|
||||
"# Impute, standardize the numeric features and one-hot encode the categorical features. \n",
|
||||
"\n",
|
||||
"transformations = [\n",
|
||||
" ([\"age\", \"fare\"], Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='median')),\n",
|
||||
" ('scaler', StandardScaler())\n",
|
||||
" ])),\n",
|
||||
" ([\"embarked\"], Pipeline(steps=[\n",
|
||||
" (\"imputer\", SimpleImputer(strategy='constant', fill_value='missing')), \n",
|
||||
" (\"encoder\", OneHotEncoder(sparse=False))])),\n",
|
||||
" ([\"sex\", \"pclass\"], OneHotEncoder(sparse=False)) \n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Append classifier to preprocessing pipeline.\n",
|
||||
"# Now we have a full prediction pipeline.\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),\n",
|
||||
" ('classifier', LogisticRegression(solver='lbfgs'))])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train a Logistic Regression model, which you want to explain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = clf.fit(x_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain predictions on your local machine"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tabular_explainer = TabularExplainer(clf.steps[-1][1], initialization_examples=x_train, features=x_train.columns, transformations=transformations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||
"global_explanation = tabular_explainer.explain_global(x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sorted_global_importance_values = global_explanation.get_ranked_global_values()\n",
|
||||
"sorted_global_importance_names = global_explanation.get_ranked_global_names()\n",
|
||||
"dict(zip(sorted_global_importance_names, sorted_global_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# explain the first member of the test set\n",
|
||||
"local_explanation = tabular_explainer.explain_local(x_test[:1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||
"prediction_value = clf.predict(x_test)[0]\n",
|
||||
"\n",
|
||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
||||
"\n",
|
||||
"# Sorted local SHAP values\n",
|
||||
"print('ranked local importance values: {}'.format(sorted_local_importance_values))\n",
|
||||
"# Corresponding feature names\n",
|
||||
"print('ranked local importance names: {}'.format(sorted_local_importance_names))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 2. Load visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note you will need to have extensions enabled prior to jupyter kernel starting\n",
|
||||
"!jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"!jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"# Or, in Jupyter Labs, uncomment below\n",
|
||||
"# jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"# jupyter labextension install microsoft-mli-widget"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -7,6 +7,13 @@
|
||||
"# Breast cancer diagnosis classification with scikit-learn (save model explanations via AML Run History)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -229,7 +236,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "wamartin"
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -7,6 +7,13 @@
|
||||
"# Boston Housing Price Prediction with scikit-learn (save model explanations via AML Run History)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -243,7 +250,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "wamartin"
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -1,267 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Uncomment these if explanation packages are not already installed in your environment\n",
|
||||
"#!pip install --upgrade azureml-sdk[explain]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Explain a model with the AML explain-model package\n",
|
||||
"\n",
|
||||
"1. Train a SVM model using Scikit-learn\n",
|
||||
"2. Run 'explain_model' in local mode, which doesn't contact any Azure services\n",
|
||||
"3. Run 'explain_model' with AML Run History, which leverages Run History Service to store and manage the explanation data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Disclaimer: this notebook is a preview of model explainability, and the APIs shown below are subject to breaking changes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train a SVM model, which we will try to explain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import Iris dataset\n",
|
||||
"from sklearn import datasets\n",
|
||||
"iris = datasets.load_iris()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split data into train and test\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Import scikit learn, fit a SVM model\n",
|
||||
"def create_scikit_learn_model(X, y):\n",
|
||||
" from sklearn import svm\n",
|
||||
" clf = svm.SVC(gamma=0.001, C=100., probability=True)\n",
|
||||
" model = clf.fit(X, y)\n",
|
||||
" return model\n",
|
||||
"model = create_scikit_learn_model(x_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run model explainer locally"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.explain.model.tabular_explainer import TabularExplainer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"start = time.time()\n",
|
||||
"\n",
|
||||
"explainer = TabularExplainer(model, x_train, features=iris.feature_names)\n",
|
||||
"global_explanation = explainer.explain_global(x_test)\n",
|
||||
"\n",
|
||||
"# importance values for each class, test example, and feature (local importance)\n",
|
||||
"local_imp_values = global_explanation.local_importance_values\n",
|
||||
"# base prediction with feature importances ignored\n",
|
||||
"expected_values = global_explanation.expected_values\n",
|
||||
"# global feature importance information\n",
|
||||
"global_imp_values = global_explanation.global_importance_values\n",
|
||||
"ranked_global_imp_names = global_explanation.get_ranked_global_names()\n",
|
||||
"# global per-class feature importance information\n",
|
||||
"per_class_imp_values = global_explanation.per_class_values\n",
|
||||
"ranked_per_class_imp_names = global_explanation.get_ranked_per_class_names()\n",
|
||||
"\n",
|
||||
"end = time.time()\n",
|
||||
"print(end - start)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run model explainer with AML Run History"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace, Experiment, Run\n",
|
||||
"from azureml.explain.model.tabular_explainer import TabularExplainer\n",
|
||||
"from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient\n",
|
||||
"# Check core SDK version number\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = 'explain_model'\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"run = experiment.start_logging()\n",
|
||||
"client = ExplanationClient.from_run(run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import time\n",
|
||||
"start = time.time()\n",
|
||||
"explainer = TabularExplainer(model, x_train, features=iris.feature_names, classes=iris.target_names)\n",
|
||||
"explanation = explainer.explain_global(x_test)\n",
|
||||
"client.upload_model_explanation(explanation)\n",
|
||||
"end = time.time()\n",
|
||||
"print(end - start)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"explanation_from_run = client.download_model_explanation()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# global feature importance information\n",
|
||||
"global_imp_values = explanation_from_run.global_importance_values\n",
|
||||
"global_imp_names = explanation_from_run.get_ranked_global_names()\n",
|
||||
"# global per-class feature importance information\n",
|
||||
"per_class_imp_values = explanation_from_run.per_class_values\n",
|
||||
"per_class_imp_names = explanation_from_run.get_ranked_per_class_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## This visualization is unsupported, and is not guaranteed to work in the future"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the shap values and explore locally\n",
|
||||
"import shap\n",
|
||||
"import numpy as np\n",
|
||||
"shap.initjs()\n",
|
||||
"display(shap.force_plot(explanation_from_run.expected_values[1], np.asarray(explanation_from_run.local_importance_values[1]), x_test))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.complete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "wamartin"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -37,19 +37,11 @@ Azure Machine Learning Pipelines optimize for simplicity, speed, and efficiency.
|
||||
In this directory, there are two types of notebooks:
|
||||
|
||||
* The first type of notebooks will introduce you to core Azure Machine Learning Pipelines features. These notebooks below belong in this category, and are designed to go in sequence; they're all located in the "intro-to-pipelines" folder:
|
||||
|
||||
1. [aml-pipelines-getting-started.ipynb](https://aka.ms/pl-get-started)
|
||||
2. [aml-pipelines-with-data-dependency-steps.ipynb](https://aka.ms/pl-data-dep)
|
||||
3. [aml-pipelines-publish-and-run-using-rest-endpoint.ipynb](https://aka.ms/pl-pub-rep)
|
||||
4. [aml-pipelines-data-transfer.ipynb](https://aka.ms/pl-data-trans)
|
||||
5. [aml-pipelines-use-databricks-as-compute-target.ipynb](https://aka.ms/pl-databricks)
|
||||
6. [aml-pipelines-use-adla-as-compute-target.ipynb](https://aka.ms/pl-adla)
|
||||
7. [aml-pipelines-parameter-tuning-with-hyperdrive.ipynb](https://aka.ms/pl-hyperdrive)
|
||||
8. [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb](https://aka.ms/pl-azbatch)
|
||||
9. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule)
|
||||
10. [aml-pipelines-with-automated-machine-learning-step.ipynb](https://aka.ms/pl-automl)
|
||||
Take a look at [intro-to-pipelines](./intro-to-pipelines/) for the list of notebooks that introduce Azure Machine Learning concepts for you.
|
||||
|
||||
* The second type of notebooks illustrate more sophisticated scenarios, and are independent of each other. These notebooks include:
|
||||
|
||||
1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score)
|
||||
2. [pipeline-style-transfer.ipynb](https://aka.ms/pl-style-trans)
|
||||
1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score): This notebook demonstrates how to run a batch scoring job using Azure Machine Learning pipelines.
|
||||
2. [pipeline-style-transfer.ipynb](https://aka.ms/pl-style-trans): This notebook demonstrates a multi-step pipeline that uses GPU compute.
|
||||
|
||||

|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
# Introduction to Azure Machine Learning Pipelines
|
||||
|
||||
The following notebooks provide an introduction to a concept in Azure Machine Learning Pipelines. They will introduce you to core Azure Machine Learning Pipelines features.
|
||||
These notebooks below are designed to go in sequence.
|
||||
|
||||
1. [aml-pipelines-getting-started.ipynb](https://aka.ms/pl-get-started): Start with this notebook to understand the concepts of using Azure Machine Learning Pipelines. This notebook will show you how to runs steps in parallel and in sequence.
|
||||
2. [aml-pipelines-with-data-dependency-steps.ipynb](https://aka.ms/pl-data-dep): This notebooks shows how to connect steps in your pipeline using data. Data produced by one step is used by subsequent steps to force an explicit dependency between steps.
|
||||
3. [aml-pipelines-publish-and-run-using-rest-endpoint.ipynb](https://aka.ms/pl-pub-rep): Once you are satisfied with your iterative runs in, you could publish your pipeline to get a REST endpoint which could be invoked from non-Pythons clients as well.
|
||||
4. [aml-pipelines-data-transfer.ipynb](https://aka.ms/pl-data-trans): This notebook shows how you transfer data between supported datastores.
|
||||
5. [aml-pipelines-use-databricks-as-compute-target.ipynb](https://aka.ms/pl-databricks): This notebooks shows how you can use Pipelines to send your compute payload to Azure Databricks.
|
||||
6. [aml-pipelines-use-adla-as-compute-target.ipynb](https://aka.ms/pl-adla): This notebook shows how you can use Azure Data Lake Analytics (ADLA) as a compute target.
|
||||
7. [aml-pipelines-how-to-use-estimatorstep.ipynb](https://aka.ms/pl-estimator): This notebook shows how to use the EstimatorStep.
|
||||
8. [aml-pipelines-parameter-tuning-with-hyperdrive.ipynb](https://aka.ms/pl-hyperdrive): HyperDriveStep in Pipelines shows how you can do hyper parameter tuning using Pipelines.
|
||||
9. [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb](https://aka.ms/pl-azbatch): AzureBatchStep can be used to run your custom code in AzureBatch cluster.
|
||||
10. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule): Once you publish a Pipeline, you can schedule it to trigger based on an interval or on data change in a defined datastore.
|
||||
|
||||
|
||||

|
||||
@@ -8,6 +8,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -53,7 +60,7 @@
|
||||
"source": [
|
||||
"## Initialize Workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration. Make sure the config file is present at .\\config.json\n",
|
||||
"Initialize a workspace object from persisted configuration. If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure the config file is present at .\\config.json\n",
|
||||
"\n",
|
||||
"If you don't have a config.json file, please go through the configuration Notebook located here:\n",
|
||||
"https://github.com/Azure/MachineLearningNotebooks. \n",
|
||||
@@ -141,7 +148,7 @@
|
||||
" print(\"registered blob datastore with name: %s\" % blob_datastore_name)\n",
|
||||
"\n",
|
||||
"# CLI:\n",
|
||||
"# az ml datastore register-blob -n <datastore-name> -a <account-name> -c <container-name> -k <account-key> [-t <sas-token>]"
|
||||
"# az ml datastore attach-blob -n <datastore-name> -a <account-name> -c <container-name> -k <account-key> [-t <sas-token>]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -8,6 +8,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -37,7 +44,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites and Azure Machine Learning Basics\n",
|
||||
"Make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc. \n"
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration notebook](../../../configuration.ipynb) located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,7 +153,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Datastore concepts\n",
|
||||
"A [Datastore](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.datastore(class)?view=azure-ml-py) is a place where data can be stored that is then made accessible to a compute either by means of mounting or copying the data to the compute target. \n",
|
||||
"A [Datastore](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.datastore.datastore?view=azure-ml-py) is a place where data can be stored that is then made accessible to a compute either by means of mounting or copying the data to the compute target. \n",
|
||||
"\n",
|
||||
"A Datastore can either be backed by an Azure File Storage (default) or by an Azure Blob Storage.\n",
|
||||
"\n",
|
||||
@@ -189,7 +196,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### (Optional) See your files using Azure Portal\n",
|
||||
"Once you successfully uploaded the files, you can browse to them (or upload more files) using [Azure Portal](https://portal.azure.com). At the portal, make sure you have selected **AzureML Nursery** as your subscription (click *Resource Groups* and then select the subscription). Then look for your **Machine Learning Workspace** (it has your *alias* as the name). It has a link to your storage. Click on the storage link. It will take you to a page where you can see [Blobs](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction), [Files](https://docs.microsoft.com/en-us/azure/storage/files/storage-files-introduction), [Tables](https://docs.microsoft.com/en-us/azure/storage/tables/table-storage-overview), and [Queues](https://docs.microsoft.com/en-us/azure/storage/queues/storage-queues-introduction). We have just uploaded a file to the Blob storage and another one to the File storage. You should be able to see both of these files in their respective locations. "
|
||||
"Once you successfully uploaded the files, you can browse to them (or upload more files) using [Azure Portal](https://portal.azure.com). At the portal, make sure you have selected your subscription (click *Resource Groups* and then select the subscription). Then look for your **Machine Learning Workspace** name. It has a link to your storage. Click on the storage link. It will take you to a page where you can see [Blobs](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction), [Files](https://docs.microsoft.com/en-us/azure/storage/files/storage-files-introduction), [Tables](https://docs.microsoft.com/en-us/azure/storage/tables/table-storage-overview), and [Queues](https://docs.microsoft.com/en-us/azure/storage/queues/storage-queues-introduction). We have uploaded a file each to the Blob storage and to the File storage in the above step. You should be able to see both of these files in their respective locations. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -226,15 +233,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Retrieve or create a Azure Machine Learning compute\n",
|
||||
"Azure Machine Learning Compute is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's create a new Azure Machine Learning Compute in the current workspace, if it doesn't already exist. We will then run the training script on this compute target.\n",
|
||||
"\n",
|
||||
"If we could not find the compute with the given name in the previous cell, then we will create a new compute here. We will create an Azure Machine Learning Compute containing **STANDARD_D2_V2 CPU VMs**. This process is broken down into the following steps:\n",
|
||||
"\n",
|
||||
"1. Create the configuration\n",
|
||||
"2. Create the Azure Machine Learning compute\n",
|
||||
"\n",
|
||||
"**This process will take about 3 minutes and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell.**"
|
||||
"#### Retrieve default Azure Machine Learning compute\n",
|
||||
"Azure Machine Learning Compute is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's get the default Azure Machine Learning Compute in the current workspace. We will then run the training script on this compute target."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -243,22 +243,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"aml_compute_target = \"aml-compute\"\n",
|
||||
"try:\n",
|
||||
" aml_compute = AmlCompute(ws, aml_compute_target)\n",
|
||||
" print(\"found existing compute target.\")\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print(\"creating new compute target\")\n",
|
||||
" \n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||
" min_nodes = 1, \n",
|
||||
" max_nodes = 4) \n",
|
||||
" aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)\n",
|
||||
" aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||
" \n",
|
||||
"print(\"Azure Machine Learning Compute attached\")\n"
|
||||
"aml_compute = ws.get_default_compute_target(\"CPU\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -295,15 +280,19 @@
|
||||
"## Creating a Step in a Pipeline\n",
|
||||
"A Step is a unit of execution. Step typically needs a target of execution (compute target), a script to execute, and may require script arguments and inputs, and can produce outputs. The step also could take a number of other parameters. Azure Machine Learning Pipelines provides the following built-in Steps:\n",
|
||||
"\n",
|
||||
"- [**PythonScriptStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.python_script_step.pythonscriptstep?view=azure-ml-py): Add a step to run a Python script in a Pipeline.\n",
|
||||
"- [**PythonScriptStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.python_script_step.pythonscriptstep?view=azure-ml-py): Adds a step to run a Python script in a Pipeline.\n",
|
||||
"- [**AdlaStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.adla_step.adlastep?view=azure-ml-py): Adds a step to run U-SQL script using Azure Data Lake Analytics.\n",
|
||||
"- [**DataTransferStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.data_transfer_step.datatransferstep?view=azure-ml-py): Transfers data between Azure Blob and Data Lake accounts.\n",
|
||||
"- [**DatabricksStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.databricks_step.databricksstep?view=azure-ml-py): Adds a DataBricks notebook as a step in a Pipeline.\n",
|
||||
"- [**HyperDriveStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.hyper_drive_step.hyperdrivestep?view=azure-ml-py): Creates a Hyper Drive step for Hyper Parameter Tuning in a Pipeline.\n",
|
||||
"- [**AzureBatchStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.azurebatch_step.azurebatchstep?view=azure-ml-py): Creates a step for submitting jobs to Azure Batch\n",
|
||||
"- [**EstimatorStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py): Adds a step to run Estimator in a Pipeline.\n",
|
||||
"- [**MpiStep**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.mpi_step.mpistep?view=azure-ml-py): Adds a step to run a MPI job in a Pipeline.\n",
|
||||
"- [**AutoMLStep**](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.automlstep?view=azure-ml-py): Creates a AutoML step in a Pipeline.\n",
|
||||
"\n",
|
||||
"The following code will create a PythonScriptStep to be executed in the Azure Machine Learning Compute we created above using train.py, one of the files already made available in the project folder.\n",
|
||||
"\n",
|
||||
"A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used."
|
||||
"A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used. You can also use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify requirements for the PythonScriptStep, such as conda dependencies and docker image."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -369,10 +358,31 @@
|
||||
" compute_target=aml_compute, \n",
|
||||
" source_directory=project_folder)\n",
|
||||
"\n",
|
||||
"# Use a RunConfiguration to specify some additional requirements for this step.\n",
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"# create a new runconfig object\n",
|
||||
"run_config = RunConfiguration()\n",
|
||||
"\n",
|
||||
"# enable Docker \n",
|
||||
"run_config.environment.docker.enabled = True\n",
|
||||
"\n",
|
||||
"# set Docker base image to the default CPU-based image\n",
|
||||
"run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
|
||||
"run_config.environment.python.user_managed_dependencies = False\n",
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
||||
"\n",
|
||||
"step3 = PythonScriptStep(name=\"extract_step\",\n",
|
||||
" script_name=\"extract.py\", \n",
|
||||
" compute_target=aml_compute, \n",
|
||||
" source_directory=project_folder)\n",
|
||||
" source_directory=project_folder,\n",
|
||||
" runconfig=run_config)\n",
|
||||
"\n",
|
||||
"# list of steps to run\n",
|
||||
"steps = [step1, step2, step3]\n",
|
||||
@@ -443,8 +453,8 @@
|
||||
"source": [
|
||||
"# Submit syntax\n",
|
||||
"# submit(experiment_name, \n",
|
||||
"# pipeline_parameters=None, \n",
|
||||
"# continue_on_node_failure=False, \n",
|
||||
"# pipeline_params=None, \n",
|
||||
"# continue_on_step_failure=False, \n",
|
||||
"# regenerate_outputs=False)\n",
|
||||
"\n",
|
||||
"pipeline_run1 = Experiment(ws, 'Hello_World1').submit(pipeline1, regenerate_outputs=False)\n",
|
||||
|
||||
@@ -8,6 +8,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -67,7 +74,7 @@
|
||||
"source": [
|
||||
"Initialize a workspace object from persisted configuration. Make sure the config file is present at .\\config.json\n",
|
||||
"\n",
|
||||
"If you don't have a config.json file, please go through the configuration Notebook located [here](https://github.com/Azure/MachineLearningNotebooks). \n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, If you don't have a config.json file, please go through the configuration Notebook located [here](https://github.com/Azure/MachineLearningNotebooks). \n",
|
||||
"\n",
|
||||
"This sets you up with a working config file that has information on your workspace, subscription id, etc. "
|
||||
]
|
||||
@@ -106,25 +113,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"batch_compute_name = 'mybatchcompute' # Name to associate with new compute in workspace\n",
|
||||
"\n",
|
||||
"# Batch account details needed to attach as compute to workspace\n",
|
||||
"batch_account_name = \"<batch_account_name>\" # Name of the Batch account\n",
|
||||
"batch_resource_group = \"<batch_resource_group>\" # Name of the resource group which contains this account\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" # check if already attached\n",
|
||||
" batch_compute = BatchCompute(ws, batch_compute_name)\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Attaching Batch compute...')\n",
|
||||
" provisioning_config = BatchCompute.attach_configuration(resource_group=batch_resource_group, \n",
|
||||
" account_name=batch_account_name)\n",
|
||||
" batch_compute = ComputeTarget.attach(ws, batch_compute_name, provisioning_config)\n",
|
||||
" batch_compute.wait_for_completion()\n",
|
||||
" print(\"Provisioning state:{}\".format(batch_compute.provisioning_state))\n",
|
||||
" print(\"Provisioning errors:{}\".format(batch_compute.provisioning_errors))\n",
|
||||
"\n",
|
||||
"print(\"Using Batch compute:{}\".format(batch_compute.cluster_resource_id))"
|
||||
"batch_compute = ws.get_default_compute_target(\"CPU\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -189,8 +178,8 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"def upload_file_to_datastore(datastore, file_name, content):\n",
|
||||
" dir = create_local_file(content=content, file_name=file_name)\n",
|
||||
" datastore.upload(src_dir=dir, overwrite=True, show_progress=True)"
|
||||
" src_dir = create_local_file(content=content, file_name=file_name)\n",
|
||||
" datastore.upload(src_dir=src_dir, overwrite=True, show_progress=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -0,0 +1,260 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# How to use EstimatorStep in AML Pipeline\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the EstimatorStep with Azure Machine Learning Pipelines. Estimator is a convenient object in Azure Machine Learning that wraps run configuration information to help simplify the tasks of specifying how a script is executed.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Prerequisite:\n",
|
||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||
" * install the AML SDK\n",
|
||||
" * create a workspace and its configuration file (`config.json`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's get started. First let's import some Python libraries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"# check core SDK version number\n",
|
||||
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize workspace\n",
|
||||
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get default AmlCompute\n",
|
||||
"You can create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you use default `AmlCompute` as your training compute resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cpu_cluster = ws.get_default_compute_target(\"CPU\")\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||
"print(cpu_cluster.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'cpucluster' of type `AmlCompute`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use a simple script\n",
|
||||
"We have already created a simple \"hello world\" script. This is the script that we will submit through the estimator pattern. It prints a hello-world message, and if Azure ML SDK is installed, it will also logs an array of values ([Fibonacci numbers](https://en.wikipedia.org/wiki/Fibonacci_number))."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Build an Estimator object\n",
|
||||
"Estimator by default will attempt to use Docker-based execution. You can also enable Docker and let estimator pick the default CPU image supplied by Azure ML for execution. You can target an AmlCompute cluster (or any other supported compute target types). You can also customize the conda environment by adding conda and/or pip packages.\n",
|
||||
"\n",
|
||||
"> Note: The arguments to the entry script used in the Estimator object should be specified as *list* using\n",
|
||||
" 'estimator_entry_script_arguments' parameter when instantiating EstimatorStep. Estimator object's parameter\n",
|
||||
" 'script_params' accepts a dictionary. However 'estimator_entry_script_arguments' parameter expects arguments as\n",
|
||||
" a list.\n",
|
||||
"\n",
|
||||
"> Estimator object initialization involves specifying a list of DataReference objects in its 'inputs' parameter.\n",
|
||||
" In Pipelines, a step can take another step's output or DataReferences as input. So when creating an EstimatorStep,\n",
|
||||
" the parameters 'inputs' and 'outputs' need to be set explicitly and that will override 'inputs' parameter\n",
|
||||
" specified in the Estimator object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Datastore\n",
|
||||
"from azureml.data.data_reference import DataReference\n",
|
||||
"from azureml.pipeline.core import PipelineData\n",
|
||||
"\n",
|
||||
"def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
|
||||
"\n",
|
||||
"input_data = DataReference(\n",
|
||||
" datastore=def_blob_store,\n",
|
||||
" data_reference_name=\"input_data\",\n",
|
||||
" path_on_datastore=\"20newsgroups/20news.pkl\")\n",
|
||||
"\n",
|
||||
"output = PipelineData(\"output\", datastore=def_blob_store)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.estimator import Estimator\n",
|
||||
"\n",
|
||||
"est = Estimator(source_directory='.', \n",
|
||||
" compute_target=cpu_cluster, \n",
|
||||
" entry_script='dummy_train.py', \n",
|
||||
" conda_packages=['scikit-learn'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create an EstimatorStep\n",
|
||||
"[EstimatorStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py) adds a step to run Estimator in a Pipeline.\n",
|
||||
"\n",
|
||||
"- **name:** Name of the step\n",
|
||||
"- **estimator:** Estimator object\n",
|
||||
"- **estimator_entry_script_arguments:** \n",
|
||||
"- **runconfig_pipeline_params:** Override runconfig properties at runtime using key-value pairs each with name of the runconfig property and PipelineParameter for that property\n",
|
||||
"- **inputs:** Inputs\n",
|
||||
"- **outputs:** Output is list of PipelineData\n",
|
||||
"- **compute_target:** Compute target to use \n",
|
||||
"- **allow_reuse:** Whether the step should reuse previous results when run with the same settings/inputs. If this is false, a new run will always be generated for this step during pipeline execution.\n",
|
||||
"- **version:** Optional version tag to denote a change in functionality for the step"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.steps import EstimatorStep\n",
|
||||
"\n",
|
||||
"est_step = EstimatorStep(name=\"Estimator_Train\", \n",
|
||||
" estimator=est, \n",
|
||||
" estimator_entry_script_arguments=[\"--datadir\", input_data, \"--output\", output],\n",
|
||||
" runconfig_pipeline_params=None, \n",
|
||||
" inputs=[input_data], \n",
|
||||
" outputs=[output], \n",
|
||||
" compute_target=cpu_cluster)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Build and Submit the Experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core import Pipeline\n",
|
||||
"from azureml.core import Experiment\n",
|
||||
"pipeline = Pipeline(workspace=ws, steps=[est_step])\n",
|
||||
"pipeline_run = Experiment(ws, 'Estimator_sample').submit(pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## View Run Details"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(pipeline_run).show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sanpil"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -8,6 +8,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -36,7 +43,7 @@
|
||||
"from azureml.exceptions import ComputeTargetException\n",
|
||||
"from azureml.data.data_reference import DataReference\n",
|
||||
"from azureml.pipeline.steps import HyperDriveStep\n",
|
||||
"from azureml.pipeline.core import Pipeline\n",
|
||||
"from azureml.pipeline.core import Pipeline, PipelineData\n",
|
||||
"from azureml.train.dnn import TensorFlow\n",
|
||||
"from azureml.train.hyperdrive import *\n",
|
||||
"\n",
|
||||
@@ -50,7 +57,7 @@
|
||||
"source": [
|
||||
"## Initialize workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration. Make sure the config file is present at .\\config.json"
|
||||
"Initialize a workspace object from persisted configuration. If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure the config file is present at .\\config.json"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -128,14 +135,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Retrieve or create a Azure Machine Learning compute\n",
|
||||
"Azure Machine Learning Compute is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's create a new Azure Machine Learning Compute in the current workspace, if it doesn't already exist. We will then run the training script on this compute target.\n",
|
||||
"\n",
|
||||
"If we could not find the compute with the given name in the previous cell, then we will create a new compute here. This process is broken down into the following steps:\n",
|
||||
"\n",
|
||||
"1. Create the configuration\n",
|
||||
"2. Create the Azure Machine Learning compute\n",
|
||||
"\n",
|
||||
"**This process will take a few minutes and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell.**\n"
|
||||
"Azure Machine Learning Compute is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's get the default Azure Machine Learning Compute in the current workspace. We will then run the training script on this compute target."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -144,20 +144,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cluster_name = \"gpucluster\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
" print('Found existing compute target {}.'.format(cluster_name))\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
" compute_target.wait_for_completion(show_output=True, timeout_in_minutes=20)\n",
|
||||
"\n",
|
||||
"print(\"Azure Machine Learning Compute attached\")"
|
||||
"compute_target = ws.get_default_compute_target(\"GPU\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -259,7 +246,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hd_config = HyperDriveRunConfig(estimator=est, \n",
|
||||
"hd_config = HyperDriveConfig(estimator=est, \n",
|
||||
" hyperparameter_sampling=ps,\n",
|
||||
" policy=early_termination_policy,\n",
|
||||
" primary_metric_name='validation_acc', \n",
|
||||
@@ -295,7 +282,7 @@
|
||||
"### HyperDriveStep\n",
|
||||
"HyperDriveStep can be used to run HyperDrive job as a step in pipeline.\n",
|
||||
"- **name:** Name of the step\n",
|
||||
"- **hyperdrive_run_config:** A HyperDriveRunConfig that defines the configuration for this HyperDrive run\n",
|
||||
"- **hyperdrive_config:** A HyperDriveConfig that defines the configuration for this HyperDrive run\n",
|
||||
"- **estimator_entry_script_arguments:** List of command-line arguments for estimator entry script\n",
|
||||
"- **inputs:** List of input port bindings\n",
|
||||
"- **outputs:** List of output port bindings\n",
|
||||
@@ -310,11 +297,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metrics_output_name = 'metrics_output'\n",
|
||||
"metirics_data = PipelineData(name='metrics_data',\n",
|
||||
" datastore=ds,\n",
|
||||
" pipeline_output_name=metrics_output_name)\n",
|
||||
"\n",
|
||||
"hd_step = HyperDriveStep(\n",
|
||||
" name=\"hyperdrive_module\",\n",
|
||||
" hyperdrive_run_config=hd_config,\n",
|
||||
" hyperdrive_config=hd_config,\n",
|
||||
" estimator_entry_script_arguments=['--data-folder', data_folder],\n",
|
||||
" inputs=[data_folder])"
|
||||
" inputs=[data_folder],\n",
|
||||
" metrics_output=metirics_data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -366,6 +359,40 @@
|
||||
"source": [
|
||||
"pipeline_run.wait_for_completion()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the metrics\n",
|
||||
"Outputs of above run can be used as inputs of other steps in pipeline. In this tutorial, we will show the result metrics."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n",
|
||||
"num_file_downloaded = metrics_output.download('.', show_progress=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import json\n",
|
||||
"with open(metrics_output._path_on_datastore) as f: \n",
|
||||
" metrics_output_result = f.read()\n",
|
||||
" \n",
|
||||
"deserialized_metrics_output = json.loads(metrics_output_result)\n",
|
||||
"df = pd.DataFrame(deserialized_metrics_output)\n",
|
||||
"df"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -8,6 +8,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -21,7 +28,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites and Azure Machine Learning Basics\n",
|
||||
"Make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc. \n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc. \n",
|
||||
"\n",
|
||||
"### Initialization Steps"
|
||||
]
|
||||
@@ -33,7 +40,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace, Datastore\n",
|
||||
"from azureml.core import Workspace, Datastore, Experiment\n",
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"\n",
|
||||
@@ -55,10 +62,7 @@
|
||||
"print(\"Default datastore's name: {}\".format(def_file_store.name))\n",
|
||||
"\n",
|
||||
"def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
|
||||
"print(\"Blobstore's name: {}\".format(def_blob_store.name))\n",
|
||||
"\n",
|
||||
"# project folder\n",
|
||||
"project_folder = '.'"
|
||||
"print(\"Blobstore's name: {}\".format(def_blob_store.name))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -75,20 +79,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"aml_compute_target = \"cpucluster\"\n",
|
||||
"try:\n",
|
||||
" aml_compute = AmlCompute(ws, aml_compute_target)\n",
|
||||
" print(\"found existing compute target.\")\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print(\"creating new compute target\")\n",
|
||||
" \n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||
" min_nodes = 1, \n",
|
||||
" max_nodes = 4) \n",
|
||||
" aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)\n",
|
||||
" aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n"
|
||||
"aml_compute = ws.get_default_compute_target(\"CPU\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -160,7 +151,7 @@
|
||||
" inputs=[blob_input_data],\n",
|
||||
" outputs=[processed_data1],\n",
|
||||
" compute_target=aml_compute, \n",
|
||||
" source_directory=project_folder\n",
|
||||
" source_directory='.'\n",
|
||||
")\n",
|
||||
"print(\"trainStep created\")"
|
||||
]
|
||||
@@ -191,7 +182,7 @@
|
||||
" inputs=[processed_data1],\n",
|
||||
" outputs=[processed_data2],\n",
|
||||
" compute_target=aml_compute, \n",
|
||||
" source_directory=project_folder)\n",
|
||||
" source_directory='.')\n",
|
||||
"print(\"extractStep created\")"
|
||||
]
|
||||
},
|
||||
@@ -252,7 +243,7 @@
|
||||
" inputs=[processed_data1, processed_data2],\n",
|
||||
" outputs=[processed_data3], \n",
|
||||
" compute_target=aml_compute, \n",
|
||||
" source_directory=project_folder)\n",
|
||||
" source_directory='.')\n",
|
||||
"print(\"compareStep created\")"
|
||||
]
|
||||
},
|
||||
@@ -270,10 +261,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline1 = Pipeline(workspace=ws, steps=[compareStep])\n",
|
||||
"print (\"Pipeline is built\")\n",
|
||||
"\n",
|
||||
"pipeline1.validate()\n",
|
||||
"print(\"Simple validation complete\") "
|
||||
"print (\"Pipeline is built\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -290,10 +278,38 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"published_pipeline1 = pipeline1.publish(name=\"My_New_Pipeline\", description=\"My Published Pipeline Description\")\n",
|
||||
"published_pipeline1 = pipeline1.publish(name=\"My_New_Pipeline\", description=\"My Published Pipeline Description\", continue_on_step_failure=True)\n",
|
||||
"published_pipeline1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note: the continue_on_step_failure parameter specifies whether the execution of steps in the Pipeline will continue if one step fails. The default value is False, meaning when one step fails, the Pipeline execution will stop, canceling any running steps."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Publish the pipeline from a submitted PipelineRun\n",
|
||||
"It is also possible to publish a pipeline from a submitted PipelineRun"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# submit a pipeline run\n",
|
||||
"pipeline_run1 = Experiment(ws, 'Pipeline_experiment').submit(pipeline1)\n",
|
||||
"# publish a pipeline from the submitted pipeline run\n",
|
||||
"published_pipeline2 = pipeline_run1.publish_pipeline(name=\"My_New_Pipeline2\", description=\"My Published Pipeline Description\", version=\"0.1\", continue_on_step_failure=True)\n",
|
||||
"published_pipeline2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -325,7 +341,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Run published pipeline using its REST endpoint"
|
||||
"### Run published pipeline using its REST endpoint\n",
|
||||
"[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -8,6 +8,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -21,7 +28,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites and AML Basics\n",
|
||||
"Make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc.\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc.\n",
|
||||
"\n",
|
||||
"### Initialization Steps"
|
||||
]
|
||||
@@ -47,7 +54,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Compute Targets\n",
|
||||
"#### Retrieve an already attached Azure Machine Learning Compute"
|
||||
"#### Retrieve the default Azure Machine Learning Compute"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -56,31 +63,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Run, Experiment, Datastore\n",
|
||||
"\n",
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
||||
"aml_compute_target = \"aml-compute\"\n",
|
||||
"try:\n",
|
||||
" aml_compute = AmlCompute(ws, aml_compute_target)\n",
|
||||
" print(\"Found existing compute target: {}\".format(aml_compute_target))\n",
|
||||
"except:\n",
|
||||
" print(\"Creating new compute target: {}\".format(aml_compute_target))\n",
|
||||
" \n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||
" min_nodes = 1, \n",
|
||||
" max_nodes = 4) \n",
|
||||
" aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)\n",
|
||||
" aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)"
|
||||
"aml_compute_target = ws.get_default_compute_target(\"CPU\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -107,15 +90,11 @@
|
||||
"source": [
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# project folder\n",
|
||||
"project_folder = 'scripts'\n",
|
||||
"\n",
|
||||
"trainStep = PythonScriptStep(\n",
|
||||
" name=\"Training_Step\",\n",
|
||||
" script_name=\"train.py\", \n",
|
||||
" compute_target=aml_compute_target, \n",
|
||||
" source_directory=project_folder\n",
|
||||
" source_directory='.'\n",
|
||||
")\n",
|
||||
"print(\"TrainStep created\")"
|
||||
]
|
||||
@@ -136,9 +115,7 @@
|
||||
"from azureml.pipeline.core import Pipeline\n",
|
||||
"\n",
|
||||
"pipeline1 = Pipeline(workspace=ws, steps=[trainStep])\n",
|
||||
"print (\"Pipeline is built\")\n",
|
||||
"\n",
|
||||
"pipeline1.validate()"
|
||||
"print (\"Pipeline is built\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -255,10 +232,11 @@
|
||||
"schedules = Schedule.get_all(ws, pipeline_id=pub_pipeline_id)\n",
|
||||
"\n",
|
||||
"# We will iterate through the list of schedules and \n",
|
||||
"# use the last ID in the list for further operations: \n",
|
||||
"# use the last recurrence schedule in the list for further operations: \n",
|
||||
"print(\"Found these schedules for the pipeline id {}:\".format(pub_pipeline_id))\n",
|
||||
"for schedule in schedules: \n",
|
||||
" print(schedule.id)\n",
|
||||
" if schedule.recurrence is not None:\n",
|
||||
" schedule_id = schedule.id\n",
|
||||
"\n",
|
||||
"print(\"Schedule id to be used for schedule operations: {}\".format(schedule_id))"
|
||||
@@ -306,7 +284,10 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Disable the schedule"
|
||||
"### Disable the schedule\n",
|
||||
"It is important to note the best practice of disabling schedules when not in use.\n",
|
||||
"The number of schedule triggers allowed per month per region per subscription is 100,000.\n",
|
||||
"This is calculated using the project trigger counts for all active schedules."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -380,7 +361,8 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a schedule for the pipeline using a Datastore\n",
|
||||
"This schedule will run when additions or modifications are made to Blobs in the Datastore container.\n",
|
||||
"This schedule will run when additions or modifications are made to Blobs in the Datastore.\n",
|
||||
"By default, the Datastore container is monitored for changes. Use the path_on_datastore parameter to instead specify a path on the Datastore to monitor for changes. Note: the path_on_datastore will be under the container for the datastore, so the actual path monitored will be container/path_on_datastore. Changes made to subfolders in the container/path will not trigger the schedule.\n",
|
||||
"Note: Only Blob Datastores are supported."
|
||||
]
|
||||
},
|
||||
@@ -400,6 +382,8 @@
|
||||
" datastore=datastore,\n",
|
||||
" wait_for_provisioning=True,\n",
|
||||
" description=\"Schedule Run\")\n",
|
||||
" #polling_interval=5, use polling_interval to specify how often to poll for blob additions/modifications. Default value is 5 minutes.\n",
|
||||
" #path_on_datastore=\"file/path\") use path_on_datastore to specify a specific folder to monitor for changes.\n",
|
||||
"\n",
|
||||
"# You may want to make sure that the schedule is provisioned properly\n",
|
||||
"# before making any further changes to the schedule\n",
|
||||
|
||||
@@ -0,0 +1,553 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"# How to Setup a PipelineEndpoint and Submit a Pipeline Using the PipelineEndpoint.\n",
|
||||
"In this notebook, we will see how to setup a PipelineEndpoint and run specific pipeline version.\n",
|
||||
"\n",
|
||||
"PipelineEndpoint can be used to update a published pipeline while maintaining same endpoint.\n",
|
||||
"PipelineEndpoint, provides a way to keep track of [PublishedPipelines](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.publishedpipeline) using versions. PipelineEndpoint uses endpoint with version information to trigger underlying published pipeline. Pipeline endpoints are uniquely named within a workspace. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prerequisites and AML Basics\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration Notebook](https://github.com/Azure/MachineLearningNotebooks) first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Notebook Overview\n",
|
||||
"In this notebook, we provide an introduction to Azure machine learning PipelineEndpoints. It covers:\n",
|
||||
"* [Create PipelineEndpoint](#Create-PipelineEndpoint), How to create PipelineEndpoint.\n",
|
||||
"* [Retrieving PipelineEndpoint](#Retrieving-PipelineEndpoint), How to get specific PipelineEndpoint from worskpace by name/Id and get all [PipelineEndpoints](#Get-all-PipelineEndpoints-in-workspace) within workspace.\n",
|
||||
"* [PipelineEndpoint Properties](#PipelineEndpoint-properties). How to get and set PipelineEndpoint properties, such as default version of PipelineEndpoint.\n",
|
||||
"* [PipelineEndpoint Submission](#PipelineEndpoint-Submission). How to run a Pipeline using PipelineEndpoint."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create PipelineEndpoint\n",
|
||||
"Following are required input parameters to create PipelineEndpoint:\n",
|
||||
"\n",
|
||||
"* *workspace*: AML workspace.\n",
|
||||
"* *name*: name of PipelineEndpoint, it is unique within workspace.\n",
|
||||
"* *description*: description details for PipelineEndpoint.\n",
|
||||
"* *pipeline*: A [Pipeline](#Steps-to-create-simple-Pipeline) or [PublishedPipeline](#Publish-Pipeline), to set default version of PipelineEndpoint. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Initialization, Steps to create a Pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"from azureml.pipeline.core import Pipeline\n",
|
||||
"\n",
|
||||
"aml_compute = ws.get_default_compute_target(\"CPU\")\n",
|
||||
"\n",
|
||||
"# source_directory\n",
|
||||
"source_directory = '.'\n",
|
||||
"# define a single step pipeline for demonstration purpose.\n",
|
||||
"trainStep = PythonScriptStep(\n",
|
||||
" name=\"Training_Step\",\n",
|
||||
" script_name=\"train.py\", \n",
|
||||
" compute_target=aml_compute_target, \n",
|
||||
" source_directory=source_directory\n",
|
||||
")\n",
|
||||
"print(\"TrainStep created\")\n",
|
||||
"# build and validate Pipeline\n",
|
||||
"pipeline = Pipeline(workspace=ws, steps=[trainStep])\n",
|
||||
"print(\"Pipeline is built\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Publish Pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"timenow = datetime.now().strftime('%m-%d-%Y-%H-%M')\n",
|
||||
"\n",
|
||||
"pipeline_name = timenow + \"-Pipeline\"\n",
|
||||
"print(pipeline_name)\n",
|
||||
"\n",
|
||||
"published_pipeline = pipeline.publish(\n",
|
||||
" name=pipeline_name, \n",
|
||||
" description=pipeline_name)\n",
|
||||
"print(\"Newly published pipeline id: {}\".format(published_pipeline.id))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Publishing PipelineEndpoint\n",
|
||||
"Create PipelineEndpoint with required parameters: workspace, name, description and pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core import PipelineEndpoint\n",
|
||||
"\n",
|
||||
"pipeline_endpoint = PipelineEndpoint.publish(workspace=ws, name=\"PipelineEndpointTest\",\n",
|
||||
" pipeline=pipeline, description=\"Test description Notebook\")\n",
|
||||
"pipeline_endpoint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieving PipelineEndpoint\n",
|
||||
"\n",
|
||||
"PipelineEndpoint is uniquely defined by name and id within workspace. PipelineEndpoint in workspace can be retrived by Id or by name."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Get PipelineEndpoint by Name\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_endpoint_by_name = PipelineEndpoint.get(workspace=ws, name=\"PipelineEndpointTest\")\n",
|
||||
"pipeline_endpoint_by_name"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Get PipelineEndpoint by Id\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#get the PipelineEndpoint Id\n",
|
||||
"pipeline_endpoint_by_name = PipelineEndpoint.get(workspace=ws, name=\"PipelineEndpointTest\")\n",
|
||||
"endpoint_id = pipeline_endpoint_by_name.id\n",
|
||||
"\n",
|
||||
"pipeline_endpoint_by_id = PipelineEndpoint.get(workspace=ws, id=endpoint_id)\n",
|
||||
"pipeline_endpoint_by_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Get all PipelineEndpoints in workspace\n",
|
||||
"Returns all PipelineEndpoints within workspace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint_list = PipelineEndpoint.get_all(workspace=ws, active_only=True)\n",
|
||||
"endpoint_list"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### PipelineEndpoint properties"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Default Version of PipelineEndpoint\n",
|
||||
"Default version of PipelineEndpoint starts from \"0\" and increments on addition of pipelines.\n",
|
||||
"\n",
|
||||
"##### Get the Default Version"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"default_version = pipeline_endpoint_by_name.get_default_version()\n",
|
||||
"default_version"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Set default version \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_endpoint_by_name.set_default_version(\"0\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Get the Published Pipeline corresponds to specific version of PipelineEndpoint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline = pipeline_endpoint_by_name.get_pipeline(\"0\")\n",
|
||||
"pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Get default version Published Pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline = pipeline_endpoint_by_name.get_pipeline()\n",
|
||||
"pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Set Published Pipeline to default version"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set Published Pipeline to PipelineEndpoint, if exists\n",
|
||||
"pipeline_endpoint_by_name.set_default(published_pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Get all Versions in PipelineEndpoint\n",
|
||||
"Returns list of published pipelines and its versions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"versions = pipeline_endpoint_by_name.get_all_versions()\n",
|
||||
"\n",
|
||||
"for ve in versions:\n",
|
||||
" print(ve.version)\n",
|
||||
" print(ve.pipeline.id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Get all Published Pipelines in PipelineEndpoint\n",
|
||||
"Returns all active pipelines in PipelineEnpoint, if active_only flag is set to True."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipelines = pipeline_endpoint_by_name.get_all_pipelines(active_only=True)\n",
|
||||
"pipelines"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Name property of PipelineEndpoint\n",
|
||||
"PipelineEndpoint is uniquely identified by name"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Set Name PipelineEndpoint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_endpoint_by_name.set_name(name=\"NewName\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Add Published Pipeline to PipelineEndpoint, \n",
|
||||
"Adding published pipeline, if its not present in PipelineEndpoint."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_endpoint_by_name.add(published_pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Add Published pipeline to PipelineEndpoint and set it to default version\n",
|
||||
"Adding published pipeline to PipelineEndpoint if not present and set it to default"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_endpoint_by_name.add_default(published_pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### PipelineEndpoint Submission\n",
|
||||
"PipelineEndpoint triggers specific versioned pipeline or default pipeline by:\n",
|
||||
"* Rest Endpoint \n",
|
||||
"* Submit call."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Run Pipeline by endpoint property of PipelineEndpoint\n",
|
||||
"Run specific pipeline using endpoint property of PipelineEndpoint and executing http post."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_endpoint_by_name = PipelineEndpoint.get(workspace=ws, name=\"PipelineEndpointTest\")\n",
|
||||
"\n",
|
||||
"# endpoint with id \n",
|
||||
"rest_endpoint_id = pipeline_endpoint_by_name.endpoint\n",
|
||||
"\n",
|
||||
"# for default version pipeline\n",
|
||||
"rest_endpoint_id_without_version_with_id = rest_endpoint_id\n",
|
||||
"\n",
|
||||
"# for specific version pipeline just append version info\n",
|
||||
"version=\"0\"\n",
|
||||
"rest_endpoint_id_with_version = rest_endpoint_id_without_version_with_id+\"/\"+ version\n",
|
||||
"print(rest_endpoint_id_with_version)\n",
|
||||
"pipeline_endpoint_by_name"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# endpoint with name\n",
|
||||
"rest_endpoint_name = rest_endpoint_id.split(\"Id\", 1)[0] + \"Name?name=\" + pipeline_endpoint_by_name.name\n",
|
||||
"\n",
|
||||
"# for default version pipeline\n",
|
||||
"rest_endpoint_name_without_version = rest_endpoint_name\n",
|
||||
"\n",
|
||||
"# for specific version pipeline just append version info\n",
|
||||
"version=\"0\"\n",
|
||||
"rest_endpoint_name_with_version = rest_endpoint_name_without_version+\"&pipelineVersion=\"+ version\n",
|
||||
"print(rest_endpoint_name_with_version)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.authentication import InteractiveLoginAuthentication\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"auth = InteractiveLoginAuthentication()\n",
|
||||
"aad_token = auth.get_authentication_header()\n",
|
||||
"\n",
|
||||
"#endpoint = pipeline_endpoint_by_name.url\n",
|
||||
"\n",
|
||||
"print(\"You can perform HTTP POST on URL {} to trigger this pipeline\".format(rest_endpoint_name_with_version))\n",
|
||||
"\n",
|
||||
"# specify the param when running the pipeline\n",
|
||||
"response = requests.post(rest_endpoint_name_with_version, \n",
|
||||
" headers=aad_token, \n",
|
||||
" json={\"ExperimentName\": \"default_pipeline\",\n",
|
||||
" \"RunSource\": \"SDK\",\n",
|
||||
" \"ParameterAssignments\": {\"1\": \"united\", \"2\":\"city\"}})\n",
|
||||
"\n",
|
||||
"run_id = response.json()[\"Id\"]\n",
|
||||
"print(run_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Run Pipeline by Submit call of PipelineEndpoint \n",
|
||||
"Run specific pipeline using Submit api of PipelineEndpoint"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# submit pipeline with specific version\n",
|
||||
"run_id = pipeline_endpoint_by_name.submit(\"TestPipelineEndpoint\", pipeline_version=\"0\")\n",
|
||||
"print(run_id)\n",
|
||||
"\n",
|
||||
"# submit pipeline with default version\n",
|
||||
"run_id = pipeline_endpoint_by_name.submit(\"TestPipelineEndpoint\")\n",
|
||||
"print(run_id)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "mameghwa"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user