mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 09:37:04 -05:00
Compare commits
82 Commits
azureml-sd
...
cli-ga
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
01e188d61f | ||
|
|
7a754faa7e | ||
|
|
78f8ec3d24 | ||
|
|
6120c6897f | ||
|
|
8c4168dfa1 | ||
|
|
8580dcc01b | ||
|
|
5443b05152 | ||
|
|
afec974f09 | ||
|
|
d6b558f88d | ||
|
|
70c021eb69 | ||
|
|
550abc7167 | ||
|
|
6c12043732 | ||
|
|
2f3f1ff756 | ||
|
|
05f056a692 | ||
|
|
8e559ef577 | ||
|
|
a8e4fc2f9a | ||
|
|
f5e4df0864 | ||
|
|
13d0d9baf1 | ||
|
|
845e9d653e | ||
|
|
639ef81636 | ||
|
|
60158bf41a | ||
|
|
8dbbb01b8a | ||
|
|
6e6b2b0c48 | ||
|
|
85f5721bf8 | ||
|
|
6a7dd741e7 | ||
|
|
314218fc89 | ||
|
|
b50d2725c7 | ||
|
|
9a2f448792 | ||
|
|
dd620f19fd | ||
|
|
8116d31da4 | ||
|
|
ef29dc1fa5 | ||
|
|
97b345cb33 | ||
|
|
282250e670 | ||
|
|
acef60c5b3 | ||
|
|
bfb444eb15 | ||
|
|
6277659bf2 | ||
|
|
1645e12712 | ||
|
|
cc4a32e70b | ||
|
|
997a35aed5 | ||
|
|
dd6317a4a0 | ||
|
|
82d8353d54 | ||
|
|
59a01c17a0 | ||
|
|
e31e1d9af3 | ||
|
|
d38b9db255 | ||
|
|
761ad88c93 | ||
|
|
644729e5db | ||
|
|
e2b1b3fcaa | ||
|
|
dc692589a9 | ||
|
|
624b4595b5 | ||
|
|
0ed85c33c2 | ||
|
|
5b01de605f | ||
|
|
c351ac988a | ||
|
|
759ec3934c | ||
|
|
b499b88a85 | ||
|
|
5f4edac3c1 | ||
|
|
edfce0d936 | ||
|
|
1516c7fc24 | ||
|
|
389fb668ce | ||
|
|
647d5e72a5 | ||
|
|
43ac4c84bb | ||
|
|
8a1a82b50a | ||
|
|
72f386298c | ||
|
|
41d697e298 | ||
|
|
c3ce932029 | ||
|
|
a956162114 | ||
|
|
cb5a178e40 | ||
|
|
d81c336c59 | ||
|
|
4244a24d81 | ||
|
|
3b488555e5 | ||
|
|
6abc478f33 | ||
|
|
666c2579eb | ||
|
|
5af3aa4231 | ||
|
|
e48d828ab0 | ||
|
|
44aa636c21 | ||
|
|
4678f9adc3 | ||
|
|
5bf85edade | ||
|
|
94f381e884 | ||
|
|
ea1b7599c3 | ||
|
|
6b8a6befde | ||
|
|
c1511b7b74 | ||
|
|
8f007a3333 | ||
|
|
18a11bbd8d |
29
Dockerfiles/1.0.21/Dockerfile
Normal file
29
Dockerfiles/1.0.21/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
FROM continuumio/miniconda:4.5.11
|
||||||
|
|
||||||
|
# install git
|
||||||
|
RUN apt-get update && apt-get upgrade -y && apt-get install -y git
|
||||||
|
|
||||||
|
# create a new conda environment named azureml
|
||||||
|
RUN conda create -n azureml -y -q Python=3.6
|
||||||
|
|
||||||
|
# install additional packages used by sample notebooks. this is optional
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
|
||||||
|
|
||||||
|
# install azurmel-sdk components
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.21"]
|
||||||
|
|
||||||
|
# clone Azure ML GitHub sample notebooks
|
||||||
|
RUN cd /home && git clone -b "azureml-sdk-1.0.21" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
|
||||||
|
|
||||||
|
# generate jupyter configuration file
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
|
||||||
|
|
||||||
|
# set an emtpy token for Jupyter to remove authentication.
|
||||||
|
# this is NOT recommended for production environment
|
||||||
|
RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||||
|
|
||||||
|
# open up port 8887 on the container
|
||||||
|
EXPOSE 8887
|
||||||
|
|
||||||
|
# start Jupyter notebook server on port 8887 when the container starts
|
||||||
|
CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
|
||||||
29
Dockerfiles/1.0.23/Dockerfile
Normal file
29
Dockerfiles/1.0.23/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
FROM continuumio/miniconda:4.5.11
|
||||||
|
|
||||||
|
# install git
|
||||||
|
RUN apt-get update && apt-get upgrade -y && apt-get install -y git
|
||||||
|
|
||||||
|
# create a new conda environment named azureml
|
||||||
|
RUN conda create -n azureml -y -q Python=3.6
|
||||||
|
|
||||||
|
# install additional packages used by sample notebooks. this is optional
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
|
||||||
|
|
||||||
|
# install azurmel-sdk components
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.23"]
|
||||||
|
|
||||||
|
# clone Azure ML GitHub sample notebooks
|
||||||
|
RUN cd /home && git clone -b "azureml-sdk-1.0.23" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
|
||||||
|
|
||||||
|
# generate jupyter configuration file
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
|
||||||
|
|
||||||
|
# set an emtpy token for Jupyter to remove authentication.
|
||||||
|
# this is NOT recommended for production environment
|
||||||
|
RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||||
|
|
||||||
|
# open up port 8887 on the container
|
||||||
|
EXPOSE 8887
|
||||||
|
|
||||||
|
# start Jupyter notebook server on port 8887 when the container starts
|
||||||
|
CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
|
||||||
29
Dockerfiles/1.0.30/Dockerfile
Normal file
29
Dockerfiles/1.0.30/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
FROM continuumio/miniconda:4.5.11
|
||||||
|
|
||||||
|
# install git
|
||||||
|
RUN apt-get update && apt-get upgrade -y && apt-get install -y git
|
||||||
|
|
||||||
|
# create a new conda environment named azureml
|
||||||
|
RUN conda create -n azureml -y -q Python=3.6
|
||||||
|
|
||||||
|
# install additional packages used by sample notebooks. this is optional
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
|
||||||
|
|
||||||
|
# install azurmel-sdk components
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.30"]
|
||||||
|
|
||||||
|
# clone Azure ML GitHub sample notebooks
|
||||||
|
RUN cd /home && git clone -b "azureml-sdk-1.0.30" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
|
||||||
|
|
||||||
|
# generate jupyter configuration file
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
|
||||||
|
|
||||||
|
# set an emtpy token for Jupyter to remove authentication.
|
||||||
|
# this is NOT recommended for production environment
|
||||||
|
RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||||
|
|
||||||
|
# open up port 8887 on the container
|
||||||
|
EXPOSE 8887
|
||||||
|
|
||||||
|
# start Jupyter notebook server on port 8887 when the container starts
|
||||||
|
CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
|
||||||
29
Dockerfiles/1.0.33/Dockerfile
Normal file
29
Dockerfiles/1.0.33/Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
FROM continuumio/miniconda:4.5.11
|
||||||
|
|
||||||
|
# install git
|
||||||
|
RUN apt-get update && apt-get upgrade -y && apt-get install -y git
|
||||||
|
|
||||||
|
# create a new conda environment named azureml
|
||||||
|
RUN conda create -n azureml -y -q Python=3.6
|
||||||
|
|
||||||
|
# install additional packages used by sample notebooks. this is optional
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
|
||||||
|
|
||||||
|
# install azurmel-sdk components
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.33"]
|
||||||
|
|
||||||
|
# clone Azure ML GitHub sample notebooks
|
||||||
|
RUN cd /home && git clone -b "azureml-sdk-1.0.33" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
|
||||||
|
|
||||||
|
# generate jupyter configuration file
|
||||||
|
RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
|
||||||
|
|
||||||
|
# set an emtpy token for Jupyter to remove authentication.
|
||||||
|
# this is NOT recommended for production environment
|
||||||
|
RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
|
||||||
|
|
||||||
|
# open up port 8887 on the container
|
||||||
|
EXPOSE 8887
|
||||||
|
|
||||||
|
# start Jupyter notebook server on port 8887 when the container starts
|
||||||
|
CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
|
||||||
17
NBSETUP.md
17
NBSETUP.md
@@ -1,6 +1,4 @@
|
|||||||
# Setting up environment
|
# Set up your notebook environment for Azure Machine Learning
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
To run the notebooks in this repository use one of following options.
|
To run the notebooks in this repository use one of following options.
|
||||||
|
|
||||||
@@ -12,9 +10,7 @@ Azure Notebooks is a hosted Jupyter-based notebook service in the Azure cloud. A
|
|||||||
1. Follow the instructions in the [Configuration](configuration.ipynb) notebook to create and connect to a workspace
|
1. Follow the instructions in the [Configuration](configuration.ipynb) notebook to create and connect to a workspace
|
||||||
1. Open one of the sample notebooks
|
1. Open one of the sample notebooks
|
||||||
|
|
||||||
**Make sure the Azure Notebook kernel is set to `Python 3.6`** when you open a notebook
|
**Make sure the Azure Notebook kernel is set to `Python 3.6`** when you open a notebook by choosing Kernel > Change Kernel > Python 3.6 from the menus.
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
## **Option 2: Use your own notebook server**
|
## **Option 2: Use your own notebook server**
|
||||||
|
|
||||||
@@ -31,9 +27,6 @@ git clone https://github.com/Azure/MachineLearningNotebooks.git
|
|||||||
# install the base SDK and a Jupyter notebook server
|
# install the base SDK and a Jupyter notebook server
|
||||||
pip install azureml-sdk[notebooks]
|
pip install azureml-sdk[notebooks]
|
||||||
|
|
||||||
# install the data prep component
|
|
||||||
pip install azureml-dataprep
|
|
||||||
|
|
||||||
# install model explainability component
|
# install model explainability component
|
||||||
pip install azureml-sdk[explain]
|
pip install azureml-sdk[explain]
|
||||||
|
|
||||||
@@ -58,8 +51,7 @@ Please make sure you start with the [Configuration](configuration.ipynb) noteboo
|
|||||||
|
|
||||||
### Video walkthrough:
|
### Video walkthrough:
|
||||||
|
|
||||||
[](https://youtu.be/VIsXeTuW3FU)
|
[!VIDEO https://youtu.be/VIsXeTuW3FU]
|
||||||
|
|
||||||
|
|
||||||
## **Option 3: Use Docker**
|
## **Option 3: Use Docker**
|
||||||
|
|
||||||
@@ -90,9 +82,6 @@ Now you can point your browser to http://localhost:8887. We recommend that you s
|
|||||||
If you need additional Azure ML SDK components, you can either modify the Docker files before you build the Docker images to add additional steps, or install them through command line in the live container after you build the Docker image. For example:
|
If you need additional Azure ML SDK components, you can either modify the Docker files before you build the Docker images to add additional steps, or install them through command line in the live container after you build the Docker image. For example:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
# install dataprep components
|
|
||||||
pip install azureml-dataprep
|
|
||||||
|
|
||||||
# install the core SDK and automated ml components
|
# install the core SDK and automated ml components
|
||||||
pip install azureml-sdk[automl]
|
pip install azureml-sdk[automl]
|
||||||
|
|
||||||
|
|||||||
@@ -96,7 +96,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"This notebook was created using version 1.0.21 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.0.23 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -287,6 +287,8 @@ Notice how the parameters are modified when using the CPU-only mode.
|
|||||||
|
|
||||||
The outputs of the script can be observed in the master notebook as the script is executed
|
The outputs of the script can be observed in the master notebook as the script is executed
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -15,3 +15,6 @@ As a pre-requisite, run the [configuration Notebook](../configuration.ipynb) not
|
|||||||
* [enable-app-insights-in-production-service](./deployment/enable-app-insights-in-production-service) Learn how to use App Insights with production web service.
|
* [enable-app-insights-in-production-service](./deployment/enable-app-insights-in-production-service) Learn how to use App Insights with production web service.
|
||||||
|
|
||||||
Find quickstarts, end-to-end tutorials, and how-tos on the [official documentation site for Azure Machine Learning service](https://docs.microsoft.com/en-us/azure/machine-learning/service/).
|
Find quickstarts, end-to-end tutorials, and how-tos on the [official documentation site for Azure Machine Learning service](https://docs.microsoft.com/en-us/azure/machine-learning/service/).
|
||||||
|
|
||||||
|
|
||||||
|

|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
# Table of Contents
|
# Table of Contents
|
||||||
1. [Automated ML Introduction](#introduction)
|
1. [Automated ML Introduction](#introduction)
|
||||||
1. [Running samples in Azure Notebooks](#jupyter)
|
1. [Setup using Azure Notebooks](#jupyter)
|
||||||
1. [Running samples in Azure Databricks](#databricks)
|
1. [Setup using Azure Databricks](#databricks)
|
||||||
1. [Running samples in a Local Conda environment](#localconda)
|
1. [Setup using a Local Conda environment](#localconda)
|
||||||
1. [Automated ML SDK Sample Notebooks](#samples)
|
1. [Automated ML SDK Sample Notebooks](#samples)
|
||||||
1. [Documentation](#documentation)
|
1. [Documentation](#documentation)
|
||||||
1. [Running using python command](#pythoncommand)
|
1. [Running using python command](#pythoncommand)
|
||||||
@@ -13,15 +13,15 @@
|
|||||||
Automated machine learning (automated ML) builds high quality machine learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, automated ML will give you a high quality machine learning model that you can use for predictions.
|
Automated machine learning (automated ML) builds high quality machine learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, automated ML will give you a high quality machine learning model that you can use for predictions.
|
||||||
|
|
||||||
|
|
||||||
If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
|
If you are new to Data Science, automated ML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
|
||||||
|
|
||||||
If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
|
If you are an experienced data scientist, automated ML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. Automated ML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
|
||||||
|
|
||||||
Below are the three execution environments supported by AutoML.
|
Below are the three execution environments supported by automated ML.
|
||||||
|
|
||||||
|
|
||||||
<a name="jupyter"></a>
|
<a name="jupyter"></a>
|
||||||
## Running samples in Azure Notebooks - Jupyter based notebooks in the Azure cloud
|
## Setup using Azure Notebooks - Jupyter based notebooks in the Azure cloud
|
||||||
|
|
||||||
1. [](https://aka.ms/aml-clone-azure-notebooks)
|
1. [](https://aka.ms/aml-clone-azure-notebooks)
|
||||||
[Import sample notebooks ](https://aka.ms/aml-clone-azure-notebooks) into Azure Notebooks.
|
[Import sample notebooks ](https://aka.ms/aml-clone-azure-notebooks) into Azure Notebooks.
|
||||||
@@ -29,7 +29,7 @@ Below are the three execution environments supported by AutoML.
|
|||||||
1. Open one of the sample notebooks.
|
1. Open one of the sample notebooks.
|
||||||
|
|
||||||
<a name="databricks"></a>
|
<a name="databricks"></a>
|
||||||
## Running samples in Azure Databricks
|
## Setup using Azure Databricks
|
||||||
|
|
||||||
**NOTE**: Please create your Azure Databricks cluster as v4.x (high concurrency preferred) with **Python 3** (dropdown).
|
**NOTE**: Please create your Azure Databricks cluster as v4.x (high concurrency preferred) with **Python 3** (dropdown).
|
||||||
**NOTE**: You should at least have contributor access to your Azure subcription to run the notebook.
|
**NOTE**: You should at least have contributor access to your Azure subcription to run the notebook.
|
||||||
@@ -39,7 +39,7 @@ Below are the three execution environments supported by AutoML.
|
|||||||
- Attach the notebook to the cluster.
|
- Attach the notebook to the cluster.
|
||||||
|
|
||||||
<a name="localconda"></a>
|
<a name="localconda"></a>
|
||||||
## Running samples in a Local Conda environment
|
## Setup using a Local Conda environment
|
||||||
|
|
||||||
To run these notebook on your own notebook server, use these installation instructions.
|
To run these notebook on your own notebook server, use these installation instructions.
|
||||||
The instructions below will install everything you need and then start a Jupyter notebook.
|
The instructions below will install everything you need and then start a Jupyter notebook.
|
||||||
@@ -49,11 +49,15 @@ The instructions below will install everything you need and then start a Jupyter
|
|||||||
There's no need to install mini-conda specifically.
|
There's no need to install mini-conda specifically.
|
||||||
|
|
||||||
### 2. Downloading the sample notebooks
|
### 2. Downloading the sample notebooks
|
||||||
- Download the sample notebooks from [GitHub](https://github.com/Azure/MachineLearningNotebooks) as zip and extract the contents to a local directory. The AutoML sample notebooks are in the "automl" folder.
|
- Download the sample notebooks from [GitHub](https://github.com/Azure/MachineLearningNotebooks) as zip and extract the contents to a local directory. The automated ML sample notebooks are in the "automated-machine-learning" folder.
|
||||||
|
|
||||||
### 3. Setup a new conda environment
|
### 3. Setup a new conda environment
|
||||||
The **automl/automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook.
|
The **automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook. It takes the conda environment name as an optional parameter. The default conda environment name is azure_automl. The exact command depends on the operating system. See the specific sections below for Windows, Mac and Linux. It can take about 10 minutes to execute.
|
||||||
It takes the conda environment name as an optional parameter. The default conda environment name is azure_automl. The exact command depends on the operating system. See the specific sections below for Windows, Mac and Linux. It can take about 10 minutes to execute.
|
|
||||||
|
Packages installed by the **automl_setup** script:
|
||||||
|
<ul><li>python</li><li>nb_conda</li><li>matplotlib</li><li>numpy</li><li>cython</li><li>urllib3</li><li>scipy</li><li>scikit-learn</li><li>pandas</li><li>tensorflow</li><li>py-xgboost</li><li>azureml-sdk</li><li>azureml-widgets</li><li>pandas-ml</li></ul>
|
||||||
|
|
||||||
|
For more details refer to the [automl_env.yml](./automl_env.yml)
|
||||||
## Windows
|
## Windows
|
||||||
Start an **Anaconda Prompt** window, cd to the **how-to-use-azureml/automated-machine-learning** folder where the sample notebooks were extracted and then run:
|
Start an **Anaconda Prompt** window, cd to the **how-to-use-azureml/automated-machine-learning** folder where the sample notebooks were extracted and then run:
|
||||||
```
|
```
|
||||||
@@ -81,7 +85,7 @@ bash automl_setup_linux.sh
|
|||||||
|
|
||||||
### 5. Running Samples
|
### 5. Running Samples
|
||||||
- Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks.
|
- Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks.
|
||||||
- Follow the instructions in the individual notebooks to explore various features in AutoML
|
- Follow the instructions in the individual notebooks to explore various features in automated ML.
|
||||||
|
|
||||||
### 6. Starting jupyter notebook manually
|
### 6. Starting jupyter notebook manually
|
||||||
To start your Jupyter notebook manually, use:
|
To start your Jupyter notebook manually, use:
|
||||||
@@ -103,22 +107,22 @@ jupyter notebook
|
|||||||
|
|
||||||
- [auto-ml-classification.ipynb](classification/auto-ml-classification.ipynb)
|
- [auto-ml-classification.ipynb](classification/auto-ml-classification.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
- Simple example of using Auto ML for classification
|
- Simple example of using automated ML for classification
|
||||||
- Uses local compute for training
|
- Uses local compute for training
|
||||||
|
|
||||||
- [auto-ml-regression.ipynb](regression/auto-ml-regression.ipynb)
|
- [auto-ml-regression.ipynb](regression/auto-ml-regression.ipynb)
|
||||||
- Dataset: scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html)
|
- Dataset: scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html)
|
||||||
- Simple example of using Auto ML for regression
|
- Simple example of using automated ML for regression
|
||||||
- Uses local compute for training
|
- Uses local compute for training
|
||||||
|
|
||||||
- [auto-ml-remote-execution.ipynb](remote-execution/auto-ml-remote-execution.ipynb)
|
- [auto-ml-remote-execution.ipynb](remote-execution/auto-ml-remote-execution.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
- Example of using Auto ML for classification using a remote linux DSVM for training
|
- Example of using automated ML for classification using a remote linux DSVM for training
|
||||||
- Parallel execution of iterations
|
- Parallel execution of iterations
|
||||||
- Async tracking of progress
|
- Async tracking of progress
|
||||||
- Cancelling individual iterations or entire run
|
- Cancelling individual iterations or entire run
|
||||||
- Retrieving models for any iteration or logged metric
|
- Retrieving models for any iteration or logged metric
|
||||||
- Specify automl settings as kwargs
|
- Specify automated ML settings as kwargs
|
||||||
|
|
||||||
- [auto-ml-remote-amlcompute.ipynb](remote-batchai/auto-ml-remote-amlcompute.ipynb)
|
- [auto-ml-remote-amlcompute.ipynb](remote-batchai/auto-ml-remote-amlcompute.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
@@ -127,7 +131,7 @@ jupyter notebook
|
|||||||
- Async tracking of progress
|
- Async tracking of progress
|
||||||
- Cancelling individual iterations or entire run
|
- Cancelling individual iterations or entire run
|
||||||
- Retrieving models for any iteration or logged metric
|
- Retrieving models for any iteration or logged metric
|
||||||
- Specify automl settings as kwargs
|
- Specify automated ML settings as kwargs
|
||||||
|
|
||||||
- [auto-ml-remote-attach.ipynb](remote-attach/auto-ml-remote-attach.ipynb)
|
- [auto-ml-remote-attach.ipynb](remote-attach/auto-ml-remote-attach.ipynb)
|
||||||
- Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html)
|
- Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html)
|
||||||
@@ -148,8 +152,8 @@ jupyter notebook
|
|||||||
|
|
||||||
- [auto-ml-exploring-previous-runs.ipynb](exploring-previous-runs/auto-ml-exploring-previous-runs.ipynb)
|
- [auto-ml-exploring-previous-runs.ipynb](exploring-previous-runs/auto-ml-exploring-previous-runs.ipynb)
|
||||||
- List all projects for the workspace
|
- List all projects for the workspace
|
||||||
- List all AutoML Runs for a given project
|
- List all automated ML Runs for a given project
|
||||||
- Get details for a AutoML Run. (Automl settings, run widget & all metrics)
|
- Get details for a automated ML Run. (automated ML settings, run widget & all metrics)
|
||||||
- Download fitted pipeline for any iteration
|
- Download fitted pipeline for any iteration
|
||||||
|
|
||||||
- [auto-ml-remote-execution-with-datastore.ipynb](remote-execution-with-datastore/auto-ml-remote-execution-with-datastore.ipynb)
|
- [auto-ml-remote-execution-with-datastore.ipynb](remote-execution-with-datastore/auto-ml-remote-execution-with-datastore.ipynb)
|
||||||
@@ -158,7 +162,7 @@ jupyter notebook
|
|||||||
|
|
||||||
- [auto-ml-classification-with-deployment.ipynb](classification-with-deployment/auto-ml-classification-with-deployment.ipynb)
|
- [auto-ml-classification-with-deployment.ipynb](classification-with-deployment/auto-ml-classification-with-deployment.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
- Simple example of using Auto ML for classification
|
- Simple example of using automated ML for classification
|
||||||
- Registering the model
|
- Registering the model
|
||||||
- Creating Image and creating aci service
|
- Creating Image and creating aci service
|
||||||
- Testing the aci service
|
- Testing the aci service
|
||||||
@@ -178,16 +182,21 @@ jupyter notebook
|
|||||||
|
|
||||||
- [auto-ml-classification-with-whitelisting.ipynb](classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb)
|
- [auto-ml-classification-with-whitelisting.ipynb](classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
- Simple example of using Auto ML for classification with whitelisting tensorflow models.
|
- Simple example of using automated ML for classification with whitelisting tensorflow models.
|
||||||
- Uses local compute for training
|
- Uses local compute for training
|
||||||
|
|
||||||
- [auto-ml-forecasting-energy-demand.ipynb](forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb)
|
- [auto-ml-forecasting-energy-demand.ipynb](forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb)
|
||||||
- Dataset: [NYC energy demand data](forecasting-a/nyc_energy.csv)
|
- Dataset: [NYC energy demand data](forecasting-a/nyc_energy.csv)
|
||||||
- Example of using AutoML for training a forecasting model
|
- Example of using automated ML for training a forecasting model
|
||||||
|
|
||||||
- [auto-ml-forecasting-orange-juice-sales.ipynb](forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb)
|
- [auto-ml-forecasting-orange-juice-sales.ipynb](forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb)
|
||||||
- Dataset: [Dominick's grocery sales of orange juice](forecasting-b/dominicks_OJ.csv)
|
- Dataset: [Dominick's grocery sales of orange juice](forecasting-b/dominicks_OJ.csv)
|
||||||
- Example of training an AutoML forecasting model on multiple time-series
|
- Example of training an automated ML forecasting model on multiple time-series
|
||||||
|
|
||||||
|
- [auto-ml-classification-with-onnx.ipynb](classification-with-onnx/auto-ml-classification-with-onnx.ipynb)
|
||||||
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
|
- Simple example of using automated ML for classification with ONNX models
|
||||||
|
- Uses local compute for training
|
||||||
|
|
||||||
<a name="documentation"></a>
|
<a name="documentation"></a>
|
||||||
See [Configure automated machine learning experiments](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train) to learn how more about the the settings and features available for automated machine learning experiments.
|
See [Configure automated machine learning experiments](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train) to learn how more about the the settings and features available for automated machine learning experiments.
|
||||||
@@ -206,10 +215,18 @@ The main code of the file must be indented so that it is under this condition.
|
|||||||
<a name="troubleshooting"></a>
|
<a name="troubleshooting"></a>
|
||||||
# Troubleshooting
|
# Troubleshooting
|
||||||
## automl_setup fails
|
## automl_setup fails
|
||||||
1. On windows, make sure that you are running automl_setup from an Anconda Prompt window rather than a regular cmd window. You can launch the "Anaconda Prompt" window by hitting the Start button and typing "Anaconda Prompt". If you don't see the application "Anaconda Prompt", you might not have conda or mini conda installed. In that case, you can install it [here](https://conda.io/miniconda.html)
|
1. On Windows, make sure that you are running automl_setup from an Anconda Prompt window rather than a regular cmd window. You can launch the "Anaconda Prompt" window by hitting the Start button and typing "Anaconda Prompt". If you don't see the application "Anaconda Prompt", you might not have conda or mini conda installed. In that case, you can install it [here](https://conda.io/miniconda.html)
|
||||||
2. Check that you have conda 64-bit installed rather than 32-bit. You can check this with the command `conda info`. The `platform` should be `win-64` for Windows or `osx-64` for Mac.
|
2. Check that you have conda 64-bit installed rather than 32-bit. You can check this with the command `conda info`. The `platform` should be `win-64` for Windows or `osx-64` for Mac.
|
||||||
3. Check that you have conda 4.4.10 or later. You can check the version with the command `conda -V`. If you have a previous version installed, you can update it using the command: `conda update conda`.
|
3. Check that you have conda 4.4.10 or later. You can check the version with the command `conda -V`. If you have a previous version installed, you can update it using the command: `conda update conda`.
|
||||||
4. Pass a new name as the first parameter to automl_setup so that it creates a new conda environment. You can view existing conda environments using `conda env list` and remove them with `conda env remove -n <environmentname>`.
|
4. On Linux, if the error is `gcc: error trying to exec 'cc1plus': execvp: No such file or directory`, install build essentials using the command `sudo apt-get install build-essential`.
|
||||||
|
5. Pass a new name as the first parameter to automl_setup so that it creates a new conda environment. You can view existing conda environments using `conda env list` and remove them with `conda env remove -n <environmentname>`.
|
||||||
|
|
||||||
|
## automl_setup_linux.sh fails
|
||||||
|
If automl_setup_linux.sh fails on Ubuntu Linux with the error: `unable to execute 'gcc': No such file or directory`
|
||||||
|
1. Make sure that outbound ports 53 and 80 are enabled. On an Azure VM, you can do this from the Azure Portal by selecting the VM and clicking on Networking.
|
||||||
|
2. Run the command: `sudo apt-get update`
|
||||||
|
3. Run the command: `sudo apt-get install build-essential --fix-missing`
|
||||||
|
4. Run `automl_setup_linux.sh` again.
|
||||||
|
|
||||||
## configuration.ipynb fails
|
## configuration.ipynb fails
|
||||||
1) For local conda, make sure that you have susccessfully run automl_setup first.
|
1) For local conda, make sure that you have susccessfully run automl_setup first.
|
||||||
@@ -233,13 +250,20 @@ If a sample notebook fails with an error that property, method or library does n
|
|||||||
## Numpy import fails on Windows
|
## Numpy import fails on Windows
|
||||||
Some Windows environments see an error loading numpy with the latest Python version 3.6.8. If you see this issue, try with Python version 3.6.7.
|
Some Windows environments see an error loading numpy with the latest Python version 3.6.8. If you see this issue, try with Python version 3.6.7.
|
||||||
|
|
||||||
|
## Numpy import fails
|
||||||
|
Check the tensorflow version in the automated ml conda environment. Supported versions are < 1.13. Uninstall tensorflow from the environment if version is >= 1.13
|
||||||
|
You may check the version of tensorflow and uninstall as follows
|
||||||
|
1) start a command shell, activate conda environment where automated ml packages are installed
|
||||||
|
2) enter `pip freeze` and look for `tensorflow` , if found, the version listed should be < 1.13
|
||||||
|
3) If the listed version is a not a supported version, `pip uninstall tensorflow` in the command shell and enter y for confirmation.
|
||||||
|
|
||||||
## Remote run: DsvmCompute.create fails
|
## Remote run: DsvmCompute.create fails
|
||||||
There are several reasons why the DsvmCompute.create can fail. The reason is usually in the error message but you have to look at the end of the error message for the detailed reason. Some common reasons are:
|
There are several reasons why the DsvmCompute.create can fail. The reason is usually in the error message but you have to look at the end of the error message for the detailed reason. Some common reasons are:
|
||||||
1) `Compute name is invalid, it should start with a letter, be between 2 and 16 character, and only include letters (a-zA-Z), numbers (0-9) and \'-\'.` Note that underscore is not allowed in the name.
|
1) `Compute name is invalid, it should start with a letter, be between 2 and 16 character, and only include letters (a-zA-Z), numbers (0-9) and \'-\'.` Note that underscore is not allowed in the name.
|
||||||
2) `The requested VM size xxxxx is not available in the current region.` You can select a different region or vm_size.
|
2) `The requested VM size xxxxx is not available in the current region.` You can select a different region or vm_size.
|
||||||
|
|
||||||
## Remote run: Unable to establish SSH connection
|
## Remote run: Unable to establish SSH connection
|
||||||
AutoML uses the SSH protocol to communicate with remote DSVMs. This defaults to port 22. Possible causes for this error are:
|
Automated ML uses the SSH protocol to communicate with remote DSVMs. This defaults to port 22. Possible causes for this error are:
|
||||||
1) The DSVM is not ready for SSH connections. When DSVM creation completes, the DSVM might still not be ready to acceept SSH connections. The sample notebooks have a one minute delay to allow for this.
|
1) The DSVM is not ready for SSH connections. When DSVM creation completes, the DSVM might still not be ready to acceept SSH connections. The sample notebooks have a one minute delay to allow for this.
|
||||||
2) Your Azure Subscription may restrict the IP address ranges that can access the DSVM on port 22. You can check this in the Azure Portal by selecting the Virtual Machine and then clicking Networking. The Virtual Machine name is the name that you provided in the notebook plus 10 alpha numeric characters to make the name unique. The Inbound Port Rules define what can access the VM on specific ports. Note that there is a priority priority order. So, a Deny entry with a low priority number will override a Allow entry with a higher priority number.
|
2) Your Azure Subscription may restrict the IP address ranges that can access the DSVM on port 22. You can check this in the Azure Portal by selecting the Virtual Machine and then clicking Networking. The Virtual Machine name is the name that you provided in the notebook plus 10 alpha numeric characters to make the name unique. The Inbound Port Rules define what can access the VM on specific ports. Note that there is a priority priority order. So, a Deny entry with a low priority number will override a Allow entry with a higher priority number.
|
||||||
|
|
||||||
@@ -250,13 +274,13 @@ This is often an issue with the `get_data` method.
|
|||||||
3) You can get to the error log for the setup iteration by clicking the `Click here to see the run in Azure portal` link, click `Back to Experiment`, click on the highest run number and then click on Logs.
|
3) You can get to the error log for the setup iteration by clicking the `Click here to see the run in Azure portal` link, click `Back to Experiment`, click on the highest run number and then click on Logs.
|
||||||
|
|
||||||
## Remote run: disk full
|
## Remote run: disk full
|
||||||
AutoML creates files under /tmp/azureml_runs for each iteration that it runs. It creates a folder with the iteration id. For example: AutoML_9a038a18-77cc-48f1-80fb-65abdbc33abe_93. Under this, there is a azureml-logs folder, which contains logs. If you run too many iterations on the same DSVM, these files can fill the disk.
|
Automated ML creates files under /tmp/azureml_runs for each iteration that it runs. It creates a folder with the iteration id. For example: AutoML_9a038a18-77cc-48f1-80fb-65abdbc33abe_93. Under this, there is a azureml-logs folder, which contains logs. If you run too many iterations on the same DSVM, these files can fill the disk.
|
||||||
You can delete the files under /tmp/azureml_runs or just delete the VM and create a new one.
|
You can delete the files under /tmp/azureml_runs or just delete the VM and create a new one.
|
||||||
If your get_data downloads files, make sure the delete them or they can use disk space as well.
|
If your get_data downloads files, make sure the delete them or they can use disk space as well.
|
||||||
When using DataStore, it is good to specify an absolute path for the files so that they are downloaded just once. If you specify a relative path, it will download a file for each iteration.
|
When using DataStore, it is good to specify an absolute path for the files so that they are downloaded just once. If you specify a relative path, it will download a file for each iteration.
|
||||||
|
|
||||||
## Remote run: Iterations fail and the log contains "MemoryError"
|
## Remote run: Iterations fail and the log contains "MemoryError"
|
||||||
This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size.
|
This can be caused by insufficient memory on the DSVM. Automated ML loads all training data into memory. So, the available memory should be more than the training data size.
|
||||||
If you are using a remote DSVM, memory is needed for each concurrent iteration. The max_concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and max_concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
|
If you are using a remote DSVM, memory is needed for each concurrent iteration. The max_concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and max_concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
|
||||||
To resolve this issue, allocate a DSVM with more memory or reduce the value specified for max_concurrent_iterations.
|
To resolve this issue, allocate a DSVM with more memory or reduce the value specified for max_concurrent_iterations.
|
||||||
|
|
||||||
|
|||||||
21
how-to-use-azureml/automated-machine-learning/automl_env.yml
Normal file
21
how-to-use-azureml/automated-machine-learning/automl_env.yml
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
name: azure_automl
|
||||||
|
dependencies:
|
||||||
|
# The python interpreter version.
|
||||||
|
# Currently Azure ML only supports 3.5.2 and later.
|
||||||
|
- python>=3.5.2,<3.6.8
|
||||||
|
- nb_conda
|
||||||
|
- matplotlib==2.1.0
|
||||||
|
- numpy>=1.11.0,<=1.16.2
|
||||||
|
- cython
|
||||||
|
- urllib3<1.24
|
||||||
|
- scipy>=1.0.0,<=1.1.0
|
||||||
|
- scikit-learn>=0.19.0,<=0.20.3
|
||||||
|
- pandas>=0.22.0,<0.23.0
|
||||||
|
- py-xgboost<=0.80
|
||||||
|
|
||||||
|
- pip:
|
||||||
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
|
- azureml-sdk[automl,explain]
|
||||||
|
- azureml-widgets
|
||||||
|
- pandas_ml
|
||||||
|
|
||||||
@@ -0,0 +1,51 @@
|
|||||||
|
@echo off
|
||||||
|
set conda_env_name=%1
|
||||||
|
set automl_env_file=%2
|
||||||
|
set options=%3
|
||||||
|
set PIP_NO_WARN_SCRIPT_LOCATION=0
|
||||||
|
|
||||||
|
IF "%conda_env_name%"=="" SET conda_env_name="azure_automl"
|
||||||
|
IF "%automl_env_file%"=="" SET automl_env_file="automl_env.yml"
|
||||||
|
|
||||||
|
IF NOT EXIST %automl_env_file% GOTO YmlMissing
|
||||||
|
|
||||||
|
call conda activate %conda_env_name% 2>nul:
|
||||||
|
|
||||||
|
if not errorlevel 1 (
|
||||||
|
echo Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment %conda_env_name%
|
||||||
|
call pip install --upgrade azureml-sdk[automl,notebooks,explain]
|
||||||
|
if errorlevel 1 goto ErrorExit
|
||||||
|
) else (
|
||||||
|
call conda env create -f %automl_env_file% -n %conda_env_name%
|
||||||
|
)
|
||||||
|
|
||||||
|
call conda activate %conda_env_name% 2>nul:
|
||||||
|
if errorlevel 1 goto ErrorExit
|
||||||
|
|
||||||
|
call python -m ipykernel install --user --name %conda_env_name% --display-name "Python (%conda_env_name%)"
|
||||||
|
|
||||||
|
REM azureml.widgets is now installed as part of the pip install under the conda env.
|
||||||
|
REM Removing the old user install so that the notebooks will use the latest widget.
|
||||||
|
call jupyter nbextension uninstall --user --py azureml.widgets
|
||||||
|
|
||||||
|
echo.
|
||||||
|
echo.
|
||||||
|
echo ***************************************
|
||||||
|
echo * AutoML setup completed successfully *
|
||||||
|
echo ***************************************
|
||||||
|
IF NOT "%options%"=="nolaunch" (
|
||||||
|
echo.
|
||||||
|
echo Starting jupyter notebook - please run the configuration notebook
|
||||||
|
echo.
|
||||||
|
jupyter notebook --log-level=50 --notebook-dir='..\..'
|
||||||
|
)
|
||||||
|
|
||||||
|
goto End
|
||||||
|
|
||||||
|
:YmlMissing
|
||||||
|
echo File %automl_env_file% not found.
|
||||||
|
|
||||||
|
:ErrorExit
|
||||||
|
echo Install failed
|
||||||
|
|
||||||
|
:End
|
||||||
@@ -0,0 +1,52 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
CONDA_ENV_NAME=$1
|
||||||
|
AUTOML_ENV_FILE=$2
|
||||||
|
OPTIONS=$3
|
||||||
|
PIP_NO_WARN_SCRIPT_LOCATION=0
|
||||||
|
|
||||||
|
if [ "$CONDA_ENV_NAME" == "" ]
|
||||||
|
then
|
||||||
|
CONDA_ENV_NAME="azure_automl"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$AUTOML_ENV_FILE" == "" ]
|
||||||
|
then
|
||||||
|
AUTOML_ENV_FILE="automl_env.yml"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f $AUTOML_ENV_FILE ]; then
|
||||||
|
echo "File $AUTOML_ENV_FILE not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if source activate $CONDA_ENV_NAME 2> /dev/null
|
||||||
|
then
|
||||||
|
echo "Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment" $CONDA_ENV_NAME
|
||||||
|
pip install --upgrade azureml-sdk[automl,notebooks,explain] &&
|
||||||
|
jupyter nbextension uninstall --user --py azureml.widgets
|
||||||
|
else
|
||||||
|
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&
|
||||||
|
source activate $CONDA_ENV_NAME &&
|
||||||
|
python -m ipykernel install --user --name $CONDA_ENV_NAME --display-name "Python ($CONDA_ENV_NAME)" &&
|
||||||
|
jupyter nbextension uninstall --user --py azureml.widgets &&
|
||||||
|
echo "" &&
|
||||||
|
echo "" &&
|
||||||
|
echo "***************************************" &&
|
||||||
|
echo "* AutoML setup completed successfully *" &&
|
||||||
|
echo "***************************************" &&
|
||||||
|
if [ "$OPTIONS" != "nolaunch" ]
|
||||||
|
then
|
||||||
|
echo "" &&
|
||||||
|
echo "Starting jupyter notebook - please run the configuration notebook" &&
|
||||||
|
echo "" &&
|
||||||
|
jupyter notebook --log-level=50 --notebook-dir '../..'
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $? -gt 0 ]
|
||||||
|
then
|
||||||
|
echo "Installation failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
CONDA_ENV_NAME=$1
|
||||||
|
AUTOML_ENV_FILE=$2
|
||||||
|
OPTIONS=$3
|
||||||
|
PIP_NO_WARN_SCRIPT_LOCATION=0
|
||||||
|
|
||||||
|
if [ "$CONDA_ENV_NAME" == "" ]
|
||||||
|
then
|
||||||
|
CONDA_ENV_NAME="azure_automl"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$AUTOML_ENV_FILE" == "" ]
|
||||||
|
then
|
||||||
|
AUTOML_ENV_FILE="automl_env.yml"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f $AUTOML_ENV_FILE ]; then
|
||||||
|
echo "File $AUTOML_ENV_FILE not found"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if source activate $CONDA_ENV_NAME 2> /dev/null
|
||||||
|
then
|
||||||
|
echo "Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment" $CONDA_ENV_NAME
|
||||||
|
pip install --upgrade azureml-sdk[automl,notebooks,explain] &&
|
||||||
|
jupyter nbextension uninstall --user --py azureml.widgets
|
||||||
|
else
|
||||||
|
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&
|
||||||
|
source activate $CONDA_ENV_NAME &&
|
||||||
|
conda install lightgbm -c conda-forge -y &&
|
||||||
|
python -m ipykernel install --user --name $CONDA_ENV_NAME --display-name "Python ($CONDA_ENV_NAME)" &&
|
||||||
|
jupyter nbextension uninstall --user --py azureml.widgets &&
|
||||||
|
echo "" &&
|
||||||
|
echo "" &&
|
||||||
|
echo "***************************************" &&
|
||||||
|
echo "* AutoML setup completed successfully *" &&
|
||||||
|
echo "***************************************" &&
|
||||||
|
if [ "$OPTIONS" != "nolaunch" ]
|
||||||
|
then
|
||||||
|
echo "" &&
|
||||||
|
echo "Starting jupyter notebook - please run the configuration notebook" &&
|
||||||
|
echo "" &&
|
||||||
|
jupyter notebook --log-level=50 --notebook-dir '../..'
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $? -gt 0 ]
|
||||||
|
then
|
||||||
|
echo "Installation failed"
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -139,7 +139,6 @@
|
|||||||
" primary_metric = 'AUC_weighted',\n",
|
" primary_metric = 'AUC_weighted',\n",
|
||||||
" iteration_timeout_minutes = 20,\n",
|
" iteration_timeout_minutes = 20,\n",
|
||||||
" iterations = 10,\n",
|
" iterations = 10,\n",
|
||||||
" n_cross_validations = 2,\n",
|
|
||||||
" verbosity = logging.INFO,\n",
|
" verbosity = logging.INFO,\n",
|
||||||
" X = X_train, \n",
|
" X = X_train, \n",
|
||||||
" y = y_train,\n",
|
" y = y_train,\n",
|
||||||
@@ -263,7 +262,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"To ensure the fit results are consistent with the training results, the SDK dependency versions need to be the same as the environment that trains the model. Details about retrieving the versions can be found in notebook [12.auto-ml-retrieve-the-training-sdk-versions](12.auto-ml-retrieve-the-training-sdk-versions.ipynb)."
|
"To ensure the fit results are consistent with the training results, the SDK dependency versions need to be the same as the environment that trains the model. The following cells create a file, myenv.yml, which specifies the dependencies from the run."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -303,7 +302,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n",
|
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],\n",
|
||||||
|
" pip_packages=['azureml-sdk[automl]'])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"conda_env_file_name = 'myenv.yml'\n",
|
"conda_env_file_name = 'myenv.yml'\n",
|
||||||
"myenv.save_to_file('.', conda_env_file_name)"
|
"myenv.save_to_file('.', conda_env_file_name)"
|
||||||
|
|||||||
@@ -0,0 +1,284 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Automated Machine Learning\n",
|
||||||
|
"_**Classification with Local Compute**_\n",
|
||||||
|
"\n",
|
||||||
|
"## Contents\n",
|
||||||
|
"1. [Introduction](#Introduction)\n",
|
||||||
|
"1. [Setup](#Setup)\n",
|
||||||
|
"1. [Data](#Data)\n",
|
||||||
|
"1. [Train](#Train)\n",
|
||||||
|
"1. [Results](#Results)\n",
|
||||||
|
"1. [Test](#Test)\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Introduction\n",
|
||||||
|
"\n",
|
||||||
|
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||||
|
"\n",
|
||||||
|
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
||||||
|
"\n",
|
||||||
|
"Please find the ONNX related documentations [here](https://github.com/onnx/onnx).\n",
|
||||||
|
"\n",
|
||||||
|
"In this notebook you will learn how to:\n",
|
||||||
|
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||||
|
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||||
|
"3. Train the model using local compute with ONNX compatible config on.\n",
|
||||||
|
"4. Explore the results and save the ONNX model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Setup\n",
|
||||||
|
"\n",
|
||||||
|
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import logging\n",
|
||||||
|
"\n",
|
||||||
|
"from matplotlib import pyplot as plt\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from sklearn import datasets\n",
|
||||||
|
"\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core.experiment import Experiment\n",
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"from azureml.train.automl import AutoMLConfig"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"\n",
|
||||||
|
"# Choose a name for the experiment and specify the project folder.\n",
|
||||||
|
"experiment_name = 'automl-classification-onnx'\n",
|
||||||
|
"project_folder = './sample_projects/automl-classification-onnx'\n",
|
||||||
|
"\n",
|
||||||
|
"experiment = Experiment(ws, experiment_name)\n",
|
||||||
|
"\n",
|
||||||
|
"output = {}\n",
|
||||||
|
"output['SDK version'] = azureml.core.VERSION\n",
|
||||||
|
"output['Subscription ID'] = ws.subscription_id\n",
|
||||||
|
"output['Workspace Name'] = ws.name\n",
|
||||||
|
"output['Resource Group'] = ws.resource_group\n",
|
||||||
|
"output['Location'] = ws.location\n",
|
||||||
|
"output['Project Directory'] = project_folder\n",
|
||||||
|
"output['Experiment Name'] = experiment.name\n",
|
||||||
|
"pd.set_option('display.max_colwidth', -1)\n",
|
||||||
|
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||||
|
"outputDf.T"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Data\n",
|
||||||
|
"\n",
|
||||||
|
"This uses scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) method."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"digits = datasets.load_digits()\n",
|
||||||
|
"\n",
|
||||||
|
"# Exclude the first 100 rows from training so that they can be used for test.\n",
|
||||||
|
"X_train = digits.data[100:,:]\n",
|
||||||
|
"y_train = digits.target[100:]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train with enable ONNX compatible models config on\n",
|
||||||
|
"\n",
|
||||||
|
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
|
||||||
|
"\n",
|
||||||
|
"Set the parameter enable_onnx_compatible_models=True, if you also want to generate the ONNX compatible models. Please note, the forecasting task and TensorFlow models are not ONNX compatible yet.\n",
|
||||||
|
"\n",
|
||||||
|
"|Property|Description|\n",
|
||||||
|
"|-|-|\n",
|
||||||
|
"|**task**|classification or regression|\n",
|
||||||
|
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
||||||
|
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||||
|
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||||
|
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||||
|
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||||
|
"|**enable_onnx_compatible_models**|Enable the ONNX compatible models in the experiment.|\n",
|
||||||
|
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||||
|
" debug_log = 'automl_errors.log',\n",
|
||||||
|
" primary_metric = 'AUC_weighted',\n",
|
||||||
|
" iteration_timeout_minutes = 60,\n",
|
||||||
|
" iterations = 10,\n",
|
||||||
|
" verbosity = logging.INFO,\n",
|
||||||
|
" X = X_train, \n",
|
||||||
|
" y = y_train,\n",
|
||||||
|
" enable_onnx_compatible_models=True,\n",
|
||||||
|
" path = project_folder)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||||
|
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Results"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Widget for Monitoring Runs\n",
|
||||||
|
"\n",
|
||||||
|
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||||
|
"\n",
|
||||||
|
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"RunDetails(local_run).show() "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Retrieve the Best ONNX Model\n",
|
||||||
|
"\n",
|
||||||
|
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*.\n",
|
||||||
|
"\n",
|
||||||
|
"Set the parameter return_onnx_model=True to retrieve the best ONNX model, instead of the Python model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"best_run, onnx_mdl = local_run.get_output(return_onnx_model=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Save the best ONNX model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.train.automl._vendor.automl.client.core.common.onnx_convert import OnnxConverter\n",
|
||||||
|
"onnx_fl_path = \"./best_model.onnx\"\n",
|
||||||
|
"OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -71,11 +71,17 @@
|
|||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"from azureml.core.experiment import Experiment\n",
|
"from azureml.core.experiment import Experiment\n",
|
||||||
"from azureml.core.workspace import Workspace\n",
|
"from azureml.core.workspace import Workspace\n",
|
||||||
"try:\n",
|
"import sys\n",
|
||||||
" import tensorflow as tf1\n",
|
"whitelist_models=[\"LightGBM\"]\n",
|
||||||
"except ImportError:\n",
|
"if \"3.7\" != sys.version[0:3]:\n",
|
||||||
" from pip._internal import main\n",
|
" try:\n",
|
||||||
" main(['install', 'tensorflow>=1.10.0,<=1.12.0'])\n",
|
" import tensorflow as tf1\n",
|
||||||
|
" except ImportError:\n",
|
||||||
|
" from pip._internal import main\n",
|
||||||
|
" main(['install', 'tensorflow>=1.10.0,<=1.12.0'])\n",
|
||||||
|
" logging.getLogger().setLevel(logging.ERROR)\n",
|
||||||
|
" whitelist_models=[\"TensorFlowLinearClassifier\", \"TensorFlowDNN\"]\n",
|
||||||
|
"\n",
|
||||||
"from azureml.train.automl import AutoMLConfig"
|
"from azureml.train.automl import AutoMLConfig"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -160,12 +166,11 @@
|
|||||||
" primary_metric = 'AUC_weighted',\n",
|
" primary_metric = 'AUC_weighted',\n",
|
||||||
" iteration_timeout_minutes = 60,\n",
|
" iteration_timeout_minutes = 60,\n",
|
||||||
" iterations = 10,\n",
|
" iterations = 10,\n",
|
||||||
" n_cross_validations = 3,\n",
|
|
||||||
" verbosity = logging.INFO,\n",
|
" verbosity = logging.INFO,\n",
|
||||||
" X = X_train, \n",
|
" X = X_train, \n",
|
||||||
" y = y_train,\n",
|
" y = y_train,\n",
|
||||||
" enable_tf=True,\n",
|
" enable_tf=True,\n",
|
||||||
" whitelist_models=[\"TensorFlowLinearClassifier\", \"TensorFlowDNN\"],\n",
|
" whitelist_models=whitelist_models,\n",
|
||||||
" path = project_folder)"
|
" path = project_folder)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -72,6 +72,32 @@
|
|||||||
"from azureml.train.automl import AutoMLConfig"
|
"from azureml.train.automl import AutoMLConfig"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Accessing the Azure ML workspace requires authentication with Azure.\n",
|
||||||
|
"\n",
|
||||||
|
"The default authentication is interactive authentication using the default tenant. Executing the `ws = Workspace.from_config()` line in the cell below will prompt for authentication the first time that it is run.\n",
|
||||||
|
"\n",
|
||||||
|
"If you have multiple Azure tenants, you can specify the tenant by replacing the `ws = Workspace.from_config()` line in the cell below with the following:\n",
|
||||||
|
"\n",
|
||||||
|
"```\n",
|
||||||
|
"from azureml.core.authentication import InteractiveLoginAuthentication\n",
|
||||||
|
"auth = InteractiveLoginAuthentication(tenant_id = 'mytenantid')\n",
|
||||||
|
"ws = Workspace.from_config(auth = auth)\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"If you need to run in an environment where interactive login is not possible, you can use Service Principal authentication by replacing the `ws = Workspace.from_config()` line in the cell below with the following:\n",
|
||||||
|
"\n",
|
||||||
|
"```\n",
|
||||||
|
"from azureml.core.authentication import ServicePrincipalAuthentication\n",
|
||||||
|
"auth = auth = ServicePrincipalAuthentication('mytenantid', 'myappid', 'mypassword')\n",
|
||||||
|
"ws = Workspace.from_config(auth = auth)\n",
|
||||||
|
"```\n",
|
||||||
|
"For more details, see [aka.ms/aml-notebook-auth](http://aka.ms/aml-notebook-auth)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -133,12 +159,17 @@
|
|||||||
"|-|-|\n",
|
"|-|-|\n",
|
||||||
"|**task**|classification or regression|\n",
|
"|**task**|classification or regression|\n",
|
||||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
||||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
|
||||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
|
||||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
|
||||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||||
|
"|\n",
|
||||||
|
"\n",
|
||||||
|
"Automated machine learning trains multiple machine learning pipelines. Each pipelines training is known as an iteration.\n",
|
||||||
|
"* You can specify a maximum number of iterations using the `iterations` parameter.\n",
|
||||||
|
"* You can specify a maximum time for the run using the `experiment_timeout_minutes` parameter.\n",
|
||||||
|
"* If you specify neither the `iterations` nor the `experiment_timeout_minutes`, automated ML keeps running iterations while it continues to see improvements in the scores.\n",
|
||||||
|
"\n",
|
||||||
|
"The following example doesn't specify `iterations` or `experiment_timeout_minutes` and so runs until the scores stop improving.\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -148,15 +179,10 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||||
" debug_log = 'automl_errors.log',\n",
|
|
||||||
" primary_metric = 'AUC_weighted',\n",
|
" primary_metric = 'AUC_weighted',\n",
|
||||||
" iteration_timeout_minutes = 60,\n",
|
|
||||||
" iterations = 25,\n",
|
|
||||||
" n_cross_validations = 3,\n",
|
|
||||||
" verbosity = logging.INFO,\n",
|
|
||||||
" X = X_train, \n",
|
" X = X_train, \n",
|
||||||
" y = y_train,\n",
|
" y = y_train,\n",
|
||||||
" path = project_folder)"
|
" n_cross_validations = 3)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -163,8 +163,7 @@
|
|||||||
" \"iterations\" : 2,\n",
|
" \"iterations\" : 2,\n",
|
||||||
" \"primary_metric\" : 'AUC_weighted',\n",
|
" \"primary_metric\" : 'AUC_weighted',\n",
|
||||||
" \"preprocess\" : False,\n",
|
" \"preprocess\" : False,\n",
|
||||||
" \"verbosity\" : logging.INFO,\n",
|
" \"verbosity\" : logging.INFO\n",
|
||||||
" \"n_cross_validations\": 3\n",
|
|
||||||
"}"
|
"}"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -0,0 +1,493 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Automated Machine Learning\n",
|
||||||
|
"**BikeShare Demand Forecasting**\n",
|
||||||
|
"\n",
|
||||||
|
"## Contents\n",
|
||||||
|
"1. [Introduction](#Introduction)\n",
|
||||||
|
"1. [Setup](#Setup)\n",
|
||||||
|
"1. [Data](#Data)\n",
|
||||||
|
"1. [Train](#Train)\n",
|
||||||
|
"1. [Evaluate](#Evaluate)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Introduction\n",
|
||||||
|
"In this example, we show how AutoML can be used for bike share forecasting.\n",
|
||||||
|
"\n",
|
||||||
|
"The purpose is to demonstrate how to take advantage of the built-in holiday featurization, access the feature names, and further demonstrate how to work with the `forecast` function. Please also look at the additional forecasting notebooks, which document lagging, rolling windows, forecast quantiles, other ways to use the forecast function, and forecaster deployment.\n",
|
||||||
|
"\n",
|
||||||
|
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
||||||
|
"\n",
|
||||||
|
"In this notebook you would see\n",
|
||||||
|
"1. Creating an Experiment in an existing Workspace\n",
|
||||||
|
"2. Instantiating AutoMLConfig with new task type \"forecasting\" for timeseries data training, and other timeseries related settings: for this dataset we use the basic one: \"time_column_name\" \n",
|
||||||
|
"3. Training the Model using local compute\n",
|
||||||
|
"4. Exploring the results\n",
|
||||||
|
"5. Viewing the engineered names for featurized data and featurization summary for all raw features\n",
|
||||||
|
"6. Testing the fitted model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Setup\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import azureml.core\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import logging\n",
|
||||||
|
"import warnings\n",
|
||||||
|
"# Squash warning messages for cleaner output in the notebook\n",
|
||||||
|
"warnings.showwarning = lambda *args, **kwargs: None\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"from azureml.core.experiment import Experiment\n",
|
||||||
|
"from azureml.train.automl import AutoMLConfig\n",
|
||||||
|
"from matplotlib import pyplot as plt\n",
|
||||||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for the run history container in the workspace\n",
|
||||||
|
"experiment_name = 'automl-bikeshareforecasting'\n",
|
||||||
|
"# project folder\n",
|
||||||
|
"project_folder = './sample_projects/automl-local-bikeshareforecasting'\n",
|
||||||
|
"\n",
|
||||||
|
"experiment = Experiment(ws, experiment_name)\n",
|
||||||
|
"\n",
|
||||||
|
"output = {}\n",
|
||||||
|
"output['SDK version'] = azureml.core.VERSION\n",
|
||||||
|
"output['Subscription ID'] = ws.subscription_id\n",
|
||||||
|
"output['Workspace'] = ws.name\n",
|
||||||
|
"output['Resource Group'] = ws.resource_group\n",
|
||||||
|
"output['Location'] = ws.location\n",
|
||||||
|
"output['Project Directory'] = project_folder\n",
|
||||||
|
"output['Run History Name'] = experiment_name\n",
|
||||||
|
"pd.set_option('display.max_colwidth', -1)\n",
|
||||||
|
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||||
|
"outputDf.T"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Data\n",
|
||||||
|
"Read bike share demand data from file, and preview data."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data = pd.read_csv('bike-no.csv', parse_dates=['date'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's set up what we know abou the dataset. \n",
|
||||||
|
"\n",
|
||||||
|
"**Target column** is what we want to forecast.\n",
|
||||||
|
"\n",
|
||||||
|
"**Time column** is the time axis along which to predict.\n",
|
||||||
|
"\n",
|
||||||
|
"**Grain** is another word for an individual time series in your dataset. Grains are identified by values of the columns listed `grain_column_names`, for example \"store\" and \"item\" if your data has multiple time series of sales, one series for each combination of store and item sold.\n",
|
||||||
|
"\n",
|
||||||
|
"This dataset has only one time series. Please see the [orange juice notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales) for an example of a multi-time series dataset."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"target_column_name = 'cnt'\n",
|
||||||
|
"time_column_name = 'date'\n",
|
||||||
|
"grain_column_names = []"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Split the data\n",
|
||||||
|
"\n",
|
||||||
|
"The first split we make is into train and test sets. Note we are splitting on time."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"train = data[data[time_column_name] < '2012-09-01']\n",
|
||||||
|
"test = data[data[time_column_name] >= '2012-09-01']\n",
|
||||||
|
"\n",
|
||||||
|
"X_train = train.copy()\n",
|
||||||
|
"y_train = X_train.pop(target_column_name).values\n",
|
||||||
|
"\n",
|
||||||
|
"X_test = test.copy()\n",
|
||||||
|
"y_test = X_test.pop(target_column_name).values\n",
|
||||||
|
"\n",
|
||||||
|
"print(X_train.shape)\n",
|
||||||
|
"print(y_train.shape)\n",
|
||||||
|
"print(X_test.shape)\n",
|
||||||
|
"print(y_test.shape)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Setting forecaster maximum horizon \n",
|
||||||
|
"\n",
|
||||||
|
"Assuming your test data forms a full and regular time series(regular time intervals and no holes), \n",
|
||||||
|
"the maximum horizon you will need to forecast is the length of the longest grain in your test set."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if len(grain_column_names) == 0:\n",
|
||||||
|
" max_horizon = len(X_test)\n",
|
||||||
|
"else:\n",
|
||||||
|
" max_horizon = X_test.groupby(grain_column_names)[time_column_name].count().max()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train\n",
|
||||||
|
"\n",
|
||||||
|
"Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n",
|
||||||
|
"\n",
|
||||||
|
"|Property|Description|\n",
|
||||||
|
"|-|-|\n",
|
||||||
|
"|**task**|forecasting|\n",
|
||||||
|
"|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>\n",
|
||||||
|
"|**iterations**|Number of iterations. In each iteration, Auto ML trains a specific pipeline on the given data|\n",
|
||||||
|
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||||
|
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||||
|
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||||
|
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||||
|
"|**country**|The country used to generate holiday features. These should be ISO 3166 two-letter country codes (i.e. 'US', 'GB').|\n",
|
||||||
|
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"time_column_name = 'date'\n",
|
||||||
|
"automl_settings = {\n",
|
||||||
|
" \"time_column_name\": time_column_name,\n",
|
||||||
|
" # these columns are a breakdown of the total and therefore a leak\n",
|
||||||
|
" \"drop_column_names\": ['casual', 'registered'],\n",
|
||||||
|
" # knowing the country allows Automated ML to bring in holidays\n",
|
||||||
|
" \"country\" : 'US',\n",
|
||||||
|
" \"max_horizon\" : max_horizon,\n",
|
||||||
|
" \"target_lags\": 1 \n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"automl_config = AutoMLConfig(task = 'forecasting', \n",
|
||||||
|
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||||
|
" iterations = 10,\n",
|
||||||
|
" iteration_timeout_minutes = 5,\n",
|
||||||
|
" X = X_train,\n",
|
||||||
|
" y = y_train,\n",
|
||||||
|
" n_cross_validations = 3, \n",
|
||||||
|
" path=project_folder,\n",
|
||||||
|
" verbosity = logging.INFO,\n",
|
||||||
|
" **automl_settings)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We will now run the experiment, starting with 10 iterations of model search. Experiment can be continued for more iterations if the results are not yet good. You will see the currently running iterations printing to the console."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Displaying the run objects gives you links to the visual tools in the Azure Portal. Go try them!"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Retrieve the Best Model\n",
|
||||||
|
"Below we select the best pipeline from our iterations. The get_output method on automl_classifier returns the best run and the fitted model for the last fit invocation. There are overloads on get_output that allow you to retrieve the best run and fitted model for any logged metric or a particular iteration."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"best_run, fitted_model = local_run.get_output()\n",
|
||||||
|
"fitted_model.steps"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### View the engineered names for featurized data\n",
|
||||||
|
"\n",
|
||||||
|
"You can accees the engineered feature names generated in time-series featurization. Note that a number of named holiday periods are represented. We recommend that you have at least one year of data when using this feature to ensure that all yearly holidays are captured in the training featurization."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fitted_model.named_steps['timeseriestransformer'].get_engineered_feature_names()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### View the featurization summary\n",
|
||||||
|
"\n",
|
||||||
|
"You can also see what featurization steps were performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:\n",
|
||||||
|
"\n",
|
||||||
|
"- Raw feature name\n",
|
||||||
|
"- Number of engineered features formed out of this raw feature\n",
|
||||||
|
"- Type detected\n",
|
||||||
|
"- If feature was dropped\n",
|
||||||
|
"- List of feature transformations for the raw feature"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fitted_model.named_steps['timeseriestransformer'].get_featurization_summary()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Test the Best Fitted Model\n",
|
||||||
|
"\n",
|
||||||
|
"Predict on training and test set, and calculate residual values.\n",
|
||||||
|
"\n",
|
||||||
|
"We always score on the original dataset whose schema matches the scheme of the training dataset."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X_test.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"y_query = y_test.copy().astype(np.float)\n",
|
||||||
|
"y_query.fill(np.NaN)\n",
|
||||||
|
"y_fcst, X_trans = fitted_model.forecast(X_test, y_query)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"It is a good practice to always align the output explicitly to the input, as the count and order of the rows may have changed during transformations that span multiple rows."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def align_outputs(y_predicted, X_trans, X_test, y_test, predicted_column_name = 'predicted'):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Demonstrates how to get the output aligned to the inputs\n",
|
||||||
|
" using pandas indexes. Helps understand what happened if\n",
|
||||||
|
" the output's shape differs from the input shape, or if\n",
|
||||||
|
" the data got re-sorted by time and grain during forecasting.\n",
|
||||||
|
" \n",
|
||||||
|
" Typical causes of misalignment are:\n",
|
||||||
|
" * we predicted some periods that were missing in actuals -> drop from eval\n",
|
||||||
|
" * model was asked to predict past max_horizon -> increase max horizon\n",
|
||||||
|
" * data at start of X_test was needed for lags -> provide previous periods\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" df_fcst = pd.DataFrame({predicted_column_name : y_predicted})\n",
|
||||||
|
" # y and X outputs are aligned by forecast() function contract\n",
|
||||||
|
" df_fcst.index = X_trans.index\n",
|
||||||
|
" \n",
|
||||||
|
" # align original X_test to y_test \n",
|
||||||
|
" X_test_full = X_test.copy()\n",
|
||||||
|
" X_test_full[target_column_name] = y_test\n",
|
||||||
|
"\n",
|
||||||
|
" # X_test_full's index does not include origin, so reset for merge\n",
|
||||||
|
" df_fcst.reset_index(inplace=True)\n",
|
||||||
|
" X_test_full = X_test_full.reset_index().drop(columns='index')\n",
|
||||||
|
" together = df_fcst.merge(X_test_full, how='right')\n",
|
||||||
|
" \n",
|
||||||
|
" # drop rows where prediction or actuals are nan \n",
|
||||||
|
" # happens because of missing actuals \n",
|
||||||
|
" # or at edges of time due to lags/rolling windows\n",
|
||||||
|
" clean = together[together[[target_column_name, predicted_column_name]].notnull().all(axis=1)]\n",
|
||||||
|
" return(clean)\n",
|
||||||
|
"\n",
|
||||||
|
"df_all = align_outputs(y_fcst, X_trans, X_test, y_test)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def MAPE(actual, pred):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Calculate mean absolute percentage error.\n",
|
||||||
|
" Remove NA and values where actual is close to zero\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" not_na = ~(np.isnan(actual) | np.isnan(pred))\n",
|
||||||
|
" not_zero = ~np.isclose(actual, 0.0)\n",
|
||||||
|
" actual_safe = actual[not_na & not_zero]\n",
|
||||||
|
" pred_safe = pred[not_na & not_zero]\n",
|
||||||
|
" APE = 100*np.abs((actual_safe - pred_safe)/actual_safe)\n",
|
||||||
|
" return np.mean(APE)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"Simple forecasting model\")\n",
|
||||||
|
"rmse = np.sqrt(mean_squared_error(df_all[target_column_name], df_all['predicted']))\n",
|
||||||
|
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
|
||||||
|
"mae = mean_absolute_error(df_all[target_column_name], df_all['predicted'])\n",
|
||||||
|
"print('mean_absolute_error score: %.2f' % mae)\n",
|
||||||
|
"print('MAPE: %.2f' % MAPE(df_all[target_column_name], df_all['predicted']))\n",
|
||||||
|
"\n",
|
||||||
|
"# Plot outputs\n",
|
||||||
|
"%matplotlib notebook\n",
|
||||||
|
"test_pred = plt.scatter(df_all[target_column_name], df_all['predicted'], color='b')\n",
|
||||||
|
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||||
|
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "xiaga@microsoft.com, tosingli@microsoft.com"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,732 @@
|
|||||||
|
instant,date,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
|
||||||
|
1,1/1/2011,1,0,1,6,2,0.344167,0.363625,0.805833,0.160446,331,654,985
|
||||||
|
2,1/2/2011,1,0,1,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
|
||||||
|
3,1/3/2011,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
|
||||||
|
4,1/4/2011,1,0,1,2,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
|
||||||
|
5,1/5/2011,1,0,1,3,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600
|
||||||
|
6,1/6/2011,1,0,1,4,1,0.204348,0.233209,0.518261,0.0895652,88,1518,1606
|
||||||
|
7,1/7/2011,1,0,1,5,2,0.196522,0.208839,0.498696,0.168726,148,1362,1510
|
||||||
|
8,1/8/2011,1,0,1,6,2,0.165,0.162254,0.535833,0.266804,68,891,959
|
||||||
|
9,1/9/2011,1,0,1,0,1,0.138333,0.116175,0.434167,0.36195,54,768,822
|
||||||
|
10,1/10/2011,1,0,1,1,1,0.150833,0.150888,0.482917,0.223267,41,1280,1321
|
||||||
|
11,1/11/2011,1,0,1,2,2,0.169091,0.191464,0.686364,0.122132,43,1220,1263
|
||||||
|
12,1/12/2011,1,0,1,3,1,0.172727,0.160473,0.599545,0.304627,25,1137,1162
|
||||||
|
13,1/13/2011,1,0,1,4,1,0.165,0.150883,0.470417,0.301,38,1368,1406
|
||||||
|
14,1/14/2011,1,0,1,5,1,0.16087,0.188413,0.537826,0.126548,54,1367,1421
|
||||||
|
15,1/15/2011,1,0,1,6,2,0.233333,0.248112,0.49875,0.157963,222,1026,1248
|
||||||
|
16,1/16/2011,1,0,1,0,1,0.231667,0.234217,0.48375,0.188433,251,953,1204
|
||||||
|
17,1/17/2011,1,0,1,1,2,0.175833,0.176771,0.5375,0.194017,117,883,1000
|
||||||
|
18,1/18/2011,1,0,1,2,2,0.216667,0.232333,0.861667,0.146775,9,674,683
|
||||||
|
19,1/19/2011,1,0,1,3,2,0.292174,0.298422,0.741739,0.208317,78,1572,1650
|
||||||
|
20,1/20/2011,1,0,1,4,2,0.261667,0.25505,0.538333,0.195904,83,1844,1927
|
||||||
|
21,1/21/2011,1,0,1,5,1,0.1775,0.157833,0.457083,0.353242,75,1468,1543
|
||||||
|
22,1/22/2011,1,0,1,6,1,0.0591304,0.0790696,0.4,0.17197,93,888,981
|
||||||
|
23,1/23/2011,1,0,1,0,1,0.0965217,0.0988391,0.436522,0.2466,150,836,986
|
||||||
|
24,1/24/2011,1,0,1,1,1,0.0973913,0.11793,0.491739,0.15833,86,1330,1416
|
||||||
|
25,1/25/2011,1,0,1,2,2,0.223478,0.234526,0.616957,0.129796,186,1799,1985
|
||||||
|
26,1/26/2011,1,0,1,3,3,0.2175,0.2036,0.8625,0.29385,34,472,506
|
||||||
|
27,1/27/2011,1,0,1,4,1,0.195,0.2197,0.6875,0.113837,15,416,431
|
||||||
|
28,1/28/2011,1,0,1,5,2,0.203478,0.223317,0.793043,0.1233,38,1129,1167
|
||||||
|
29,1/29/2011,1,0,1,6,1,0.196522,0.212126,0.651739,0.145365,123,975,1098
|
||||||
|
30,1/30/2011,1,0,1,0,1,0.216522,0.250322,0.722174,0.0739826,140,956,1096
|
||||||
|
31,1/31/2011,1,0,1,1,2,0.180833,0.18625,0.60375,0.187192,42,1459,1501
|
||||||
|
32,2/1/2011,1,0,2,2,2,0.192174,0.23453,0.829565,0.053213,47,1313,1360
|
||||||
|
33,2/2/2011,1,0,2,3,2,0.26,0.254417,0.775417,0.264308,72,1454,1526
|
||||||
|
34,2/3/2011,1,0,2,4,1,0.186957,0.177878,0.437826,0.277752,61,1489,1550
|
||||||
|
35,2/4/2011,1,0,2,5,2,0.211304,0.228587,0.585217,0.127839,88,1620,1708
|
||||||
|
36,2/5/2011,1,0,2,6,2,0.233333,0.243058,0.929167,0.161079,100,905,1005
|
||||||
|
37,2/6/2011,1,0,2,0,1,0.285833,0.291671,0.568333,0.1418,354,1269,1623
|
||||||
|
38,2/7/2011,1,0,2,1,1,0.271667,0.303658,0.738333,0.0454083,120,1592,1712
|
||||||
|
39,2/8/2011,1,0,2,2,1,0.220833,0.198246,0.537917,0.36195,64,1466,1530
|
||||||
|
40,2/9/2011,1,0,2,3,2,0.134783,0.144283,0.494783,0.188839,53,1552,1605
|
||||||
|
41,2/10/2011,1,0,2,4,1,0.144348,0.149548,0.437391,0.221935,47,1491,1538
|
||||||
|
42,2/11/2011,1,0,2,5,1,0.189091,0.213509,0.506364,0.10855,149,1597,1746
|
||||||
|
43,2/12/2011,1,0,2,6,1,0.2225,0.232954,0.544167,0.203367,288,1184,1472
|
||||||
|
44,2/13/2011,1,0,2,0,1,0.316522,0.324113,0.457391,0.260883,397,1192,1589
|
||||||
|
45,2/14/2011,1,0,2,1,1,0.415,0.39835,0.375833,0.417908,208,1705,1913
|
||||||
|
46,2/15/2011,1,0,2,2,1,0.266087,0.254274,0.314348,0.291374,140,1675,1815
|
||||||
|
47,2/16/2011,1,0,2,3,1,0.318261,0.3162,0.423478,0.251791,218,1897,2115
|
||||||
|
48,2/17/2011,1,0,2,4,1,0.435833,0.428658,0.505,0.230104,259,2216,2475
|
||||||
|
49,2/18/2011,1,0,2,5,1,0.521667,0.511983,0.516667,0.264925,579,2348,2927
|
||||||
|
50,2/19/2011,1,0,2,6,1,0.399167,0.391404,0.187917,0.507463,532,1103,1635
|
||||||
|
51,2/20/2011,1,0,2,0,1,0.285217,0.27733,0.407826,0.223235,639,1173,1812
|
||||||
|
52,2/21/2011,1,0,2,1,2,0.303333,0.284075,0.605,0.307846,195,912,1107
|
||||||
|
53,2/22/2011,1,0,2,2,1,0.182222,0.186033,0.577778,0.195683,74,1376,1450
|
||||||
|
54,2/23/2011,1,0,2,3,1,0.221739,0.245717,0.423043,0.094113,139,1778,1917
|
||||||
|
55,2/24/2011,1,0,2,4,2,0.295652,0.289191,0.697391,0.250496,100,1707,1807
|
||||||
|
56,2/25/2011,1,0,2,5,2,0.364348,0.350461,0.712174,0.346539,120,1341,1461
|
||||||
|
57,2/26/2011,1,0,2,6,1,0.2825,0.282192,0.537917,0.186571,424,1545,1969
|
||||||
|
58,2/27/2011,1,0,2,0,1,0.343478,0.351109,0.68,0.125248,694,1708,2402
|
||||||
|
59,2/28/2011,1,0,2,1,2,0.407273,0.400118,0.876364,0.289686,81,1365,1446
|
||||||
|
60,3/1/2011,1,0,3,2,1,0.266667,0.263879,0.535,0.216425,137,1714,1851
|
||||||
|
61,3/2/2011,1,0,3,3,1,0.335,0.320071,0.449583,0.307833,231,1903,2134
|
||||||
|
62,3/3/2011,1,0,3,4,1,0.198333,0.200133,0.318333,0.225754,123,1562,1685
|
||||||
|
63,3/4/2011,1,0,3,5,2,0.261667,0.255679,0.610417,0.203346,214,1730,1944
|
||||||
|
64,3/5/2011,1,0,3,6,2,0.384167,0.378779,0.789167,0.251871,640,1437,2077
|
||||||
|
65,3/6/2011,1,0,3,0,2,0.376522,0.366252,0.948261,0.343287,114,491,605
|
||||||
|
66,3/7/2011,1,0,3,1,1,0.261739,0.238461,0.551304,0.341352,244,1628,1872
|
||||||
|
67,3/8/2011,1,0,3,2,1,0.2925,0.3024,0.420833,0.12065,316,1817,2133
|
||||||
|
68,3/9/2011,1,0,3,3,2,0.295833,0.286608,0.775417,0.22015,191,1700,1891
|
||||||
|
69,3/10/2011,1,0,3,4,3,0.389091,0.385668,0,0.261877,46,577,623
|
||||||
|
70,3/11/2011,1,0,3,5,2,0.316522,0.305,0.649565,0.23297,247,1730,1977
|
||||||
|
71,3/12/2011,1,0,3,6,1,0.329167,0.32575,0.594583,0.220775,724,1408,2132
|
||||||
|
72,3/13/2011,1,0,3,0,1,0.384348,0.380091,0.527391,0.270604,982,1435,2417
|
||||||
|
73,3/14/2011,1,0,3,1,1,0.325217,0.332,0.496957,0.136926,359,1687,2046
|
||||||
|
74,3/15/2011,1,0,3,2,2,0.317391,0.318178,0.655652,0.184309,289,1767,2056
|
||||||
|
75,3/16/2011,1,0,3,3,2,0.365217,0.36693,0.776522,0.203117,321,1871,2192
|
||||||
|
76,3/17/2011,1,0,3,4,1,0.415,0.410333,0.602917,0.209579,424,2320,2744
|
||||||
|
77,3/18/2011,1,0,3,5,1,0.54,0.527009,0.525217,0.231017,884,2355,3239
|
||||||
|
78,3/19/2011,1,0,3,6,1,0.4725,0.466525,0.379167,0.368167,1424,1693,3117
|
||||||
|
79,3/20/2011,1,0,3,0,1,0.3325,0.32575,0.47375,0.207721,1047,1424,2471
|
||||||
|
80,3/21/2011,2,0,3,1,2,0.430435,0.409735,0.737391,0.288783,401,1676,2077
|
||||||
|
81,3/22/2011,2,0,3,2,1,0.441667,0.440642,0.624583,0.22575,460,2243,2703
|
||||||
|
82,3/23/2011,2,0,3,3,2,0.346957,0.337939,0.839565,0.234261,203,1918,2121
|
||||||
|
83,3/24/2011,2,0,3,4,2,0.285,0.270833,0.805833,0.243787,166,1699,1865
|
||||||
|
84,3/25/2011,2,0,3,5,1,0.264167,0.256312,0.495,0.230725,300,1910,2210
|
||||||
|
85,3/26/2011,2,0,3,6,1,0.265833,0.257571,0.394167,0.209571,981,1515,2496
|
||||||
|
86,3/27/2011,2,0,3,0,2,0.253043,0.250339,0.493913,0.1843,472,1221,1693
|
||||||
|
87,3/28/2011,2,0,3,1,1,0.264348,0.257574,0.302174,0.212204,222,1806,2028
|
||||||
|
88,3/29/2011,2,0,3,2,1,0.3025,0.292908,0.314167,0.226996,317,2108,2425
|
||||||
|
89,3/30/2011,2,0,3,3,2,0.3,0.29735,0.646667,0.172888,168,1368,1536
|
||||||
|
90,3/31/2011,2,0,3,4,3,0.268333,0.257575,0.918333,0.217646,179,1506,1685
|
||||||
|
91,4/1/2011,2,0,4,5,2,0.3,0.283454,0.68625,0.258708,307,1920,2227
|
||||||
|
92,4/2/2011,2,0,4,6,2,0.315,0.315637,0.65375,0.197146,898,1354,2252
|
||||||
|
93,4/3/2011,2,0,4,0,1,0.378333,0.378767,0.48,0.182213,1651,1598,3249
|
||||||
|
94,4/4/2011,2,0,4,1,1,0.573333,0.542929,0.42625,0.385571,734,2381,3115
|
||||||
|
95,4/5/2011,2,0,4,2,2,0.414167,0.39835,0.642083,0.388067,167,1628,1795
|
||||||
|
96,4/6/2011,2,0,4,3,1,0.390833,0.387608,0.470833,0.263063,413,2395,2808
|
||||||
|
97,4/7/2011,2,0,4,4,1,0.4375,0.433696,0.602917,0.162312,571,2570,3141
|
||||||
|
98,4/8/2011,2,0,4,5,2,0.335833,0.324479,0.83625,0.226992,172,1299,1471
|
||||||
|
99,4/9/2011,2,0,4,6,2,0.3425,0.341529,0.8775,0.133083,879,1576,2455
|
||||||
|
100,4/10/2011,2,0,4,0,2,0.426667,0.426737,0.8575,0.146767,1188,1707,2895
|
||||||
|
101,4/11/2011,2,0,4,1,2,0.595652,0.565217,0.716956,0.324474,855,2493,3348
|
||||||
|
102,4/12/2011,2,0,4,2,2,0.5025,0.493054,0.739167,0.274879,257,1777,2034
|
||||||
|
103,4/13/2011,2,0,4,3,2,0.4125,0.417283,0.819167,0.250617,209,1953,2162
|
||||||
|
104,4/14/2011,2,0,4,4,1,0.4675,0.462742,0.540417,0.1107,529,2738,3267
|
||||||
|
105,4/15/2011,2,0,4,5,1,0.446667,0.441913,0.67125,0.226375,642,2484,3126
|
||||||
|
106,4/16/2011,2,0,4,6,3,0.430833,0.425492,0.888333,0.340808,121,674,795
|
||||||
|
107,4/17/2011,2,0,4,0,1,0.456667,0.445696,0.479583,0.303496,1558,2186,3744
|
||||||
|
108,4/18/2011,2,0,4,1,1,0.5125,0.503146,0.5425,0.163567,669,2760,3429
|
||||||
|
109,4/19/2011,2,0,4,2,2,0.505833,0.489258,0.665833,0.157971,409,2795,3204
|
||||||
|
110,4/20/2011,2,0,4,3,1,0.595,0.564392,0.614167,0.241925,613,3331,3944
|
||||||
|
111,4/21/2011,2,0,4,4,1,0.459167,0.453892,0.407083,0.325258,745,3444,4189
|
||||||
|
112,4/22/2011,2,0,4,5,2,0.336667,0.321954,0.729583,0.219521,177,1506,1683
|
||||||
|
113,4/23/2011,2,0,4,6,2,0.46,0.450121,0.887917,0.230725,1462,2574,4036
|
||||||
|
114,4/24/2011,2,0,4,0,2,0.581667,0.551763,0.810833,0.192175,1710,2481,4191
|
||||||
|
115,4/25/2011,2,0,4,1,1,0.606667,0.5745,0.776667,0.185333,773,3300,4073
|
||||||
|
116,4/26/2011,2,0,4,2,1,0.631667,0.594083,0.729167,0.3265,678,3722,4400
|
||||||
|
117,4/27/2011,2,0,4,3,2,0.62,0.575142,0.835417,0.3122,547,3325,3872
|
||||||
|
118,4/28/2011,2,0,4,4,2,0.6175,0.578929,0.700833,0.320908,569,3489,4058
|
||||||
|
119,4/29/2011,2,0,4,5,1,0.51,0.497463,0.457083,0.240063,878,3717,4595
|
||||||
|
120,4/30/2011,2,0,4,6,1,0.4725,0.464021,0.503333,0.235075,1965,3347,5312
|
||||||
|
121,5/1/2011,2,0,5,0,2,0.451667,0.448204,0.762083,0.106354,1138,2213,3351
|
||||||
|
122,5/2/2011,2,0,5,1,2,0.549167,0.532833,0.73,0.183454,847,3554,4401
|
||||||
|
123,5/3/2011,2,0,5,2,2,0.616667,0.582079,0.697083,0.342667,603,3848,4451
|
||||||
|
124,5/4/2011,2,0,5,3,2,0.414167,0.40465,0.737083,0.328996,255,2378,2633
|
||||||
|
125,5/5/2011,2,0,5,4,1,0.459167,0.441917,0.444167,0.295392,614,3819,4433
|
||||||
|
126,5/6/2011,2,0,5,5,1,0.479167,0.474117,0.59,0.228246,894,3714,4608
|
||||||
|
127,5/7/2011,2,0,5,6,1,0.52,0.512621,0.54125,0.16045,1612,3102,4714
|
||||||
|
128,5/8/2011,2,0,5,0,1,0.528333,0.518933,0.631667,0.0746375,1401,2932,4333
|
||||||
|
129,5/9/2011,2,0,5,1,1,0.5325,0.525246,0.58875,0.176,664,3698,4362
|
||||||
|
130,5/10/2011,2,0,5,2,1,0.5325,0.522721,0.489167,0.115671,694,4109,4803
|
||||||
|
131,5/11/2011,2,0,5,3,1,0.5425,0.5284,0.632917,0.120642,550,3632,4182
|
||||||
|
132,5/12/2011,2,0,5,4,1,0.535,0.523363,0.7475,0.189667,695,4169,4864
|
||||||
|
133,5/13/2011,2,0,5,5,2,0.5125,0.4943,0.863333,0.179725,692,3413,4105
|
||||||
|
134,5/14/2011,2,0,5,6,2,0.520833,0.500629,0.9225,0.13495,902,2507,3409
|
||||||
|
135,5/15/2011,2,0,5,0,2,0.5625,0.536,0.867083,0.152979,1582,2971,4553
|
||||||
|
136,5/16/2011,2,0,5,1,1,0.5775,0.550512,0.787917,0.126871,773,3185,3958
|
||||||
|
137,5/17/2011,2,0,5,2,2,0.561667,0.538529,0.837917,0.277354,678,3445,4123
|
||||||
|
138,5/18/2011,2,0,5,3,2,0.55,0.527158,0.87,0.201492,536,3319,3855
|
||||||
|
139,5/19/2011,2,0,5,4,2,0.530833,0.510742,0.829583,0.108213,735,3840,4575
|
||||||
|
140,5/20/2011,2,0,5,5,1,0.536667,0.529042,0.719583,0.125013,909,4008,4917
|
||||||
|
141,5/21/2011,2,0,5,6,1,0.6025,0.571975,0.626667,0.12065,2258,3547,5805
|
||||||
|
142,5/22/2011,2,0,5,0,1,0.604167,0.5745,0.749583,0.148008,1576,3084,4660
|
||||||
|
143,5/23/2011,2,0,5,1,2,0.631667,0.590296,0.81,0.233842,836,3438,4274
|
||||||
|
144,5/24/2011,2,0,5,2,2,0.66,0.604813,0.740833,0.207092,659,3833,4492
|
||||||
|
145,5/25/2011,2,0,5,3,1,0.660833,0.615542,0.69625,0.154233,740,4238,4978
|
||||||
|
146,5/26/2011,2,0,5,4,1,0.708333,0.654688,0.6775,0.199642,758,3919,4677
|
||||||
|
147,5/27/2011,2,0,5,5,1,0.681667,0.637008,0.65375,0.240679,871,3808,4679
|
||||||
|
148,5/28/2011,2,0,5,6,1,0.655833,0.612379,0.729583,0.230092,2001,2757,4758
|
||||||
|
149,5/29/2011,2,0,5,0,1,0.6675,0.61555,0.81875,0.213938,2355,2433,4788
|
||||||
|
150,5/30/2011,2,0,5,1,1,0.733333,0.671092,0.685,0.131225,1549,2549,4098
|
||||||
|
151,5/31/2011,2,0,5,2,1,0.775,0.725383,0.636667,0.111329,673,3309,3982
|
||||||
|
152,6/1/2011,2,0,6,3,2,0.764167,0.720967,0.677083,0.207092,513,3461,3974
|
||||||
|
153,6/2/2011,2,0,6,4,1,0.715,0.643942,0.305,0.292287,736,4232,4968
|
||||||
|
154,6/3/2011,2,0,6,5,1,0.62,0.587133,0.354167,0.253121,898,4414,5312
|
||||||
|
155,6/4/2011,2,0,6,6,1,0.635,0.594696,0.45625,0.123142,1869,3473,5342
|
||||||
|
156,6/5/2011,2,0,6,0,2,0.648333,0.616804,0.6525,0.138692,1685,3221,4906
|
||||||
|
157,6/6/2011,2,0,6,1,1,0.678333,0.621858,0.6,0.121896,673,3875,4548
|
||||||
|
158,6/7/2011,2,0,6,2,1,0.7075,0.65595,0.597917,0.187808,763,4070,4833
|
||||||
|
159,6/8/2011,2,0,6,3,1,0.775833,0.727279,0.622083,0.136817,676,3725,4401
|
||||||
|
160,6/9/2011,2,0,6,4,2,0.808333,0.757579,0.568333,0.149883,563,3352,3915
|
||||||
|
161,6/10/2011,2,0,6,5,1,0.755,0.703292,0.605,0.140554,815,3771,4586
|
||||||
|
162,6/11/2011,2,0,6,6,1,0.725,0.678038,0.654583,0.15485,1729,3237,4966
|
||||||
|
163,6/12/2011,2,0,6,0,1,0.6925,0.643325,0.747917,0.163567,1467,2993,4460
|
||||||
|
164,6/13/2011,2,0,6,1,1,0.635,0.601654,0.494583,0.30535,863,4157,5020
|
||||||
|
165,6/14/2011,2,0,6,2,1,0.604167,0.591546,0.507083,0.269283,727,4164,4891
|
||||||
|
166,6/15/2011,2,0,6,3,1,0.626667,0.587754,0.471667,0.167912,769,4411,5180
|
||||||
|
167,6/16/2011,2,0,6,4,2,0.628333,0.595346,0.688333,0.206471,545,3222,3767
|
||||||
|
168,6/17/2011,2,0,6,5,1,0.649167,0.600383,0.735833,0.143029,863,3981,4844
|
||||||
|
169,6/18/2011,2,0,6,6,1,0.696667,0.643954,0.670417,0.119408,1807,3312,5119
|
||||||
|
170,6/19/2011,2,0,6,0,2,0.699167,0.645846,0.666667,0.102,1639,3105,4744
|
||||||
|
171,6/20/2011,2,0,6,1,2,0.635,0.595346,0.74625,0.155475,699,3311,4010
|
||||||
|
172,6/21/2011,3,0,6,2,2,0.680833,0.637646,0.770417,0.171025,774,4061,4835
|
||||||
|
173,6/22/2011,3,0,6,3,1,0.733333,0.693829,0.7075,0.172262,661,3846,4507
|
||||||
|
174,6/23/2011,3,0,6,4,2,0.728333,0.693833,0.703333,0.238804,746,4044,4790
|
||||||
|
175,6/24/2011,3,0,6,5,1,0.724167,0.656583,0.573333,0.222025,969,4022,4991
|
||||||
|
176,6/25/2011,3,0,6,6,1,0.695,0.643313,0.483333,0.209571,1782,3420,5202
|
||||||
|
177,6/26/2011,3,0,6,0,1,0.68,0.637629,0.513333,0.0945333,1920,3385,5305
|
||||||
|
178,6/27/2011,3,0,6,1,2,0.6825,0.637004,0.658333,0.107588,854,3854,4708
|
||||||
|
179,6/28/2011,3,0,6,2,1,0.744167,0.692558,0.634167,0.144283,732,3916,4648
|
||||||
|
180,6/29/2011,3,0,6,3,1,0.728333,0.654688,0.497917,0.261821,848,4377,5225
|
||||||
|
181,6/30/2011,3,0,6,4,1,0.696667,0.637008,0.434167,0.185312,1027,4488,5515
|
||||||
|
182,7/1/2011,3,0,7,5,1,0.7225,0.652162,0.39625,0.102608,1246,4116,5362
|
||||||
|
183,7/2/2011,3,0,7,6,1,0.738333,0.667308,0.444583,0.115062,2204,2915,5119
|
||||||
|
184,7/3/2011,3,0,7,0,2,0.716667,0.668575,0.6825,0.228858,2282,2367,4649
|
||||||
|
185,7/4/2011,3,0,7,1,2,0.726667,0.665417,0.637917,0.0814792,3065,2978,6043
|
||||||
|
186,7/5/2011,3,0,7,2,1,0.746667,0.696338,0.590417,0.126258,1031,3634,4665
|
||||||
|
187,7/6/2011,3,0,7,3,1,0.72,0.685633,0.743333,0.149883,784,3845,4629
|
||||||
|
188,7/7/2011,3,0,7,4,1,0.75,0.686871,0.65125,0.1592,754,3838,4592
|
||||||
|
189,7/8/2011,3,0,7,5,2,0.709167,0.670483,0.757917,0.225129,692,3348,4040
|
||||||
|
190,7/9/2011,3,0,7,6,1,0.733333,0.664158,0.609167,0.167912,1988,3348,5336
|
||||||
|
191,7/10/2011,3,0,7,0,1,0.7475,0.690025,0.578333,0.183471,1743,3138,4881
|
||||||
|
192,7/11/2011,3,0,7,1,1,0.7625,0.729804,0.635833,0.282337,723,3363,4086
|
||||||
|
193,7/12/2011,3,0,7,2,1,0.794167,0.739275,0.559167,0.200254,662,3596,4258
|
||||||
|
194,7/13/2011,3,0,7,3,1,0.746667,0.689404,0.631667,0.146133,748,3594,4342
|
||||||
|
195,7/14/2011,3,0,7,4,1,0.680833,0.635104,0.47625,0.240667,888,4196,5084
|
||||||
|
196,7/15/2011,3,0,7,5,1,0.663333,0.624371,0.59125,0.182833,1318,4220,5538
|
||||||
|
197,7/16/2011,3,0,7,6,1,0.686667,0.638263,0.585,0.208342,2418,3505,5923
|
||||||
|
198,7/17/2011,3,0,7,0,1,0.719167,0.669833,0.604167,0.245033,2006,3296,5302
|
||||||
|
199,7/18/2011,3,0,7,1,1,0.746667,0.703925,0.65125,0.215804,841,3617,4458
|
||||||
|
200,7/19/2011,3,0,7,2,1,0.776667,0.747479,0.650417,0.1306,752,3789,4541
|
||||||
|
201,7/20/2011,3,0,7,3,1,0.768333,0.74685,0.707083,0.113817,644,3688,4332
|
||||||
|
202,7/21/2011,3,0,7,4,2,0.815,0.826371,0.69125,0.222021,632,3152,3784
|
||||||
|
203,7/22/2011,3,0,7,5,1,0.848333,0.840896,0.580417,0.1331,562,2825,3387
|
||||||
|
204,7/23/2011,3,0,7,6,1,0.849167,0.804287,0.5,0.131221,987,2298,3285
|
||||||
|
205,7/24/2011,3,0,7,0,1,0.83,0.794829,0.550833,0.169171,1050,2556,3606
|
||||||
|
206,7/25/2011,3,0,7,1,1,0.743333,0.720958,0.757083,0.0908083,568,3272,3840
|
||||||
|
207,7/26/2011,3,0,7,2,1,0.771667,0.696979,0.540833,0.200258,750,3840,4590
|
||||||
|
208,7/27/2011,3,0,7,3,1,0.775,0.690667,0.402917,0.183463,755,3901,4656
|
||||||
|
209,7/28/2011,3,0,7,4,1,0.779167,0.7399,0.583333,0.178479,606,3784,4390
|
||||||
|
210,7/29/2011,3,0,7,5,1,0.838333,0.785967,0.5425,0.174138,670,3176,3846
|
||||||
|
211,7/30/2011,3,0,7,6,1,0.804167,0.728537,0.465833,0.168537,1559,2916,4475
|
||||||
|
212,7/31/2011,3,0,7,0,1,0.805833,0.729796,0.480833,0.164813,1524,2778,4302
|
||||||
|
213,8/1/2011,3,0,8,1,1,0.771667,0.703292,0.550833,0.156717,729,3537,4266
|
||||||
|
214,8/2/2011,3,0,8,2,1,0.783333,0.707071,0.49125,0.20585,801,4044,4845
|
||||||
|
215,8/3/2011,3,0,8,3,2,0.731667,0.679937,0.6575,0.135583,467,3107,3574
|
||||||
|
216,8/4/2011,3,0,8,4,2,0.71,0.664788,0.7575,0.19715,799,3777,4576
|
||||||
|
217,8/5/2011,3,0,8,5,1,0.710833,0.656567,0.630833,0.184696,1023,3843,4866
|
||||||
|
218,8/6/2011,3,0,8,6,2,0.716667,0.676154,0.755,0.22825,1521,2773,4294
|
||||||
|
219,8/7/2011,3,0,8,0,1,0.7425,0.715292,0.752917,0.201487,1298,2487,3785
|
||||||
|
220,8/8/2011,3,0,8,1,1,0.765,0.703283,0.592083,0.192175,846,3480,4326
|
||||||
|
221,8/9/2011,3,0,8,2,1,0.775,0.724121,0.570417,0.151121,907,3695,4602
|
||||||
|
222,8/10/2011,3,0,8,3,1,0.766667,0.684983,0.424167,0.200258,884,3896,4780
|
||||||
|
223,8/11/2011,3,0,8,4,1,0.7175,0.651521,0.42375,0.164796,812,3980,4792
|
||||||
|
224,8/12/2011,3,0,8,5,1,0.708333,0.654042,0.415,0.125621,1051,3854,4905
|
||||||
|
225,8/13/2011,3,0,8,6,2,0.685833,0.645858,0.729583,0.211454,1504,2646,4150
|
||||||
|
226,8/14/2011,3,0,8,0,2,0.676667,0.624388,0.8175,0.222633,1338,2482,3820
|
||||||
|
227,8/15/2011,3,0,8,1,1,0.665833,0.616167,0.712083,0.208954,775,3563,4338
|
||||||
|
228,8/16/2011,3,0,8,2,1,0.700833,0.645837,0.578333,0.236329,721,4004,4725
|
||||||
|
229,8/17/2011,3,0,8,3,1,0.723333,0.666671,0.575417,0.143667,668,4026,4694
|
||||||
|
230,8/18/2011,3,0,8,4,1,0.711667,0.662258,0.654583,0.233208,639,3166,3805
|
||||||
|
231,8/19/2011,3,0,8,5,2,0.685,0.633221,0.722917,0.139308,797,3356,4153
|
||||||
|
232,8/20/2011,3,0,8,6,1,0.6975,0.648996,0.674167,0.104467,1914,3277,5191
|
||||||
|
233,8/21/2011,3,0,8,0,1,0.710833,0.675525,0.77,0.248754,1249,2624,3873
|
||||||
|
234,8/22/2011,3,0,8,1,1,0.691667,0.638254,0.47,0.27675,833,3925,4758
|
||||||
|
235,8/23/2011,3,0,8,2,1,0.640833,0.606067,0.455417,0.146763,1281,4614,5895
|
||||||
|
236,8/24/2011,3,0,8,3,1,0.673333,0.630692,0.605,0.253108,949,4181,5130
|
||||||
|
237,8/25/2011,3,0,8,4,2,0.684167,0.645854,0.771667,0.210833,435,3107,3542
|
||||||
|
238,8/26/2011,3,0,8,5,1,0.7,0.659733,0.76125,0.0839625,768,3893,4661
|
||||||
|
239,8/27/2011,3,0,8,6,2,0.68,0.635556,0.85,0.375617,226,889,1115
|
||||||
|
240,8/28/2011,3,0,8,0,1,0.707059,0.647959,0.561765,0.304659,1415,2919,4334
|
||||||
|
241,8/29/2011,3,0,8,1,1,0.636667,0.607958,0.554583,0.159825,729,3905,4634
|
||||||
|
242,8/30/2011,3,0,8,2,1,0.639167,0.594704,0.548333,0.125008,775,4429,5204
|
||||||
|
243,8/31/2011,3,0,8,3,1,0.656667,0.611121,0.597917,0.0833333,688,4370,5058
|
||||||
|
244,9/1/2011,3,0,9,4,1,0.655,0.614921,0.639167,0.141796,783,4332,5115
|
||||||
|
245,9/2/2011,3,0,9,5,2,0.643333,0.604808,0.727083,0.139929,875,3852,4727
|
||||||
|
246,9/3/2011,3,0,9,6,1,0.669167,0.633213,0.716667,0.185325,1935,2549,4484
|
||||||
|
247,9/4/2011,3,0,9,0,1,0.709167,0.665429,0.742083,0.206467,2521,2419,4940
|
||||||
|
248,9/5/2011,3,0,9,1,2,0.673333,0.625646,0.790417,0.212696,1236,2115,3351
|
||||||
|
249,9/6/2011,3,0,9,2,3,0.54,0.5152,0.886957,0.343943,204,2506,2710
|
||||||
|
250,9/7/2011,3,0,9,3,3,0.599167,0.544229,0.917083,0.0970208,118,1878,1996
|
||||||
|
251,9/8/2011,3,0,9,4,3,0.633913,0.555361,0.939565,0.192748,153,1689,1842
|
||||||
|
252,9/9/2011,3,0,9,5,2,0.65,0.578946,0.897917,0.124379,417,3127,3544
|
||||||
|
253,9/10/2011,3,0,9,6,1,0.66,0.607962,0.75375,0.153608,1750,3595,5345
|
||||||
|
254,9/11/2011,3,0,9,0,1,0.653333,0.609229,0.71375,0.115054,1633,3413,5046
|
||||||
|
255,9/12/2011,3,0,9,1,1,0.644348,0.60213,0.692174,0.088913,690,4023,4713
|
||||||
|
256,9/13/2011,3,0,9,2,1,0.650833,0.603554,0.7125,0.141804,701,4062,4763
|
||||||
|
257,9/14/2011,3,0,9,3,1,0.673333,0.6269,0.697083,0.1673,647,4138,4785
|
||||||
|
258,9/15/2011,3,0,9,4,2,0.5775,0.553671,0.709167,0.271146,428,3231,3659
|
||||||
|
259,9/16/2011,3,0,9,5,2,0.469167,0.461475,0.590417,0.164183,742,4018,4760
|
||||||
|
260,9/17/2011,3,0,9,6,2,0.491667,0.478512,0.718333,0.189675,1434,3077,4511
|
||||||
|
261,9/18/2011,3,0,9,0,1,0.5075,0.490537,0.695,0.178483,1353,2921,4274
|
||||||
|
262,9/19/2011,3,0,9,1,2,0.549167,0.529675,0.69,0.151742,691,3848,4539
|
||||||
|
263,9/20/2011,3,0,9,2,2,0.561667,0.532217,0.88125,0.134954,438,3203,3641
|
||||||
|
264,9/21/2011,3,0,9,3,2,0.595,0.550533,0.9,0.0964042,539,3813,4352
|
||||||
|
265,9/22/2011,3,0,9,4,2,0.628333,0.554963,0.902083,0.128125,555,4240,4795
|
||||||
|
266,9/23/2011,4,0,9,5,2,0.609167,0.522125,0.9725,0.0783667,258,2137,2395
|
||||||
|
267,9/24/2011,4,0,9,6,2,0.606667,0.564412,0.8625,0.0783833,1776,3647,5423
|
||||||
|
268,9/25/2011,4,0,9,0,2,0.634167,0.572637,0.845,0.0503792,1544,3466,5010
|
||||||
|
269,9/26/2011,4,0,9,1,2,0.649167,0.589042,0.848333,0.1107,684,3946,4630
|
||||||
|
270,9/27/2011,4,0,9,2,2,0.636667,0.574525,0.885417,0.118171,477,3643,4120
|
||||||
|
271,9/28/2011,4,0,9,3,2,0.635,0.575158,0.84875,0.148629,480,3427,3907
|
||||||
|
272,9/29/2011,4,0,9,4,1,0.616667,0.574512,0.699167,0.172883,653,4186,4839
|
||||||
|
273,9/30/2011,4,0,9,5,1,0.564167,0.544829,0.6475,0.206475,830,4372,5202
|
||||||
|
274,10/1/2011,4,0,10,6,2,0.41,0.412863,0.75375,0.292296,480,1949,2429
|
||||||
|
275,10/2/2011,4,0,10,0,2,0.356667,0.345317,0.791667,0.222013,616,2302,2918
|
||||||
|
276,10/3/2011,4,0,10,1,2,0.384167,0.392046,0.760833,0.0833458,330,3240,3570
|
||||||
|
277,10/4/2011,4,0,10,2,1,0.484167,0.472858,0.71,0.205854,486,3970,4456
|
||||||
|
278,10/5/2011,4,0,10,3,1,0.538333,0.527138,0.647917,0.17725,559,4267,4826
|
||||||
|
279,10/6/2011,4,0,10,4,1,0.494167,0.480425,0.620833,0.134954,639,4126,4765
|
||||||
|
280,10/7/2011,4,0,10,5,1,0.510833,0.504404,0.684167,0.0223917,949,4036,4985
|
||||||
|
281,10/8/2011,4,0,10,6,1,0.521667,0.513242,0.70125,0.0454042,2235,3174,5409
|
||||||
|
282,10/9/2011,4,0,10,0,1,0.540833,0.523983,0.7275,0.06345,2397,3114,5511
|
||||||
|
283,10/10/2011,4,0,10,1,1,0.570833,0.542925,0.73375,0.0423042,1514,3603,5117
|
||||||
|
284,10/11/2011,4,0,10,2,2,0.566667,0.546096,0.80875,0.143042,667,3896,4563
|
||||||
|
285,10/12/2011,4,0,10,3,3,0.543333,0.517717,0.90625,0.24815,217,2199,2416
|
||||||
|
286,10/13/2011,4,0,10,4,2,0.589167,0.551804,0.896667,0.141787,290,2623,2913
|
||||||
|
287,10/14/2011,4,0,10,5,2,0.550833,0.529675,0.71625,0.223883,529,3115,3644
|
||||||
|
288,10/15/2011,4,0,10,6,1,0.506667,0.498725,0.483333,0.258083,1899,3318,5217
|
||||||
|
289,10/16/2011,4,0,10,0,1,0.511667,0.503154,0.486667,0.281717,1748,3293,5041
|
||||||
|
290,10/17/2011,4,0,10,1,1,0.534167,0.510725,0.579583,0.175379,713,3857,4570
|
||||||
|
291,10/18/2011,4,0,10,2,2,0.5325,0.522721,0.701667,0.110087,637,4111,4748
|
||||||
|
292,10/19/2011,4,0,10,3,3,0.541739,0.513848,0.895217,0.243339,254,2170,2424
|
||||||
|
293,10/20/2011,4,0,10,4,1,0.475833,0.466525,0.63625,0.422275,471,3724,4195
|
||||||
|
294,10/21/2011,4,0,10,5,1,0.4275,0.423596,0.574167,0.221396,676,3628,4304
|
||||||
|
295,10/22/2011,4,0,10,6,1,0.4225,0.425492,0.629167,0.0926667,1499,2809,4308
|
||||||
|
296,10/23/2011,4,0,10,0,1,0.421667,0.422333,0.74125,0.0995125,1619,2762,4381
|
||||||
|
297,10/24/2011,4,0,10,1,1,0.463333,0.457067,0.772083,0.118792,699,3488,4187
|
||||||
|
298,10/25/2011,4,0,10,2,1,0.471667,0.463375,0.622917,0.166658,695,3992,4687
|
||||||
|
299,10/26/2011,4,0,10,3,2,0.484167,0.472846,0.720417,0.148642,404,3490,3894
|
||||||
|
300,10/27/2011,4,0,10,4,2,0.47,0.457046,0.812917,0.197763,240,2419,2659
|
||||||
|
301,10/28/2011,4,0,10,5,2,0.330833,0.318812,0.585833,0.229479,456,3291,3747
|
||||||
|
302,10/29/2011,4,0,10,6,3,0.254167,0.227913,0.8825,0.351371,57,570,627
|
||||||
|
303,10/30/2011,4,0,10,0,1,0.319167,0.321329,0.62375,0.176617,885,2446,3331
|
||||||
|
304,10/31/2011,4,0,10,1,1,0.34,0.356063,0.703333,0.10635,362,3307,3669
|
||||||
|
305,11/1/2011,4,0,11,2,1,0.400833,0.397088,0.68375,0.135571,410,3658,4068
|
||||||
|
306,11/2/2011,4,0,11,3,1,0.3775,0.390133,0.71875,0.0820917,370,3816,4186
|
||||||
|
307,11/3/2011,4,0,11,4,1,0.408333,0.405921,0.702083,0.136817,318,3656,3974
|
||||||
|
308,11/4/2011,4,0,11,5,2,0.403333,0.403392,0.6225,0.271779,470,3576,4046
|
||||||
|
309,11/5/2011,4,0,11,6,1,0.326667,0.323854,0.519167,0.189062,1156,2770,3926
|
||||||
|
310,11/6/2011,4,0,11,0,1,0.348333,0.362358,0.734583,0.0920542,952,2697,3649
|
||||||
|
311,11/7/2011,4,0,11,1,1,0.395,0.400871,0.75875,0.057225,373,3662,4035
|
||||||
|
312,11/8/2011,4,0,11,2,1,0.408333,0.412246,0.721667,0.0690375,376,3829,4205
|
||||||
|
313,11/9/2011,4,0,11,3,1,0.4,0.409079,0.758333,0.0621958,305,3804,4109
|
||||||
|
314,11/10/2011,4,0,11,4,2,0.38,0.373721,0.813333,0.189067,190,2743,2933
|
||||||
|
315,11/11/2011,4,0,11,5,1,0.324167,0.306817,0.44625,0.314675,440,2928,3368
|
||||||
|
316,11/12/2011,4,0,11,6,1,0.356667,0.357942,0.552917,0.212062,1275,2792,4067
|
||||||
|
317,11/13/2011,4,0,11,0,1,0.440833,0.43055,0.458333,0.281721,1004,2713,3717
|
||||||
|
318,11/14/2011,4,0,11,1,1,0.53,0.524612,0.587083,0.306596,595,3891,4486
|
||||||
|
319,11/15/2011,4,0,11,2,2,0.53,0.507579,0.68875,0.199633,449,3746,4195
|
||||||
|
320,11/16/2011,4,0,11,3,3,0.456667,0.451988,0.93,0.136829,145,1672,1817
|
||||||
|
321,11/17/2011,4,0,11,4,2,0.341667,0.323221,0.575833,0.305362,139,2914,3053
|
||||||
|
322,11/18/2011,4,0,11,5,1,0.274167,0.272721,0.41,0.168533,245,3147,3392
|
||||||
|
323,11/19/2011,4,0,11,6,1,0.329167,0.324483,0.502083,0.224496,943,2720,3663
|
||||||
|
324,11/20/2011,4,0,11,0,2,0.463333,0.457058,0.684583,0.18595,787,2733,3520
|
||||||
|
325,11/21/2011,4,0,11,1,3,0.4475,0.445062,0.91,0.138054,220,2545,2765
|
||||||
|
326,11/22/2011,4,0,11,2,3,0.416667,0.421696,0.9625,0.118792,69,1538,1607
|
||||||
|
327,11/23/2011,4,0,11,3,2,0.440833,0.430537,0.757917,0.335825,112,2454,2566
|
||||||
|
328,11/24/2011,4,0,11,4,1,0.373333,0.372471,0.549167,0.167304,560,935,1495
|
||||||
|
329,11/25/2011,4,0,11,5,1,0.375,0.380671,0.64375,0.0988958,1095,1697,2792
|
||||||
|
330,11/26/2011,4,0,11,6,1,0.375833,0.385087,0.681667,0.0684208,1249,1819,3068
|
||||||
|
331,11/27/2011,4,0,11,0,1,0.459167,0.4558,0.698333,0.208954,810,2261,3071
|
||||||
|
332,11/28/2011,4,0,11,1,1,0.503478,0.490122,0.743043,0.142122,253,3614,3867
|
||||||
|
333,11/29/2011,4,0,11,2,2,0.458333,0.451375,0.830833,0.258092,96,2818,2914
|
||||||
|
334,11/30/2011,4,0,11,3,1,0.325,0.311221,0.613333,0.271158,188,3425,3613
|
||||||
|
335,12/1/2011,4,0,12,4,1,0.3125,0.305554,0.524583,0.220158,182,3545,3727
|
||||||
|
336,12/2/2011,4,0,12,5,1,0.314167,0.331433,0.625833,0.100754,268,3672,3940
|
||||||
|
337,12/3/2011,4,0,12,6,1,0.299167,0.310604,0.612917,0.0957833,706,2908,3614
|
||||||
|
338,12/4/2011,4,0,12,0,1,0.330833,0.3491,0.775833,0.0839583,634,2851,3485
|
||||||
|
339,12/5/2011,4,0,12,1,2,0.385833,0.393925,0.827083,0.0622083,233,3578,3811
|
||||||
|
340,12/6/2011,4,0,12,2,3,0.4625,0.4564,0.949583,0.232583,126,2468,2594
|
||||||
|
341,12/7/2011,4,0,12,3,3,0.41,0.400246,0.970417,0.266175,50,655,705
|
||||||
|
342,12/8/2011,4,0,12,4,1,0.265833,0.256938,0.58,0.240058,150,3172,3322
|
||||||
|
343,12/9/2011,4,0,12,5,1,0.290833,0.317542,0.695833,0.0827167,261,3359,3620
|
||||||
|
344,12/10/2011,4,0,12,6,1,0.275,0.266412,0.5075,0.233221,502,2688,3190
|
||||||
|
345,12/11/2011,4,0,12,0,1,0.220833,0.253154,0.49,0.0665417,377,2366,2743
|
||||||
|
346,12/12/2011,4,0,12,1,1,0.238333,0.270196,0.670833,0.06345,143,3167,3310
|
||||||
|
347,12/13/2011,4,0,12,2,1,0.2825,0.301138,0.59,0.14055,155,3368,3523
|
||||||
|
348,12/14/2011,4,0,12,3,2,0.3175,0.338362,0.66375,0.0609583,178,3562,3740
|
||||||
|
349,12/15/2011,4,0,12,4,2,0.4225,0.412237,0.634167,0.268042,181,3528,3709
|
||||||
|
350,12/16/2011,4,0,12,5,2,0.375,0.359825,0.500417,0.260575,178,3399,3577
|
||||||
|
351,12/17/2011,4,0,12,6,2,0.258333,0.249371,0.560833,0.243167,275,2464,2739
|
||||||
|
352,12/18/2011,4,0,12,0,1,0.238333,0.245579,0.58625,0.169779,220,2211,2431
|
||||||
|
353,12/19/2011,4,0,12,1,1,0.276667,0.280933,0.6375,0.172896,260,3143,3403
|
||||||
|
354,12/20/2011,4,0,12,2,2,0.385833,0.396454,0.595417,0.0615708,216,3534,3750
|
||||||
|
355,12/21/2011,1,0,12,3,2,0.428333,0.428017,0.858333,0.2214,107,2553,2660
|
||||||
|
356,12/22/2011,1,0,12,4,2,0.423333,0.426121,0.7575,0.047275,227,2841,3068
|
||||||
|
357,12/23/2011,1,0,12,5,1,0.373333,0.377513,0.68625,0.274246,163,2046,2209
|
||||||
|
358,12/24/2011,1,0,12,6,1,0.3025,0.299242,0.5425,0.190304,155,856,1011
|
||||||
|
359,12/25/2011,1,0,12,0,1,0.274783,0.279961,0.681304,0.155091,303,451,754
|
||||||
|
360,12/26/2011,1,0,12,1,1,0.321739,0.315535,0.506957,0.239465,430,887,1317
|
||||||
|
361,12/27/2011,1,0,12,2,2,0.325,0.327633,0.7625,0.18845,103,1059,1162
|
||||||
|
362,12/28/2011,1,0,12,3,1,0.29913,0.279974,0.503913,0.293961,255,2047,2302
|
||||||
|
363,12/29/2011,1,0,12,4,1,0.248333,0.263892,0.574167,0.119412,254,2169,2423
|
||||||
|
364,12/30/2011,1,0,12,5,1,0.311667,0.318812,0.636667,0.134337,491,2508,2999
|
||||||
|
365,12/31/2011,1,0,12,6,1,0.41,0.414121,0.615833,0.220154,665,1820,2485
|
||||||
|
366,1/1/2012,1,1,1,0,1,0.37,0.375621,0.6925,0.192167,686,1608,2294
|
||||||
|
367,1/2/2012,1,1,1,1,1,0.273043,0.252304,0.381304,0.329665,244,1707,1951
|
||||||
|
368,1/3/2012,1,1,1,2,1,0.15,0.126275,0.44125,0.365671,89,2147,2236
|
||||||
|
369,1/4/2012,1,1,1,3,2,0.1075,0.119337,0.414583,0.1847,95,2273,2368
|
||||||
|
370,1/5/2012,1,1,1,4,1,0.265833,0.278412,0.524167,0.129987,140,3132,3272
|
||||||
|
371,1/6/2012,1,1,1,5,1,0.334167,0.340267,0.542083,0.167908,307,3791,4098
|
||||||
|
372,1/7/2012,1,1,1,6,1,0.393333,0.390779,0.531667,0.174758,1070,3451,4521
|
||||||
|
373,1/8/2012,1,1,1,0,1,0.3375,0.340258,0.465,0.191542,599,2826,3425
|
||||||
|
374,1/9/2012,1,1,1,1,2,0.224167,0.247479,0.701667,0.0989,106,2270,2376
|
||||||
|
375,1/10/2012,1,1,1,2,1,0.308696,0.318826,0.646522,0.187552,173,3425,3598
|
||||||
|
376,1/11/2012,1,1,1,3,2,0.274167,0.282821,0.8475,0.131221,92,2085,2177
|
||||||
|
377,1/12/2012,1,1,1,4,2,0.3825,0.381938,0.802917,0.180967,269,3828,4097
|
||||||
|
378,1/13/2012,1,1,1,5,1,0.274167,0.249362,0.5075,0.378108,174,3040,3214
|
||||||
|
379,1/14/2012,1,1,1,6,1,0.18,0.183087,0.4575,0.187183,333,2160,2493
|
||||||
|
380,1/15/2012,1,1,1,0,1,0.166667,0.161625,0.419167,0.251258,284,2027,2311
|
||||||
|
381,1/16/2012,1,1,1,1,1,0.19,0.190663,0.5225,0.231358,217,2081,2298
|
||||||
|
382,1/17/2012,1,1,1,2,2,0.373043,0.364278,0.716087,0.34913,127,2808,2935
|
||||||
|
383,1/18/2012,1,1,1,3,1,0.303333,0.275254,0.443333,0.415429,109,3267,3376
|
||||||
|
384,1/19/2012,1,1,1,4,1,0.19,0.190038,0.4975,0.220158,130,3162,3292
|
||||||
|
385,1/20/2012,1,1,1,5,2,0.2175,0.220958,0.45,0.20275,115,3048,3163
|
||||||
|
386,1/21/2012,1,1,1,6,2,0.173333,0.174875,0.83125,0.222642,67,1234,1301
|
||||||
|
387,1/22/2012,1,1,1,0,2,0.1625,0.16225,0.79625,0.199638,196,1781,1977
|
||||||
|
388,1/23/2012,1,1,1,1,2,0.218333,0.243058,0.91125,0.110708,145,2287,2432
|
||||||
|
389,1/24/2012,1,1,1,2,1,0.3425,0.349108,0.835833,0.123767,439,3900,4339
|
||||||
|
390,1/25/2012,1,1,1,3,1,0.294167,0.294821,0.64375,0.161071,467,3803,4270
|
||||||
|
391,1/26/2012,1,1,1,4,2,0.341667,0.35605,0.769583,0.0733958,244,3831,4075
|
||||||
|
392,1/27/2012,1,1,1,5,2,0.425,0.415383,0.74125,0.342667,269,3187,3456
|
||||||
|
393,1/28/2012,1,1,1,6,1,0.315833,0.326379,0.543333,0.210829,775,3248,4023
|
||||||
|
394,1/29/2012,1,1,1,0,1,0.2825,0.272721,0.31125,0.24005,558,2685,3243
|
||||||
|
395,1/30/2012,1,1,1,1,1,0.269167,0.262625,0.400833,0.215792,126,3498,3624
|
||||||
|
396,1/31/2012,1,1,1,2,1,0.39,0.381317,0.416667,0.261817,324,4185,4509
|
||||||
|
397,2/1/2012,1,1,2,3,1,0.469167,0.466538,0.507917,0.189067,304,4275,4579
|
||||||
|
398,2/2/2012,1,1,2,4,2,0.399167,0.398971,0.672917,0.187187,190,3571,3761
|
||||||
|
399,2/3/2012,1,1,2,5,1,0.313333,0.309346,0.526667,0.178496,310,3841,4151
|
||||||
|
400,2/4/2012,1,1,2,6,2,0.264167,0.272725,0.779583,0.121896,384,2448,2832
|
||||||
|
401,2/5/2012,1,1,2,0,2,0.265833,0.264521,0.687917,0.175996,318,2629,2947
|
||||||
|
402,2/6/2012,1,1,2,1,1,0.282609,0.296426,0.622174,0.1538,206,3578,3784
|
||||||
|
403,2/7/2012,1,1,2,2,1,0.354167,0.361104,0.49625,0.147379,199,4176,4375
|
||||||
|
404,2/8/2012,1,1,2,3,2,0.256667,0.266421,0.722917,0.133721,109,2693,2802
|
||||||
|
405,2/9/2012,1,1,2,4,1,0.265,0.261988,0.562083,0.194037,163,3667,3830
|
||||||
|
406,2/10/2012,1,1,2,5,2,0.280833,0.293558,0.54,0.116929,227,3604,3831
|
||||||
|
407,2/11/2012,1,1,2,6,3,0.224167,0.210867,0.73125,0.289796,192,1977,2169
|
||||||
|
408,2/12/2012,1,1,2,0,1,0.1275,0.101658,0.464583,0.409212,73,1456,1529
|
||||||
|
409,2/13/2012,1,1,2,1,1,0.2225,0.227913,0.41125,0.167283,94,3328,3422
|
||||||
|
410,2/14/2012,1,1,2,2,2,0.319167,0.333946,0.50875,0.141179,135,3787,3922
|
||||||
|
411,2/15/2012,1,1,2,3,1,0.348333,0.351629,0.53125,0.1816,141,4028,4169
|
||||||
|
412,2/16/2012,1,1,2,4,2,0.316667,0.330162,0.752917,0.091425,74,2931,3005
|
||||||
|
413,2/17/2012,1,1,2,5,1,0.343333,0.351629,0.634583,0.205846,349,3805,4154
|
||||||
|
414,2/18/2012,1,1,2,6,1,0.346667,0.355425,0.534583,0.190929,1435,2883,4318
|
||||||
|
415,2/19/2012,1,1,2,0,2,0.28,0.265788,0.515833,0.253112,618,2071,2689
|
||||||
|
416,2/20/2012,1,1,2,1,1,0.28,0.273391,0.507826,0.229083,502,2627,3129
|
||||||
|
417,2/21/2012,1,1,2,2,1,0.287826,0.295113,0.594348,0.205717,163,3614,3777
|
||||||
|
418,2/22/2012,1,1,2,3,1,0.395833,0.392667,0.567917,0.234471,394,4379,4773
|
||||||
|
419,2/23/2012,1,1,2,4,1,0.454167,0.444446,0.554583,0.190913,516,4546,5062
|
||||||
|
420,2/24/2012,1,1,2,5,2,0.4075,0.410971,0.7375,0.237567,246,3241,3487
|
||||||
|
421,2/25/2012,1,1,2,6,1,0.290833,0.255675,0.395833,0.421642,317,2415,2732
|
||||||
|
422,2/26/2012,1,1,2,0,1,0.279167,0.268308,0.41,0.205229,515,2874,3389
|
||||||
|
423,2/27/2012,1,1,2,1,1,0.366667,0.357954,0.490833,0.268033,253,4069,4322
|
||||||
|
424,2/28/2012,1,1,2,2,1,0.359167,0.353525,0.395833,0.193417,229,4134,4363
|
||||||
|
425,2/29/2012,1,1,2,3,2,0.344348,0.34847,0.804783,0.179117,65,1769,1834
|
||||||
|
426,3/1/2012,1,1,3,4,1,0.485833,0.475371,0.615417,0.226987,325,4665,4990
|
||||||
|
427,3/2/2012,1,1,3,5,2,0.353333,0.359842,0.657083,0.144904,246,2948,3194
|
||||||
|
428,3/3/2012,1,1,3,6,2,0.414167,0.413492,0.62125,0.161079,956,3110,4066
|
||||||
|
429,3/4/2012,1,1,3,0,1,0.325833,0.303021,0.403333,0.334571,710,2713,3423
|
||||||
|
430,3/5/2012,1,1,3,1,1,0.243333,0.241171,0.50625,0.228858,203,3130,3333
|
||||||
|
431,3/6/2012,1,1,3,2,1,0.258333,0.255042,0.456667,0.200875,221,3735,3956
|
||||||
|
432,3/7/2012,1,1,3,3,1,0.404167,0.3851,0.513333,0.345779,432,4484,4916
|
||||||
|
433,3/8/2012,1,1,3,4,1,0.5275,0.524604,0.5675,0.441563,486,4896,5382
|
||||||
|
434,3/9/2012,1,1,3,5,2,0.410833,0.397083,0.407083,0.4148,447,4122,4569
|
||||||
|
435,3/10/2012,1,1,3,6,1,0.2875,0.277767,0.350417,0.22575,968,3150,4118
|
||||||
|
436,3/11/2012,1,1,3,0,1,0.361739,0.35967,0.476957,0.222587,1658,3253,4911
|
||||||
|
437,3/12/2012,1,1,3,1,1,0.466667,0.459592,0.489167,0.207713,838,4460,5298
|
||||||
|
438,3/13/2012,1,1,3,2,1,0.565,0.542929,0.6175,0.23695,762,5085,5847
|
||||||
|
439,3/14/2012,1,1,3,3,1,0.5725,0.548617,0.507083,0.115062,997,5315,6312
|
||||||
|
440,3/15/2012,1,1,3,4,1,0.5575,0.532825,0.579583,0.149883,1005,5187,6192
|
||||||
|
441,3/16/2012,1,1,3,5,2,0.435833,0.436229,0.842083,0.113192,548,3830,4378
|
||||||
|
442,3/17/2012,1,1,3,6,2,0.514167,0.505046,0.755833,0.110704,3155,4681,7836
|
||||||
|
443,3/18/2012,1,1,3,0,2,0.4725,0.464,0.81,0.126883,2207,3685,5892
|
||||||
|
444,3/19/2012,1,1,3,1,1,0.545,0.532821,0.72875,0.162317,982,5171,6153
|
||||||
|
445,3/20/2012,1,1,3,2,1,0.560833,0.538533,0.807917,0.121271,1051,5042,6093
|
||||||
|
446,3/21/2012,2,1,3,3,2,0.531667,0.513258,0.82125,0.0895583,1122,5108,6230
|
||||||
|
447,3/22/2012,2,1,3,4,1,0.554167,0.531567,0.83125,0.117562,1334,5537,6871
|
||||||
|
448,3/23/2012,2,1,3,5,2,0.601667,0.570067,0.694167,0.1163,2469,5893,8362
|
||||||
|
449,3/24/2012,2,1,3,6,2,0.5025,0.486733,0.885417,0.192783,1033,2339,3372
|
||||||
|
450,3/25/2012,2,1,3,0,2,0.4375,0.437488,0.880833,0.220775,1532,3464,4996
|
||||||
|
451,3/26/2012,2,1,3,1,1,0.445833,0.43875,0.477917,0.386821,795,4763,5558
|
||||||
|
452,3/27/2012,2,1,3,2,1,0.323333,0.315654,0.29,0.187192,531,4571,5102
|
||||||
|
453,3/28/2012,2,1,3,3,1,0.484167,0.47095,0.48125,0.291671,674,5024,5698
|
||||||
|
454,3/29/2012,2,1,3,4,1,0.494167,0.482304,0.439167,0.31965,834,5299,6133
|
||||||
|
455,3/30/2012,2,1,3,5,2,0.37,0.375621,0.580833,0.138067,796,4663,5459
|
||||||
|
456,3/31/2012,2,1,3,6,2,0.424167,0.421708,0.738333,0.250617,2301,3934,6235
|
||||||
|
457,4/1/2012,2,1,4,0,2,0.425833,0.417287,0.67625,0.172267,2347,3694,6041
|
||||||
|
458,4/2/2012,2,1,4,1,1,0.433913,0.427513,0.504348,0.312139,1208,4728,5936
|
||||||
|
459,4/3/2012,2,1,4,2,1,0.466667,0.461483,0.396667,0.100133,1348,5424,6772
|
||||||
|
460,4/4/2012,2,1,4,3,1,0.541667,0.53345,0.469583,0.180975,1058,5378,6436
|
||||||
|
461,4/5/2012,2,1,4,4,1,0.435,0.431163,0.374167,0.219529,1192,5265,6457
|
||||||
|
462,4/6/2012,2,1,4,5,1,0.403333,0.390767,0.377083,0.300388,1807,4653,6460
|
||||||
|
463,4/7/2012,2,1,4,6,1,0.4375,0.426129,0.254167,0.274871,3252,3605,6857
|
||||||
|
464,4/8/2012,2,1,4,0,1,0.5,0.492425,0.275833,0.232596,2230,2939,5169
|
||||||
|
465,4/9/2012,2,1,4,1,1,0.489167,0.476638,0.3175,0.358196,905,4680,5585
|
||||||
|
466,4/10/2012,2,1,4,2,1,0.446667,0.436233,0.435,0.249375,819,5099,5918
|
||||||
|
467,4/11/2012,2,1,4,3,1,0.348696,0.337274,0.469565,0.295274,482,4380,4862
|
||||||
|
468,4/12/2012,2,1,4,4,1,0.3975,0.387604,0.46625,0.290429,663,4746,5409
|
||||||
|
469,4/13/2012,2,1,4,5,1,0.4425,0.431808,0.408333,0.155471,1252,5146,6398
|
||||||
|
470,4/14/2012,2,1,4,6,1,0.495,0.487996,0.502917,0.190917,2795,4665,7460
|
||||||
|
471,4/15/2012,2,1,4,0,1,0.606667,0.573875,0.507917,0.225129,2846,4286,7132
|
||||||
|
472,4/16/2012,2,1,4,1,1,0.664167,0.614925,0.561667,0.284829,1198,5172,6370
|
||||||
|
473,4/17/2012,2,1,4,2,1,0.608333,0.598487,0.390417,0.273629,989,5702,6691
|
||||||
|
474,4/18/2012,2,1,4,3,2,0.463333,0.457038,0.569167,0.167912,347,4020,4367
|
||||||
|
475,4/19/2012,2,1,4,4,1,0.498333,0.493046,0.6125,0.0659292,846,5719,6565
|
||||||
|
476,4/20/2012,2,1,4,5,1,0.526667,0.515775,0.694583,0.149871,1340,5950,7290
|
||||||
|
477,4/21/2012,2,1,4,6,1,0.57,0.542921,0.682917,0.283587,2541,4083,6624
|
||||||
|
478,4/22/2012,2,1,4,0,3,0.396667,0.389504,0.835417,0.344546,120,907,1027
|
||||||
|
479,4/23/2012,2,1,4,1,2,0.321667,0.301125,0.766667,0.303496,195,3019,3214
|
||||||
|
480,4/24/2012,2,1,4,2,1,0.413333,0.405283,0.454167,0.249383,518,5115,5633
|
||||||
|
481,4/25/2012,2,1,4,3,1,0.476667,0.470317,0.427917,0.118792,655,5541,6196
|
||||||
|
482,4/26/2012,2,1,4,4,2,0.498333,0.483583,0.756667,0.176625,475,4551,5026
|
||||||
|
483,4/27/2012,2,1,4,5,1,0.4575,0.452637,0.400833,0.347633,1014,5219,6233
|
||||||
|
484,4/28/2012,2,1,4,6,2,0.376667,0.377504,0.489583,0.129975,1120,3100,4220
|
||||||
|
485,4/29/2012,2,1,4,0,1,0.458333,0.450121,0.587083,0.116908,2229,4075,6304
|
||||||
|
486,4/30/2012,2,1,4,1,2,0.464167,0.457696,0.57,0.171638,665,4907,5572
|
||||||
|
487,5/1/2012,2,1,5,2,2,0.613333,0.577021,0.659583,0.156096,653,5087,5740
|
||||||
|
488,5/2/2012,2,1,5,3,1,0.564167,0.537896,0.797083,0.138058,667,5502,6169
|
||||||
|
489,5/3/2012,2,1,5,4,2,0.56,0.537242,0.768333,0.133696,764,5657,6421
|
||||||
|
490,5/4/2012,2,1,5,5,1,0.6275,0.590917,0.735417,0.162938,1069,5227,6296
|
||||||
|
491,5/5/2012,2,1,5,6,2,0.621667,0.584608,0.756667,0.152992,2496,4387,6883
|
||||||
|
492,5/6/2012,2,1,5,0,2,0.5625,0.546737,0.74,0.149879,2135,4224,6359
|
||||||
|
493,5/7/2012,2,1,5,1,2,0.5375,0.527142,0.664167,0.230721,1008,5265,6273
|
||||||
|
494,5/8/2012,2,1,5,2,2,0.581667,0.557471,0.685833,0.296029,738,4990,5728
|
||||||
|
495,5/9/2012,2,1,5,3,2,0.575,0.553025,0.744167,0.216412,620,4097,4717
|
||||||
|
496,5/10/2012,2,1,5,4,1,0.505833,0.491783,0.552083,0.314063,1026,5546,6572
|
||||||
|
497,5/11/2012,2,1,5,5,1,0.533333,0.520833,0.360417,0.236937,1319,5711,7030
|
||||||
|
498,5/12/2012,2,1,5,6,1,0.564167,0.544817,0.480417,0.123133,2622,4807,7429
|
||||||
|
499,5/13/2012,2,1,5,0,1,0.6125,0.585238,0.57625,0.225117,2172,3946,6118
|
||||||
|
500,5/14/2012,2,1,5,1,2,0.573333,0.5499,0.789583,0.212692,342,2501,2843
|
||||||
|
501,5/15/2012,2,1,5,2,2,0.611667,0.576404,0.794583,0.147392,625,4490,5115
|
||||||
|
502,5/16/2012,2,1,5,3,1,0.636667,0.595975,0.697917,0.122512,991,6433,7424
|
||||||
|
503,5/17/2012,2,1,5,4,1,0.593333,0.572613,0.52,0.229475,1242,6142,7384
|
||||||
|
504,5/18/2012,2,1,5,5,1,0.564167,0.551121,0.523333,0.136817,1521,6118,7639
|
||||||
|
505,5/19/2012,2,1,5,6,1,0.6,0.566908,0.45625,0.083975,3410,4884,8294
|
||||||
|
506,5/20/2012,2,1,5,0,1,0.620833,0.583967,0.530417,0.254367,2704,4425,7129
|
||||||
|
507,5/21/2012,2,1,5,1,2,0.598333,0.565667,0.81125,0.233204,630,3729,4359
|
||||||
|
508,5/22/2012,2,1,5,2,2,0.615,0.580825,0.765833,0.118167,819,5254,6073
|
||||||
|
509,5/23/2012,2,1,5,3,2,0.621667,0.584612,0.774583,0.102,766,4494,5260
|
||||||
|
510,5/24/2012,2,1,5,4,1,0.655,0.6067,0.716667,0.172896,1059,5711,6770
|
||||||
|
511,5/25/2012,2,1,5,5,1,0.68,0.627529,0.747083,0.14055,1417,5317,6734
|
||||||
|
512,5/26/2012,2,1,5,6,1,0.6925,0.642696,0.7325,0.198992,2855,3681,6536
|
||||||
|
513,5/27/2012,2,1,5,0,1,0.69,0.641425,0.697083,0.215171,3283,3308,6591
|
||||||
|
514,5/28/2012,2,1,5,1,1,0.7125,0.6793,0.67625,0.196521,2557,3486,6043
|
||||||
|
515,5/29/2012,2,1,5,2,1,0.7225,0.672992,0.684583,0.2954,880,4863,5743
|
||||||
|
516,5/30/2012,2,1,5,3,2,0.656667,0.611129,0.67,0.134329,745,6110,6855
|
||||||
|
517,5/31/2012,2,1,5,4,1,0.68,0.631329,0.492917,0.195279,1100,6238,7338
|
||||||
|
518,6/1/2012,2,1,6,5,2,0.654167,0.607962,0.755417,0.237563,533,3594,4127
|
||||||
|
519,6/2/2012,2,1,6,6,1,0.583333,0.566288,0.549167,0.186562,2795,5325,8120
|
||||||
|
520,6/3/2012,2,1,6,0,1,0.6025,0.575133,0.493333,0.184087,2494,5147,7641
|
||||||
|
521,6/4/2012,2,1,6,1,1,0.5975,0.578283,0.487083,0.284833,1071,5927,6998
|
||||||
|
522,6/5/2012,2,1,6,2,2,0.540833,0.525892,0.613333,0.209575,968,6033,7001
|
||||||
|
523,6/6/2012,2,1,6,3,1,0.554167,0.542292,0.61125,0.077125,1027,6028,7055
|
||||||
|
524,6/7/2012,2,1,6,4,1,0.6025,0.569442,0.567083,0.15735,1038,6456,7494
|
||||||
|
525,6/8/2012,2,1,6,5,1,0.649167,0.597862,0.467917,0.175383,1488,6248,7736
|
||||||
|
526,6/9/2012,2,1,6,6,1,0.710833,0.648367,0.437083,0.144287,2708,4790,7498
|
||||||
|
527,6/10/2012,2,1,6,0,1,0.726667,0.663517,0.538333,0.133721,2224,4374,6598
|
||||||
|
528,6/11/2012,2,1,6,1,2,0.720833,0.659721,0.587917,0.207713,1017,5647,6664
|
||||||
|
529,6/12/2012,2,1,6,2,2,0.653333,0.597875,0.833333,0.214546,477,4495,4972
|
||||||
|
530,6/13/2012,2,1,6,3,1,0.655833,0.611117,0.582083,0.343279,1173,6248,7421
|
||||||
|
531,6/14/2012,2,1,6,4,1,0.648333,0.624383,0.569583,0.253733,1180,6183,7363
|
||||||
|
532,6/15/2012,2,1,6,5,1,0.639167,0.599754,0.589583,0.176617,1563,6102,7665
|
||||||
|
533,6/16/2012,2,1,6,6,1,0.631667,0.594708,0.504167,0.166667,2963,4739,7702
|
||||||
|
534,6/17/2012,2,1,6,0,1,0.5925,0.571975,0.59875,0.144904,2634,4344,6978
|
||||||
|
535,6/18/2012,2,1,6,1,2,0.568333,0.544842,0.777917,0.174746,653,4446,5099
|
||||||
|
536,6/19/2012,2,1,6,2,1,0.688333,0.654692,0.69,0.148017,968,5857,6825
|
||||||
|
537,6/20/2012,2,1,6,3,1,0.7825,0.720975,0.592083,0.113812,872,5339,6211
|
||||||
|
538,6/21/2012,3,1,6,4,1,0.805833,0.752542,0.567917,0.118787,778,5127,5905
|
||||||
|
539,6/22/2012,3,1,6,5,1,0.7775,0.724121,0.57375,0.182842,964,4859,5823
|
||||||
|
540,6/23/2012,3,1,6,6,1,0.731667,0.652792,0.534583,0.179721,2657,4801,7458
|
||||||
|
541,6/24/2012,3,1,6,0,1,0.743333,0.674254,0.479167,0.145525,2551,4340,6891
|
||||||
|
542,6/25/2012,3,1,6,1,1,0.715833,0.654042,0.504167,0.300383,1139,5640,6779
|
||||||
|
543,6/26/2012,3,1,6,2,1,0.630833,0.594704,0.373333,0.347642,1077,6365,7442
|
||||||
|
544,6/27/2012,3,1,6,3,1,0.6975,0.640792,0.36,0.271775,1077,6258,7335
|
||||||
|
545,6/28/2012,3,1,6,4,1,0.749167,0.675512,0.4225,0.17165,921,5958,6879
|
||||||
|
546,6/29/2012,3,1,6,5,1,0.834167,0.786613,0.48875,0.165417,829,4634,5463
|
||||||
|
547,6/30/2012,3,1,6,6,1,0.765,0.687508,0.60125,0.161071,1455,4232,5687
|
||||||
|
548,7/1/2012,3,1,7,0,1,0.815833,0.750629,0.51875,0.168529,1421,4110,5531
|
||||||
|
549,7/2/2012,3,1,7,1,1,0.781667,0.702038,0.447083,0.195267,904,5323,6227
|
||||||
|
550,7/3/2012,3,1,7,2,1,0.780833,0.70265,0.492083,0.126237,1052,5608,6660
|
||||||
|
551,7/4/2012,3,1,7,3,1,0.789167,0.732337,0.53875,0.13495,2562,4841,7403
|
||||||
|
552,7/5/2012,3,1,7,4,1,0.8275,0.761367,0.457917,0.194029,1405,4836,6241
|
||||||
|
553,7/6/2012,3,1,7,5,1,0.828333,0.752533,0.450833,0.146142,1366,4841,6207
|
||||||
|
554,7/7/2012,3,1,7,6,1,0.861667,0.804913,0.492083,0.163554,1448,3392,4840
|
||||||
|
555,7/8/2012,3,1,7,0,1,0.8225,0.790396,0.57375,0.125629,1203,3469,4672
|
||||||
|
556,7/9/2012,3,1,7,1,2,0.710833,0.654054,0.683333,0.180975,998,5571,6569
|
||||||
|
557,7/10/2012,3,1,7,2,2,0.720833,0.664796,0.6675,0.151737,954,5336,6290
|
||||||
|
558,7/11/2012,3,1,7,3,1,0.716667,0.650271,0.633333,0.151733,975,6289,7264
|
||||||
|
559,7/12/2012,3,1,7,4,1,0.715833,0.654683,0.529583,0.146775,1032,6414,7446
|
||||||
|
560,7/13/2012,3,1,7,5,2,0.731667,0.667933,0.485833,0.08085,1511,5988,7499
|
||||||
|
561,7/14/2012,3,1,7,6,2,0.703333,0.666042,0.699167,0.143679,2355,4614,6969
|
||||||
|
562,7/15/2012,3,1,7,0,1,0.745833,0.705196,0.717917,0.166667,1920,4111,6031
|
||||||
|
563,7/16/2012,3,1,7,1,1,0.763333,0.724125,0.645,0.164187,1088,5742,6830
|
||||||
|
564,7/17/2012,3,1,7,2,1,0.818333,0.755683,0.505833,0.114429,921,5865,6786
|
||||||
|
565,7/18/2012,3,1,7,3,1,0.793333,0.745583,0.577083,0.137442,799,4914,5713
|
||||||
|
566,7/19/2012,3,1,7,4,1,0.77,0.714642,0.600417,0.165429,888,5703,6591
|
||||||
|
567,7/20/2012,3,1,7,5,2,0.665833,0.613025,0.844167,0.208967,747,5123,5870
|
||||||
|
568,7/21/2012,3,1,7,6,3,0.595833,0.549912,0.865417,0.2133,1264,3195,4459
|
||||||
|
569,7/22/2012,3,1,7,0,2,0.6675,0.623125,0.7625,0.0939208,2544,4866,7410
|
||||||
|
570,7/23/2012,3,1,7,1,1,0.741667,0.690017,0.694167,0.138683,1135,5831,6966
|
||||||
|
571,7/24/2012,3,1,7,2,1,0.750833,0.70645,0.655,0.211454,1140,6452,7592
|
||||||
|
572,7/25/2012,3,1,7,3,1,0.724167,0.654054,0.45,0.1648,1383,6790,8173
|
||||||
|
573,7/26/2012,3,1,7,4,1,0.776667,0.739263,0.596667,0.284813,1036,5825,6861
|
||||||
|
574,7/27/2012,3,1,7,5,1,0.781667,0.734217,0.594583,0.152992,1259,5645,6904
|
||||||
|
575,7/28/2012,3,1,7,6,1,0.755833,0.697604,0.613333,0.15735,2234,4451,6685
|
||||||
|
576,7/29/2012,3,1,7,0,1,0.721667,0.667933,0.62375,0.170396,2153,4444,6597
|
||||||
|
577,7/30/2012,3,1,7,1,1,0.730833,0.684987,0.66875,0.153617,1040,6065,7105
|
||||||
|
578,7/31/2012,3,1,7,2,1,0.713333,0.662896,0.704167,0.165425,968,6248,7216
|
||||||
|
579,8/1/2012,3,1,8,3,1,0.7175,0.667308,0.6775,0.141179,1074,6506,7580
|
||||||
|
580,8/2/2012,3,1,8,4,1,0.7525,0.707088,0.659583,0.129354,983,6278,7261
|
||||||
|
581,8/3/2012,3,1,8,5,2,0.765833,0.722867,0.6425,0.215792,1328,5847,7175
|
||||||
|
582,8/4/2012,3,1,8,6,1,0.793333,0.751267,0.613333,0.257458,2345,4479,6824
|
||||||
|
583,8/5/2012,3,1,8,0,1,0.769167,0.731079,0.6525,0.290421,1707,3757,5464
|
||||||
|
584,8/6/2012,3,1,8,1,2,0.7525,0.710246,0.654167,0.129354,1233,5780,7013
|
||||||
|
585,8/7/2012,3,1,8,2,2,0.735833,0.697621,0.70375,0.116908,1278,5995,7273
|
||||||
|
586,8/8/2012,3,1,8,3,2,0.75,0.707717,0.672917,0.1107,1263,6271,7534
|
||||||
|
587,8/9/2012,3,1,8,4,1,0.755833,0.699508,0.620417,0.1561,1196,6090,7286
|
||||||
|
588,8/10/2012,3,1,8,5,2,0.715833,0.667942,0.715833,0.238813,1065,4721,5786
|
||||||
|
589,8/11/2012,3,1,8,6,2,0.6925,0.638267,0.732917,0.206479,2247,4052,6299
|
||||||
|
590,8/12/2012,3,1,8,0,1,0.700833,0.644579,0.530417,0.122512,2182,4362,6544
|
||||||
|
591,8/13/2012,3,1,8,1,1,0.720833,0.662254,0.545417,0.136212,1207,5676,6883
|
||||||
|
592,8/14/2012,3,1,8,2,1,0.726667,0.676779,0.686667,0.169158,1128,5656,6784
|
||||||
|
593,8/15/2012,3,1,8,3,1,0.706667,0.654037,0.619583,0.169771,1198,6149,7347
|
||||||
|
594,8/16/2012,3,1,8,4,1,0.719167,0.654688,0.519167,0.141796,1338,6267,7605
|
||||||
|
595,8/17/2012,3,1,8,5,1,0.723333,0.2424,0.570833,0.231354,1483,5665,7148
|
||||||
|
596,8/18/2012,3,1,8,6,1,0.678333,0.618071,0.603333,0.177867,2827,5038,7865
|
||||||
|
597,8/19/2012,3,1,8,0,2,0.635833,0.603554,0.711667,0.08645,1208,3341,4549
|
||||||
|
598,8/20/2012,3,1,8,1,2,0.635833,0.595967,0.734167,0.129979,1026,5504,6530
|
||||||
|
599,8/21/2012,3,1,8,2,1,0.649167,0.601025,0.67375,0.0727708,1081,5925,7006
|
||||||
|
600,8/22/2012,3,1,8,3,1,0.6675,0.621854,0.677083,0.0702833,1094,6281,7375
|
||||||
|
601,8/23/2012,3,1,8,4,1,0.695833,0.637008,0.635833,0.0845958,1363,6402,7765
|
||||||
|
602,8/24/2012,3,1,8,5,2,0.7025,0.6471,0.615,0.0721458,1325,6257,7582
|
||||||
|
603,8/25/2012,3,1,8,6,2,0.661667,0.618696,0.712917,0.244408,1829,4224,6053
|
||||||
|
604,8/26/2012,3,1,8,0,2,0.653333,0.595996,0.845833,0.228858,1483,3772,5255
|
||||||
|
605,8/27/2012,3,1,8,1,1,0.703333,0.654688,0.730417,0.128733,989,5928,6917
|
||||||
|
606,8/28/2012,3,1,8,2,1,0.728333,0.66605,0.62,0.190925,935,6105,7040
|
||||||
|
607,8/29/2012,3,1,8,3,1,0.685,0.635733,0.552083,0.112562,1177,6520,7697
|
||||||
|
608,8/30/2012,3,1,8,4,1,0.706667,0.652779,0.590417,0.0771167,1172,6541,7713
|
||||||
|
609,8/31/2012,3,1,8,5,1,0.764167,0.6894,0.5875,0.168533,1433,5917,7350
|
||||||
|
610,9/1/2012,3,1,9,6,2,0.753333,0.702654,0.638333,0.113187,2352,3788,6140
|
||||||
|
611,9/2/2012,3,1,9,0,2,0.696667,0.649,0.815,0.0640708,2613,3197,5810
|
||||||
|
612,9/3/2012,3,1,9,1,1,0.7075,0.661629,0.790833,0.151121,1965,4069,6034
|
||||||
|
613,9/4/2012,3,1,9,2,1,0.725833,0.686888,0.755,0.236321,867,5997,6864
|
||||||
|
614,9/5/2012,3,1,9,3,1,0.736667,0.708983,0.74125,0.187808,832,6280,7112
|
||||||
|
615,9/6/2012,3,1,9,4,2,0.696667,0.655329,0.810417,0.142421,611,5592,6203
|
||||||
|
616,9/7/2012,3,1,9,5,1,0.703333,0.657204,0.73625,0.171646,1045,6459,7504
|
||||||
|
617,9/8/2012,3,1,9,6,2,0.659167,0.611121,0.799167,0.281104,1557,4419,5976
|
||||||
|
618,9/9/2012,3,1,9,0,1,0.61,0.578925,0.5475,0.224496,2570,5657,8227
|
||||||
|
619,9/10/2012,3,1,9,1,1,0.583333,0.565654,0.50375,0.258713,1118,6407,7525
|
||||||
|
620,9/11/2012,3,1,9,2,1,0.5775,0.554292,0.52,0.0920542,1070,6697,7767
|
||||||
|
621,9/12/2012,3,1,9,3,1,0.599167,0.570075,0.577083,0.131846,1050,6820,7870
|
||||||
|
622,9/13/2012,3,1,9,4,1,0.6125,0.579558,0.637083,0.0827208,1054,6750,7804
|
||||||
|
623,9/14/2012,3,1,9,5,1,0.633333,0.594083,0.6725,0.103863,1379,6630,8009
|
||||||
|
624,9/15/2012,3,1,9,6,1,0.608333,0.585867,0.501667,0.247521,3160,5554,8714
|
||||||
|
625,9/16/2012,3,1,9,0,1,0.58,0.563125,0.57,0.0901833,2166,5167,7333
|
||||||
|
626,9/17/2012,3,1,9,1,2,0.580833,0.55305,0.734583,0.151742,1022,5847,6869
|
||||||
|
627,9/18/2012,3,1,9,2,2,0.623333,0.565067,0.8725,0.357587,371,3702,4073
|
||||||
|
628,9/19/2012,3,1,9,3,1,0.5525,0.540404,0.536667,0.215175,788,6803,7591
|
||||||
|
629,9/20/2012,3,1,9,4,1,0.546667,0.532192,0.618333,0.118167,939,6781,7720
|
||||||
|
630,9/21/2012,3,1,9,5,1,0.599167,0.571971,0.66875,0.154229,1250,6917,8167
|
||||||
|
631,9/22/2012,3,1,9,6,1,0.65,0.610488,0.646667,0.283583,2512,5883,8395
|
||||||
|
632,9/23/2012,4,1,9,0,1,0.529167,0.518933,0.467083,0.223258,2454,5453,7907
|
||||||
|
633,9/24/2012,4,1,9,1,1,0.514167,0.502513,0.492917,0.142404,1001,6435,7436
|
||||||
|
634,9/25/2012,4,1,9,2,1,0.55,0.544179,0.57,0.236321,845,6693,7538
|
||||||
|
635,9/26/2012,4,1,9,3,1,0.635,0.596613,0.630833,0.2444,787,6946,7733
|
||||||
|
636,9/27/2012,4,1,9,4,2,0.65,0.607975,0.690833,0.134342,751,6642,7393
|
||||||
|
637,9/28/2012,4,1,9,5,2,0.619167,0.585863,0.69,0.164179,1045,6370,7415
|
||||||
|
638,9/29/2012,4,1,9,6,1,0.5425,0.530296,0.542917,0.227604,2589,5966,8555
|
||||||
|
639,9/30/2012,4,1,9,0,1,0.526667,0.517663,0.583333,0.134958,2015,4874,6889
|
||||||
|
640,10/1/2012,4,1,10,1,2,0.520833,0.512,0.649167,0.0908042,763,6015,6778
|
||||||
|
641,10/2/2012,4,1,10,2,3,0.590833,0.542333,0.871667,0.104475,315,4324,4639
|
||||||
|
642,10/3/2012,4,1,10,3,2,0.6575,0.599133,0.79375,0.0665458,728,6844,7572
|
||||||
|
643,10/4/2012,4,1,10,4,2,0.6575,0.607975,0.722917,0.117546,891,6437,7328
|
||||||
|
644,10/5/2012,4,1,10,5,1,0.615,0.580187,0.6275,0.10635,1516,6640,8156
|
||||||
|
645,10/6/2012,4,1,10,6,1,0.554167,0.538521,0.664167,0.268025,3031,4934,7965
|
||||||
|
646,10/7/2012,4,1,10,0,2,0.415833,0.419813,0.708333,0.141162,781,2729,3510
|
||||||
|
647,10/8/2012,4,1,10,1,2,0.383333,0.387608,0.709583,0.189679,874,4604,5478
|
||||||
|
648,10/9/2012,4,1,10,2,2,0.446667,0.438112,0.761667,0.1903,601,5791,6392
|
||||||
|
649,10/10/2012,4,1,10,3,1,0.514167,0.503142,0.630833,0.187821,780,6911,7691
|
||||||
|
650,10/11/2012,4,1,10,4,1,0.435,0.431167,0.463333,0.181596,834,6736,7570
|
||||||
|
651,10/12/2012,4,1,10,5,1,0.4375,0.433071,0.539167,0.235092,1060,6222,7282
|
||||||
|
652,10/13/2012,4,1,10,6,1,0.393333,0.391396,0.494583,0.146142,2252,4857,7109
|
||||||
|
653,10/14/2012,4,1,10,0,1,0.521667,0.508204,0.640417,0.278612,2080,4559,6639
|
||||||
|
654,10/15/2012,4,1,10,1,2,0.561667,0.53915,0.7075,0.296037,760,5115,5875
|
||||||
|
655,10/16/2012,4,1,10,2,1,0.468333,0.460846,0.558333,0.182221,922,6612,7534
|
||||||
|
656,10/17/2012,4,1,10,3,1,0.455833,0.450108,0.692917,0.101371,979,6482,7461
|
||||||
|
657,10/18/2012,4,1,10,4,2,0.5225,0.512625,0.728333,0.236937,1008,6501,7509
|
||||||
|
658,10/19/2012,4,1,10,5,2,0.563333,0.537896,0.815,0.134954,753,4671,5424
|
||||||
|
659,10/20/2012,4,1,10,6,1,0.484167,0.472842,0.572917,0.117537,2806,5284,8090
|
||||||
|
660,10/21/2012,4,1,10,0,1,0.464167,0.456429,0.51,0.166054,2132,4692,6824
|
||||||
|
661,10/22/2012,4,1,10,1,1,0.4875,0.482942,0.568333,0.0814833,830,6228,7058
|
||||||
|
662,10/23/2012,4,1,10,2,1,0.544167,0.530304,0.641667,0.0945458,841,6625,7466
|
||||||
|
663,10/24/2012,4,1,10,3,1,0.5875,0.558721,0.63625,0.0727792,795,6898,7693
|
||||||
|
664,10/25/2012,4,1,10,4,2,0.55,0.529688,0.800417,0.124375,875,6484,7359
|
||||||
|
665,10/26/2012,4,1,10,5,2,0.545833,0.52275,0.807083,0.132467,1182,6262,7444
|
||||||
|
666,10/27/2012,4,1,10,6,2,0.53,0.515133,0.72,0.235692,2643,5209,7852
|
||||||
|
667,10/28/2012,4,1,10,0,2,0.4775,0.467771,0.694583,0.398008,998,3461,4459
|
||||||
|
668,10/29/2012,4,1,10,1,3,0.44,0.4394,0.88,0.3582,2,20,22
|
||||||
|
669,10/30/2012,4,1,10,2,2,0.318182,0.309909,0.825455,0.213009,87,1009,1096
|
||||||
|
670,10/31/2012,4,1,10,3,2,0.3575,0.3611,0.666667,0.166667,419,5147,5566
|
||||||
|
671,11/1/2012,4,1,11,4,2,0.365833,0.369942,0.581667,0.157346,466,5520,5986
|
||||||
|
672,11/2/2012,4,1,11,5,1,0.355,0.356042,0.522083,0.266175,618,5229,5847
|
||||||
|
673,11/3/2012,4,1,11,6,2,0.343333,0.323846,0.49125,0.270529,1029,4109,5138
|
||||||
|
674,11/4/2012,4,1,11,0,1,0.325833,0.329538,0.532917,0.179108,1201,3906,5107
|
||||||
|
675,11/5/2012,4,1,11,1,1,0.319167,0.308075,0.494167,0.236325,378,4881,5259
|
||||||
|
676,11/6/2012,4,1,11,2,1,0.280833,0.281567,0.567083,0.173513,466,5220,5686
|
||||||
|
677,11/7/2012,4,1,11,3,2,0.295833,0.274621,0.5475,0.304108,326,4709,5035
|
||||||
|
678,11/8/2012,4,1,11,4,1,0.352174,0.341891,0.333478,0.347835,340,4975,5315
|
||||||
|
679,11/9/2012,4,1,11,5,1,0.361667,0.355413,0.540833,0.214558,709,5283,5992
|
||||||
|
680,11/10/2012,4,1,11,6,1,0.389167,0.393937,0.645417,0.0578458,2090,4446,6536
|
||||||
|
681,11/11/2012,4,1,11,0,1,0.420833,0.421713,0.659167,0.1275,2290,4562,6852
|
||||||
|
682,11/12/2012,4,1,11,1,1,0.485,0.475383,0.741667,0.173517,1097,5172,6269
|
||||||
|
683,11/13/2012,4,1,11,2,2,0.343333,0.323225,0.662917,0.342046,327,3767,4094
|
||||||
|
684,11/14/2012,4,1,11,3,1,0.289167,0.281563,0.552083,0.199625,373,5122,5495
|
||||||
|
685,11/15/2012,4,1,11,4,2,0.321667,0.324492,0.620417,0.152987,320,5125,5445
|
||||||
|
686,11/16/2012,4,1,11,5,1,0.345,0.347204,0.524583,0.171025,484,5214,5698
|
||||||
|
687,11/17/2012,4,1,11,6,1,0.325,0.326383,0.545417,0.179729,1313,4316,5629
|
||||||
|
688,11/18/2012,4,1,11,0,1,0.3425,0.337746,0.692917,0.227612,922,3747,4669
|
||||||
|
689,11/19/2012,4,1,11,1,2,0.380833,0.375621,0.623333,0.235067,449,5050,5499
|
||||||
|
690,11/20/2012,4,1,11,2,2,0.374167,0.380667,0.685,0.082725,534,5100,5634
|
||||||
|
691,11/21/2012,4,1,11,3,1,0.353333,0.364892,0.61375,0.103246,615,4531,5146
|
||||||
|
692,11/22/2012,4,1,11,4,1,0.34,0.350371,0.580417,0.0528708,955,1470,2425
|
||||||
|
693,11/23/2012,4,1,11,5,1,0.368333,0.378779,0.56875,0.148021,1603,2307,3910
|
||||||
|
694,11/24/2012,4,1,11,6,1,0.278333,0.248742,0.404583,0.376871,532,1745,2277
|
||||||
|
695,11/25/2012,4,1,11,0,1,0.245833,0.257583,0.468333,0.1505,309,2115,2424
|
||||||
|
696,11/26/2012,4,1,11,1,1,0.313333,0.339004,0.535417,0.04665,337,4750,5087
|
||||||
|
697,11/27/2012,4,1,11,2,2,0.291667,0.281558,0.786667,0.237562,123,3836,3959
|
||||||
|
698,11/28/2012,4,1,11,3,1,0.296667,0.289762,0.50625,0.210821,198,5062,5260
|
||||||
|
699,11/29/2012,4,1,11,4,1,0.28087,0.298422,0.555652,0.115522,243,5080,5323
|
||||||
|
700,11/30/2012,4,1,11,5,1,0.298333,0.323867,0.649583,0.0584708,362,5306,5668
|
||||||
|
701,12/1/2012,4,1,12,6,2,0.298333,0.316904,0.806667,0.0597042,951,4240,5191
|
||||||
|
702,12/2/2012,4,1,12,0,2,0.3475,0.359208,0.823333,0.124379,892,3757,4649
|
||||||
|
703,12/3/2012,4,1,12,1,1,0.4525,0.455796,0.7675,0.0827208,555,5679,6234
|
||||||
|
704,12/4/2012,4,1,12,2,1,0.475833,0.469054,0.73375,0.174129,551,6055,6606
|
||||||
|
705,12/5/2012,4,1,12,3,1,0.438333,0.428012,0.485,0.324021,331,5398,5729
|
||||||
|
706,12/6/2012,4,1,12,4,1,0.255833,0.258204,0.50875,0.174754,340,5035,5375
|
||||||
|
707,12/7/2012,4,1,12,5,2,0.320833,0.321958,0.764167,0.1306,349,4659,5008
|
||||||
|
708,12/8/2012,4,1,12,6,2,0.381667,0.389508,0.91125,0.101379,1153,4429,5582
|
||||||
|
709,12/9/2012,4,1,12,0,2,0.384167,0.390146,0.905417,0.157975,441,2787,3228
|
||||||
|
710,12/10/2012,4,1,12,1,2,0.435833,0.435575,0.925,0.190308,329,4841,5170
|
||||||
|
711,12/11/2012,4,1,12,2,2,0.353333,0.338363,0.596667,0.296037,282,5219,5501
|
||||||
|
712,12/12/2012,4,1,12,3,2,0.2975,0.297338,0.538333,0.162937,310,5009,5319
|
||||||
|
713,12/13/2012,4,1,12,4,1,0.295833,0.294188,0.485833,0.174129,425,5107,5532
|
||||||
|
714,12/14/2012,4,1,12,5,1,0.281667,0.294192,0.642917,0.131229,429,5182,5611
|
||||||
|
715,12/15/2012,4,1,12,6,1,0.324167,0.338383,0.650417,0.10635,767,4280,5047
|
||||||
|
716,12/16/2012,4,1,12,0,2,0.3625,0.369938,0.83875,0.100742,538,3248,3786
|
||||||
|
717,12/17/2012,4,1,12,1,2,0.393333,0.4015,0.907083,0.0982583,212,4373,4585
|
||||||
|
718,12/18/2012,4,1,12,2,1,0.410833,0.409708,0.66625,0.221404,433,5124,5557
|
||||||
|
719,12/19/2012,4,1,12,3,1,0.3325,0.342162,0.625417,0.184092,333,4934,5267
|
||||||
|
720,12/20/2012,4,1,12,4,2,0.33,0.335217,0.667917,0.132463,314,3814,4128
|
||||||
|
721,12/21/2012,1,1,12,5,2,0.326667,0.301767,0.556667,0.374383,221,3402,3623
|
||||||
|
722,12/22/2012,1,1,12,6,1,0.265833,0.236113,0.44125,0.407346,205,1544,1749
|
||||||
|
723,12/23/2012,1,1,12,0,1,0.245833,0.259471,0.515417,0.133083,408,1379,1787
|
||||||
|
724,12/24/2012,1,1,12,1,2,0.231304,0.2589,0.791304,0.0772304,174,746,920
|
||||||
|
725,12/25/2012,1,1,12,2,2,0.291304,0.294465,0.734783,0.168726,440,573,1013
|
||||||
|
726,12/26/2012,1,1,12,3,3,0.243333,0.220333,0.823333,0.316546,9,432,441
|
||||||
|
727,12/27/2012,1,1,12,4,2,0.254167,0.226642,0.652917,0.350133,247,1867,2114
|
||||||
|
728,12/28/2012,1,1,12,5,2,0.253333,0.255046,0.59,0.155471,644,2451,3095
|
||||||
|
729,12/29/2012,1,1,12,6,2,0.253333,0.2424,0.752917,0.124383,159,1182,1341
|
||||||
|
730,12/30/2012,1,1,12,0,1,0.255833,0.2317,0.483333,0.350754,364,1432,1796
|
||||||
|
731,12/31/2012,1,1,12,1,2,0.215833,0.223487,0.5775,0.154846,439,2290,2729
|
||||||
|
@@ -37,7 +37,8 @@
|
|||||||
"2. Instantiating AutoMLConfig with new task type \"forecasting\" for timeseries data training, and other timeseries related settings: for this dataset we use the basic one: \"time_column_name\" \n",
|
"2. Instantiating AutoMLConfig with new task type \"forecasting\" for timeseries data training, and other timeseries related settings: for this dataset we use the basic one: \"time_column_name\" \n",
|
||||||
"3. Training the Model using local compute\n",
|
"3. Training the Model using local compute\n",
|
||||||
"4. Exploring the results\n",
|
"4. Exploring the results\n",
|
||||||
"5. Testing the fitted model"
|
"5. Viewing the engineered names for featurized data and featurization summary for all raw features\n",
|
||||||
|
"6. Testing the fitted model"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -122,12 +123,22 @@
|
|||||||
"data.head()"
|
"data.head()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# let's take note of what columns means what in the data\n",
|
||||||
|
"time_column_name = 'timeStamp'\n",
|
||||||
|
"target_column_name = 'demand'"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Split the data to train and test\n",
|
"### Split the data into train and test sets\n"
|
||||||
"\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -136,50 +147,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"train = data[data['timeStamp'] < '2017-02-01']\n",
|
"X_train = data[data[time_column_name] < '2017-02-01']\n",
|
||||||
"test = data[data['timeStamp'] >= '2017-02-01']\n"
|
"X_test = data[data[time_column_name] >= '2017-02-01']\n",
|
||||||
]
|
"y_train = X_train.pop(target_column_name).values\n",
|
||||||
},
|
"y_test = X_test.pop(target_column_name).values"
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Prepare the test data, we will feed X_test to the fitted model and get prediction"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"y_test = test.pop('demand').values\n",
|
|
||||||
"X_test = test"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Split the train data to train and valid\n",
|
|
||||||
"\n",
|
|
||||||
"Use one month's data as valid data\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"X_train = train[train['timeStamp'] < '2017-01-01']\n",
|
|
||||||
"X_valid = train[train['timeStamp'] >= '2017-01-01']\n",
|
|
||||||
"y_train = X_train.pop('demand').values\n",
|
|
||||||
"y_valid = X_valid.pop('demand').values\n",
|
|
||||||
"print(X_train.shape)\n",
|
|
||||||
"print(y_train.shape)\n",
|
|
||||||
"print(X_valid.shape)\n",
|
|
||||||
"print(y_valid.shape)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -198,8 +169,7 @@
|
|||||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||||
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
"|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
||||||
"|**X_valid**|Data used to evaluate a model in a iteration. (sparse) array-like, shape = [n_samples, n_features]|\n",
|
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||||
"|**y_valid**|Data used to evaluate a model in a iteration. (sparse) array-like, shape = [n_samples, ], targets values.|\n",
|
|
||||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. "
|
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -209,9 +179,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"time_column_name = 'timeStamp'\n",
|
|
||||||
"automl_settings = {\n",
|
"automl_settings = {\n",
|
||||||
" \"time_column_name\": time_column_name,\n",
|
" \"time_column_name\": time_column_name \n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -222,8 +191,7 @@
|
|||||||
" iteration_timeout_minutes = 5,\n",
|
" iteration_timeout_minutes = 5,\n",
|
||||||
" X = X_train,\n",
|
" X = X_train,\n",
|
||||||
" y = y_train,\n",
|
" y = y_train,\n",
|
||||||
" X_valid = X_valid,\n",
|
" n_cross_validations = 3,\n",
|
||||||
" y_valid = y_valid,\n",
|
|
||||||
" path=project_folder,\n",
|
" path=project_folder,\n",
|
||||||
" verbosity = logging.INFO,\n",
|
" verbosity = logging.INFO,\n",
|
||||||
" **automl_settings)"
|
" **automl_settings)"
|
||||||
@@ -233,7 +201,8 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n",
|
"Submitting the configuration will start a new run in this experiment. For local runs, the execution is synchronous. Depending on the data and number of iterations, this can run for a while. Parameters controlling concurrency may speed up the process, depending on your hardware.\n",
|
||||||
|
"\n",
|
||||||
"You will see the currently running iterations printing to the console."
|
"You will see the currently running iterations printing to the console."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -273,13 +242,34 @@
|
|||||||
"fitted_model.steps"
|
"fitted_model.steps"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### View the engineered names for featurized data\n",
|
||||||
|
"Below we display the engineered feature names generated for the featurized data using the time-series featurization."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fitted_model.named_steps['timeseriestransformer'].get_engineered_feature_names()"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Test the Best Fitted Model\n",
|
"### Test the Best Fitted Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Predict on training and test set, and calculate residual values."
|
"For forecasting, we will use the `forecast` function instead of the `predict` function. There are two reasons for this.\n",
|
||||||
|
"\n",
|
||||||
|
"We need to pass the recent values of the target variable `y`, whereas the scikit-compatible `predict` function only takes the non-target variables `X`. In our case, the test data immediately follows the training data, and we fill the `y` variable with `NaN`. The `NaN` serves as a question mark for the forecaster to fill with the actuals. Using the forecast function will produce forecasts using the shortest possible forecast horizon. The last time at which a definite (non-NaN) value is seen is the _forecast origin_ - the last time when the value of the target is known. \n",
|
||||||
|
"\n",
|
||||||
|
"Using the `predict` method would result in getting predictions for EVERY horizon the forecaster can predict at. This is useful when training and evaluating the performance of the forecaster at various horizons, but the level of detail is excessive for normal use."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -288,15 +278,64 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"y_pred = fitted_model.predict(X_test)\n",
|
"# Replace ALL values in y_pred by NaN. \n",
|
||||||
"y_pred"
|
"# The forecast origin will be at the beginning of the first forecast period\n",
|
||||||
|
"# (which is the same time as the end of the last training period).\n",
|
||||||
|
"y_query = y_test.copy().astype(np.float)\n",
|
||||||
|
"y_query.fill(np.nan)\n",
|
||||||
|
"# The featurized data, aligned to y, will also be returned.\n",
|
||||||
|
"# This contains the assumptions that were made in the forecast\n",
|
||||||
|
"# and helps align the forecast to the original data\n",
|
||||||
|
"y_fcst, X_trans = fitted_model.forecast(X_test, y_query)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# limit the evaluation to data where y_test has actuals\n",
|
||||||
|
"def align_outputs(y_predicted, X_trans, X_test, y_test, predicted_column_name = 'predicted'):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Demonstrates how to get the output aligned to the inputs\n",
|
||||||
|
" using pandas indexes. Helps understand what happened if\n",
|
||||||
|
" the output's shape differs from the input shape, or if\n",
|
||||||
|
" the data got re-sorted by time and grain during forecasting.\n",
|
||||||
|
" \n",
|
||||||
|
" Typical causes of misalignment are:\n",
|
||||||
|
" * we predicted some periods that were missing in actuals -> drop from eval\n",
|
||||||
|
" * model was asked to predict past max_horizon -> increase max horizon\n",
|
||||||
|
" * data at start of X_test was needed for lags -> provide previous periods\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" df_fcst = pd.DataFrame({predicted_column_name : y_predicted})\n",
|
||||||
|
" # y and X outputs are aligned by forecast() function contract\n",
|
||||||
|
" df_fcst.index = X_trans.index\n",
|
||||||
|
" \n",
|
||||||
|
" # align original X_test to y_test \n",
|
||||||
|
" X_test_full = X_test.copy()\n",
|
||||||
|
" X_test_full[target_column_name] = y_test\n",
|
||||||
|
"\n",
|
||||||
|
" # X_test_full's does not include origin, so reset for merge\n",
|
||||||
|
" df_fcst.reset_index(inplace=True)\n",
|
||||||
|
" X_test_full = X_test_full.reset_index().drop(columns='index')\n",
|
||||||
|
" together = df_fcst.merge(X_test_full, how='right')\n",
|
||||||
|
" \n",
|
||||||
|
" # drop rows where prediction or actuals are nan \n",
|
||||||
|
" # happens because of missing actuals \n",
|
||||||
|
" # or at edges of time due to lags/rolling windows\n",
|
||||||
|
" clean = together[together[[target_column_name, predicted_column_name]].notnull().all(axis=1)]\n",
|
||||||
|
" return(clean)\n",
|
||||||
|
"\n",
|
||||||
|
"df_all = align_outputs(y_fcst, X_trans, X_test, y_test)\n",
|
||||||
|
"df_all.head()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Use the Check Data Function to remove the nan values from y_test to avoid error when calculate metrics "
|
"Looking at `X_trans` is also useful to see what featurization happened to the data."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -305,29 +344,14 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"if len(y_test) != len(y_pred):\n",
|
"X_trans"
|
||||||
" raise ValueError(\n",
|
|
||||||
" 'the true values and prediction values do not have equal length.')\n",
|
|
||||||
"elif len(y_test) == 0:\n",
|
|
||||||
" raise ValueError(\n",
|
|
||||||
" 'y_true and y_pred are empty.')\n",
|
|
||||||
"\n",
|
|
||||||
"# if there is any non-numeric element in the y_true or y_pred,\n",
|
|
||||||
"# the ValueError exception will be thrown.\n",
|
|
||||||
"y_test_f = np.array(y_test).astype(float)\n",
|
|
||||||
"y_pred_f = np.array(y_pred).astype(float)\n",
|
|
||||||
"\n",
|
|
||||||
"# remove entries both in y_true and y_pred where at least\n",
|
|
||||||
"# one element in y_true or y_pred is missing\n",
|
|
||||||
"y_test = y_test_f[~(np.isnan(y_test_f) | np.isnan(y_pred_f))]\n",
|
|
||||||
"y_pred = y_pred_f[~(np.isnan(y_test_f) | np.isnan(y_pred_f))]"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Calculate metrics for the prediction\n"
|
"### Calculate accuracy metrics\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -336,26 +360,180 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % np.sqrt(mean_squared_error(y_test, y_pred)))\n",
|
"def MAPE(actual, pred):\n",
|
||||||
"# Explained variance score: 1 is perfect prediction\n",
|
" \"\"\"\n",
|
||||||
"print('mean_absolute_error score: %.2f' % mean_absolute_error(y_test, y_pred))\n",
|
" Calculate mean absolute percentage error.\n",
|
||||||
"print('R2 score: %.2f' % r2_score(y_test, y_pred))\n",
|
" Remove NA and values where actual is close to zero\n",
|
||||||
"\n",
|
" \"\"\"\n",
|
||||||
"\n",
|
" not_na = ~(np.isnan(actual) | np.isnan(pred))\n",
|
||||||
|
" not_zero = ~np.isclose(actual, 0.0)\n",
|
||||||
|
" actual_safe = actual[not_na & not_zero]\n",
|
||||||
|
" pred_safe = pred[not_na & not_zero]\n",
|
||||||
|
" APE = 100*np.abs((actual_safe - pred_safe)/actual_safe)\n",
|
||||||
|
" return np.mean(APE)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"Simple forecasting model\")\n",
|
||||||
|
"rmse = np.sqrt(mean_squared_error(df_all[target_column_name], df_all['predicted']))\n",
|
||||||
|
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
|
||||||
|
"mae = mean_absolute_error(df_all[target_column_name], df_all['predicted'])\n",
|
||||||
|
"print('mean_absolute_error score: %.2f' % mae)\n",
|
||||||
|
"print('MAPE: %.2f' % MAPE(df_all[target_column_name], df_all['predicted']))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Plot outputs\n",
|
"# Plot outputs\n",
|
||||||
"%matplotlib notebook\n",
|
"%matplotlib notebook\n",
|
||||||
"test_pred = plt.scatter(y_test, y_pred, color='b')\n",
|
"test_pred = plt.scatter(df_all[target_column_name], df_all['predicted'], color='b')\n",
|
||||||
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||||
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The distribution looks a little heavy tailed: we underestimate the excursions of the extremes. A normal-quantile transform of the target might help, but let's first try using some past data with the lags and rolling window transforms.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Using lags and rolling window features to improve the forecast"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data.\n",
|
||||||
|
"\n",
|
||||||
|
"Now that we configured target lags, that is the previous values of the target variables, and the prediction is no longer horizon-less. We therefore must specify the `max_horizon` that the model will learn to forecast. The `target_lags` keyword specifies how far back we will construct the lags of the target variable, and the `target_rolling_window_size` specifies the size of the rolling window over which we will generate the `max`, `min` and `sum` features."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"automl_settings_lags = {\n",
|
||||||
|
" 'time_column_name': time_column_name,\n",
|
||||||
|
" 'target_lags': 1,\n",
|
||||||
|
" 'target_rolling_window_size': 5,\n",
|
||||||
|
" # you MUST set the max_horizon when using lags and rolling windows\n",
|
||||||
|
" # it is optional when looking-back features are not used \n",
|
||||||
|
" 'max_horizon': len(y_test), # only one grain\n",
|
||||||
|
"}\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"automl_config_lags = AutoMLConfig(task = 'forecasting',\n",
|
||||||
|
" debug_log = 'automl_nyc_energy_errors.log',\n",
|
||||||
|
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||||
|
" iterations = 10,\n",
|
||||||
|
" iteration_timeout_minutes = 5,\n",
|
||||||
|
" X = X_train,\n",
|
||||||
|
" y = y_train,\n",
|
||||||
|
" n_cross_validations = 3,\n",
|
||||||
|
" path=project_folder,\n",
|
||||||
|
" verbosity = logging.INFO,\n",
|
||||||
|
" **automl_settings_lags)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_run_lags = experiment.submit(automl_config_lags, show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"best_run_lags, fitted_model_lags = local_run_lags.get_output()\n",
|
||||||
|
"y_fcst_lags, X_trans_lags = fitted_model_lags.forecast(X_test, y_query)\n",
|
||||||
|
"df_lags = align_outputs(y_fcst_lags, X_trans_lags, X_test, y_test)\n",
|
||||||
|
"df_lags.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X_trans_lags"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"Forecasting model with lags\")\n",
|
||||||
|
"rmse = np.sqrt(mean_squared_error(df_lags[target_column_name], df_lags['predicted']))\n",
|
||||||
|
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
|
||||||
|
"mae = mean_absolute_error(df_lags[target_column_name], df_lags['predicted'])\n",
|
||||||
|
"print('mean_absolute_error score: %.2f' % mae)\n",
|
||||||
|
"print('MAPE: %.2f' % MAPE(df_lags[target_column_name], df_lags['predicted']))\n",
|
||||||
|
"\n",
|
||||||
|
"# Plot outputs\n",
|
||||||
|
"%matplotlib notebook\n",
|
||||||
|
"test_pred = plt.scatter(df_lags[target_column_name], df_lags['predicted'], color='b')\n",
|
||||||
|
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||||
|
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### What features matter for the forecast?"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.train.automl.automlexplainer import explain_model\n",
|
||||||
|
"\n",
|
||||||
|
"# feature names are everything in the transformed data except the target\n",
|
||||||
|
"features = X_trans.columns[:-1]\n",
|
||||||
|
"expl = explain_model(fitted_model, X_train, X_test, features = features, best_run=best_run_lags, y_train = y_train)\n",
|
||||||
|
"# unpack the tuple\n",
|
||||||
|
"shap_values, expected_values, feat_overall_imp, feat_names, per_class_summary, per_class_imp = expl\n",
|
||||||
|
"best_run_lags"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Please go to the Azure Portal's best run to see the top features chart.\n",
|
||||||
|
"\n",
|
||||||
|
"The informative features make all sorts of intuitive sense. Temperature is a strong driver of heating and cooling demand in NYC. Apart from that, the daily life cycle, expressed by `hour`, and the weekly cycle, expressed by `wday` drives people's energy use habits."
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "xiaga"
|
"name": "xiaga, tosingli"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
@@ -373,7 +551,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.8"
|
"version": "3.6.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -20,7 +20,9 @@
|
|||||||
"1. [Introduction](#Introduction)\n",
|
"1. [Introduction](#Introduction)\n",
|
||||||
"1. [Setup](#Setup)\n",
|
"1. [Setup](#Setup)\n",
|
||||||
"1. [Data](#Data)\n",
|
"1. [Data](#Data)\n",
|
||||||
"1. [Train](#Train)"
|
"1. [Train](#Train)\n",
|
||||||
|
"1. [Predict](#Predict)\n",
|
||||||
|
"1. [Operationalize](#Operationalize)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -85,9 +87,9 @@
|
|||||||
"ws = Workspace.from_config()\n",
|
"ws = Workspace.from_config()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# choose a name for the run history container in the workspace\n",
|
"# choose a name for the run history container in the workspace\n",
|
||||||
"experiment_name = 'automl-ojsalesforecasting'\n",
|
"experiment_name = 'automl-ojforecasting'\n",
|
||||||
"# project folder\n",
|
"# project folder\n",
|
||||||
"project_folder = './sample_projects/automl-local-ojsalesforecasting'\n",
|
"project_folder = './sample_projects/automl-local-ojforecasting'\n",
|
||||||
"\n",
|
"\n",
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
"experiment = Experiment(ws, experiment_name)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -260,12 +262,12 @@
|
|||||||
" 'time_column_name': time_column_name,\n",
|
" 'time_column_name': time_column_name,\n",
|
||||||
" 'grain_column_names': grain_column_names,\n",
|
" 'grain_column_names': grain_column_names,\n",
|
||||||
" 'drop_column_names': ['logQuantity'],\n",
|
" 'drop_column_names': ['logQuantity'],\n",
|
||||||
" 'max_horizon': n_test_periods\n",
|
" 'max_horizon': n_test_periods # optional\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"automl_config = AutoMLConfig(task='forecasting',\n",
|
"automl_config = AutoMLConfig(task='forecasting',\n",
|
||||||
" debug_log='automl_oj_sales_errors.log',\n",
|
" debug_log='automl_oj_sales_errors.log',\n",
|
||||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
" primary_metric='normalized_mean_absolute_error',\n",
|
||||||
" iterations=10,\n",
|
" iterations=10,\n",
|
||||||
" X=X_train,\n",
|
" X=X_train,\n",
|
||||||
" y=y_train,\n",
|
" y=y_train,\n",
|
||||||
@@ -293,15 +295,6 @@
|
|||||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"local_run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -324,7 +317,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Make Predictions from the Best Fitted Model\n",
|
"# Predict\n",
|
||||||
"Now that we have retrieved the best pipeline/model, it can be used to make predictions on test data. First, we remove the target values from the test set:"
|
"Now that we have retrieved the best pipeline/model, it can be used to make predictions on test data. First, we remove the target values from the test set:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -352,7 +345,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"To produce predictions on the test set, we need to know the feature values at all dates in the test set. This requirement is somewhat reasonable for the OJ sales data since the features mainly consist of price, which is usually set in advance, and customer demographics which are approximately constant for each store over the 20 week forecast horizon in the testing data. \n",
|
"To produce predictions on the test set, we need to know the feature values at all dates in the test set. This requirement is somewhat reasonable for the OJ sales data since the features mainly consist of price, which is usually set in advance, and customer demographics which are approximately constant for each store over the 20 week forecast horizon in the testing data. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"The target predictions can be retrieved by calling the `predict` method on the best model:"
|
"We will first create a query `y_query`, which is aligned index-for-index to `X_test`. This is a vector of target values where each `NaN` serves the function of the question mark to be replaced by forecast. Passing definite values in the `y` argument allows the `forecast` function to make predictions on data that does not immediately follow the train data which contains `y`. In each grain, the last time point where the model sees a definite value of `y` is that grain's _forecast origin_."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -361,15 +354,76 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"y_pred = fitted_pipeline.predict(X_test)"
|
"# Replace ALL values in y_pred by NaN.\n",
|
||||||
|
"# The forecast origin will be at the beginning of the first forecast period.\n",
|
||||||
|
"# (Which is the same time as the end of the last training period.)\n",
|
||||||
|
"y_query = y_test.copy().astype(np.float)\n",
|
||||||
|
"y_query.fill(np.nan)\n",
|
||||||
|
"# The featurized data, aligned to y, will also be returned.\n",
|
||||||
|
"# This contains the assumptions that were made in the forecast\n",
|
||||||
|
"# and helps align the forecast to the original data\n",
|
||||||
|
"y_pred, X_trans = fitted_pipeline.forecast(X_test, y_query)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Calculate evaluation metrics for the prediction\n",
|
"If you are used to scikit pipelines, perhaps you expected `predict(X_test)`. However, forecasting requires a more general interface that also supplies the past target `y` values. Please use `forecast(X,y)` as `predict(X)` is reserved for internal purposes on forecasting models.\n",
|
||||||
"To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE)."
|
"\n",
|
||||||
|
"The [energy demand forecasting notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand) demonstrates the use of the forecast function in more detail in the context of using lags and rolling window features. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Evaluate\n",
|
||||||
|
"\n",
|
||||||
|
"To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE). \n",
|
||||||
|
"\n",
|
||||||
|
"It is a good practice to always align the output explicitly to the input, as the count and order of the rows may have changed during transformations that span multiple rows."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def align_outputs(y_predicted, X_trans, X_test, y_test, predicted_column_name = 'predicted'):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Demonstrates how to get the output aligned to the inputs\n",
|
||||||
|
" using pandas indexes. Helps understand what happened if\n",
|
||||||
|
" the output's shape differs from the input shape, or if\n",
|
||||||
|
" the data got re-sorted by time and grain during forecasting.\n",
|
||||||
|
" \n",
|
||||||
|
" Typical causes of misalignment are:\n",
|
||||||
|
" * we predicted some periods that were missing in actuals -> drop from eval\n",
|
||||||
|
" * model was asked to predict past max_horizon -> increase max horizon\n",
|
||||||
|
" * data at start of X_test was needed for lags -> provide previous periods in y\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" \n",
|
||||||
|
" df_fcst = pd.DataFrame({predicted_column_name : y_predicted})\n",
|
||||||
|
" # y and X outputs are aligned by forecast() function contract\n",
|
||||||
|
" df_fcst.index = X_trans.index\n",
|
||||||
|
" \n",
|
||||||
|
" # align original X_test to y_test \n",
|
||||||
|
" X_test_full = X_test.copy()\n",
|
||||||
|
" X_test_full[target_column_name] = y_test\n",
|
||||||
|
"\n",
|
||||||
|
" # X_test_full's index does not include origin, so reset for merge\n",
|
||||||
|
" df_fcst.reset_index(inplace=True)\n",
|
||||||
|
" X_test_full = X_test_full.reset_index().drop(columns='index')\n",
|
||||||
|
" together = df_fcst.merge(X_test_full, how='right')\n",
|
||||||
|
" \n",
|
||||||
|
" # drop rows where prediction or actuals are nan \n",
|
||||||
|
" # happens because of missing actuals \n",
|
||||||
|
" # or at edges of time due to lags/rolling windows\n",
|
||||||
|
" clean = together[together[[target_column_name, predicted_column_name]].notnull().all(axis=1)]\n",
|
||||||
|
" return(clean)\n",
|
||||||
|
"\n",
|
||||||
|
"df_all = align_outputs(y_pred, X_trans, X_test, y_test)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -388,18 +442,392 @@
|
|||||||
" actual_safe = actual[not_na & not_zero]\n",
|
" actual_safe = actual[not_na & not_zero]\n",
|
||||||
" pred_safe = pred[not_na & not_zero]\n",
|
" pred_safe = pred[not_na & not_zero]\n",
|
||||||
" APE = 100*np.abs((actual_safe - pred_safe)/actual_safe)\n",
|
" APE = 100*np.abs((actual_safe - pred_safe)/actual_safe)\n",
|
||||||
" return np.mean(APE)\n",
|
" return np.mean(APE)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(\"Simple forecasting model\")\n",
|
||||||
|
"rmse = np.sqrt(mean_squared_error(df_all[target_column_name], df_all['predicted']))\n",
|
||||||
|
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
|
||||||
|
"mae = mean_absolute_error(df_all[target_column_name], df_all['predicted'])\n",
|
||||||
|
"print('mean_absolute_error score: %.2f' % mae)\n",
|
||||||
|
"print('MAPE: %.2f' % MAPE(df_all[target_column_name], df_all['predicted']))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % np.sqrt(mean_squared_error(y_test, y_pred)))\n",
|
"# Plot outputs\n",
|
||||||
"print('mean_absolute_error score: %.2f' % mean_absolute_error(y_test, y_pred))\n",
|
"import matplotlib.pyplot as plt\n",
|
||||||
"print('MAPE: %.2f' % MAPE(y_test, y_pred))"
|
"\n",
|
||||||
|
"%matplotlib notebook\n",
|
||||||
|
"test_pred = plt.scatter(df_all[target_column_name], df_all['predicted'], color='b')\n",
|
||||||
|
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||||
|
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||||
|
"plt.show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Operationalize"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"_Operationalization_ means getting the model into the cloud so that other can run it after you close the notebook. We will create a docker running on Azure Container Instances with the model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"description = 'AutoML OJ forecaster'\n",
|
||||||
|
"tags = None\n",
|
||||||
|
"model = local_run.register_model(description = description, tags = tags)\n",
|
||||||
|
"\n",
|
||||||
|
"print(local_run.model_id)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Develop the scoring script\n",
|
||||||
|
"\n",
|
||||||
|
"Serializing and deserializing complex data frames may be tricky. We first develop the `run()` function of the scoring script locally, then write it into a scoring script. It is much easier to debug any quirks of the scoring function without crossing two compute environments. For this exercise, we handle a common quirk of how pandas dataframes serialize time stamp values."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# this is where we test the run function of the scoring script interactively\n",
|
||||||
|
"# before putting it in the scoring script\n",
|
||||||
|
"\n",
|
||||||
|
"timestamp_columns = ['WeekStarting']\n",
|
||||||
|
"\n",
|
||||||
|
"def run(rawdata, test_model = None):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Intended to process 'rawdata' string produced by\n",
|
||||||
|
" \n",
|
||||||
|
" {'X': X_test.to_json(), y' : y_test.to_json()}\n",
|
||||||
|
" \n",
|
||||||
|
" Don't convert the X payload to numpy.array, use it as pandas.DataFrame\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" try:\n",
|
||||||
|
" # unpack the data frame with timestamp \n",
|
||||||
|
" rawobj = json.loads(rawdata) # rawobj is now a dict of strings \n",
|
||||||
|
" X_pred = pd.read_json(rawobj['X'], convert_dates=False) # load the pandas DF from a json string\n",
|
||||||
|
" for col in timestamp_columns: # fix timestamps\n",
|
||||||
|
" X_pred[col] = pd.to_datetime(X_pred[col], unit='ms') \n",
|
||||||
|
" \n",
|
||||||
|
" y_pred = np.array(rawobj['y']) # reconstitute numpy array from serialized list\n",
|
||||||
|
" \n",
|
||||||
|
" if test_model is None:\n",
|
||||||
|
" result = model.forecast(X_pred, y_pred) # use the global model from init function\n",
|
||||||
|
" else:\n",
|
||||||
|
" result = test_model.forecast(X_pred, y_pred) # use the model on which we are testing\n",
|
||||||
|
" \n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" result = str(e)\n",
|
||||||
|
" return json.dumps({\"error\": result})\n",
|
||||||
|
" \n",
|
||||||
|
" forecast_as_list = result[0].tolist()\n",
|
||||||
|
" index_as_df = result[1].index.to_frame().reset_index(drop=True)\n",
|
||||||
|
" \n",
|
||||||
|
" return json.dumps({\"forecast\": forecast_as_list, # return the minimum over the wire: \n",
|
||||||
|
" \"index\": index_as_df.to_json() # no forecast and its featurized values\n",
|
||||||
|
" })"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# test the run function here before putting in the scoring script\n",
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"test_sample = json.dumps({'X': X_test.to_json(), 'y' : y_query.tolist()})\n",
|
||||||
|
"response = run(test_sample, fitted_pipeline)\n",
|
||||||
|
"\n",
|
||||||
|
"# unpack the response, dealing with the timestamp serialization again\n",
|
||||||
|
"res_dict = json.loads(response)\n",
|
||||||
|
"y_fcst_all = pd.read_json(res_dict['index'])\n",
|
||||||
|
"y_fcst_all[time_column_name] = pd.to_datetime(y_fcst_all[time_column_name], unit = 'ms')\n",
|
||||||
|
"y_fcst_all['forecast'] = res_dict['forecast']\n",
|
||||||
|
"y_fcst_all.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now that the function works locally in the notebook, let's write it down into the scoring script. The scoring script is authored by the data scientist. Adjust it to taste, adding inputs, outputs and processing as needed."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score_fcast.py\n",
|
||||||
|
"import pickle\n",
|
||||||
|
"import json\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import azureml.train.automl\n",
|
||||||
|
"from sklearn.externals import joblib\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global model\n",
|
||||||
|
" model_path = Model.get_model_path(model_name = '<<modelid>>') # this name is model.id of model that we want to deploy\n",
|
||||||
|
" # deserialize the model file back into a sklearn model\n",
|
||||||
|
" model = joblib.load(model_path)\n",
|
||||||
|
"\n",
|
||||||
|
"timestamp_columns = ['WeekStarting']\n",
|
||||||
|
"\n",
|
||||||
|
"def run(rawdata, test_model = None):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Intended to process 'rawdata' string produced by\n",
|
||||||
|
" \n",
|
||||||
|
" {'X': X_test.to_json(), y' : y_test.to_json()}\n",
|
||||||
|
" \n",
|
||||||
|
" Don't convert the X payload to numpy.array, use it as pandas.DataFrame\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" try:\n",
|
||||||
|
" # unpack the data frame with timestamp \n",
|
||||||
|
" rawobj = json.loads(rawdata) # rawobj is now a dict of strings \n",
|
||||||
|
" X_pred = pd.read_json(rawobj['X'], convert_dates=False) # load the pandas DF from a json string\n",
|
||||||
|
" for col in timestamp_columns: # fix timestamps\n",
|
||||||
|
" X_pred[col] = pd.to_datetime(X_pred[col], unit='ms') \n",
|
||||||
|
" \n",
|
||||||
|
" y_pred = np.array(rawobj['y']) # reconstitute numpy array from serialized list\n",
|
||||||
|
" \n",
|
||||||
|
" if test_model is None:\n",
|
||||||
|
" result = model.forecast(X_pred, y_pred) # use the global model from init function\n",
|
||||||
|
" else:\n",
|
||||||
|
" result = test_model.forecast(X_pred, y_pred) # use the model on which we are testing\n",
|
||||||
|
" \n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" result = str(e)\n",
|
||||||
|
" return json.dumps({\"error\": result})\n",
|
||||||
|
" \n",
|
||||||
|
" # prepare to send over wire as json\n",
|
||||||
|
" forecast_as_list = result[0].tolist()\n",
|
||||||
|
" index_as_df = result[1].index.to_frame().reset_index(drop=True)\n",
|
||||||
|
" \n",
|
||||||
|
" return json.dumps({\"forecast\": forecast_as_list, # return the minimum over the wire: \n",
|
||||||
|
" \"index\": index_as_df.to_json() # no forecast and its featurized values\n",
|
||||||
|
" })"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get the model\n",
|
||||||
|
"from azureml.train.automl.run import AutoMLRun\n",
|
||||||
|
"\n",
|
||||||
|
"experiment = Experiment(ws, experiment_name)\n",
|
||||||
|
"ml_run = AutoMLRun(experiment = experiment, run_id = local_run.id)\n",
|
||||||
|
"best_iteration = int(str.split(best_run.id,'_')[-1]) # the iteration number is a postfix of the run ID."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get the best model's dependencies and write them into this file\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"conda_env_file_name = 'fcast_env.yml'\n",
|
||||||
|
"\n",
|
||||||
|
"dependencies = ml_run.get_run_sdk_dependencies(iteration = best_iteration)\n",
|
||||||
|
"for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n",
|
||||||
|
" print('{}\\t{}'.format(p, dependencies[p]))\n",
|
||||||
|
"\n",
|
||||||
|
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n",
|
||||||
|
"\n",
|
||||||
|
"myenv.save_to_file('.', conda_env_file_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# this is the script file name we wrote a few cells above\n",
|
||||||
|
"script_file_name = 'score_fcast.py'\n",
|
||||||
|
"\n",
|
||||||
|
"# Substitute the actual version number in the environment file.\n",
|
||||||
|
"# This is not strictly needed in this notebook because the model should have been generated using the current SDK version.\n",
|
||||||
|
"# However, we include this in case this code is used on an experiment from a previous SDK version.\n",
|
||||||
|
"\n",
|
||||||
|
"with open(conda_env_file_name, 'r') as cefr:\n",
|
||||||
|
" content = cefr.read()\n",
|
||||||
|
"\n",
|
||||||
|
"with open(conda_env_file_name, 'w') as cefw:\n",
|
||||||
|
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n",
|
||||||
|
"\n",
|
||||||
|
"# Substitute the actual model id in the script file.\n",
|
||||||
|
"\n",
|
||||||
|
"with open(script_file_name, 'r') as cefr:\n",
|
||||||
|
" content = cefr.read()\n",
|
||||||
|
"\n",
|
||||||
|
"with open(script_file_name, 'w') as cefw:\n",
|
||||||
|
" cefw.write(content.replace('<<modelid>>', local_run.model_id))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a Container Image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import Image, ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"image_config = ContainerImage.image_configuration(runtime= \"python\",\n",
|
||||||
|
" execution_script = script_file_name,\n",
|
||||||
|
" conda_file = conda_env_file_name,\n",
|
||||||
|
" tags = {'type': \"automl-forecasting\"},\n",
|
||||||
|
" description = \"Image for automl forecasting sample\")\n",
|
||||||
|
"\n",
|
||||||
|
"image = Image.create(name = \"automl-fcast-image\",\n",
|
||||||
|
" # this is the model object \n",
|
||||||
|
" models = [model],\n",
|
||||||
|
" image_config = image_config, \n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"image.wait_for_creation(show_output = True)\n",
|
||||||
|
"\n",
|
||||||
|
"if image.creation_state == 'Failed':\n",
|
||||||
|
" print(\"Image build log at: \" + image.image_build_log_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Deploy the Image as a Web Service on Azure Container Instance"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import AciWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||||
|
" memory_gb = 2, \n",
|
||||||
|
" tags = {'type': \"automl-forecasting\"},\n",
|
||||||
|
" description = \"Automl forecasting sample service\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import Webservice\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service_name = 'automl-forecast-01'\n",
|
||||||
|
"print(aci_service_name)\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||||
|
" image = image,\n",
|
||||||
|
" name = aci_service_name,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"aci_service.wait_for_deployment(True)\n",
|
||||||
|
"print(aci_service.state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Call the service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# we send the data to the service serialized into a json string\n",
|
||||||
|
"test_sample = json.dumps({'X':X_test.to_json(), 'y' : y_query.tolist()})\n",
|
||||||
|
"response = aci_service.run(input_data = test_sample)\n",
|
||||||
|
"\n",
|
||||||
|
"# translate from networkese to datascientese\n",
|
||||||
|
"try: \n",
|
||||||
|
" res_dict = json.loads(response)\n",
|
||||||
|
" y_fcst_all = pd.read_json(res_dict['index'])\n",
|
||||||
|
" y_fcst_all[time_column_name] = pd.to_datetime(y_fcst_all[time_column_name], unit = 'ms')\n",
|
||||||
|
" y_fcst_all['forecast'] = res_dict['forecast'] \n",
|
||||||
|
"except:\n",
|
||||||
|
" print(res_dict)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"y_fcst_all.head()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Delete the web service if desired"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"serv = Webservice(ws, 'automl-forecast-01')\n",
|
||||||
|
"# serv.delete() # don't do it accidentally"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "erwright"
|
"name": "erwright, tosingli"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
@@ -417,7 +845,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.8"
|
"version": "3.6.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -37,8 +37,9 @@
|
|||||||
"In this notebook you will learn how to:\n",
|
"In this notebook you will learn how to:\n",
|
||||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||||
"2. Configure AutoML using `AutoMLConfig`.\n",
|
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||||
"4. Train the model.\n",
|
"3. Train the model.\n",
|
||||||
"5. Explore the results.\n",
|
"4. Explore the results.\n",
|
||||||
|
"5. Viewing the engineered names for featurized data and featurization summary for all raw features.\n",
|
||||||
"6. Test the best fitted model.\n",
|
"6. Test the best fitted model.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In addition this notebook showcases the following features\n",
|
"In addition this notebook showcases the following features\n",
|
||||||
@@ -154,7 +155,6 @@
|
|||||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
||||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
|
||||||
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
|
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
|
||||||
"|**experiment_exit_score**|*double* value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
|
"|**experiment_exit_score**|*double* value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
|
||||||
"|**blacklist_models**|*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i>|\n",
|
"|**blacklist_models**|*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i>|\n",
|
||||||
@@ -174,7 +174,6 @@
|
|||||||
" primary_metric = 'AUC_weighted',\n",
|
" primary_metric = 'AUC_weighted',\n",
|
||||||
" iteration_timeout_minutes = 60,\n",
|
" iteration_timeout_minutes = 60,\n",
|
||||||
" iterations = 20,\n",
|
" iterations = 20,\n",
|
||||||
" n_cross_validations = 5,\n",
|
|
||||||
" preprocess = True,\n",
|
" preprocess = True,\n",
|
||||||
" experiment_exit_score = 0.9984,\n",
|
" experiment_exit_score = 0.9984,\n",
|
||||||
" blacklist_models = ['KNN','LinearSVM'],\n",
|
" blacklist_models = ['KNN','LinearSVM'],\n",
|
||||||
@@ -318,6 +317,45 @@
|
|||||||
"# best_run, fitted_model = local_run.get_output(iteration = iteration)"
|
"# best_run, fitted_model = local_run.get_output(iteration = iteration)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### View the engineered names for featurized data\n",
|
||||||
|
"Below we display the engineered feature names generated for the featurized data using the preprocessing featurization."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fitted_model.named_steps['datatransformer'].get_engineered_feature_names()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### View the featurization summary\n",
|
||||||
|
"Below we display the featurization that was performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:-\n",
|
||||||
|
"- Raw feature name\n",
|
||||||
|
"- Number of engineered features formed out of this raw feature\n",
|
||||||
|
"- Type detected\n",
|
||||||
|
"- If feature was dropped\n",
|
||||||
|
"- List of feature transformations for the raw feature"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fitted_model.named_steps['datatransformer'].get_featurization_summary()"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -254,7 +254,9 @@
|
|||||||
"3.\toverall_summary: The model level feature importance values sorted in descending order\n",
|
"3.\toverall_summary: The model level feature importance values sorted in descending order\n",
|
||||||
"4.\toverall_imp: The feature names sorted in the same order as in overall_summary\n",
|
"4.\toverall_imp: The feature names sorted in the same order as in overall_summary\n",
|
||||||
"5.\tper_class_summary: The class level feature importance values sorted in descending order. Only available for the classification case\n",
|
"5.\tper_class_summary: The class level feature importance values sorted in descending order. Only available for the classification case\n",
|
||||||
"6.\tper_class_imp: The feature names sorted in the same order as in per_class_summary. Only available for the classification case"
|
"6.\tper_class_imp: The feature names sorted in the same order as in per_class_summary. Only available for the classification case\n",
|
||||||
|
"\n",
|
||||||
|
"Note:- The **retrieve_model_explanation()** API only works in case AutoML has been configured with **'model_explainability'** flag set to **True**. "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -305,7 +307,7 @@
|
|||||||
"from azureml.train.automl.automlexplainer import explain_model\n",
|
"from azureml.train.automl.automlexplainer import explain_model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"shap_values, expected_values, overall_summary, overall_imp, per_class_summary, per_class_imp = \\\n",
|
"shap_values, expected_values, overall_summary, overall_imp, per_class_summary, per_class_imp = \\\n",
|
||||||
" explain_model(fitted_model, X_train, X_test)"
|
" explain_model(fitted_model, X_train, X_test, features=features)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -40,7 +40,8 @@
|
|||||||
"3. Configure AutoML using `AutoMLConfig`.\n",
|
"3. Configure AutoML using `AutoMLConfig`.\n",
|
||||||
"4. Train the model using the DSVM.\n",
|
"4. Train the model using the DSVM.\n",
|
||||||
"5. Explore the results.\n",
|
"5. Explore the results.\n",
|
||||||
"6. Test the best fitted model.\n",
|
"6. Viewing the engineered names for featurized data and featurization summary for all raw features.\n",
|
||||||
|
"7. Test the best fitted model.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In addition this notebook showcases the following features\n",
|
"In addition this notebook showcases the following features\n",
|
||||||
"- **Parallel** executions for iterations\n",
|
"- **Parallel** executions for iterations\n",
|
||||||
@@ -110,7 +111,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Attach a Remote Linux DSVM\n",
|
"### Attach a Remote Linux DSVM\n",
|
||||||
"To use a remote Docker compute target:\n",
|
"To use a remote Docker compute target:\n",
|
||||||
"1. Create a Linux DSVM in Azure, following these [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor (not CentOS). Make sure that disk space is available under `/tmp` because AutoML creates files under `/tmp/azureml_run`s. The DSVM should have more cores than the number of parallel runs that you plan to enable. It should also have at least 4GB per core.\n",
|
"1. Create a Linux DSVM in Azure, following these [instructions](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/dsvm-ubuntu-intro). Make sure you use the Ubuntu flavor (not CentOS). Make sure that disk space is available under `/tmp` because AutoML creates files under `/tmp/azureml_run`s. The DSVM should have more cores than the number of parallel runs that you plan to enable. It should also have at least 4GB per core.\n",
|
||||||
"2. Enter the IP address, user name and password below.\n",
|
"2. Enter the IP address, user name and password below.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Note:** By default, SSH runs on port 22 and you don't need to change the port number below. If you've configured SSH to use a different port, change `dsvm_ssh_port` accordinglyaddress. [Read more](https://docs.microsoft.com/en-us/azure/virtual-machines/troubleshooting/detailed-troubleshoot-ssh-connection) on changing SSH ports for security reasons."
|
"**Note:** By default, SSH runs on port 22 and you don't need to change the port number below. If you've configured SSH to use a different port, change `dsvm_ssh_port` accordinglyaddress. [Read more](https://docs.microsoft.com/en-us/azure/virtual-machines/troubleshooting/detailed-troubleshoot-ssh-connection) on changing SSH ports for security reasons."
|
||||||
@@ -160,6 +161,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"import pkg_resources\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# create a new RunConfig object\n",
|
"# create a new RunConfig object\n",
|
||||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||||
@@ -167,7 +169,9 @@
|
|||||||
"# Set compute target to the Linux DSVM\n",
|
"# Set compute target to the Linux DSVM\n",
|
||||||
"conda_run_config.target = dsvm_compute\n",
|
"conda_run_config.target = dsvm_compute\n",
|
||||||
"\n",
|
"\n",
|
||||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
"pandas_dependency = 'pandas==' + pkg_resources.get_distribution(\"pandas\").version\n",
|
||||||
|
"\n",
|
||||||
|
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80',pandas_dependency])\n",
|
||||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -407,6 +411,45 @@
|
|||||||
"print(fitted_model)"
|
"print(fitted_model)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### View the engineered names for featurized data\n",
|
||||||
|
"Below we display the engineered feature names generated for the featurized data using the preprocessing featurization."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fitted_model.named_steps['datatransformer'].get_engineered_feature_names()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### View the featurization summary\n",
|
||||||
|
"Below we display the featurization that was performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:-\n",
|
||||||
|
"- Raw feature name\n",
|
||||||
|
"- Number of engineered features formed out of this raw feature\n",
|
||||||
|
"- Type detected\n",
|
||||||
|
"- If feature was dropped\n",
|
||||||
|
"- List of feature transformations for the raw feature"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fitted_model.named_steps['datatransformer'].get_featurization_summary()"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -245,6 +245,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"import pkg_resources\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# create a new RunConfig object\n",
|
"# create a new RunConfig object\n",
|
||||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||||
@@ -254,7 +255,9 @@
|
|||||||
"# set the data reference of the run coonfiguration\n",
|
"# set the data reference of the run coonfiguration\n",
|
||||||
"conda_run_config.data_references = {ds.name: dr}\n",
|
"conda_run_config.data_references = {ds.name: dr}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
"pandas_dependency = 'pandas==' + pkg_resources.get_distribution(\"pandas\").version\n",
|
||||||
|
"\n",
|
||||||
|
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80',pandas_dependency])\n",
|
||||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -23,7 +23,8 @@
|
|||||||
"3. Configure Automated ML using `AutoMLConfig`.\n",
|
"3. Configure Automated ML using `AutoMLConfig`.\n",
|
||||||
"4. Train the model using Azure Databricks.\n",
|
"4. Train the model using Azure Databricks.\n",
|
||||||
"5. Explore the results.\n",
|
"5. Explore the results.\n",
|
||||||
"6. Test the best fitted model.\n",
|
"6. Viewing the engineered names for featurized data and featurization summary for all raw features.\n",
|
||||||
|
"7. Test the best fitted model.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Before running this notebook, please follow the <a href=\"https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/azure-databricks\" target=\"_blank\">readme for using Automated ML on Azure Databricks</a> for installing necessary libraries to your cluster."
|
"Before running this notebook, please follow the <a href=\"https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/azure-databricks\" target=\"_blank\">readme for using Automated ML on Azure Databricks</a> for installing necessary libraries to your cluster."
|
||||||
]
|
]
|
||||||
@@ -556,6 +557,45 @@
|
|||||||
"print(fitted_model)"
|
"print(fitted_model)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### View the engineered names for featurized data\n",
|
||||||
|
"Below we display the engineered feature names generated for the featurized data using the preprocessing featurization."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fitted_model.named_steps['datatransformer'].get_engineered_feature_names()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### View the featurization summary\n",
|
||||||
|
"Below we display the featurization that was performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:-\n",
|
||||||
|
"- Raw feature name\n",
|
||||||
|
"- Number of engineered features formed out of this raw feature\n",
|
||||||
|
"- Type detected\n",
|
||||||
|
"- If feature was dropped\n",
|
||||||
|
"- List of feature transformations for the raw feature"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"fitted_model.named_steps['datatransformer'].get_featurization_summary()"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -207,6 +207,7 @@
|
|||||||
"import os\n",
|
"import os\n",
|
||||||
"import random\n",
|
"import random\n",
|
||||||
"import time\n",
|
"import time\n",
|
||||||
|
"import json\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from matplotlib import pyplot as plt\n",
|
"from matplotlib import pyplot as plt\n",
|
||||||
"from matplotlib.pyplot import imshow\n",
|
"from matplotlib.pyplot import imshow\n",
|
||||||
@@ -295,7 +296,7 @@
|
|||||||
" datastore_name = datastore_name, \n",
|
" datastore_name = datastore_name, \n",
|
||||||
" container_name = container_name, \n",
|
" container_name = container_name, \n",
|
||||||
" account_name = account_name,\n",
|
" account_name = account_name,\n",
|
||||||
" overwrite = True\n",
|
" overwrite = True\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -427,7 +428,7 @@
|
|||||||
" debug_log = 'automl_errors.log',\n",
|
" debug_log = 'automl_errors.log',\n",
|
||||||
" primary_metric = 'AUC_weighted',\n",
|
" primary_metric = 'AUC_weighted',\n",
|
||||||
" iteration_timeout_minutes = 10,\n",
|
" iteration_timeout_minutes = 10,\n",
|
||||||
" iterations = 30,\n",
|
" iterations = 5,\n",
|
||||||
" preprocess = True,\n",
|
" preprocess = True,\n",
|
||||||
" n_cross_validations = 10,\n",
|
" n_cross_validations = 10,\n",
|
||||||
" max_concurrent_iterations = 2, #change it based on number of worker nodes\n",
|
" max_concurrent_iterations = 2, #change it based on number of worker nodes\n",
|
||||||
@@ -591,22 +592,21 @@
|
|||||||
"%%writefile score.py\n",
|
"%%writefile score.py\n",
|
||||||
"import pickle\n",
|
"import pickle\n",
|
||||||
"import json\n",
|
"import json\n",
|
||||||
"import numpy\n",
|
"import numpy as np\n",
|
||||||
"import azureml.train.automl\n",
|
"import azureml.train.automl\n",
|
||||||
"from sklearn.externals import joblib\n",
|
"from sklearn.externals import joblib\n",
|
||||||
"from azureml.core.model import Model\n",
|
"from azureml.core.model import Model\n",
|
||||||
"\n",
|
"import pandas as pd\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def init():\n",
|
"def init():\n",
|
||||||
" global model\n",
|
" global model\n",
|
||||||
" model_path = Model.get_model_path(model_name = '<<modelid>>') # this name is model.id of model that we want to deploy\n",
|
" model_path = Model.get_model_path(model_name = '<<model_id>>') # this name is model.id of model that we want to deploy\n",
|
||||||
" # deserialize the model file back into a sklearn model\n",
|
" # deserialize the model file back into a sklearn model\n",
|
||||||
" model = joblib.load(model_path)\n",
|
" model = joblib.load(model_path)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def run(rawdata):\n",
|
"def run(raw_data):\n",
|
||||||
" try:\n",
|
" try:\n",
|
||||||
" data = json.loads(rawdata)['data']\n",
|
" data = (pd.DataFrame(np.array(json.loads(raw_data)['data']), columns=[str(i) for i in range(0,64)]))\n",
|
||||||
" data = numpy.array(data)\n",
|
|
||||||
" result = model.predict(data)\n",
|
" result = model.predict(data)\n",
|
||||||
" except Exception as e:\n",
|
" except Exception as e:\n",
|
||||||
" result = str(e)\n",
|
" result = str(e)\n",
|
||||||
@@ -614,6 +614,22 @@
|
|||||||
" return json.dumps({\"result\":result.tolist()})"
|
" return json.dumps({\"result\":result.tolist()})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Replace <<model_id>>\n",
|
||||||
|
"content = \"\"\n",
|
||||||
|
"with open(\"score.py\", \"r\") as fo:\n",
|
||||||
|
" content = fo.read()\n",
|
||||||
|
"\n",
|
||||||
|
"new_content = content.replace(\"<<model_id>>\", local_run.model_id)\n",
|
||||||
|
"with open(\"score.py\", \"w\") as fw:\n",
|
||||||
|
" fw.write(new_content)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -672,16 +688,19 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"\n",
|
|
||||||
"# this will take 10-15 minutes to finish\n",
|
"# this will take 10-15 minutes to finish\n",
|
||||||
"\n",
|
"\n",
|
||||||
"service_name = \"<<servicename>>\"\n",
|
"import uuid\n",
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"guid = str(uuid.uuid4()).split(\"-\")[0]\n",
|
||||||
|
"service_name = \"myservice-{}\".format(guid)\n",
|
||||||
|
"print(\"Creating service with name: {}\".format(service_name))\n",
|
||||||
"runtime = \"spark-py\" \n",
|
"runtime = \"spark-py\" \n",
|
||||||
"driver_file = \"score.py\"\n",
|
"driver_file = \"score.py\"\n",
|
||||||
"my_conda_file = \"mydeployenv.yml\"\n",
|
"my_conda_file = \"mydeployenv.yml\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# image creation\n",
|
"# image creation\n",
|
||||||
"from azureml.core.image import ContainerImage\n",
|
|
||||||
"myimage_config = ContainerImage.image_configuration(execution_script = driver_file, \n",
|
"myimage_config = ContainerImage.image_configuration(execution_script = driver_file, \n",
|
||||||
" runtime = runtime, \n",
|
" runtime = runtime, \n",
|
||||||
" conda_file = 'mydeployenv.yml')\n",
|
" conda_file = 'mydeployenv.yml')\n",
|
||||||
@@ -744,18 +763,39 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"import json\n",
|
||||||
"# Randomly select digits and test.\n",
|
"# Randomly select digits and test.\n",
|
||||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||||
" print(index)\n",
|
" print(index)\n",
|
||||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
" test_sample = json.dumps({'data':X_test[index:index + 1].values.tolist()})\n",
|
||||||
|
" predicted = myservice.run(input_data = test_sample)\n",
|
||||||
" label = y_test.values[index]\n",
|
" label = y_test.values[index]\n",
|
||||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
" predictedDict = json.loads(predicted)\n",
|
||||||
|
" title = \"Label value = %d Predicted value = %s \" % ( label,predictedDict['result'][0]) \n",
|
||||||
" fig = plt.figure(3, figsize = (5,5))\n",
|
" fig = plt.figure(3, figsize = (5,5))\n",
|
||||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||||
" ax1.set_title(title)\n",
|
" ax1.set_title(title)\n",
|
||||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||||
" display(fig)"
|
" display(fig)"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"### Delete the service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"myservice.delete()"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
55
how-to-use-azureml/azure-hdi/README.md
Normal file
55
how-to-use-azureml/azure-hdi/README.md
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
**Azure HDInsight**
|
||||||
|
|
||||||
|
Azure HDInsight is a fully managed cloud Hadoop & Spark offering the gives
|
||||||
|
optimized open-source analytic clusters for Spark, Hive, MapReduce, HBase,
|
||||||
|
Storm, and Kafka. HDInsight Spark clusters provide kernels that you can use with
|
||||||
|
the Jupyter notebook on [Apache Spark](https://spark.apache.org/) for testing
|
||||||
|
your applications.
|
||||||
|
|
||||||
|
How Azure HDInsight works with Azure Machine Learning service
|
||||||
|
|
||||||
|
- You can train a model using Spark clusters and deploy the model to ACI/AKS
|
||||||
|
from within Azure HDInsight.
|
||||||
|
|
||||||
|
- You can also use [automated machine
|
||||||
|
learning](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-automated-ml) capabilities
|
||||||
|
integrated within Azure HDInsight.
|
||||||
|
|
||||||
|
You can use Azure HDInsight as a compute target from an [Azure Machine Learning
|
||||||
|
pipeline](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines).
|
||||||
|
|
||||||
|
**Set up your HDInsight cluster**
|
||||||
|
|
||||||
|
Create [HDInsight
|
||||||
|
cluster](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters)
|
||||||
|
|
||||||
|
**Quick create: Basic cluster setup**
|
||||||
|
|
||||||
|
This article walks you through setup in the [Azure
|
||||||
|
portal](https://portal.azure.com/), where you can create an HDInsight cluster
|
||||||
|
using *Quick create* or *Custom*.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Follow instructions on the screen to do a basic cluster setup. Details are
|
||||||
|
provided below for:
|
||||||
|
|
||||||
|
- [Resource group
|
||||||
|
name](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters#resource-group-name)
|
||||||
|
|
||||||
|
- [Cluster types and
|
||||||
|
configuration](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters#cluster-types)
|
||||||
|
(Cluster must be Spark 2.3 (HDI 3.6) or greater)
|
||||||
|
|
||||||
|
- Cluster login and SSH username
|
||||||
|
|
||||||
|
- [Location](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters#location)
|
||||||
|
|
||||||
|
**Import the sample HDI notebook in Jupyter**
|
||||||
|
|
||||||
|
**Important links:**
|
||||||
|
|
||||||
|
Create HDI cluster:
|
||||||
|
<https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters>
|
||||||
|
|
||||||
|

|
||||||
@@ -0,0 +1,624 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Automated ML on Azure HDInsight\n",
|
||||||
|
"\n",
|
||||||
|
"In this example we use the scikit-learn's <a href=\"http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset\" target=\"_blank\">digit dataset</a> to showcase how you can use AutoML for a simple classification problem.\n",
|
||||||
|
"\n",
|
||||||
|
"In this notebook you will learn how to:\n",
|
||||||
|
"1. Create Azure Machine Learning Workspace object and initialize your notebook directory to easily reload this object from a configuration file.\n",
|
||||||
|
"2. Create an `Experiment` in an existing `Workspace`.\n",
|
||||||
|
"3. Configure Automated ML using `AutoMLConfig`.\n",
|
||||||
|
"4. Train the model using Azure HDInsight.\n",
|
||||||
|
"5. Explore the results.\n",
|
||||||
|
"6. Test the best fitted model.\n",
|
||||||
|
"\n",
|
||||||
|
"Before running this notebook, please follow the readme for using Automated ML on Azure HDI for installing necessary libraries to your cluster."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Check the Azure ML Core SDK Version to Validate Your Installation"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import azureml.core\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from azureml.core.authentication import ServicePrincipalAuthentication\n",
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"from azureml.core.experiment import Experiment\n",
|
||||||
|
"from azureml.train.automl import AutoMLConfig\n",
|
||||||
|
"from azureml.train.automl.run import AutoMLRun\n",
|
||||||
|
"import logging\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK Version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize an Azure ML Workspace\n",
|
||||||
|
"### What is an Azure ML Workspace and Why Do I Need One?\n",
|
||||||
|
"\n",
|
||||||
|
"An Azure ML workspace is an Azure resource that organizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an Azure ML workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, operationalization, and the monitoring of operationalized models.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"### What do I Need?\n",
|
||||||
|
"\n",
|
||||||
|
"To create or access an Azure ML workspace, you will need to import the Azure ML library and specify following information:\n",
|
||||||
|
"* A name for your workspace. You can choose one.\n",
|
||||||
|
"* Your subscription id. Use the `id` value from the `az account show` command output above.\n",
|
||||||
|
"* The resource group name. The resource group organizes Azure resources and provides a default region for the resources in the group. The resource group will be created if it doesn't exist. Resource groups can be created and viewed in the [Azure portal](https://portal.azure.com)\n",
|
||||||
|
"* Supported regions include `eastus2`, `eastus`,`westcentralus`, `southeastasia`, `westeurope`, `australiaeast`, `westus2`, `southcentralus`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import azureml.core\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from azureml.core.authentication import ServicePrincipalAuthentication\n",
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"from azureml.core.experiment import Experiment\n",
|
||||||
|
"from azureml.train.automl import AutoMLConfig\n",
|
||||||
|
"from azureml.train.automl.run import AutoMLRun\n",
|
||||||
|
"import logging\n",
|
||||||
|
"\n",
|
||||||
|
"subscription_id = \"<Your SubscriptionId>\" #you should be owner or contributor\n",
|
||||||
|
"resource_group = \"<Resource group - new or existing>\" #you should be owner or contributor\n",
|
||||||
|
"workspace_name = \"<workspace to be created>\" #your workspace name\n",
|
||||||
|
"workspace_region = \"<azureregion>\" #your region\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"tenant_id = \"<tenant_id>\"\n",
|
||||||
|
"app_id = \"<app_id>\"\n",
|
||||||
|
"app_key = \"<app_key>\"\n",
|
||||||
|
"\n",
|
||||||
|
"auth_sp = ServicePrincipalAuthentication(tenant_id = tenant_id,\n",
|
||||||
|
" service_principal_id = app_id,\n",
|
||||||
|
" service_principal_password = app_key)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Creating a Workspace\n",
|
||||||
|
"If you already have access to an Azure ML workspace you want to use, you can skip this cell. Otherwise, this cell will create an Azure ML workspace for you in the specified subscription, provided you have the correct permissions for the given `subscription_id`.\n",
|
||||||
|
"\n",
|
||||||
|
"This will fail when:\n",
|
||||||
|
"1. The workspace already exists.\n",
|
||||||
|
"2. You do not have permission to create a workspace in the resource group.\n",
|
||||||
|
"3. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this subscription.\n",
|
||||||
|
"\n",
|
||||||
|
"If workspace creation fails for any reason other than already existing, please work with your IT administrator to provide you with the appropriate permissions or to provision the required resources.\n",
|
||||||
|
"\n",
|
||||||
|
"**Note:** Creation of a new workspace can take several minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"##TESTONLY\n",
|
||||||
|
"# Import the Workspace class and check the Azure ML SDK version.\n",
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.create(name = workspace_name,\n",
|
||||||
|
" subscription_id = subscription_id,\n",
|
||||||
|
" resource_group = resource_group, \n",
|
||||||
|
" location = workspace_region,\n",
|
||||||
|
" auth = auth_sp,\n",
|
||||||
|
" exist_ok=True)\n",
|
||||||
|
"ws.get_details()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configuring Your Local Environment\n",
|
||||||
|
"You can validate that you have access to the specified workspace and write a configuration file to the default configuration location, `./aml_config/config.json`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace(workspace_name = workspace_name,\n",
|
||||||
|
" subscription_id = subscription_id,\n",
|
||||||
|
" resource_group = resource_group,\n",
|
||||||
|
" auth = auth_sp)\n",
|
||||||
|
"\n",
|
||||||
|
"# Persist the subscription id, resource group name, and workspace name in aml_config/config.json.\n",
|
||||||
|
"ws.write_config()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create a Folder to Host Sample Projects\n",
|
||||||
|
"Finally, create a folder where all the sample projects will be hosted."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"sample_projects_folder = './sample_projects'\n",
|
||||||
|
"\n",
|
||||||
|
"if not os.path.isdir(sample_projects_folder):\n",
|
||||||
|
" os.mkdir(sample_projects_folder)\n",
|
||||||
|
" \n",
|
||||||
|
"print('Sample projects will be created in {}.'.format(sample_projects_folder))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create an Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"As part of the setup you have already created an Azure ML `Workspace` object. For Automated ML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import logging\n",
|
||||||
|
"import os\n",
|
||||||
|
"import random\n",
|
||||||
|
"import time\n",
|
||||||
|
"\n",
|
||||||
|
"from matplotlib import pyplot as plt\n",
|
||||||
|
"from matplotlib.pyplot import imshow\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core.experiment import Experiment\n",
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"from azureml.train.automl import AutoMLConfig\n",
|
||||||
|
"from azureml.train.automl.run import AutoMLRun"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Choose a name for the experiment and specify the project folder.\n",
|
||||||
|
"experiment_name = 'automl-local-classification-hdi'\n",
|
||||||
|
"project_folder = './sample_projects/automl-local-classification-hdi'\n",
|
||||||
|
"\n",
|
||||||
|
"experiment = Experiment(ws, experiment_name)\n",
|
||||||
|
"\n",
|
||||||
|
"output = {}\n",
|
||||||
|
"output['SDK version'] = azureml.core.VERSION\n",
|
||||||
|
"output['Subscription ID'] = ws.subscription_id\n",
|
||||||
|
"output['Workspace Name'] = ws.name\n",
|
||||||
|
"output['Resource Group'] = ws.resource_group\n",
|
||||||
|
"output['Location'] = ws.location\n",
|
||||||
|
"output['Project Directory'] = project_folder\n",
|
||||||
|
"output['Experiment Name'] = experiment.name\n",
|
||||||
|
"pd.set_option('display.max_colwidth', -1)\n",
|
||||||
|
"pd.DataFrame(data = output, index = ['']).T"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Registering Datastore"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Datastore is the way to save connection information to a storage service (e.g. Azure Blob, Azure Data Lake, Azure SQL) information to your workspace so you can access them without exposing credentials in your code. The first thing you will need to do is register a datastore, you can refer to our [python SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.datastore.datastore?view=azure-ml-py) on how to register datastores. __Note: for best security practices, please do not check in code that contains registering datastores with secrets into your source control__\n",
|
||||||
|
"\n",
|
||||||
|
"The code below registers a datastore pointing to a publicly readable blob container."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Datastore\n",
|
||||||
|
"\n",
|
||||||
|
"datastore_name = 'demo_training'\n",
|
||||||
|
"container_name = 'digits' \n",
|
||||||
|
"account_name = 'automlpublicdatasets'\n",
|
||||||
|
"Datastore.register_azure_blob_container(\n",
|
||||||
|
" workspace = ws, \n",
|
||||||
|
" datastore_name = datastore_name, \n",
|
||||||
|
" container_name = container_name, \n",
|
||||||
|
" account_name = account_name,\n",
|
||||||
|
" overwrite = True\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Below is an example on how to register a private blob container\n",
|
||||||
|
"```python\n",
|
||||||
|
"datastore = Datastore.register_azure_blob_container(\n",
|
||||||
|
" workspace = ws, \n",
|
||||||
|
" datastore_name = 'example_datastore', \n",
|
||||||
|
" container_name = 'example-container', \n",
|
||||||
|
" account_name = 'storageaccount',\n",
|
||||||
|
" account_key = 'accountkey'\n",
|
||||||
|
")\n",
|
||||||
|
"```\n",
|
||||||
|
"The example below shows how to register an Azure Data Lake store. Please make sure you have granted the necessary permissions for the service principal to access the data lake.\n",
|
||||||
|
"```python\n",
|
||||||
|
"datastore = Datastore.register_azure_data_lake(\n",
|
||||||
|
" workspace = ws,\n",
|
||||||
|
" datastore_name = 'example_datastore',\n",
|
||||||
|
" store_name = 'adlsstore',\n",
|
||||||
|
" tenant_id = 'tenant-id-of-service-principal',\n",
|
||||||
|
" client_id = 'client-id-of-service-principal',\n",
|
||||||
|
" client_secret = 'client-secret-of-service-principal'\n",
|
||||||
|
")\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Load Training Data Using DataPrep"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Automated ML takes a Dataflow as input.\n",
|
||||||
|
"\n",
|
||||||
|
"If you are familiar with Pandas and have done your data preparation work in Pandas already, you can use the `read_pandas_dataframe` method in dprep to convert the DataFrame to a Dataflow.\n",
|
||||||
|
"```python\n",
|
||||||
|
"df = pd.read_csv(...)\n",
|
||||||
|
"# apply some transforms\n",
|
||||||
|
"dprep.read_pandas_dataframe(df, temp_folder='/path/accessible/by/both/driver/and/worker')\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"If you just need to ingest data without doing any preparation, you can directly use AzureML Data Prep (Data Prep) to do so. The code below demonstrates this scenario. Data Prep also has data preparation capabilities, we have many [sample notebooks](https://github.com/Microsoft/AMLDataPrepDocs) demonstrating the capabilities.\n",
|
||||||
|
"\n",
|
||||||
|
"You will get the datastore you registered previously and pass it to Data Prep for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import azureml.dataprep as dprep\n",
|
||||||
|
"from azureml.data.datapath import DataPath\n",
|
||||||
|
"\n",
|
||||||
|
"datastore = Datastore.get(workspace = ws, datastore_name = datastore_name)\n",
|
||||||
|
"\n",
|
||||||
|
"X_train = dprep.read_csv(datastore.path('X.csv'))\n",
|
||||||
|
"y_train = dprep.read_csv(datastore.path('y.csv')).to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Review the Data Preparation Result\n",
|
||||||
|
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only j records for all the steps in the Dataflow, which makes it fast even against large datasets."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X_train.get_profile()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"y_train.get_profile()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configure AutoML\n",
|
||||||
|
"\n",
|
||||||
|
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
|
||||||
|
"\n",
|
||||||
|
"|Property|Description|\n",
|
||||||
|
"|-|-|\n",
|
||||||
|
"|**task**|classification or regression|\n",
|
||||||
|
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
||||||
|
"|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
|
||||||
|
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||||
|
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||||
|
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||||
|
"|**spark_context**|Spark Context object. for HDInsight, use spark_context=sc|\n",
|
||||||
|
"|**max_concurrent_iterations**|Maximum number of iterations to execute in parallel. This should be <= number of worker nodes in your Azure HDInsight cluster.|\n",
|
||||||
|
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||||
|
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
||||||
|
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
|
||||||
|
"|**preprocess**|set this to True to enable pre-processing of data eg. string to numeric using one-hot encoding|\n",
|
||||||
|
"|**exit_score**|Target score for experiment. It is associated with the metric. eg. exit_score=0.995 will exit experiment after that|"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||||
|
" debug_log = 'automl_errors.log',\n",
|
||||||
|
" primary_metric = 'AUC_weighted',\n",
|
||||||
|
" iteration_timeout_minutes = 10,\n",
|
||||||
|
" iterations = 3,\n",
|
||||||
|
" preprocess = True,\n",
|
||||||
|
" n_cross_validations = 10,\n",
|
||||||
|
" max_concurrent_iterations = 2, #change it based on number of worker nodes\n",
|
||||||
|
" verbosity = logging.INFO,\n",
|
||||||
|
" spark_context=sc, #HDI /spark related\n",
|
||||||
|
" X = X_train, \n",
|
||||||
|
" y = y_train,\n",
|
||||||
|
" path = project_folder)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Train the Models\n",
|
||||||
|
"\n",
|
||||||
|
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Explore the Results"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The following will show the child runs and waits for the parent run to complete."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Retrieve All Child Runs after the experiment is completed (in portal)\n",
|
||||||
|
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"children = list(local_run.get_children())\n",
|
||||||
|
"metricslist = {}\n",
|
||||||
|
"for run in children:\n",
|
||||||
|
" properties = run.get_properties()\n",
|
||||||
|
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||||
|
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||||
|
"\n",
|
||||||
|
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||||
|
"rundata"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Retrieve the Best Model after the above run is complete \n",
|
||||||
|
"\n",
|
||||||
|
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"best_run, fitted_model = local_run.get_output()\n",
|
||||||
|
"print(best_run)\n",
|
||||||
|
"print(fitted_model)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Best Model Based on Any Other Metric after the above run is complete based on the child run\n",
|
||||||
|
"Show the run and the model that has the smallest `log_loss` value:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"lookup_metric = \"log_loss\"\n",
|
||||||
|
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
|
||||||
|
"print(best_run)\n",
|
||||||
|
"print(fitted_model)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Test the Best Fitted Model\n",
|
||||||
|
"\n",
|
||||||
|
"#### Load Test Data - you can split the dataset beforehand & pass Train dataset to AutoML and use Test dataset to evaluate the best model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"blob_location = \"https://{}.blob.core.windows.net/{}\".format(account_name, container_name)\n",
|
||||||
|
"X_test = pd.read_csv(\"{}./X_valid.csv\".format(blob_location), header=0)\n",
|
||||||
|
"y_test = pd.read_csv(\"{}/y_valid.csv\".format(blob_location), header=0)\n",
|
||||||
|
"images = pd.read_csv(\"{}/images.csv\".format(blob_location), header=None)\n",
|
||||||
|
"images = np.reshape(images.values, (100,8,8))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Testing Our Best Fitted Model\n",
|
||||||
|
"We will try to predict digits and see how our model works. This is just an example to show you."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Randomly select digits and test.\n",
|
||||||
|
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||||
|
" print(index)\n",
|
||||||
|
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||||
|
" label = y_test.values[index]\n",
|
||||||
|
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||||
|
" fig = plt.figure(3, figsize = (5,5))\n",
|
||||||
|
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||||
|
" ax1.set_title(title)\n",
|
||||||
|
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||||
|
" display(fig)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"When deploying an automated ML trained model, please specify _pippackages=['azureml-sdk[automl]']_ in your CondaDependencies.\n",
|
||||||
|
"\n",
|
||||||
|
"Please refer to only the **Deploy** section in this notebook - <a href=\"https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment\" target=\"_blank\">Deployment of Automated ML trained model</a>"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "sasum"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "Python",
|
||||||
|
"name": "Python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "python",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "pyspark3",
|
||||||
|
"pygments_lexer": "python3"
|
||||||
|
},
|
||||||
|
"name": "auto-ml-classification-local-adb",
|
||||||
|
"notebookId": 587284549713154
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 1
|
||||||
|
}
|
||||||
25
how-to-use-azureml/deploy-to-cloud/README.md
Normal file
25
how-to-use-azureml/deploy-to-cloud/README.md
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# Model Deployment with Azure ML service
|
||||||
|
|
||||||
|
You can use Azure Machine Learning to package, debug, validate and deploy inference containers to a variety of compute targets. This process is known as "MLOps" (ML operationalization).
|
||||||
|
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where
|
||||||
|
|
||||||
|
## Get Started
|
||||||
|
To begin, you will need an ML workspace.
|
||||||
|
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace
|
||||||
|
|
||||||
|
## Deploy to the cloud
|
||||||
|
You can deploy to the cloud using the Azure ML CLI or the Azure ML SDK.
|
||||||
|
|
||||||
|
### Deploy with the CLI
|
||||||
|
```
|
||||||
|
az extension add -n azure-cli-ml
|
||||||
|
az ml folder attach -w myworkspace -g myresourcegroup
|
||||||
|
az ml model register -n sklearn_regression_model.pkl -p sklearn_regression_model.pkl -t model.json
|
||||||
|
az ml model deploy -n acicicd -f model.json --ic inferenceConfig.yml --dc deploymentConfig.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
Here is an [Azure DevOps Pipelines model deployment example](./azure-pipelines-model-deploy.yml)
|
||||||
|
[](https://aidemos.visualstudio.com/azmlcli/_build/latest?definitionId=87&branchName=cli-ga)
|
||||||
|
|
||||||
|
### Deploy from a notebook
|
||||||
|
- Notebook example: [model-register-and-deploy](./model-register-and-deploy.ipynb).
|
||||||
@@ -0,0 +1,50 @@
|
|||||||
|
trigger:
|
||||||
|
- master
|
||||||
|
|
||||||
|
pool:
|
||||||
|
vmImage: 'Ubuntu-16.04'
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- task: DownloadSecureFile@1
|
||||||
|
inputs:
|
||||||
|
name: config.json
|
||||||
|
secureFile: config.json
|
||||||
|
|
||||||
|
- script: cp $(Agent.TempDirectory)/config.json $(Build.SourcesDirectory)
|
||||||
|
|
||||||
|
- task: AzureCLI@1
|
||||||
|
displayName: 'Install the CLI'
|
||||||
|
inputs:
|
||||||
|
azureSubscription: 'azmldemows'
|
||||||
|
scriptLocation: inlineScript
|
||||||
|
inlineScript: 'az extension add -n azure-cli-ml'
|
||||||
|
|
||||||
|
- task: AzureCLI@1
|
||||||
|
displayName: 'Attach folder to workspace'
|
||||||
|
inputs:
|
||||||
|
azureSubscription: 'azmldemows'
|
||||||
|
scriptLocation: inlineScript
|
||||||
|
inlineScript: 'az ml folder attach'
|
||||||
|
|
||||||
|
- task: AzureCLI@1
|
||||||
|
displayName: 'Register model'
|
||||||
|
inputs:
|
||||||
|
azureSubscription: 'azmldemows'
|
||||||
|
scriptLocation: inlineScript
|
||||||
|
inlineScript: 'az ml model register -n sklearn_regression_model.pkl -p sklearn_regression_model.pkl -t model.json'
|
||||||
|
workingDirectory: 'how-to-use-azureml/deploy-to-cloud/'
|
||||||
|
|
||||||
|
- task: AzureCLI@1
|
||||||
|
displayName: 'Deploy model'
|
||||||
|
inputs:
|
||||||
|
azureSubscription: 'azmldemows'
|
||||||
|
scriptLocation: inlineScript
|
||||||
|
inlineScript: 'az ml model deploy -n acicicd -f model.json --ic inferenceConfig.yml --dc deploymentConfig.yml'
|
||||||
|
workingDirectory: 'how-to-use-azureml/deploy-to-cloud/'
|
||||||
|
|
||||||
|
- task: AzureCLI@1
|
||||||
|
displayName: 'Delete deployed service'
|
||||||
|
inputs:
|
||||||
|
azureSubscription: 'azmldemows'
|
||||||
|
scriptLocation: inlineScript
|
||||||
|
inlineScript: 'az ml service delete -n acicicd'
|
||||||
5
how-to-use-azureml/deploy-to-cloud/deploymentConfig.yml
Normal file
5
how-to-use-azureml/deploy-to-cloud/deploymentConfig.yml
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
---
|
||||||
|
containerResourceRequirements:
|
||||||
|
cpu: 1
|
||||||
|
memoryInGB: 1
|
||||||
|
computeType: ACI
|
||||||
7
how-to-use-azureml/deploy-to-cloud/deploymentconfig.json
Normal file
7
how-to-use-azureml/deploy-to-cloud/deploymentconfig.json
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"containerResourceRequirements": {
|
||||||
|
"cpu": 1,
|
||||||
|
"memoryInGB": 1
|
||||||
|
},
|
||||||
|
"computeType": "ACI"
|
||||||
|
}
|
||||||
1
how-to-use-azureml/deploy-to-cloud/helloworld.txt
Normal file
1
how-to-use-azureml/deploy-to-cloud/helloworld.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
RUN echo "this is test"
|
||||||
9
how-to-use-azureml/deploy-to-cloud/inferenceConfig.yml
Normal file
9
how-to-use-azureml/deploy-to-cloud/inferenceConfig.yml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
entryScript: score.py
|
||||||
|
runtime: python
|
||||||
|
condaFile: myenv.yml
|
||||||
|
extraDockerfileSteps:
|
||||||
|
schemaFile:
|
||||||
|
dependencies:
|
||||||
|
enableGpu: False
|
||||||
|
baseImage:
|
||||||
|
baseImageRegistry:
|
||||||
11
how-to-use-azureml/deploy-to-cloud/inferenceconfig.json
Normal file
11
how-to-use-azureml/deploy-to-cloud/inferenceconfig.json
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
{
|
||||||
|
"entryScript": "score.py",
|
||||||
|
"runtime": "python",
|
||||||
|
"condaFile": "myenv.yml",
|
||||||
|
"extraDockerfileSteps": null,
|
||||||
|
"schemaFile": null,
|
||||||
|
"dependencies": null,
|
||||||
|
"enableGpu": false,
|
||||||
|
"baseImage": null,
|
||||||
|
"baseImageRegistry": null
|
||||||
|
}
|
||||||
@@ -0,0 +1,275 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Register Model and deploy as Webservice\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows how to deploy a Webservice in step-by-step fashion:\n",
|
||||||
|
"\n",
|
||||||
|
" 1. Register Model\n",
|
||||||
|
" 2. Deploy Model as Webservice"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"Make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"create workspace"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Register Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can add tags and descriptions to your Models. Note you need to have a `sklearn_regression_model.pkl` file in the current directory. This file is generated by the 01 notebook. The below call registers that file as a Model with the same name `sklearn_regression_model.pkl` in the workspace.\n",
|
||||||
|
"\n",
|
||||||
|
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model. Note that tags must be alphanumeric."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"register model from file"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
|
||||||
|
" model_name = \"sklearn_regression_model.pkl\",\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||||
|
" description = \"Ridge regression model to predict diabetes\",\n",
|
||||||
|
" workspace = ws)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create Inference Configuration\n",
|
||||||
|
"\n",
|
||||||
|
"There is now support for a source directory, you can upload an entire folder from your local machine as dependencies for the Webservice.\n",
|
||||||
|
"Note: in that case, your entry_script, conda_file, and extra_docker_file_steps paths are relative paths to the source_directory path.\n",
|
||||||
|
"\n",
|
||||||
|
"Sample code for using a source directory:\n",
|
||||||
|
"\n",
|
||||||
|
"```python\n",
|
||||||
|
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
|
||||||
|
" runtime= \"python\", \n",
|
||||||
|
" entry_script=\"x/y/score.py\",\n",
|
||||||
|
" conda_file=\"env/myenv.yml\", \n",
|
||||||
|
" extra_docker_file_steps=\"helloworld.txt\")\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
" - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
|
||||||
|
" - runtime = Which runtime to use for the image. Current supported runtimes are 'spark-py' and 'python\n",
|
||||||
|
" - entry_script = contains logic specific to initializing your model and running predictions\n",
|
||||||
|
" - conda_file = manages conda and python package dependencies.\n",
|
||||||
|
" - extra_docker_file_steps = optional: any extra steps you want to inject into docker file"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"create image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import InferenceConfig\n",
|
||||||
|
"\n",
|
||||||
|
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||||
|
" entry_script=\"score.py\",\n",
|
||||||
|
" conda_file=\"myenv.yml\", \n",
|
||||||
|
" extra_docker_file_steps=\"helloworld.txt\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Deploy Model as Webservice on Azure Container Instance\n",
|
||||||
|
"\n",
|
||||||
|
"Note that the service creation can take few minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||||
|
"from azureml.exceptions import WebserviceException\n",
|
||||||
|
"\n",
|
||||||
|
"deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)\n",
|
||||||
|
"aci_service_name = 'aciservice1'\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" # if you want to get existing service below is the command\n",
|
||||||
|
" # since aci name needs to be unique in subscription deleting existing aci if any\n",
|
||||||
|
" # we use aci_service_name to create azure aci\n",
|
||||||
|
" service = Webservice(ws, name=aci_service_name)\n",
|
||||||
|
" if service:\n",
|
||||||
|
" service.delete()\n",
|
||||||
|
"except WebserviceException as e:\n",
|
||||||
|
" print()\n",
|
||||||
|
"\n",
|
||||||
|
"service = Model.deploy(ws, aci_service_name, [model], inference_config, deployment_config)\n",
|
||||||
|
"\n",
|
||||||
|
"service.wait_for_deployment(True)\n",
|
||||||
|
"print(service.state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Test web service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"test_sample = json.dumps({'data': [\n",
|
||||||
|
" [1,2,3,4,5,6,7,8,9,10], \n",
|
||||||
|
" [10,9,8,7,6,5,4,3,2,1]\n",
|
||||||
|
"]})\n",
|
||||||
|
"\n",
|
||||||
|
"test_sample_encoded = bytes(test_sample,encoding = 'utf8')\n",
|
||||||
|
"prediction = service.run(input_data=test_sample_encoded)\n",
|
||||||
|
"print(prediction)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Delete ACI to clean up"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"deploy service",
|
||||||
|
"aci"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"service.delete()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Model Profiling\n",
|
||||||
|
"\n",
|
||||||
|
"you can also take advantage of profiling feature for model\n",
|
||||||
|
"\n",
|
||||||
|
"```python\n",
|
||||||
|
"\n",
|
||||||
|
"profile = model.profile(ws, \"profilename\", [model], inference_config, test_sample)\n",
|
||||||
|
"profile.wait_for_profiling(True)\n",
|
||||||
|
"profiling_results = profile.get_results()\n",
|
||||||
|
"print(profiling_results)\n",
|
||||||
|
"\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "aashishb"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
8
how-to-use-azureml/deploy-to-cloud/myenv.yml
Normal file
8
how-to-use-azureml/deploy-to-cloud/myenv.yml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
name: project_environment
|
||||||
|
dependencies:
|
||||||
|
- python=3.6.2
|
||||||
|
- pip:
|
||||||
|
- azureml-defaults
|
||||||
|
- scikit-learn
|
||||||
|
- numpy
|
||||||
|
- inference-schema[numpy-support]
|
||||||
8
how-to-use-azureml/deploy-to-cloud/mylib.py
Normal file
8
how-to-use-azureml/deploy-to-cloud/mylib.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
# Copyright (c) Microsoft. All rights reserved.
|
||||||
|
# Licensed under the MIT license.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def get_alphas():
|
||||||
|
# list of numbers from 0.0 to 1.0 with a 0.05 interval
|
||||||
|
return np.arange(0.0, 1.0, 0.05)
|
||||||
34
how-to-use-azureml/deploy-to-cloud/score.py
Normal file
34
how-to-use-azureml/deploy-to-cloud/score.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import pickle
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.externals import joblib
|
||||||
|
from sklearn.linear_model import Ridge
|
||||||
|
from azureml.core.model import Model
|
||||||
|
|
||||||
|
from inference_schema.schema_decorators import input_schema, output_schema
|
||||||
|
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
|
||||||
|
|
||||||
|
|
||||||
|
def init():
|
||||||
|
global model
|
||||||
|
# note here "sklearn_regression_model.pkl" is the name of the model registered under
|
||||||
|
# this is a different behavior than before when the code is run locally, even though the code is the same.
|
||||||
|
model_path = Model.get_model_path('sklearn_regression_model.pkl')
|
||||||
|
# deserialize the model file back into a sklearn model
|
||||||
|
model = joblib.load(model_path)
|
||||||
|
|
||||||
|
|
||||||
|
input_sample = np.array([[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]])
|
||||||
|
output_sample = np.array([3726.995])
|
||||||
|
|
||||||
|
|
||||||
|
@input_schema('data', NumpyParameterType(input_sample))
|
||||||
|
@output_schema(NumpyParameterType(output_sample))
|
||||||
|
def run(data):
|
||||||
|
try:
|
||||||
|
result = model.predict(data)
|
||||||
|
# you can return any datatype as long as it is JSON-serializable
|
||||||
|
return result.tolist()
|
||||||
|
except Exception as e:
|
||||||
|
error = str(e)
|
||||||
|
return error
|
||||||
BIN
how-to-use-azureml/deploy-to-cloud/sklearn_regression_model.pkl
Normal file
BIN
how-to-use-azureml/deploy-to-cloud/sklearn_regression_model.pkl
Normal file
Binary file not shown.
45
how-to-use-azureml/deploy-to-cloud/train.py
Normal file
45
how-to-use-azureml/deploy-to-cloud/train.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# Copyright (c) Microsoft. All rights reserved.
|
||||||
|
# Licensed under the MIT license.
|
||||||
|
|
||||||
|
from sklearn.datasets import load_diabetes
|
||||||
|
from sklearn.linear_model import Ridge
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from azureml.core.run import Run
|
||||||
|
from sklearn.externals import joblib
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import mylib
|
||||||
|
|
||||||
|
os.makedirs('./outputs', exist_ok=True)
|
||||||
|
|
||||||
|
X, y = load_diabetes(return_X_y=True)
|
||||||
|
|
||||||
|
run = Run.get_context()
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
||||||
|
test_size=0.2,
|
||||||
|
random_state=0)
|
||||||
|
data = {"train": {"X": X_train, "y": y_train},
|
||||||
|
"test": {"X": X_test, "y": y_test}}
|
||||||
|
|
||||||
|
# list of numbers from 0.0 to 1.0 with a 0.05 interval
|
||||||
|
alphas = mylib.get_alphas()
|
||||||
|
|
||||||
|
for alpha in alphas:
|
||||||
|
# Use Ridge algorithm to create a regression model
|
||||||
|
reg = Ridge(alpha=alpha)
|
||||||
|
reg.fit(data["train"]["X"], data["train"]["y"])
|
||||||
|
|
||||||
|
preds = reg.predict(data["test"]["X"])
|
||||||
|
mse = mean_squared_error(preds, data["test"]["y"])
|
||||||
|
run.log('alpha', alpha)
|
||||||
|
run.log('mse', mse)
|
||||||
|
|
||||||
|
model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
|
||||||
|
# save model in the outputs folder so it automatically get uploaded
|
||||||
|
with open(model_file_name, "wb") as file:
|
||||||
|
joblib.dump(value=reg, filename=os.path.join('./outputs/',
|
||||||
|
model_file_name))
|
||||||
|
|
||||||
|
print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))
|
||||||
12
how-to-use-azureml/deploy-to-local/README.md
Normal file
12
how-to-use-azureml/deploy-to-local/README.md
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# Model Deployment with Azure ML service
|
||||||
|
You can use Azure Machine Learning to package, debug, validate and deploy inference containers to a variety of compute targets. This process is known as "MLOps" (ML operationalization).
|
||||||
|
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where
|
||||||
|
|
||||||
|
## Get Started
|
||||||
|
To begin, you will need an ML workspace.
|
||||||
|
For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace
|
||||||
|
|
||||||
|
## Deploy locally
|
||||||
|
You can deploy a model locally for testing & debugging using the Azure ML CLI or the Azure ML SDK.
|
||||||
|
- CLI example: https://aka.ms/azmlcli
|
||||||
|
- Notebook example: [register-model-deploy-local](./register-model-deploy-local.ipynb).
|
||||||
BIN
how-to-use-azureml/deploy-to-local/dockerSharedDrive.JPG
Normal file
BIN
how-to-use-azureml/deploy-to-local/dockerSharedDrive.JPG
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 22 KiB |
1
how-to-use-azureml/deploy-to-local/helloworld.txt
Normal file
1
how-to-use-azureml/deploy-to-local/helloworld.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
RUN echo "this is test"
|
||||||
8
how-to-use-azureml/deploy-to-local/myenv.yml
Normal file
8
how-to-use-azureml/deploy-to-local/myenv.yml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
name: project_environment
|
||||||
|
dependencies:
|
||||||
|
- python=3.6.2
|
||||||
|
- pip:
|
||||||
|
- azureml-defaults
|
||||||
|
- scikit-learn
|
||||||
|
- numpy
|
||||||
|
- inference-schema[numpy-support]
|
||||||
@@ -0,0 +1,487 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Register model and deploy locally with advanced usages\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows how to deploy a web service in step-by-step fashion:\n",
|
||||||
|
"\n",
|
||||||
|
" 1. Register model\n",
|
||||||
|
" 2. Deploy the image as a web service in a local Docker container.\n",
|
||||||
|
" 3. Quickly test changes to your entry script by reloading the local service.\n",
|
||||||
|
" 4. Optionally, you can also make changes to model, conda or extra_docker_file_steps and update local service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"Make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"create workspace"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Register Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the same name `sklearn_regression_model.pkl` in the workspace.\n",
|
||||||
|
"\n",
|
||||||
|
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model, framework, category, target customer etc. Note that tags must be alphanumeric."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"register model from file"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
|
||||||
|
" model_name = \"sklearn_regression_model.pkl\",\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||||
|
" description = \"Ridge regression model to predict diabetes\",\n",
|
||||||
|
" workspace = ws)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Manage your dependencies in a folder"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"source_directory = \"C:/abc\"\n",
|
||||||
|
"\n",
|
||||||
|
"os.makedirs(source_directory, exist_ok = True)\n",
|
||||||
|
"os.makedirs(\"C:/abc/x/y\", exist_ok = True)\n",
|
||||||
|
"os.makedirs(\"C:/abc/env\", exist_ok = True)\n",
|
||||||
|
"os.makedirs(\"C:/abc/dockerstep\", exist_ok = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Show `score.py`. Note that the `sklearn_regression_model.pkl` in the `get_model_path` call is referring to a model named `sklearn_regression_model.pkl` registered under the workspace. It is NOT referencing the local file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile C:/abc/x/y/score.py\n",
|
||||||
|
"import pickle\n",
|
||||||
|
"import json\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from sklearn.externals import joblib\n",
|
||||||
|
"from sklearn.linear_model import Ridge\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"from inference_schema.schema_decorators import input_schema, output_schema\n",
|
||||||
|
"from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global model\n",
|
||||||
|
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
|
||||||
|
" # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
|
||||||
|
" model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
|
||||||
|
" # deserialize the model file back into a sklearn model\n",
|
||||||
|
" model = joblib.load(model_path)\n",
|
||||||
|
" global name\n",
|
||||||
|
" # note here, entire source directory on inference config gets added into image\n",
|
||||||
|
" # bellow is the example how you can use any extra files in image\n",
|
||||||
|
" with open('./abc/extradata.json') as json_file: \n",
|
||||||
|
" data = json.load(json_file)\n",
|
||||||
|
" name = data[\"people\"][0][\"name\"]\n",
|
||||||
|
"\n",
|
||||||
|
"input_sample = np.array([[10,9,8,7,6,5,4,3,2,1]])\n",
|
||||||
|
"output_sample = np.array([3726.995])\n",
|
||||||
|
"\n",
|
||||||
|
"@input_schema('data', NumpyParameterType(input_sample))\n",
|
||||||
|
"@output_schema(NumpyParameterType(output_sample))\n",
|
||||||
|
"def run(data):\n",
|
||||||
|
" try:\n",
|
||||||
|
" result = model.predict(data)\n",
|
||||||
|
" # you can return any datatype as long as it is JSON-serializable\n",
|
||||||
|
" return \"Hello \" + name + \" here is your result = \" + str(result)\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" error = str(e)\n",
|
||||||
|
" return error"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile C:/abc/env/myenv.yml\n",
|
||||||
|
"name: project_environment\n",
|
||||||
|
"dependencies:\n",
|
||||||
|
" - python=3.6.2\n",
|
||||||
|
" - pip:\n",
|
||||||
|
" - azureml-defaults\n",
|
||||||
|
" - scikit-learn\n",
|
||||||
|
" - numpy\n",
|
||||||
|
" - inference-schema[numpy-support]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile C:/abc/dockerstep/customDockerStep.txt\n",
|
||||||
|
"RUN echo \"this is test\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile C:/abc/extradata.json\n",
|
||||||
|
"{\n",
|
||||||
|
" \"people\": [\n",
|
||||||
|
" {\n",
|
||||||
|
" \"website\": \"microsoft.com\", \n",
|
||||||
|
" \"from\": \"Seattle\", \n",
|
||||||
|
" \"name\": \"Mrudula\"\n",
|
||||||
|
" }\n",
|
||||||
|
" ]\n",
|
||||||
|
"}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create Inference Configuration\n",
|
||||||
|
"\n",
|
||||||
|
" - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
|
||||||
|
" - runtime = Which runtime to use for the image. Current supported runtimes are 'spark-py' and 'python\n",
|
||||||
|
" - entry_script = contains logic specific to initializing your model and running predictions\n",
|
||||||
|
" - conda_file = manages conda and python package dependencies.\n",
|
||||||
|
" - extra_docker_file_steps = optional: any extra steps you want to inject into docker file"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import InferenceConfig\n",
|
||||||
|
"\n",
|
||||||
|
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
|
||||||
|
" runtime= \"python\", \n",
|
||||||
|
" entry_script=\"x/y/score.py\",\n",
|
||||||
|
" conda_file=\"env/myenv.yml\", \n",
|
||||||
|
" extra_docker_file_steps=\"dockerstep/customDockerStep.txt\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploy Model as a Local Docker Web Service\n",
|
||||||
|
"\n",
|
||||||
|
"*Make sure you have Docker installed and running.*\n",
|
||||||
|
"\n",
|
||||||
|
"Note that the service creation can take few minutes.\n",
|
||||||
|
"\n",
|
||||||
|
"NOTE:\n",
|
||||||
|
"\n",
|
||||||
|
"we require docker running with linux container. If you are running Docker for Windows, you need to ensure the Linux Engine is running\n",
|
||||||
|
"\n",
|
||||||
|
" powershell command to switch to linux engine\n",
|
||||||
|
" & 'C:\\Program Files\\Docker\\Docker\\DockerCli.exe' -SwitchLinuxEngine\n",
|
||||||
|
"\n",
|
||||||
|
"and c drive is shared https://docs.docker.com/docker-for-windows/#shared-drives\n",
|
||||||
|
"sometimes you have to reshare c drive as docker \n",
|
||||||
|
"\n",
|
||||||
|
"<img src=\"./dockerSharedDrive.JPG\" align=\"left\"/>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"deploy service",
|
||||||
|
"aci"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import LocalWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"#this is optional, if not provided we choose random port\n",
|
||||||
|
"deployment_config = LocalWebservice.deploy_configuration(port=6789)\n",
|
||||||
|
"\n",
|
||||||
|
"local_service = Model.deploy(ws, \"test\", [model], inference_config, deployment_config)\n",
|
||||||
|
"\n",
|
||||||
|
"local_service.wait_for_deployment()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print('Local service port: {}'.format(local_service.port))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Check Status and Get Container Logs\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(local_service.get_logs())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Test Web Service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Call the web service with some input data to get a prediction."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"sample_input = json.dumps({\n",
|
||||||
|
" 'data': [\n",
|
||||||
|
" [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
|
||||||
|
" [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
|
||||||
|
" ]\n",
|
||||||
|
"})\n",
|
||||||
|
"\n",
|
||||||
|
"sample_input = bytes(sample_input, encoding='utf-8')\n",
|
||||||
|
"\n",
|
||||||
|
"print(local_service.run(input_data=sample_input))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Reload Service\n",
|
||||||
|
"\n",
|
||||||
|
"You can update your score.py file and then call `reload()` to quickly restart the service. This will only reload your execution script and dependency files, it will not rebuild the underlying Docker image. As a result, `reload()` is fast, but if you do need to rebuild the image -- to add a new Conda or pip package, for instance -- you will have to call `update()`, instead (see below)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile C:/abc/x/y/score.py\n",
|
||||||
|
"import pickle\n",
|
||||||
|
"import json\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from sklearn.externals import joblib\n",
|
||||||
|
"from sklearn.linear_model import Ridge\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"from inference_schema.schema_decorators import input_schema, output_schema\n",
|
||||||
|
"from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global model\n",
|
||||||
|
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
|
||||||
|
" # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
|
||||||
|
" model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
|
||||||
|
" # deserialize the model file back into a sklearn model\n",
|
||||||
|
" model = joblib.load(model_path)\n",
|
||||||
|
" global name, from_location\n",
|
||||||
|
" # note here, entire source directory on inference config gets added into image\n",
|
||||||
|
" # bellow is the example how you can use any extra files in image\n",
|
||||||
|
" with open('./abc/extradata.json') as json_file: \n",
|
||||||
|
" data = json.load(json_file)\n",
|
||||||
|
" name = data[\"people\"][0][\"name\"]\n",
|
||||||
|
" from_location = data[\"people\"][0][\"from\"]\n",
|
||||||
|
"\n",
|
||||||
|
"input_sample = np.array([[10,9,8,7,6,5,4,3,2,1]])\n",
|
||||||
|
"output_sample = np.array([3726.995])\n",
|
||||||
|
"\n",
|
||||||
|
"@input_schema('data', NumpyParameterType(input_sample))\n",
|
||||||
|
"@output_schema(NumpyParameterType(output_sample))\n",
|
||||||
|
"def run(data):\n",
|
||||||
|
" try:\n",
|
||||||
|
" result = model.predict(data)\n",
|
||||||
|
" # you can return any datatype as long as it is JSON-serializable\n",
|
||||||
|
" return \"Hello \" + name + \" from \" + from_location + \" here is your result = \" + str(result)\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" error = str(e)\n",
|
||||||
|
" return error"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_service.reload()\n",
|
||||||
|
"print(\"--------------------------------------------------------------\")\n",
|
||||||
|
"\n",
|
||||||
|
"# after reload now if you call run this will return updated return message\n",
|
||||||
|
"\n",
|
||||||
|
"print(local_service.run(input_data=sample_input))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Update Service\n",
|
||||||
|
"\n",
|
||||||
|
"If you want to change your model(s), Conda dependencies, or deployment configuration, call `update()` to rebuild the Docker image.\n",
|
||||||
|
"\n",
|
||||||
|
"```python\n",
|
||||||
|
"\n",
|
||||||
|
"local_service.update(models = [SomeOtherModelObject],\n",
|
||||||
|
" deployment_config = local_config,\n",
|
||||||
|
" inference_config = inference_config)\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Delete Service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_service.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "raymondl"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,342 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Register model and deploy locally\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows how to deploy a web service in step-by-step fashion:\n",
|
||||||
|
"\n",
|
||||||
|
" 1. Register model\n",
|
||||||
|
" 2. Deploy the image as a web service in a local Docker container.\n",
|
||||||
|
" 3. Quickly test changes to your entry script by reloading the local service.\n",
|
||||||
|
" 4. Optionally, you can also make changes to model, conda or extra_docker_file_steps and update local service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"Make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Register Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the same name `sklearn_regression_model.pkl` in the workspace.\n",
|
||||||
|
"\n",
|
||||||
|
"Using tags, you can track useful information such as the name and version of the machine learning library used to train the model, framework, category, target customer etc. Note that tags must be alphanumeric."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"register model from file"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
|
||||||
|
" model_name = \"sklearn_regression_model.pkl\",\n",
|
||||||
|
" tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
|
||||||
|
" description = \"Ridge regression model to predict diabetes\",\n",
|
||||||
|
" workspace = ws)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create Inference Configuration"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import InferenceConfig\n",
|
||||||
|
"\n",
|
||||||
|
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||||
|
" entry_script=\"score.py\",\n",
|
||||||
|
" conda_file=\"myenv.yml\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploy Model as a Local Docker Web Service\n",
|
||||||
|
"\n",
|
||||||
|
"*Make sure you have Docker installed and running.*\n",
|
||||||
|
"\n",
|
||||||
|
"Note that the service creation can take few minutes.\n",
|
||||||
|
"\n",
|
||||||
|
"NOTE:\n",
|
||||||
|
"\n",
|
||||||
|
"we require docker running with linux container. If you are running Docker for Windows, you need to ensure the Linux Engine is running\n",
|
||||||
|
"\n",
|
||||||
|
" powershell command to switch to linux engine\n",
|
||||||
|
" & 'C:\\Program Files\\Docker\\Docker\\DockerCli.exe' -SwitchLinuxEngine\n",
|
||||||
|
"\n",
|
||||||
|
"and c drive is shared https://docs.docker.com/docker-for-windows/#shared-drives\n",
|
||||||
|
"sometimes you have to reshare c drive as docker \n",
|
||||||
|
"\n",
|
||||||
|
"<img src=\"./dockerSharedDrive.JPG\" align=\"left\"/>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import LocalWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"#this is optional, if not provided we choose random port\n",
|
||||||
|
"deployment_config = LocalWebservice.deploy_configuration(port=6789)\n",
|
||||||
|
"\n",
|
||||||
|
"local_service = Model.deploy(ws, \"test\", [model], inference_config, deployment_config)\n",
|
||||||
|
"\n",
|
||||||
|
"local_service.wait_for_deployment()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print('Local service port: {}'.format(local_service.port))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Check Status and Get Container Logs\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(local_service.get_logs())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Test Web Service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Call the web service with some input data to get a prediction."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"sample_input = json.dumps({\n",
|
||||||
|
" 'data': [\n",
|
||||||
|
" [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
|
||||||
|
" [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
|
||||||
|
" ]\n",
|
||||||
|
"})\n",
|
||||||
|
"\n",
|
||||||
|
"sample_input = bytes(sample_input, encoding='utf-8')\n",
|
||||||
|
"\n",
|
||||||
|
"print(local_service.run(input_data=sample_input))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Reload Service\n",
|
||||||
|
"\n",
|
||||||
|
"You can update your score.py file and then call `reload()` to quickly restart the service. This will only reload your execution script and dependency files, it will not rebuild the underlying Docker image. As a result, `reload()` is fast, but if you do need to rebuild the image -- to add a new Conda or pip package, for instance -- you will have to call `update()`, instead (see below)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score.py\n",
|
||||||
|
"import pickle\n",
|
||||||
|
"import json\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from sklearn.externals import joblib\n",
|
||||||
|
"from sklearn.linear_model import Ridge\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"from inference_schema.schema_decorators import input_schema, output_schema\n",
|
||||||
|
"from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global model\n",
|
||||||
|
" # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
|
||||||
|
" # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
|
||||||
|
" model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
|
||||||
|
" # deserialize the model file back into a sklearn model\n",
|
||||||
|
" model = joblib.load(model_path)\n",
|
||||||
|
"\n",
|
||||||
|
"input_sample = np.array([[10,9,8,7,6,5,4,3,2,1]])\n",
|
||||||
|
"output_sample = np.array([3726.995])\n",
|
||||||
|
"\n",
|
||||||
|
"@input_schema('data', NumpyParameterType(input_sample))\n",
|
||||||
|
"@output_schema(NumpyParameterType(output_sample))\n",
|
||||||
|
"def run(data):\n",
|
||||||
|
" try:\n",
|
||||||
|
" result = model.predict(data)\n",
|
||||||
|
" # you can return any datatype as long as it is JSON-serializable\n",
|
||||||
|
" return 'hello from updated score.py'\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" error = str(e)\n",
|
||||||
|
" return error"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_service.reload()\n",
|
||||||
|
"print(\"--------------------------------------------------------------\")\n",
|
||||||
|
"\n",
|
||||||
|
"# after reload now if you call run this will return updated return message\n",
|
||||||
|
"\n",
|
||||||
|
"print(local_service.run(input_data=sample_input))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Update Service\n",
|
||||||
|
"\n",
|
||||||
|
"If you want to change your model(s), Conda dependencies, or deployment configuration, call `update()` to rebuild the Docker image.\n",
|
||||||
|
"\n",
|
||||||
|
"```python\n",
|
||||||
|
"\n",
|
||||||
|
"local_service.update(models = [SomeOtherModelObject],\n",
|
||||||
|
" deployment_config = local_config,\n",
|
||||||
|
" inference_config = inference_config)\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Delete Service"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_service.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "raymondl"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
34
how-to-use-azureml/deploy-to-local/score.py
Normal file
34
how-to-use-azureml/deploy-to-local/score.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import pickle
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.externals import joblib
|
||||||
|
from sklearn.linear_model import Ridge
|
||||||
|
from azureml.core.model import Model
|
||||||
|
|
||||||
|
from inference_schema.schema_decorators import input_schema, output_schema
|
||||||
|
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
|
||||||
|
|
||||||
|
|
||||||
|
def init():
|
||||||
|
global model
|
||||||
|
# note here "sklearn_regression_model.pkl" is the name of the model registered under
|
||||||
|
# this is a different behavior than before when the code is run locally, even though the code is the same.
|
||||||
|
model_path = Model.get_model_path('sklearn_regression_model.pkl')
|
||||||
|
# deserialize the model file back into a sklearn model
|
||||||
|
model = joblib.load(model_path)
|
||||||
|
|
||||||
|
|
||||||
|
input_sample = np.array([[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]])
|
||||||
|
output_sample = np.array([3726.995])
|
||||||
|
|
||||||
|
|
||||||
|
@input_schema('data', NumpyParameterType(input_sample))
|
||||||
|
@output_schema(NumpyParameterType(output_sample))
|
||||||
|
def run(data):
|
||||||
|
try:
|
||||||
|
result = model.predict(data)
|
||||||
|
# you can return any datatype as long as it is JSON-serializable
|
||||||
|
return result.tolist()
|
||||||
|
except Exception as e:
|
||||||
|
error = str(e)
|
||||||
|
return error
|
||||||
BIN
how-to-use-azureml/deploy-to-local/sklearn_regression_model.pkl
Normal file
BIN
how-to-use-azureml/deploy-to-local/sklearn_regression_model.pkl
Normal file
Binary file not shown.
@@ -6,15 +6,18 @@ These tutorials show how to create and deploy Open Neural Network eXchange ([ONN
|
|||||||
|
|
||||||
0. [Configure your Azure Machine Learning Workspace](../../../configuration.ipynb)
|
0. [Configure your Azure Machine Learning Workspace](../../../configuration.ipynb)
|
||||||
|
|
||||||
#### Obtain models from the [ONNX Model Zoo](https://github.com/onnx/models) and deploy with ONNX Runtime Inference
|
#### Obtain pretrained models from the [ONNX Model Zoo](https://github.com/onnx/models) and deploy with ONNX Runtime
|
||||||
1. [Handwritten Digit Classification (MNIST)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb)
|
1. [MNIST - Handwritten Digit Classification with ONNX Runtime](onnx-inference-mnist-deploy.ipynb)
|
||||||
2. [Facial Expression Recognition (Emotion FER+)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb)
|
2. [Emotion FER+ - Facial Expression Recognition with ONNX Runtime](onnx-inference-facial-expression-recognition-deploy.ipynb)
|
||||||
|
|
||||||
|
#### Train model on Azure ML, convert to ONNX, and deploy with ONNX Runtime
|
||||||
|
3. [MNIST - Train using PyTorch and deploy with ONNX Runtime](onnx-train-pytorch-aml-deploy-mnist.ipynb)
|
||||||
|
|
||||||
#### Demo Notebooks from Microsoft Ignite 2018
|
#### Demo Notebooks from Microsoft Ignite 2018
|
||||||
Note that the following notebooks do not have evaluation sections for the models since they were deployed as part of a live demo. You can find the respective pre-processing and post-processing code linked from the ONNX Model Zoo Github pages ([ResNet](https://github.com/onnx/models/tree/master/models/image_classification/resnet), [TinyYoloV2](https://github.com/onnx/models/tree/master/tiny_yolov2)), or experiment with the ONNX models by [running them in the browser](https://microsoft.github.io/onnxjs-demo/#/).
|
Note that the following notebooks do not have evaluation sections for the models since they were deployed as part of a live demo. You can find the respective pre-processing and post-processing code linked from the ONNX Model Zoo Github pages ([ResNet](https://github.com/onnx/models/tree/master/models/image_classification/resnet), [TinyYoloV2](https://github.com/onnx/models/tree/master/tiny_yolov2)), or experiment with the ONNX models by [running them in the browser](https://microsoft.github.io/onnxjs-demo/#/).
|
||||||
|
|
||||||
3. [Image Recognition (ResNet50)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb)
|
4. [ResNet50 - Image Recognition with ONNX Runtime](onnx-modelzoo-aml-deploy-resnet50.ipynb)
|
||||||
4. [Convert Core ML Model to ONNX and deploy - Real Time Object Detection (TinyYOLO)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb)
|
5. [TinyYoloV2 - Convert from CoreML and deploy with ONNX Runtime](onnx-convert-aml-deploy-tinyyolo.ipynb)
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
- [ONNX Runtime Python API Documentation](http://aka.ms/onnxruntime-python)
|
- [ONNX Runtime Python API Documentation](http://aka.ms/onnxruntime-python)
|
||||||
|
|||||||
124
how-to-use-azureml/deployment/onnx/mnist.py
Normal file
124
how-to-use-azureml/deployment/onnx/mnist.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
# This is a modified version of https://github.com/pytorch/examples/blob/master/mnist/main.py which is
|
||||||
|
# licensed under BSD 3-Clause (https://github.com/pytorch/examples/blob/master/LICENSE)
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
import argparse
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torch.optim as optim
|
||||||
|
from torchvision import datasets, transforms
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
class Net(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(Net, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
|
||||||
|
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
|
||||||
|
self.conv2_drop = nn.Dropout2d()
|
||||||
|
self.fc1 = nn.Linear(320, 50)
|
||||||
|
self.fc2 = nn.Linear(50, 10)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = F.relu(F.max_pool2d(self.conv1(x), 2))
|
||||||
|
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
|
||||||
|
x = x.view(-1, 320)
|
||||||
|
x = F.relu(self.fc1(x))
|
||||||
|
x = F.dropout(x, training=self.training)
|
||||||
|
x = self.fc2(x)
|
||||||
|
return F.log_softmax(x, dim=1)
|
||||||
|
|
||||||
|
|
||||||
|
def train(args, model, device, train_loader, optimizer, epoch, output_dir):
|
||||||
|
model.train()
|
||||||
|
for batch_idx, (data, target) in enumerate(train_loader):
|
||||||
|
data, target = data.to(device), target.to(device)
|
||||||
|
optimizer.zero_grad()
|
||||||
|
output = model(data)
|
||||||
|
loss = F.nll_loss(output, target)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
if batch_idx % args.log_interval == 0:
|
||||||
|
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
|
||||||
|
epoch, batch_idx * len(data), len(train_loader.dataset),
|
||||||
|
100. * batch_idx / len(train_loader), loss.item()))
|
||||||
|
|
||||||
|
|
||||||
|
def test(args, model, device, test_loader):
|
||||||
|
model.eval()
|
||||||
|
test_loss = 0
|
||||||
|
correct = 0
|
||||||
|
with torch.no_grad():
|
||||||
|
for data, target in test_loader:
|
||||||
|
data, target = data.to(device), target.to(device)
|
||||||
|
output = model(data)
|
||||||
|
test_loss += F.nll_loss(output, target, size_average=False, reduce=True).item() # sum up batch loss
|
||||||
|
pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
|
||||||
|
correct += pred.eq(target.view_as(pred)).sum().item()
|
||||||
|
|
||||||
|
test_loss /= len(test_loader.dataset)
|
||||||
|
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
|
||||||
|
test_loss, correct, len(test_loader.dataset),
|
||||||
|
100. * correct / len(test_loader.dataset)))
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Training settings
|
||||||
|
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
|
||||||
|
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
|
||||||
|
help='input batch size for training (default: 64)')
|
||||||
|
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
|
||||||
|
help='input batch size for testing (default: 1000)')
|
||||||
|
parser.add_argument('--epochs', type=int, default=5, metavar='N',
|
||||||
|
help='number of epochs to train (default: 5)')
|
||||||
|
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
|
||||||
|
help='learning rate (default: 0.01)')
|
||||||
|
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
|
||||||
|
help='SGD momentum (default: 0.5)')
|
||||||
|
parser.add_argument('--no-cuda', action='store_true', default=False,
|
||||||
|
help='disables CUDA training')
|
||||||
|
parser.add_argument('--seed', type=int, default=1, metavar='S',
|
||||||
|
help='random seed (default: 1)')
|
||||||
|
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
|
||||||
|
help='how many batches to wait before logging training status')
|
||||||
|
parser.add_argument('--output-dir', type=str, default='outputs')
|
||||||
|
args = parser.parse_args()
|
||||||
|
use_cuda = not args.no_cuda and torch.cuda.is_available()
|
||||||
|
|
||||||
|
torch.manual_seed(args.seed)
|
||||||
|
|
||||||
|
device = torch.device("cuda" if use_cuda else "cpu")
|
||||||
|
|
||||||
|
output_dir = args.output_dir
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
|
||||||
|
train_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST('data', train=True, download=True,
|
||||||
|
transform=transforms.Compose([transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))])
|
||||||
|
),
|
||||||
|
batch_size=args.batch_size, shuffle=True, **kwargs)
|
||||||
|
test_loader = torch.utils.data.DataLoader(
|
||||||
|
datasets.MNIST('data', train=False,
|
||||||
|
transform=transforms.Compose([transforms.ToTensor(),
|
||||||
|
transforms.Normalize((0.1307,), (0.3081,))])
|
||||||
|
),
|
||||||
|
batch_size=args.test_batch_size, shuffle=True, **kwargs)
|
||||||
|
|
||||||
|
model = Net().to(device)
|
||||||
|
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
|
||||||
|
|
||||||
|
for epoch in range(1, args.epochs + 1):
|
||||||
|
train(args, model, device, train_loader, optimizer, epoch, output_dir)
|
||||||
|
test(args, model, device, test_loader)
|
||||||
|
|
||||||
|
# save model
|
||||||
|
dummy_input = torch.randn(1, 1, 28, 28, device=device)
|
||||||
|
model_path = os.path.join(output_dir, 'mnist.onnx')
|
||||||
|
torch.onnx.export(model, dummy_input, model_path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
File diff suppressed because one or more lines are too long
@@ -167,6 +167,31 @@
|
|||||||
"image.wait_for_creation(show_output = True)"
|
"image.wait_for_creation(show_output = True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Use a custom Docker image\n",
|
||||||
|
"\n",
|
||||||
|
"You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
|
||||||
|
"\n",
|
||||||
|
"Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
|
||||||
|
"```python\n",
|
||||||
|
"# use an image available in public Container Registry without authentication\n",
|
||||||
|
"image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
|
||||||
|
"\n",
|
||||||
|
"# or, use an image available in a private Container Registry\n",
|
||||||
|
"image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
|
||||||
|
"image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
|
||||||
|
"image_config.base_image_registry.username = \"username\"\n",
|
||||||
|
"image_config.base_image_registry.password = \"password\"\n",
|
||||||
|
"\n",
|
||||||
|
"# or, use an image built during training.\n",
|
||||||
|
"image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
|
||||||
|
"```\n",
|
||||||
|
"You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -191,6 +216,56 @@
|
|||||||
" provisioning_configuration = prov_config)"
|
" provisioning_configuration = prov_config)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Create AKS Cluster in an existing virtual network (optional)\n",
|
||||||
|
"See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-enable-virtual-network#use-azure-kubernetes-service) for more details."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"'''\n",
|
||||||
|
"from azureml.core.compute import ComputeTarget, AksCompute\n",
|
||||||
|
"\n",
|
||||||
|
"# Create the compute configuration and set virtual network information\n",
|
||||||
|
"config = AksCompute.provisioning_configuration(location=\"eastus2\")\n",
|
||||||
|
"config.vnet_resourcegroup_name = \"mygroup\"\n",
|
||||||
|
"config.vnet_name = \"mynetwork\"\n",
|
||||||
|
"config.subnet_name = \"default\"\n",
|
||||||
|
"config.service_cidr = \"10.0.0.0/16\"\n",
|
||||||
|
"config.dns_service_ip = \"10.0.0.10\"\n",
|
||||||
|
"config.docker_bridge_cidr = \"172.17.0.1/16\"\n",
|
||||||
|
"\n",
|
||||||
|
"# Create the compute target\n",
|
||||||
|
"aks_target = ComputeTarget.create(workspace = ws,\n",
|
||||||
|
" name = \"myaks\",\n",
|
||||||
|
" provisioning_configuration = config)\n",
|
||||||
|
"'''"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Enable SSL on the AKS Cluster (optional)\n",
|
||||||
|
"See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-secure-web-service) for more details"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# provisioning_config = AksCompute.provisioning_configuration(ssl_cert_pem_file=\"cert.pem\", ssl_key_pem_file=\"key.pem\", ssl_cname=\"www.contoso.com\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -270,8 +345,9 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Test the web service\n",
|
"# Test the web service using run method\n",
|
||||||
"We test the web sevice by passing data."
|
"We test the web sevice by passing data.\n",
|
||||||
|
"Run() method retrieves API keys behind the scenes to make sure that call is authenticated."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -293,6 +369,57 @@
|
|||||||
"print(prediction)"
|
"print(prediction)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Test the web service using raw HTTP request (optional)\n",
|
||||||
|
"Alternatively you can construct a raw HTTP request and send it to the service. In this case you need to explicitly pass the HTTP header. This process is shown in the next 2 cells."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# retreive the API keys. AML generates two keys.\n",
|
||||||
|
"'''\n",
|
||||||
|
"key1, Key2 = aks_service.get_keys()\n",
|
||||||
|
"print(key1)\n",
|
||||||
|
"'''"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# construct raw HTTP request and send to the service\n",
|
||||||
|
"'''\n",
|
||||||
|
"%%time\n",
|
||||||
|
"\n",
|
||||||
|
"import requests\n",
|
||||||
|
"\n",
|
||||||
|
"import json\n",
|
||||||
|
"\n",
|
||||||
|
"test_sample = json.dumps({'data': [\n",
|
||||||
|
" [1,2,3,4,5,6,7,8,9,10], \n",
|
||||||
|
" [10,9,8,7,6,5,4,3,2,1]\n",
|
||||||
|
"]})\n",
|
||||||
|
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
||||||
|
"\n",
|
||||||
|
"# Don't forget to add key to the HTTP header.\n",
|
||||||
|
"headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
||||||
|
"\n",
|
||||||
|
"resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"prediction:\", resp.text)\n",
|
||||||
|
"'''"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -317,7 +444,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "raymondl"
|
"name": "aashishb"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
|
|||||||
@@ -261,6 +261,31 @@
|
|||||||
"image.wait_for_creation(show_output = True)"
|
"image.wait_for_creation(show_output = True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Use a custom Docker image\n",
|
||||||
|
"\n",
|
||||||
|
"You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
|
||||||
|
"\n",
|
||||||
|
"Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
|
||||||
|
"```python\n",
|
||||||
|
"# use an image available in public Container Registry without authentication\n",
|
||||||
|
"image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
|
||||||
|
"\n",
|
||||||
|
"# or, use an image available in a private Container Registry\n",
|
||||||
|
"image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
|
||||||
|
"image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
|
||||||
|
"image_config.base_image_registry.username = \"username\"\n",
|
||||||
|
"image_config.base_image_registry.password = \"password\"\n",
|
||||||
|
"\n",
|
||||||
|
"# or, use an image built during training.\n",
|
||||||
|
"image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
|
||||||
|
"```\n",
|
||||||
|
"You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -395,7 +420,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "raymondl"
|
"name": "aashishb"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
|
|||||||
@@ -38,18 +38,19 @@ In this directory, there are two types of notebooks:
|
|||||||
|
|
||||||
* The first type of notebooks will introduce you to core Azure Machine Learning Pipelines features. These notebooks below belong in this category, and are designed to go in sequence; they're all located in the "intro-to-pipelines" folder:
|
* The first type of notebooks will introduce you to core Azure Machine Learning Pipelines features. These notebooks below belong in this category, and are designed to go in sequence; they're all located in the "intro-to-pipelines" folder:
|
||||||
|
|
||||||
1. [aml-pipelines-getting-started.ipynb](https://aka.ms/pl-get-started)
|
1. [aml-pipelines-getting-started.ipynb](https://aka.ms/pl-get-started): Start with this notebook to understand the concepts of using Azure Machine Learning Pipelines. This notebook will show you how to runs steps in parallel and in sequence.
|
||||||
2. [aml-pipelines-with-data-dependency-steps.ipynb](https://aka.ms/pl-data-dep)
|
2. [aml-pipelines-with-data-dependency-steps.ipynb](https://aka.ms/pl-data-dep): This notebooks shows how to connect steps in your pipeline using data. Data produced by one step is used by subsequent steps to force an explicit dependency between steps.
|
||||||
3. [aml-pipelines-publish-and-run-using-rest-endpoint.ipynb](https://aka.ms/pl-pub-rep)
|
3. [aml-pipelines-publish-and-run-using-rest-endpoint.ipynb](https://aka.ms/pl-pub-rep): Once you are satisfied with your iterative runs in, you could publish your pipeline to get a REST endpoint which could be invoked from non-Pythons clients as well.
|
||||||
4. [aml-pipelines-data-transfer.ipynb](https://aka.ms/pl-data-trans)
|
4. [aml-pipelines-data-transfer.ipynb](https://aka.ms/pl-data-trans): This notebook shows how you transfer data between supported datastores.
|
||||||
5. [aml-pipelines-use-databricks-as-compute-target.ipynb](https://aka.ms/pl-databricks)
|
5. [aml-pipelines-use-databricks-as-compute-target.ipynb](https://aka.ms/pl-databricks): This notebooks shows how you can use Pipelines to send your compute payload to Azure Databricks.
|
||||||
6. [aml-pipelines-use-adla-as-compute-target.ipynb](https://aka.ms/pl-adla)
|
6. [aml-pipelines-use-adla-as-compute-target.ipynb](https://aka.ms/pl-adla): This notebook shows how you can use Azure Data Lake Analytics (ADLA) as a compute target.
|
||||||
7. [aml-pipelines-parameter-tuning-with-hyperdrive.ipynb](https://aka.ms/pl-hyperdrive)
|
7. [aml-pipelines-how-to-use-estimatorstep.ipynb](https://aka.ms/pl-estimator): This notebook shows how to use the EstimatorStep.
|
||||||
8. [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb](https://aka.ms/pl-azbatch)
|
7. [aml-pipelines-parameter-tuning-with-hyperdrive.ipynb](https://aka.ms/pl-hyperdrive): HyperDriveStep in Pipelines shows how you can do hyper parameter tuning using Pipelines.
|
||||||
9. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule)
|
8. [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb](https://aka.ms/pl-azbatch): AzureBatchStep can be used to run your custom code in AzureBatch cluster.
|
||||||
10. [aml-pipelines-with-automated-machine-learning-step.ipynb](https://aka.ms/pl-automl)
|
9. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule): Once you publish a Pipeline, you can schedule it to trigger based on an interval or on data change in a defined datastore.
|
||||||
|
10. [aml-pipelines-with-automated-machine-learning-step.ipynb](https://aka.ms/pl-automl): AutoMLStep in Pipelines shows how you can do automated machine learning using Pipelines.
|
||||||
|
|
||||||
* The second type of notebooks illustrate more sophisticated scenarios, and are independent of each other. These notebooks include:
|
* The second type of notebooks illustrate more sophisticated scenarios, and are independent of each other. These notebooks include:
|
||||||
|
|
||||||
1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score)
|
1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score): This notebook demonstrates how to run a batch scoring job using Azure Machine Learning pipelines.
|
||||||
2. [pipeline-style-transfer.ipynb](https://aka.ms/pl-style-trans)
|
2. [pipeline-style-transfer.ipynb](https://aka.ms/pl-style-trans)
|
||||||
|
|||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# Introduction to Azure Machine Learning Pipelines
|
||||||
|
|
||||||
|
The following notebooks provide an introduction to a concept in Azure Machine Learning Pipelines. They will introduce you to core Azure Machine Learning Pipelines features.
|
||||||
|
These notebooks below are designed to go in sequence.
|
||||||
|
|
||||||
|
1. [aml-pipelines-getting-started.ipynb](https://aka.ms/pl-get-started): Start with this notebook to understand the concepts of using Azure Machine Learning Pipelines. This notebook will show you how to runs steps in parallel and in sequence.
|
||||||
|
2. [aml-pipelines-with-data-dependency-steps.ipynb](https://aka.ms/pl-data-dep): This notebooks shows how to connect steps in your pipeline using data. Data produced by one step is used by subsequent steps to force an explicit dependency between steps.
|
||||||
|
3. [aml-pipelines-publish-and-run-using-rest-endpoint.ipynb](https://aka.ms/pl-pub-rep): Once you are satisfied with your iterative runs in, you could publish your pipeline to get a REST endpoint which could be invoked from non-Pythons clients as well.
|
||||||
|
4. [aml-pipelines-data-transfer.ipynb](https://aka.ms/pl-data-trans): This notebook shows how you transfer data between supported datastores.
|
||||||
|
5. [aml-pipelines-use-databricks-as-compute-target.ipynb](https://aka.ms/pl-databricks): This notebooks shows how you can use Pipelines to send your compute payload to Azure Databricks.
|
||||||
|
6. [aml-pipelines-use-adla-as-compute-target.ipynb](https://aka.ms/pl-adla): This notebook shows how you can use Azure Data Lake Analytics (ADLA) as a compute target.
|
||||||
|
7. [aml-pipelines-how-to-use-estimatorstep.ipynb](https://aka.ms/pl-estimator): This notebook shows how to use the EstimatorStep.
|
||||||
|
8. [aml-pipelines-parameter-tuning-with-hyperdrive.ipynb](https://aka.ms/pl-hyperdrive): HyperDriveStep in Pipelines shows how you can do hyper parameter tuning using Pipelines.
|
||||||
|
9. [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb](https://aka.ms/pl-azbatch): AzureBatchStep can be used to run your custom code in AzureBatch cluster.
|
||||||
|
10. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule): Once you publish a Pipeline, you can schedule it to trigger based on an interval or on data change in a defined datastore.
|
||||||
@@ -141,7 +141,7 @@
|
|||||||
" print(\"registered blob datastore with name: %s\" % blob_datastore_name)\n",
|
" print(\"registered blob datastore with name: %s\" % blob_datastore_name)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# CLI:\n",
|
"# CLI:\n",
|
||||||
"# az ml datastore register-blob -n <datastore-name> -a <account-name> -c <container-name> -k <account-key> [-t <sas-token>]"
|
"# az ml datastore attach-blob -n <datastore-name> -a <account-name> -c <container-name> -k <account-key> [-t <sas-token>]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -303,7 +303,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"The following code will create a PythonScriptStep to be executed in the Azure Machine Learning Compute we created above using train.py, one of the files already made available in the project folder.\n",
|
"The following code will create a PythonScriptStep to be executed in the Azure Machine Learning Compute we created above using train.py, one of the files already made available in the project folder.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used."
|
"A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used. You can also use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify requirements for the PythonScriptStep, such as conda dependencies and docker image."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -369,10 +369,34 @@
|
|||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder)\n",
|
" source_directory=project_folder)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# Use a RunConfiguration to specify some additional requirements for this step.\n",
|
||||||
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n",
|
||||||
|
"\n",
|
||||||
|
"# create a new runconfig object\n",
|
||||||
|
"run_config = RunConfiguration()\n",
|
||||||
|
"\n",
|
||||||
|
"# enable Docker \n",
|
||||||
|
"run_config.environment.docker.enabled = True\n",
|
||||||
|
"\n",
|
||||||
|
"# set Docker base image to the default CPU-based image\n",
|
||||||
|
"run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE\n",
|
||||||
|
"\n",
|
||||||
|
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
|
||||||
|
"run_config.environment.python.user_managed_dependencies = False\n",
|
||||||
|
"\n",
|
||||||
|
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
|
||||||
|
"run_config.auto_prepare_environment = True\n",
|
||||||
|
"\n",
|
||||||
|
"# specify CondaDependencies obj\n",
|
||||||
|
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
||||||
|
"\n",
|
||||||
"step3 = PythonScriptStep(name=\"extract_step\",\n",
|
"step3 = PythonScriptStep(name=\"extract_step\",\n",
|
||||||
" script_name=\"extract.py\", \n",
|
" script_name=\"extract.py\", \n",
|
||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder)\n",
|
" source_directory=project_folder,\n",
|
||||||
|
" runconfig=run_config)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# list of steps to run\n",
|
"# list of steps to run\n",
|
||||||
"steps = [step1, step2, step3]\n",
|
"steps = [step1, step2, step3]\n",
|
||||||
|
|||||||
@@ -0,0 +1,281 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# How to use EstimatorStep in AML Pipeline\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook shows how to use the EstimatorStep with Azure Machine Learning Pipelines. Estimator is a convenient object in Azure Machine Learning that wraps run configuration information to help simplify the tasks of specifying how a script is executed.\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"## Prerequisite:\n",
|
||||||
|
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||||
|
"* Go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||||
|
" * install the AML SDK\n",
|
||||||
|
" * create a workspace and its configuration file (`config.json`)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's get started. First let's import some Python libraries."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import azureml.core\n",
|
||||||
|
"# check core SDK version number\n",
|
||||||
|
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize workspace\n",
|
||||||
|
"Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print('Workspace name: ' + ws.name, \n",
|
||||||
|
" 'Azure region: ' + ws.location, \n",
|
||||||
|
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||||
|
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create or Attach existing AmlCompute\n",
|
||||||
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
|
||||||
|
"1. create the configuration (this step is local and only takes a second)\n",
|
||||||
|
"2. create the cluster (this step will take about **20 seconds**)\n",
|
||||||
|
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"# choose a name for your cluster\n",
|
||||||
|
"cluster_name = \"cpucluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
|
" print('Found existing compute target')\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', max_nodes=4)\n",
|
||||||
|
"\n",
|
||||||
|
" # create the cluster\n",
|
||||||
|
" cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
|
"\n",
|
||||||
|
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||||
|
" # if no min node count is provided it uses the scale settings for the cluster\n",
|
||||||
|
" cpu_cluster.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||||
|
"\n",
|
||||||
|
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||||
|
"print(cpu_cluster.get_status().serialize())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'cpucluster' of type `AmlCompute`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Use a simple script\n",
|
||||||
|
"We have already created a simple \"hello world\" script. This is the script that we will submit through the estimator pattern. It prints a hello-world message, and if Azure ML SDK is installed, it will also logs an array of values ([Fibonacci numbers](https://en.wikipedia.org/wiki/Fibonacci_number))."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Build an Estimator object\n",
|
||||||
|
"Estimator by default will attempt to use Docker-based execution. You can also enable Docker and let estimator pick the default CPU image supplied by Azure ML for execution. You can target an AmlCompute cluster (or any other supported compute target types). You can also customize the conda environment by adding conda and/or pip packages.\n",
|
||||||
|
"\n",
|
||||||
|
"> Note: The arguments to the entry script used in the Estimator object should be specified as *list* using\n",
|
||||||
|
" 'estimator_entry_script_arguments' parameter when instantiating EstimatorStep. Estimator object's parameter\n",
|
||||||
|
" 'script_params' accepts a dictionary. However 'estimator_entry_script_arguments' parameter expects arguments as\n",
|
||||||
|
" a list.\n",
|
||||||
|
"\n",
|
||||||
|
"> Estimator object initialization involves specifying a list of DataReference objects in its 'inputs' parameter.\n",
|
||||||
|
" In Pipelines, a step can take another step's output or DataReferences as input. So when creating an EstimatorStep,\n",
|
||||||
|
" the parameters 'inputs' and 'outputs' need to be set explicitly and that will override 'inputs' parameter\n",
|
||||||
|
" specified in the Estimator object."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Datastore\n",
|
||||||
|
"from azureml.data.data_reference import DataReference\n",
|
||||||
|
"from azureml.pipeline.core import PipelineData\n",
|
||||||
|
"\n",
|
||||||
|
"def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
|
||||||
|
"\n",
|
||||||
|
"input_data = DataReference(\n",
|
||||||
|
" datastore=def_blob_store,\n",
|
||||||
|
" data_reference_name=\"input_data\",\n",
|
||||||
|
" path_on_datastore=\"20newsgroups/20news.pkl\")\n",
|
||||||
|
"\n",
|
||||||
|
"output = PipelineData(\"output\", datastore=def_blob_store)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.train.estimator import Estimator\n",
|
||||||
|
"\n",
|
||||||
|
"est = Estimator(source_directory='.', \n",
|
||||||
|
" compute_target=cpu_cluster, \n",
|
||||||
|
" entry_script='dummy_train.py', \n",
|
||||||
|
" conda_packages=['scikit-learn'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create an EstimatorStep\n",
|
||||||
|
"[EstimatorStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py) adds a step to run Estimator in a Pipeline.\n",
|
||||||
|
"\n",
|
||||||
|
"- **name:** Name of the step\n",
|
||||||
|
"- **estimator:** Estimator object\n",
|
||||||
|
"- **estimator_entry_script_arguments:** \n",
|
||||||
|
"- **runconfig_pipeline_params:** Override runconfig properties at runtime using key-value pairs each with name of the runconfig property and PipelineParameter for that property\n",
|
||||||
|
"- **inputs:** Inputs\n",
|
||||||
|
"- **outputs:** Output is list of PipelineData\n",
|
||||||
|
"- **compute_target:** Compute target to use \n",
|
||||||
|
"- **allow_reuse:** Whether the step should reuse previous results when run with the same settings/inputs. If this is false, a new run will always be generated for this step during pipeline execution.\n",
|
||||||
|
"- **version:** Optional version tag to denote a change in functionality for the step"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.pipeline.steps import EstimatorStep\n",
|
||||||
|
"\n",
|
||||||
|
"est_step = EstimatorStep(name=\"Estimator_Train\", \n",
|
||||||
|
" estimator=est, \n",
|
||||||
|
" estimator_entry_script_arguments=[\"--datadir\", input_data, \"--output\", output],\n",
|
||||||
|
" runconfig_pipeline_params=None, \n",
|
||||||
|
" inputs=[input_data], \n",
|
||||||
|
" outputs=[output], \n",
|
||||||
|
" compute_target=cpu_cluster)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Build and Submit the Experiment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.pipeline.core import Pipeline\n",
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"pipeline = Pipeline(workspace=ws, steps=[est_step])\n",
|
||||||
|
"pipeline_run = Experiment(ws, 'Estimator_sample').submit(pipeline)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## View Run Details"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"RunDetails(pipeline_run).show()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "sanpil"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -36,7 +36,7 @@
|
|||||||
"from azureml.exceptions import ComputeTargetException\n",
|
"from azureml.exceptions import ComputeTargetException\n",
|
||||||
"from azureml.data.data_reference import DataReference\n",
|
"from azureml.data.data_reference import DataReference\n",
|
||||||
"from azureml.pipeline.steps import HyperDriveStep\n",
|
"from azureml.pipeline.steps import HyperDriveStep\n",
|
||||||
"from azureml.pipeline.core import Pipeline\n",
|
"from azureml.pipeline.core import Pipeline, PipelineData\n",
|
||||||
"from azureml.train.dnn import TensorFlow\n",
|
"from azureml.train.dnn import TensorFlow\n",
|
||||||
"from azureml.train.hyperdrive import *\n",
|
"from azureml.train.hyperdrive import *\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -310,11 +310,17 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"metrics_output_name = 'metrics_output'\n",
|
||||||
|
"metirics_data = PipelineData(name='metrics_data',\n",
|
||||||
|
" datastore=ds,\n",
|
||||||
|
" pipeline_output_name=metrics_output_name)\n",
|
||||||
|
"\n",
|
||||||
"hd_step = HyperDriveStep(\n",
|
"hd_step = HyperDriveStep(\n",
|
||||||
" name=\"hyperdrive_module\",\n",
|
" name=\"hyperdrive_module\",\n",
|
||||||
" hyperdrive_run_config=hd_config,\n",
|
" hyperdrive_run_config=hd_config,\n",
|
||||||
" estimator_entry_script_arguments=['--data-folder', data_folder],\n",
|
" estimator_entry_script_arguments=['--data-folder', data_folder],\n",
|
||||||
" inputs=[data_folder])"
|
" inputs=[data_folder],\n",
|
||||||
|
" metrics_output=metirics_data)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -366,6 +372,40 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"pipeline_run.wait_for_completion()"
|
"pipeline_run.wait_for_completion()"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Retrieve the metrics\n",
|
||||||
|
"Outputs of above run can be used as inputs of other steps in pipeline. In this tutorial, we will show the result metrics."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n",
|
||||||
|
"num_file_downloaded = metrics_output.download('.', show_progress=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import json\n",
|
||||||
|
"with open(metrics_output._path_on_datastore) as f: \n",
|
||||||
|
" metrics_output_result = f.read()\n",
|
||||||
|
" \n",
|
||||||
|
"deserialized_metrics_output = json.loads(metrics_output_result)\n",
|
||||||
|
"df = pd.DataFrame(deserialized_metrics_output)\n",
|
||||||
|
"df"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|||||||
@@ -33,7 +33,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"from azureml.core import Workspace, Datastore\n",
|
"from azureml.core import Workspace, Datastore, Experiment\n",
|
||||||
"from azureml.core.compute import AmlCompute\n",
|
"from azureml.core.compute import AmlCompute\n",
|
||||||
"from azureml.core.compute import ComputeTarget\n",
|
"from azureml.core.compute import ComputeTarget\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -55,10 +55,7 @@
|
|||||||
"print(\"Default datastore's name: {}\".format(def_file_store.name))\n",
|
"print(\"Default datastore's name: {}\".format(def_file_store.name))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
|
"def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
|
||||||
"print(\"Blobstore's name: {}\".format(def_blob_store.name))\n",
|
"print(\"Blobstore's name: {}\".format(def_blob_store.name))"
|
||||||
"\n",
|
|
||||||
"# project folder\n",
|
|
||||||
"project_folder = '.'"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -160,7 +157,7 @@
|
|||||||
" inputs=[blob_input_data],\n",
|
" inputs=[blob_input_data],\n",
|
||||||
" outputs=[processed_data1],\n",
|
" outputs=[processed_data1],\n",
|
||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder\n",
|
" source_directory='.'\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"trainStep created\")"
|
"print(\"trainStep created\")"
|
||||||
]
|
]
|
||||||
@@ -191,7 +188,7 @@
|
|||||||
" inputs=[processed_data1],\n",
|
" inputs=[processed_data1],\n",
|
||||||
" outputs=[processed_data2],\n",
|
" outputs=[processed_data2],\n",
|
||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder)\n",
|
" source_directory='.')\n",
|
||||||
"print(\"extractStep created\")"
|
"print(\"extractStep created\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -252,7 +249,7 @@
|
|||||||
" inputs=[processed_data1, processed_data2],\n",
|
" inputs=[processed_data1, processed_data2],\n",
|
||||||
" outputs=[processed_data3], \n",
|
" outputs=[processed_data3], \n",
|
||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder)\n",
|
" source_directory='.')\n",
|
||||||
"print(\"compareStep created\")"
|
"print(\"compareStep created\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -270,10 +267,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"pipeline1 = Pipeline(workspace=ws, steps=[compareStep])\n",
|
"pipeline1 = Pipeline(workspace=ws, steps=[compareStep])\n",
|
||||||
"print (\"Pipeline is built\")\n",
|
"print (\"Pipeline is built\")"
|
||||||
"\n",
|
|
||||||
"pipeline1.validate()\n",
|
|
||||||
"print(\"Simple validation complete\") "
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -290,10 +284,38 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"published_pipeline1 = pipeline1.publish(name=\"My_New_Pipeline\", description=\"My Published Pipeline Description\")\n",
|
"published_pipeline1 = pipeline1.publish(name=\"My_New_Pipeline\", description=\"My Published Pipeline Description\", continue_on_step_failure=True)\n",
|
||||||
"published_pipeline1"
|
"published_pipeline1"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Note: the continue_on_step_failure parameter specifies whether the execution of steps in the Pipeline will continue if one step fails. The default value is False, meaning when one step fails, the Pipeline execution will stop, canceling any running steps."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Publish the pipeline from a submitted PipelineRun\n",
|
||||||
|
"It is also possible to publish a pipeline from a submitted PipelineRun"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# submit a pipeline run\n",
|
||||||
|
"pipeline_run1 = Experiment(ws, 'Pipeline_experiment').submit(pipeline1)\n",
|
||||||
|
"# publish a pipeline from the submitted pipeline run\n",
|
||||||
|
"published_pipeline2 = pipeline_run1.publish_pipeline(name=\"My_New_Pipeline2\", description=\"My Published Pipeline Description\", version=\"0.1\", continue_on_step_failure=True)\n",
|
||||||
|
"published_pipeline2"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -325,7 +347,8 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Run published pipeline using its REST endpoint"
|
"### Run published pipeline using its REST endpoint\n",
|
||||||
|
"[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -107,15 +107,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
|
||||||
"# project folder\n",
|
|
||||||
"project_folder = 'scripts'\n",
|
|
||||||
"\n",
|
|
||||||
"trainStep = PythonScriptStep(\n",
|
"trainStep = PythonScriptStep(\n",
|
||||||
" name=\"Training_Step\",\n",
|
" name=\"Training_Step\",\n",
|
||||||
" script_name=\"train.py\", \n",
|
" script_name=\"train.py\", \n",
|
||||||
" compute_target=aml_compute_target, \n",
|
" compute_target=aml_compute_target, \n",
|
||||||
" source_directory=project_folder\n",
|
" source_directory='.'\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"TrainStep created\")"
|
"print(\"TrainStep created\")"
|
||||||
]
|
]
|
||||||
@@ -136,9 +132,7 @@
|
|||||||
"from azureml.pipeline.core import Pipeline\n",
|
"from azureml.pipeline.core import Pipeline\n",
|
||||||
"\n",
|
"\n",
|
||||||
"pipeline1 = Pipeline(workspace=ws, steps=[trainStep])\n",
|
"pipeline1 = Pipeline(workspace=ws, steps=[trainStep])\n",
|
||||||
"print (\"Pipeline is built\")\n",
|
"print (\"Pipeline is built\")"
|
||||||
"\n",
|
|
||||||
"pipeline1.validate()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -255,11 +249,12 @@
|
|||||||
"schedules = Schedule.get_all(ws, pipeline_id=pub_pipeline_id)\n",
|
"schedules = Schedule.get_all(ws, pipeline_id=pub_pipeline_id)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# We will iterate through the list of schedules and \n",
|
"# We will iterate through the list of schedules and \n",
|
||||||
"# use the last ID in the list for further operations: \n",
|
"# use the last recurrence schedule in the list for further operations: \n",
|
||||||
"print(\"Found these schedules for the pipeline id {}:\".format(pub_pipeline_id))\n",
|
"print(\"Found these schedules for the pipeline id {}:\".format(pub_pipeline_id))\n",
|
||||||
"for schedule in schedules: \n",
|
"for schedule in schedules: \n",
|
||||||
" print(schedule.id)\n",
|
" print(schedule.id)\n",
|
||||||
" schedule_id = schedule.id\n",
|
" if schedule.recurrence is not None:\n",
|
||||||
|
" schedule_id = schedule.id\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"Schedule id to be used for schedule operations: {}\".format(schedule_id))"
|
"print(\"Schedule id to be used for schedule operations: {}\".format(schedule_id))"
|
||||||
]
|
]
|
||||||
@@ -380,7 +375,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Create a schedule for the pipeline using a Datastore\n",
|
"### Create a schedule for the pipeline using a Datastore\n",
|
||||||
"This schedule will run when additions or modifications are made to Blobs in the Datastore container.\n",
|
"This schedule will run when additions or modifications are made to Blobs in the Datastore.\n",
|
||||||
|
"By default, the Datastore container is monitored for changes. Use the path_on_datastore parameter to instead specify a path on the Datastore to monitor for changes. Changes made to subfolders in the container/path will not trigger the schedule.\n",
|
||||||
"Note: Only Blob Datastores are supported."
|
"Note: Only Blob Datastores are supported."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -400,6 +396,7 @@
|
|||||||
" datastore=datastore,\n",
|
" datastore=datastore,\n",
|
||||||
" wait_for_provisioning=True,\n",
|
" wait_for_provisioning=True,\n",
|
||||||
" description=\"Schedule Run\")\n",
|
" description=\"Schedule Run\")\n",
|
||||||
|
" #path_on_datastore=\"file/path\") use path_on_datastore to specify a specific folder to monitor for changes.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# You may want to make sure that the schedule is provisioned properly\n",
|
"# You may want to make sure that the schedule is provisioned properly\n",
|
||||||
"# before making any further changes to the schedule\n",
|
"# before making any further changes to the schedule\n",
|
||||||
|
|||||||
@@ -168,7 +168,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Data Connections with Inputs and Outputs\n",
|
"## Data Connections with Inputs and Outputs\n",
|
||||||
"The DatabricksStep supports Azure Bloband ADLS for inputs and outputs. You also will need to define a [Secrets](https://docs.azuredatabricks.net/user-guide/secrets/index.html) scope to enable authentication to external data sources such as Blob and ADLS from Databricks.\n",
|
"The DatabricksStep supports Azure Blob and ADLS for inputs and outputs. You also will need to define a [Secrets](https://docs.azuredatabricks.net/user-guide/secrets/index.html) scope to enable authentication to external data sources such as Blob and ADLS from Databricks.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- Databricks documentation on [Azure Blob](https://docs.azuredatabricks.net/spark/latest/data-sources/azure/azure-storage.html)\n",
|
"- Databricks documentation on [Azure Blob](https://docs.azuredatabricks.net/spark/latest/data-sources/azure/azure-storage.html)\n",
|
||||||
"- Databricks documentation on [ADLS](https://docs.databricks.com/spark/latest/data-sources/azure/azure-datalake.html)\n",
|
"- Databricks documentation on [ADLS](https://docs.databricks.com/spark/latest/data-sources/azure/azure-datalake.html)\n",
|
||||||
|
|||||||
@@ -0,0 +1,517 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Azure Machine Learning Pipeline with AutoMLStep\n",
|
||||||
|
"This notebook demonstrates the use of AutoMLStep in Azure Machine Learning Pipeline."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Introduction\n",
|
||||||
|
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||||
|
"\n",
|
||||||
|
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
||||||
|
"\n",
|
||||||
|
"In this notebook you would see\n",
|
||||||
|
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||||
|
"2. Create or Attach existing AmlCompute to a workspace.\n",
|
||||||
|
"3. Configure AutoML using `AutoMLConfig`.\n",
|
||||||
|
"4. Use AutoMLStep\n",
|
||||||
|
"5. Train the model using AmlCompute\n",
|
||||||
|
"6. Explore the results.\n",
|
||||||
|
"7. Test the best fitted model.\n",
|
||||||
|
"\n",
|
||||||
|
"In addition this notebook showcases the following features\n",
|
||||||
|
"- **Parallel** executions for iterations\n",
|
||||||
|
"- **Asynchronous** tracking of progress\n",
|
||||||
|
"- Retrieving models for any iteration or logged metric\n",
|
||||||
|
"- Specifying AutoML settings as `**kwargs`"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Azure Machine Learning and Pipeline SDK-specific imports"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import logging\n",
|
||||||
|
"import os\n",
|
||||||
|
"import csv\n",
|
||||||
|
"\n",
|
||||||
|
"from matplotlib import pyplot as plt\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"from sklearn import datasets\n",
|
||||||
|
"\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core.experiment import Experiment\n",
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"from azureml.train.automl import AutoMLConfig\n",
|
||||||
|
"from azureml.core.compute import AmlCompute\n",
|
||||||
|
"from azureml.core.compute import ComputeTarget\n",
|
||||||
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"from azureml.train.automl import AutoMLStep\n",
|
||||||
|
"\n",
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"Initialize a workspace object from persisted configuration. Make sure the config file is present at .\\config.json"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create an Azure ML experiment\n",
|
||||||
|
"Let's create an experiment named \"automl-classification\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Choose a name for the run history container in the workspace.\n",
|
||||||
|
"experiment_name = 'automlstep-classification'\n",
|
||||||
|
"project_folder = './project'\n",
|
||||||
|
"\n",
|
||||||
|
"experiment = Experiment(ws, experiment_name)\n",
|
||||||
|
"experiment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create or Attach existing AmlCompute\n",
|
||||||
|
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you create `AmlCompute` as your training compute resource.\n",
|
||||||
|
"\n",
|
||||||
|
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Choose a name for your cluster.\n",
|
||||||
|
"amlcompute_cluster_name = \"cpucluster\"\n",
|
||||||
|
"\n",
|
||||||
|
"found = False\n",
|
||||||
|
"# Check if this compute target already exists in the workspace.\n",
|
||||||
|
"cts = ws.compute_targets\n",
|
||||||
|
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||||
|
" found = True\n",
|
||||||
|
" print('Found existing compute target.')\n",
|
||||||
|
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||||
|
" \n",
|
||||||
|
"if not found:\n",
|
||||||
|
" print('Creating a new compute target...')\n",
|
||||||
|
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||||
|
" #vm_priority = 'lowpriority', # optional\n",
|
||||||
|
" max_nodes = 4)\n",
|
||||||
|
"\n",
|
||||||
|
" # Create the cluster.\n",
|
||||||
|
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||||
|
" \n",
|
||||||
|
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||||
|
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||||
|
" compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)\n",
|
||||||
|
" \n",
|
||||||
|
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prepare and Point to Data\n",
|
||||||
|
"For remote executions, you need to make the data accessible from the remote compute.\n",
|
||||||
|
"This can be done by uploading the data to DataStore.\n",
|
||||||
|
"In this example, we upload scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) data."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data_train = datasets.load_digits()\n",
|
||||||
|
"\n",
|
||||||
|
"if not os.path.isdir('data'):\n",
|
||||||
|
" os.mkdir('data')\n",
|
||||||
|
" \n",
|
||||||
|
"if not os.path.exists(project_folder):\n",
|
||||||
|
" os.makedirs(project_folder)\n",
|
||||||
|
" \n",
|
||||||
|
"pd.DataFrame(data_train.data).to_csv(\"data/X_train.tsv\", index=False, header=False, quoting=csv.QUOTE_ALL, sep=\"\\t\")\n",
|
||||||
|
"pd.DataFrame(data_train.target).to_csv(\"data/y_train.tsv\", index=False, header=False, sep=\"\\t\")\n",
|
||||||
|
"\n",
|
||||||
|
"ds = ws.get_default_datastore()\n",
|
||||||
|
"ds.upload(src_dir='./data', target_path='bai_data', overwrite=True, show_progress=True)\n",
|
||||||
|
"\n",
|
||||||
|
"from azureml.data.data_reference import DataReference \n",
|
||||||
|
"input_data = DataReference(datastore=ds, \n",
|
||||||
|
" data_reference_name=\"input_data_reference\",\n",
|
||||||
|
" path_on_datastore='bai_data',\n",
|
||||||
|
" mode='download',\n",
|
||||||
|
" path_on_compute='/tmp/azureml_runs',\n",
|
||||||
|
" overwrite=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# create a new RunConfig object\n",
|
||||||
|
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Set compute target to AmlCompute\n",
|
||||||
|
"#conda_run_config.target = compute_target\n",
|
||||||
|
"\n",
|
||||||
|
"conda_run_config.environment.docker.enabled = True\n",
|
||||||
|
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||||
|
"\n",
|
||||||
|
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], \n",
|
||||||
|
" conda_packages=['numpy', 'py-xgboost'], \n",
|
||||||
|
" pin_sdk_version=False)\n",
|
||||||
|
"conda_run_config.environment.python.conda_dependencies = cd\n",
|
||||||
|
"\n",
|
||||||
|
"print('run config is ready')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile $project_folder/get_data.py\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"def get_data():\n",
|
||||||
|
" X_train = pd.read_csv(\"/tmp/azureml_runs/bai_data/X_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
|
||||||
|
" y_train = pd.read_csv(\"/tmp/azureml_runs/bai_data/y_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
|
||||||
|
"\n",
|
||||||
|
" return { \"X\" : X_train.values, \"y\" : y_train[0].values }\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Set up AutoMLConfig for Training\n",
|
||||||
|
"\n",
|
||||||
|
"You can specify `automl_settings` as `**kwargs` as well. Also note that you can use a `get_data()` function for local excutions too.\n",
|
||||||
|
"\n",
|
||||||
|
"**Note:** When using AmlCompute, you can't pass Numpy arrays directly to the fit method.\n",
|
||||||
|
"\n",
|
||||||
|
"|Property|Description|\n",
|
||||||
|
"|-|-|\n",
|
||||||
|
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
||||||
|
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||||
|
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||||
|
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||||
|
"|**max_concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"automl_settings = {\n",
|
||||||
|
" \"iteration_timeout_minutes\": 5,\n",
|
||||||
|
" \"iterations\": 20,\n",
|
||||||
|
" \"n_cross_validations\": 5,\n",
|
||||||
|
" \"primary_metric\": 'AUC_weighted',\n",
|
||||||
|
" \"preprocess\": False,\n",
|
||||||
|
" \"max_concurrent_iterations\": 3,\n",
|
||||||
|
" \"verbosity\": logging.INFO\n",
|
||||||
|
"}\n",
|
||||||
|
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||||
|
" debug_log = 'automl_errors.log',\n",
|
||||||
|
" path = project_folder,\n",
|
||||||
|
" compute_target=compute_target,\n",
|
||||||
|
" run_configuration=conda_run_config,\n",
|
||||||
|
" data_script = project_folder + \"/get_data.py\",\n",
|
||||||
|
" **automl_settings\n",
|
||||||
|
" )"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
|
||||||
|
"In this example, we specify `show_output = False` to suppress console output while the run is in progress."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Define AutoMLStep"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.pipeline.core import PipelineData, TrainingOutput\n",
|
||||||
|
"\n",
|
||||||
|
"metrics_output_name = 'metrics_output'\n",
|
||||||
|
"best_model_output_name = 'best_model_output'\n",
|
||||||
|
"\n",
|
||||||
|
"metirics_data = PipelineData(name='metrics_data',\n",
|
||||||
|
" datastore=ds,\n",
|
||||||
|
" pipeline_output_name=metrics_output_name,\n",
|
||||||
|
" training_output=TrainingOutput(type='Metrics'))\n",
|
||||||
|
"model_data = PipelineData(name='model_data',\n",
|
||||||
|
" datastore=ds,\n",
|
||||||
|
" pipeline_output_name=best_model_output_name,\n",
|
||||||
|
" training_output=TrainingOutput(type='Model'))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"automl_step = AutoMLStep(\n",
|
||||||
|
" name='automl_module',\n",
|
||||||
|
" experiment=experiment,\n",
|
||||||
|
" automl_config=automl_config,\n",
|
||||||
|
" inputs=[input_data],\n",
|
||||||
|
" outputs=[metirics_data, model_data],\n",
|
||||||
|
" allow_reuse=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.pipeline.core import Pipeline\n",
|
||||||
|
"pipeline = Pipeline(\n",
|
||||||
|
" description=\"pipeline_with_automlstep\",\n",
|
||||||
|
" workspace=ws, \n",
|
||||||
|
" steps=[automl_step])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pipeline_run = experiment.submit(pipeline)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.widgets import RunDetails\n",
|
||||||
|
"RunDetails(pipeline_run).show()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pipeline_run.wait_for_completion()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Examine Results\n",
|
||||||
|
"\n",
|
||||||
|
"### Retrieve the metrics of all child runs\n",
|
||||||
|
"Outputs of above run can be used as inputs of other steps in pipeline. In this tutorial, we will examine the outputs by retrieve output data and running some tests."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n",
|
||||||
|
"num_file_downloaded = metrics_output.download('.', show_progress=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import json\n",
|
||||||
|
"with open(metrics_output._path_on_datastore) as f: \n",
|
||||||
|
" metrics_output_result = f.read()\n",
|
||||||
|
" \n",
|
||||||
|
"deserialized_metrics_output = json.loads(metrics_output_result)\n",
|
||||||
|
"df = pd.DataFrame(deserialized_metrics_output)\n",
|
||||||
|
"df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Retrieve the Best Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"best_model_output = pipeline_run.get_pipeline_output(best_model_output_name)\n",
|
||||||
|
"num_file_downloaded = best_model_output.download('.', show_progress=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
" import pickle\n",
|
||||||
|
"\n",
|
||||||
|
" with open(best_model_output._path_on_datastore, \"rb\" ) as f:\n",
|
||||||
|
" best_model = pickle.load(f)\n",
|
||||||
|
" best_model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Test the Model\n",
|
||||||
|
"#### Load Test Data"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"digits = datasets.load_digits()\n",
|
||||||
|
"X_test = digits.data[:10, :]\n",
|
||||||
|
"y_test = digits.target[:10]\n",
|
||||||
|
"images = digits.images[:10]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Testing Best Model"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Randomly select digits and test.\n",
|
||||||
|
"for index in np.random.choice(len(y_test), 3, replace = False):\n",
|
||||||
|
" print(index)\n",
|
||||||
|
" predicted = best_model.predict(X_test[index:index + 1])[0]\n",
|
||||||
|
" label = y_test[index]\n",
|
||||||
|
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||||
|
" fig = plt.figure(1, figsize=(3,3))\n",
|
||||||
|
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||||
|
" ax1.set_title(title)\n",
|
||||||
|
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||||
|
" plt.show()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "sanpil"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.7"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -83,10 +83,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# project folder\n",
|
"# source directory\n",
|
||||||
"project_folder = '.'\n",
|
"source_directory = '.'\n",
|
||||||
" \n",
|
" \n",
|
||||||
"print('Sample projects will be created in {}.'.format(project_folder))"
|
"print('Sample scripts will be created in {} directory.'.format(source_directory))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -259,6 +259,44 @@
|
|||||||
"**Open `train.py` in the local machine and examine the arguments, inputs, and outputs for the script. That will give you a good sense of why the script argument names used below are important.** "
|
"**Open `train.py` in the local machine and examine the arguments, inputs, and outputs for the script. That will give you a good sense of why the script argument names used below are important.** "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Specify conda dependencies and a base docker image through a RunConfiguration\n",
|
||||||
|
"\n",
|
||||||
|
"This step uses a docker image and scikit-learn, use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify these requirements and use when creating the PythonScriptStep. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n",
|
||||||
|
"\n",
|
||||||
|
"# create a new runconfig object\n",
|
||||||
|
"run_config = RunConfiguration()\n",
|
||||||
|
"\n",
|
||||||
|
"# enable Docker \n",
|
||||||
|
"run_config.environment.docker.enabled = True\n",
|
||||||
|
"\n",
|
||||||
|
"# set Docker base image to the default CPU-based image\n",
|
||||||
|
"run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE\n",
|
||||||
|
"\n",
|
||||||
|
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
|
||||||
|
"run_config.environment.python.user_managed_dependencies = False\n",
|
||||||
|
"\n",
|
||||||
|
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
|
||||||
|
"run_config.auto_prepare_environment = True\n",
|
||||||
|
"\n",
|
||||||
|
"# specify CondaDependencies obj\n",
|
||||||
|
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -273,7 +311,8 @@
|
|||||||
" inputs=[blob_input_data],\n",
|
" inputs=[blob_input_data],\n",
|
||||||
" outputs=[processed_data1],\n",
|
" outputs=[processed_data1],\n",
|
||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder\n",
|
" source_directory=source_directory,\n",
|
||||||
|
" runconfig=run_config\n",
|
||||||
")\n",
|
")\n",
|
||||||
"print(\"trainStep created\")"
|
"print(\"trainStep created\")"
|
||||||
]
|
]
|
||||||
@@ -304,7 +343,7 @@
|
|||||||
" inputs=[processed_data1],\n",
|
" inputs=[processed_data1],\n",
|
||||||
" outputs=[processed_data2],\n",
|
" outputs=[processed_data2],\n",
|
||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder)\n",
|
" source_directory=source_directory)\n",
|
||||||
"print(\"extractStep created\")"
|
"print(\"extractStep created\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -312,8 +351,10 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Define a Step that consumes multiple intermediate data and produces intermediate data\n",
|
"#### Define a Step that consumes intermediate data and existing data and produces intermediate data\n",
|
||||||
"In this step, we define a step that consumes multiple intermediate data and produces intermediate data.\n",
|
"In this step, we define a step that consumes multiple data types and produces intermediate data.\n",
|
||||||
|
"\n",
|
||||||
|
"This step uses the output generated from the previous step as well as existing data on a DataStore. The location of the existing data is specified using a [**PipelineParameter**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelineparameter?view=azure-ml-py) and a [**DataPath**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.datapath.datapath?view=azure-ml-py). Using a PipelineParameter enables easy modification of the data location when the Pipeline is published and resubmitted.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Open `compare.py` in the local machine and examine the arguments, inputs, and outputs for the script. That will give you a good sense of why the script argument names used below are important.**"
|
"**Open `compare.py` in the local machine and examine the arguments, inputs, and outputs for the script. That will give you a good sense of why the script argument names used below are important.**"
|
||||||
]
|
]
|
||||||
@@ -324,16 +365,31 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Now define step6 that takes two inputs (both intermediate data), and produce an output\n",
|
"# Reference the data uploaded to blob storage using a PipelineParameter and a DataPath\n",
|
||||||
|
"from azureml.pipeline.core import PipelineParameter\n",
|
||||||
|
"from azureml.data.datapath import DataPath, DataPathComputeBinding\n",
|
||||||
|
"\n",
|
||||||
|
"datapath = DataPath(datastore=def_blob_store, path_on_datastore='20newsgroups/20news.pkl')\n",
|
||||||
|
"datapath_param = PipelineParameter(name=\"compare_data\", default_value=datapath)\n",
|
||||||
|
"data_parameter1 = (datapath_param, DataPathComputeBinding(mode='mount'))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Now define the compare step which takes two inputs and produces an output\n",
|
||||||
"processed_data3 = PipelineData(\"processed_data3\", datastore=def_blob_store)\n",
|
"processed_data3 = PipelineData(\"processed_data3\", datastore=def_blob_store)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"compareStep = PythonScriptStep(\n",
|
"compareStep = PythonScriptStep(\n",
|
||||||
" script_name=\"compare.py\",\n",
|
" script_name=\"compare.py\",\n",
|
||||||
" arguments=[\"--compare_data1\", processed_data1, \"--compare_data2\", processed_data2, \"--output_compare\", processed_data3],\n",
|
" arguments=[\"--compare_data1\", data_parameter1, \"--compare_data2\", processed_data2, \"--output_compare\", processed_data3],\n",
|
||||||
" inputs=[processed_data1, processed_data2],\n",
|
" inputs=[data_parameter1, processed_data2],\n",
|
||||||
" outputs=[processed_data3], \n",
|
" outputs=[processed_data3], \n",
|
||||||
" compute_target=aml_compute, \n",
|
" compute_target=aml_compute, \n",
|
||||||
" source_directory=project_folder)\n",
|
" source_directory=source_directory)\n",
|
||||||
"print(\"compareStep created\")"
|
"print(\"compareStep created\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -351,10 +407,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"pipeline1 = Pipeline(workspace=ws, steps=[compareStep])\n",
|
"pipeline1 = Pipeline(workspace=ws, steps=[compareStep])\n",
|
||||||
"print (\"Pipeline is built\")\n",
|
"print (\"Pipeline is built\")"
|
||||||
"\n",
|
|
||||||
"pipeline1.validate()\n",
|
|
||||||
"print(\"Simple validation complete\") "
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
# Licensed under the MIT License.
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
|
||||||
|
print("*********************************************************")
|
||||||
|
print("Hello Azure ML!")
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--datadir', type=str, help="data directory")
|
||||||
|
parser.add_argument('--output', type=str, help="output")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("Argument 1: %s" % args.datadir)
|
||||||
|
print("Argument 2: %s" % args.output)
|
||||||
|
|
||||||
|
if not (args.output is None):
|
||||||
|
os.makedirs(args.output, exist_ok=True)
|
||||||
|
print("%s created" % args.output)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from azureml.core import Run
|
||||||
|
run = Run.get_context()
|
||||||
|
print("Log Fibonacci numbers.")
|
||||||
|
run.log_list('Fibonacci numbers', [0, 1, 1, 2, 3, 5, 8, 13, 21, 34])
|
||||||
|
run.complete()
|
||||||
|
except:
|
||||||
|
print("Warning: you need to install Azure ML SDK in order to log metrics.")
|
||||||
|
|
||||||
|
print("*********************************************************")
|
||||||
@@ -508,7 +508,8 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Get AAD token"
|
"### Get AAD token\n",
|
||||||
|
"[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -492,7 +492,8 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Get AAD token"
|
"## Get AAD token\n",
|
||||||
|
"[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,253 +1,253 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License.\n",
|
"Licensed under the MIT License.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Authentication in Azure Machine Learning\n",
|
"## Authentication in Azure Machine Learning\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This notebook shows you how to authenticate to your Azure ML Workspace using\n",
|
"This notebook shows you how to authenticate to your Azure ML Workspace using\n",
|
||||||
"\n",
|
"\n",
|
||||||
" 1. Interactive Login Authentication\n",
|
" 1. Interactive Login Authentication\n",
|
||||||
" 2. Azure CLI Authentication\n",
|
" 2. Azure CLI Authentication\n",
|
||||||
" 3. Service Principal Authentication\n",
|
" 3. Service Principal Authentication\n",
|
||||||
" \n",
|
" \n",
|
||||||
"The interactive authentication is suitable for local experimentation on your own computer. Azure CLI authentication is suitable if you are already using Azure CLI for managing Azure resources, and want to sign in only once. The Service Principal authentication is suitable for automated workflows, for example as part of Azure Devops build."
|
"The interactive authentication is suitable for local experimentation on your own computer. Azure CLI authentication is suitable if you are already using Azure CLI for managing Azure resources, and want to sign in only once. The Service Principal authentication is suitable for automated workflows, for example as part of Azure Devops build."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Workspace"
|
"from azureml.core import Workspace"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Interactive Authentication\n",
|
"### Interactive Authentication\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Interactive authentication is the default mode when using Azure ML SDK.\n",
|
"Interactive authentication is the default mode when using Azure ML SDK.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"When you connect to your workspace using workspace.from_config, you will get an interactive login dialog."
|
"When you connect to your workspace using workspace.from_config, you will get an interactive login dialog."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"ws = Workspace.from_config()"
|
"ws = Workspace.from_config()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Also, if you explicitly specify the subscription ID, resource group and resource group, you will get the dialog."
|
"Also, if you explicitly specify the subscription ID, resource group and resource group, you will get the dialog."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"ws = Workspace(subscription_id=\"my-subscription-id\",\n",
|
"ws = Workspace(subscription_id=\"my-subscription-id\",\n",
|
||||||
" resource_group=\"my-ml-rg\",\n",
|
" resource_group=\"my-ml-rg\",\n",
|
||||||
" workspace_name=\"my-ml-workspace\")"
|
" workspace_name=\"my-ml-workspace\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Note the user you're authenticated as must have access to the subscription and resource group. If you receive an error\n",
|
"Note the user you're authenticated as must have access to the subscription and resource group. If you receive an error\n",
|
||||||
"\n",
|
"\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"AuthenticationException: You don't have access to xxxxxx-xxxx-xxx-xxx-xxxxxxxxxx subscription. All the subscriptions that you have access to = ...\n",
|
"AuthenticationException: You don't have access to xxxxxx-xxxx-xxx-xxx-xxxxxxxxxx subscription. All the subscriptions that you have access to = ...\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"\n",
|
"\n",
|
||||||
"check that the you used correct login and entered the correct subscription ID."
|
"check that the you used correct login and entered the correct subscription ID."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"In some cases, you may see a version of the error message containing text: ```All the subscriptions that you have access to = []```\n",
|
"In some cases, you may see a version of the error message containing text: ```All the subscriptions that you have access to = []```\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In such a case, you may have to specify the tenant ID of the Azure Active Directory you're using. An example would be accessing a subscription as a guest to a tenant that is not your default. You specify the tenant by explicitly instantiating _InteractiveLoginAuthentication_ with tenant ID as argument ([see instructions how to obtain tenant Id](#get-tenant-id))."
|
"In such a case, you may have to specify the tenant ID of the Azure Active Directory you're using. An example would be accessing a subscription as a guest to a tenant that is not your default. You specify the tenant by explicitly instantiating _InteractiveLoginAuthentication_ with tenant ID as argument ([see instructions how to obtain tenant Id](#get-tenant-id))."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.authentication import InteractiveLoginAuthentication\n",
|
"from azureml.core.authentication import InteractiveLoginAuthentication\n",
|
||||||
"\n",
|
"\n",
|
||||||
"interactive_auth = InteractiveLoginAuthentication(tenant_id=\"my-tenant-id\")\n",
|
"interactive_auth = InteractiveLoginAuthentication(tenant_id=\"my-tenant-id\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ws = Workspace(subscription_id=\"my-subscription-id\",\n",
|
"ws = Workspace(subscription_id=\"my-subscription-id\",\n",
|
||||||
" resource_group=\"my-ml-rg\",\n",
|
" resource_group=\"my-ml-rg\",\n",
|
||||||
" workspace_name=\"my-ml-workspace\",\n",
|
" workspace_name=\"my-ml-workspace\",\n",
|
||||||
" auth=interactive_auth)"
|
" auth=interactive_auth)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Azure CLI Authentication\n",
|
"### Azure CLI Authentication\n",
|
||||||
"\n",
|
"\n",
|
||||||
"If you have installed azure-cli package, and used ```az login``` command to log in to your Azure Subscription, you can use _AzureCliAuthentication_ class.\n",
|
"If you have installed azure-cli package, and used ```az login``` command to log in to your Azure Subscription, you can use _AzureCliAuthentication_ class.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Note that interactive authentication described above won't use existing Azure CLI auth tokens. "
|
"Note that interactive authentication described above won't use existing Azure CLI auth tokens. "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.authentication import AzureCliAuthentication\n",
|
"from azureml.core.authentication import AzureCliAuthentication\n",
|
||||||
"\n",
|
"\n",
|
||||||
"cli_auth = AzureCliAuthentication()\n",
|
"cli_auth = AzureCliAuthentication()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ws = Workspace(subscription_id=\"my-subscription-id\",\n",
|
"ws = Workspace(subscription_id=\"my-subscription-id\",\n",
|
||||||
" resource_group=\"my-ml-rg\",\n",
|
" resource_group=\"my-ml-rg\",\n",
|
||||||
" workspace_name=\"my-ml-workspace\",\n",
|
" workspace_name=\"my-ml-workspace\",\n",
|
||||||
" auth=cli_auth)\n",
|
" auth=cli_auth)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"Found workspace {} at location {}\".format(ws.name, ws.location))"
|
"print(\"Found workspace {} at location {}\".format(ws.name, ws.location))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Service Principal Authentication\n",
|
"### Service Principal Authentication\n",
|
||||||
"\n",
|
"\n",
|
||||||
"When setting up a machine learning workflow as an automated process, we recommend using Service Principal Authentication. This approach decouples the authentication from any specific user login, and allows managed access control.\n",
|
"When setting up a machine learning workflow as an automated process, we recommend using Service Principal Authentication. This approach decouples the authentication from any specific user login, and allows managed access control.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Note that you must have administrator privileges over the Azure subscription to complete these steps.\n",
|
"Note that you must have administrator privileges over the Azure subscription to complete these steps.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The first step is to create a service principal. First, go to [Azure Portal](https://portal.azure.com), select **Azure Active Directory** and **App Registrations**. Then select **+New application registration**, give your service principal a name, for example _my-svc-principal_. You can leave application type as is, and specify a dummy value for Sign-on URL, such as _https://invalid_.\n",
|
"The first step is to create a service principal. First, go to [Azure Portal](https://portal.azure.com), select **Azure Active Directory** and **App Registrations**. Then select **+New application registration**, give your service principal a name, for example _my-svc-principal_. You can leave application type as is, and specify a dummy value for Sign-on URL, such as _https://invalid_.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Then click **Create**.\n",
|
"Then click **Create**.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"![service principal creation]<img src=\"images/svc-pr-1.PNG\">"
|
"![service principal creation]<img src=\"images/svc-pr-1.PNG\">"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"The next step is to obtain the _Application ID_ (also called username) and create _password_ for the service principal.\n",
|
"The next step is to obtain the _Application ID_ (also called username) and create _password_ for the service principal.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"From the page for your newly created service principal, copy the _Application ID_. Then select **Settings** and **Keys**, write a description for your key, and select duration. Then click **Save**, and copy the _password_ to a secure location.\n",
|
"From the page for your newly created service principal, copy the _Application ID_. Then select **Settings** and **Keys**, write a description for your key, and select duration. Then click **Save**, and copy the _password_ to a secure location.\n",
|
||||||
"\n",
|
"\n",
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<a id =\"get-tenant-id\"></a>\n",
|
"<a id =\"get-tenant-id\"></a>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Also, you need to obtain the tenant ID of your Azure subscription. Go back to **Azure Active Directory**, select **Properties** and copy _Directory ID_.\n",
|
"Also, you need to obtain the tenant ID of your Azure subscription. Go back to **Azure Active Directory**, select **Properties** and copy _Directory ID_.\n",
|
||||||
"\n",
|
"\n",
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Finally, you need to give the service principal permissions to access your workspace. Navigate to **Resource Groups**, to the resource group for your Machine Learning Workspace. \n",
|
"Finally, you need to give the service principal permissions to access your workspace. Navigate to **Resource Groups**, to the resource group for your Machine Learning Workspace. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"Then select **Access Control (IAM)** and **Add a role assignment**. For _Role_, specify which level of access you need to grant, for example _Contributor_. Start entering your service principal name and once it is found, select it, and click **Save**.\n",
|
"Then select **Access Control (IAM)** and **Add a role assignment**. For _Role_, specify which level of access you need to grant, for example _Contributor_. Start entering your service principal name and once it is found, select it, and click **Save**.\n",
|
||||||
"\n",
|
"\n",
|
||||||
""
|
""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Now you are ready to use the service principal authentication. For example, to connect to your Workspace, see code below and enter your own values for tenant ID, application ID, subscription ID, resource group and workspace.\n",
|
"Now you are ready to use the service principal authentication. For example, to connect to your Workspace, see code below and enter your own values for tenant ID, application ID, subscription ID, resource group and workspace.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**We strongly recommended that you do not insert the secret password to code**. Instead, you can use environment variables to pass it to your code, for example through Azure Key Vault, or through secret build variables in Azure DevOps. For local testing, you can for example use following PowerShell command to set the environment variable.\n",
|
"**We strongly recommended that you do not insert the secret password to code**. Instead, you can use environment variables to pass it to your code, for example through Azure Key Vault, or through secret build variables in Azure DevOps. For local testing, you can for example use following PowerShell command to set the environment variable.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"$env:AZUREML_PASSWORD = \"my-password\"\n",
|
"$env:AZUREML_PASSWORD = \"my-password\"\n",
|
||||||
"```"
|
"```"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"from azureml.core.authentication import ServicePrincipalAuthentication\n",
|
"from azureml.core.authentication import ServicePrincipalAuthentication\n",
|
||||||
"\n",
|
"\n",
|
||||||
"svc_pr_password = os.environ.get(\"AZUREML_PASSWORD\")\n",
|
"svc_pr_password = os.environ.get(\"AZUREML_PASSWORD\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"svc_pr = ServicePrincipalAuthentication(\n",
|
"svc_pr = ServicePrincipalAuthentication(\n",
|
||||||
" tenant_id=\"my-tenant-id\",\n",
|
" tenant_id=\"my-tenant-id\",\n",
|
||||||
" service_principal_id=\"my-application-id\",\n",
|
" service_principal_id=\"my-application-id\",\n",
|
||||||
" service_principal_password=svc_pr_password)\n",
|
" service_principal_password=svc_pr_password)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ws = Workspace(\n",
|
"ws = Workspace(\n",
|
||||||
" subscription_id=\"my-subscription-id\",\n",
|
" subscription_id=\"my-subscription-id\",\n",
|
||||||
" resource_group=\"my-ml-rg\",\n",
|
" resource_group=\"my-ml-rg\",\n",
|
||||||
" workspace_name=\"my-ml-workspace\",\n",
|
" workspace_name=\"my-ml-workspace\",\n",
|
||||||
" auth=svc_pr\n",
|
" auth=svc_pr\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"Found workspace {} at location {}\".format(ws.name, ws.location))"
|
"print(\"Found workspace {} at location {}\".format(ws.name, ws.location))"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "roastala"
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"metadata": {
|
||||||
"display_name": "Python 3.6",
|
"authors": [
|
||||||
"language": "python",
|
{
|
||||||
"name": "python36"
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.5"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"language_info": {
|
"nbformat": 4,
|
||||||
"codemirror_mode": {
|
"nbformat_minor": 2
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
}
|
||||||
@@ -220,14 +220,14 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"from azureml.core.runconfig import MpiConfiguration\n",
|
||||||
"from azureml.train.dnn import Chainer\n",
|
"from azureml.train.dnn import Chainer\n",
|
||||||
"\n",
|
"\n",
|
||||||
"estimator = Chainer(source_directory=project_folder,\n",
|
"estimator = Chainer(source_directory=project_folder,\n",
|
||||||
" compute_target=compute_target,\n",
|
" compute_target=compute_target,\n",
|
||||||
" entry_script='train_mnist.py',\n",
|
" entry_script='train_mnist.py',\n",
|
||||||
" node_count=2,\n",
|
" node_count=2,\n",
|
||||||
" process_count_per_node=1,\n",
|
" distributed_training=MpiConfiguration(),\n",
|
||||||
" distributed_backend='mpi',\n",
|
|
||||||
" use_gpu=True)"
|
" use_gpu=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -233,14 +233,14 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"from azureml.core.runconfig import MpiConfiguration\n",
|
||||||
"from azureml.train.dnn import PyTorch\n",
|
"from azureml.train.dnn import PyTorch\n",
|
||||||
"\n",
|
"\n",
|
||||||
"estimator = PyTorch(source_directory=project_folder,\n",
|
"estimator = PyTorch(source_directory=project_folder,\n",
|
||||||
" compute_target=compute_target,\n",
|
" compute_target=compute_target,\n",
|
||||||
" entry_script='pytorch_horovod_mnist.py',\n",
|
" entry_script='pytorch_horovod_mnist.py',\n",
|
||||||
" node_count=2,\n",
|
" node_count=2,\n",
|
||||||
" process_count_per_node=1,\n",
|
" distributed_training=MpiConfiguration(),\n",
|
||||||
" distributed_backend='mpi',\n",
|
|
||||||
" use_gpu=True)"
|
" use_gpu=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -285,7 +285,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Create a TensorFlow estimator\n",
|
"### Create a TensorFlow estimator\n",
|
||||||
"The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow)."
|
"The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow).\n",
|
||||||
|
"\n",
|
||||||
|
"The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -294,6 +296,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"from azureml.core.runconfig import MpiConfiguration\n",
|
||||||
"from azureml.train.dnn import TensorFlow\n",
|
"from azureml.train.dnn import TensorFlow\n",
|
||||||
"\n",
|
"\n",
|
||||||
"script_params={\n",
|
"script_params={\n",
|
||||||
@@ -305,9 +308,8 @@
|
|||||||
" script_params=script_params,\n",
|
" script_params=script_params,\n",
|
||||||
" entry_script='tf_horovod_word2vec.py',\n",
|
" entry_script='tf_horovod_word2vec.py',\n",
|
||||||
" node_count=2,\n",
|
" node_count=2,\n",
|
||||||
" process_count_per_node=1,\n",
|
" distributed_training=MpiConfiguration(),\n",
|
||||||
" distributed_backend='mpi',\n",
|
" framework_version='1.13')"
|
||||||
" use_gpu=True)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -26,7 +26,7 @@
|
|||||||
"* Go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
"* Go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||||
" * install the AML SDK\n",
|
" * install the AML SDK\n",
|
||||||
" * create a workspace and its configuration file (`config.json`)\n",
|
" * create a workspace and its configuration file (`config.json`)\n",
|
||||||
"* Review the [tutorial](https://aka.ms/aml-notebook-hyperdrive) on single-node TensorFlow training using the SDK"
|
"* Review the [tutorial](../train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) on single-node TensorFlow training using the SDK"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -208,6 +208,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"from azureml.core.runconfig import TensorflowConfiguration\n",
|
||||||
"from azureml.train.dnn import TensorFlow\n",
|
"from azureml.train.dnn import TensorFlow\n",
|
||||||
"\n",
|
"\n",
|
||||||
"script_params={\n",
|
"script_params={\n",
|
||||||
@@ -215,14 +216,15 @@
|
|||||||
" '--train_steps': 500\n",
|
" '--train_steps': 500\n",
|
||||||
"}\n",
|
"}\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"distributed_training = TensorflowConfiguration()\n",
|
||||||
|
"distributed_training.worker_count = 2\n",
|
||||||
|
"\n",
|
||||||
"estimator = TensorFlow(source_directory=project_folder,\n",
|
"estimator = TensorFlow(source_directory=project_folder,\n",
|
||||||
" compute_target=compute_target,\n",
|
" compute_target=compute_target,\n",
|
||||||
" script_params=script_params,\n",
|
" script_params=script_params,\n",
|
||||||
" entry_script='tf_mnist_replica.py',\n",
|
" entry_script='tf_mnist_replica.py',\n",
|
||||||
" node_count=2,\n",
|
" node_count=2,\n",
|
||||||
" worker_count=2,\n",
|
" distributed_training=distributed_training,\n",
|
||||||
" parameter_server_count=1, \n",
|
|
||||||
" distributed_backend='ps',\n",
|
|
||||||
" use_gpu=True)"
|
" use_gpu=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -291,7 +291,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# use a custom Docker image\n",
|
"# use a custom Docker image\n",
|
||||||
"from azureml.core.runconfig import ContainerRegistry\n",
|
"from azureml.core.container_registry import ContainerRegistry\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# this is an image available in Docker Hub\n",
|
"# this is an image available in Docker Hub\n",
|
||||||
"image_name = 'continuumio/miniconda3'\n",
|
"image_name = 'continuumio/miniconda3'\n",
|
||||||
@@ -309,7 +309,8 @@
|
|||||||
"est = Estimator(source_directory='.', compute_target='local', \n",
|
"est = Estimator(source_directory='.', compute_target='local', \n",
|
||||||
" entry_script='dummy_train.py',\n",
|
" entry_script='dummy_train.py',\n",
|
||||||
" custom_docker_image=image_name,\n",
|
" custom_docker_image=image_name,\n",
|
||||||
" image_registry_details=image_registry_details,\n",
|
" # uncomment below line to use your private ACR\n",
|
||||||
|
" #image_registry_details=image_registry_details,\n",
|
||||||
" user_managed=user_managed_dependencies\n",
|
" user_managed=user_managed_dependencies\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -336,7 +337,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "minxia"
|
"name": "maxluk"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
@@ -356,7 +357,7 @@
|
|||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.8"
|
"version": "3.6.8"
|
||||||
},
|
},
|
||||||
"msauthor": "haining"
|
"msauthor": "minxia"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 2
|
"nbformat_minor": 2
|
||||||
|
|||||||
@@ -396,7 +396,7 @@
|
|||||||
"est = TensorFlow(source_directory=script_folder,\n",
|
"est = TensorFlow(source_directory=script_folder,\n",
|
||||||
" script_params=script_params,\n",
|
" script_params=script_params,\n",
|
||||||
" compute_target=compute_target, \n",
|
" compute_target=compute_target, \n",
|
||||||
" conda_packages=['keras', 'matplotlib'],\n",
|
" pip_packages=['keras', 'matplotlib'],\n",
|
||||||
" entry_script='keras_mnist.py', \n",
|
" entry_script='keras_mnist.py', \n",
|
||||||
" use_gpu=True)"
|
" use_gpu=True)"
|
||||||
]
|
]
|
||||||
@@ -792,7 +792,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"best_run = hdr.get_best_run_by_primary_metric()\n",
|
"best_run = hdr.get_best_run_by_primary_metric()\n",
|
||||||
"print(best_run.get_details()['runDefinition']['Arguments'])"
|
"print(best_run.get_details()['runDefinition']['arguments'])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1144,7 +1144,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "haining"
|
"name": "maxluk"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
@@ -1164,7 +1164,7 @@
|
|||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.7"
|
"version": "3.6.7"
|
||||||
},
|
},
|
||||||
"msauthor": "haining"
|
"msauthor": "maxluk"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 2
|
"nbformat_minor": 2
|
||||||
|
|||||||
@@ -396,7 +396,10 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Create TensorFlow estimator\n",
|
"## Create TensorFlow estimator\n",
|
||||||
"Next, we construct an `azureml.train.dnn.TensorFlow` estimator object, use the Batch AI cluster as compute target, and pass the mount-point of the datastore to the training code as a parameter.\n",
|
"Next, we construct an `azureml.train.dnn.TensorFlow` estimator object, use the Batch AI cluster as compute target, and pass the mount-point of the datastore to the training code as a parameter.\n",
|
||||||
"The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker."
|
"\n",
|
||||||
|
"The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker.\n",
|
||||||
|
"\n",
|
||||||
|
"The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -419,7 +422,8 @@
|
|||||||
" script_params=script_params,\n",
|
" script_params=script_params,\n",
|
||||||
" compute_target=compute_target,\n",
|
" compute_target=compute_target,\n",
|
||||||
" entry_script='tf_mnist.py', \n",
|
" entry_script='tf_mnist.py', \n",
|
||||||
" use_gpu=True)"
|
" use_gpu=True, \n",
|
||||||
|
" framework_version='1.13')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1158,7 +1162,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.8"
|
"version": "3.6.6"
|
||||||
},
|
},
|
||||||
"msauthor": "minxia"
|
"msauthor": "minxia"
|
||||||
},
|
},
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -319,9 +319,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Submit script to run in the system-managed environment\n",
|
"### Submit script to run in the system-managed environment\n",
|
||||||
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies.\n",
|
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 minutes. But this conda environment is reused so long as you don't change the conda dependencies."
|
||||||
"\n",
|
|
||||||
"\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -332,9 +330,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import subprocess\n",
|
"import subprocess\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Check if Docker is installed and Linux containers are enables\n",
|
"# Check if Docker is installed and Linux containers are enabled\n",
|
||||||
"if subprocess.run(\"docker -v\", shell=True) == 0:\n",
|
"if subprocess.run(\"docker -v\", shell=True).returncode == 0:\n",
|
||||||
" out = subprocess.check_output(\"docker system info\", shell=True, encoding=\"ascii\").split(\"\\n\")\n",
|
" out = subprocess.check_output(\"docker system info\", shell=True).decode('ascii')\n",
|
||||||
" if not \"OSType: linux\" in out:\n",
|
" if not \"OSType: linux\" in out:\n",
|
||||||
" print(\"Switch Docker engine to use Linux containers.\")\n",
|
" print(\"Switch Docker engine to use Linux containers.\")\n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
@@ -435,6 +433,29 @@
|
|||||||
"))"
|
"))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's compare it to the others"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%matplotlib inline\n",
|
||||||
|
"\n",
|
||||||
|
"import matplotlib\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"\n",
|
||||||
|
"plt.plot(metrics['alpha'], metrics['mse'], marker='o')\n",
|
||||||
|
"plt.ylabel(\"MSE\")\n",
|
||||||
|
"plt.xlabel(\"Alpha\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -455,7 +476,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We know the model `ridge_0.40.pkl` is the best performing model from the eariler queries. So let's register it with the workspace."
|
"We know the model `ridge_0.40.pkl` is the best performing model from the earlier queries. So let's register it with the workspace."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -488,7 +509,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "haining"
|
"name": "roastala"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
@@ -506,7 +527,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.6"
|
"version": "3.6.8"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -110,7 +110,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Upload data files into datastore\n",
|
"## Upload data files into datastore\n",
|
||||||
"Every workspace comes with a default datastore (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and access it from the compute target."
|
"Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and access it from the compute target."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -236,7 +236,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Configure & Run\n",
|
"## Configure & Run\n",
|
||||||
"First let's create a `DataReferenceConfiguration` object to inform the system what data folder to download to the copmute target."
|
"First let's create a `DataReferenceConfiguration` object to inform the system what data folder to download to the compute target."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -615,7 +615,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "haining"
|
"name": "roastala"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
|
|||||||
@@ -673,7 +673,7 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "haining"
|
"name": "roastala"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
|
|||||||
201
how-to-use-azureml/work-with-data/dataprep/README.md
Normal file
201
how-to-use-azureml/work-with-data/dataprep/README.md
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
# Azure Machine Learning Data Prep SDK
|
||||||
|
|
||||||
|
The Azure Machine Learning Data Prep SDK helps data scientists explore, cleanse and transform data for machine learning workflows in any Python environment.
|
||||||
|
|
||||||
|
Key benefits to the SDK:
|
||||||
|
- Cross-platform functionality. Write with a single SDK and run it on Windows, macOS, or Linux.
|
||||||
|
- Intelligent transformations powered by AI, including grouping similar values to their canonical form and deriving columns by examples without custom code.
|
||||||
|
- Capability to work with large, multiple files of different schema.
|
||||||
|
- Scalability on a single machine by streaming data during processing rather than loading into memory.
|
||||||
|
- Seamless integration with other Azure Machine Learning services. You can simply pass your prepared data file into `AutoMLConfig` object for automated machine learning training.
|
||||||
|
|
||||||
|
You will find in this repo:
|
||||||
|
- [Getting Started Tutorial](tutorials/getting-started/getting-started.ipynb) for a quick introduction to the main features of Data Prep SDK.
|
||||||
|
- [Case Study Notebooks](case-studies/new-york-taxi) that present an end-to-end data preparation tutorial where users start with small dataset, profile data with statistics summary, cleanse and perform feature engineering. All transformation steps are saved in a dataflow object. Users can easily reapply the same steps on the full dataset, and run it on Spark.
|
||||||
|
- [How-To Guide Notebooks](how-to-guides) for more in-depth sample code at feature level.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
Here are the [SDK installation steps](https://docs.microsoft.com/python/api/overview/azure/dataprep/intro?view=azure-dataprep-py#install).
|
||||||
|
|
||||||
|
## Documentation
|
||||||
|
Here is more information on how to use the new Data Prep SDK:
|
||||||
|
- [SDK overview and API reference docs](http://aka.ms/data-prep-sdk) that show different classes, methods, and function parameters for the SDK.
|
||||||
|
- [Tutorial: Prep NYC taxi data](https://docs.microsoft.com/azure/machine-learning/service/tutorial-data-prep) for regression modeling and then run automated machine learning to build the model.
|
||||||
|
- [How to load data](https://docs.microsoft.com/azure/machine-learning/service/how-to-load-data) is an overview guide on how to load data using the Data Prep SDK.
|
||||||
|
- [How to transform data](https://docs.microsoft.com/azure/machine-learning/service/how-to-transform-data) is an overview guide on how to transform data.
|
||||||
|
- [How to write data](https://docs.microsoft.com/azure/machine-learning/service/how-to-write-data) is an overview guide on how to write data to different storage locations.
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
If you have any questions or feedback, send us an email at: [askamldataprep@microsoft.com](mailto:askamldataprep@microsoft.com).
|
||||||
|
|
||||||
|
## Release Notes
|
||||||
|
|
||||||
|
### 2019-04-08 (version 1.1.1)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- You can read multiple Datastore/DataPath/DataReference sources using read_* transforms.
|
||||||
|
- You can perform the following operations on columns to create a new column: division, floor, modulo, power, length.
|
||||||
|
- Data Prep is now part of the Azure ML diagnostics suite and will log diagnostic information by default.
|
||||||
|
- To turn this off, set this environment variable to true: DISABLE_DPREP_LOGGER
|
||||||
|
|
||||||
|
Bug fixes and improvements
|
||||||
|
- Improved code documentation for commonly used classes and functions.
|
||||||
|
- Fixed a bug in auto_read_file that failed to read Excel files.
|
||||||
|
- Added option to overwrite the folder in read_pandas_dataframe.
|
||||||
|
- Improved performance of dotnetcore2 dependency installation, and added support for Fedora 27/28 and Ubuntu 1804.
|
||||||
|
- Improved the performance of reading from Azure Blobs.
|
||||||
|
- Column type detection now supports columns of type Long.
|
||||||
|
- Fixed a bug where some date values were being displayed as timestamps instead of Python datetime objects.
|
||||||
|
- Fixed a bug where some type counts were being displayed as doubles instead of integers.
|
||||||
|
|
||||||
|
### 2019-03-25 (version 1.1.0)
|
||||||
|
|
||||||
|
Breaking changes
|
||||||
|
- The concept of the Data Prep Package has been deprecated and is no longer supported. Instead of persisting multiple Dataflows in one Package, you can persist Dataflows individually.
|
||||||
|
- How-to guide: [Opening and Saving Dataflows notebook](https://aka.ms/aml-data-prep-open-save-dataflows-nb)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- Data Prep can now recognize columns that match a particular Semantic Type, and split accordingly. The STypes currently supported include: email address, geographic coordinates (latitude & longitude), IPv4 and IPv6 addresses, US phone number, and US zip code.
|
||||||
|
- How-to guide: [Semantic Types notebook](https://aka.ms/aml-data-prep-semantic-types-nb)
|
||||||
|
- Data Prep now supports the following operations to generate a resultant column from two numeric columns: subtract, multiply, divide, and modulo.
|
||||||
|
- You can call `verify_has_data()` on a Dataflow to check whether the Dataflow would produce records if executed.
|
||||||
|
|
||||||
|
Bug fixes and improvements
|
||||||
|
- You can now specify the number of bins to use in a histogram for numeric column profiles.
|
||||||
|
- The `read_pandas_dataframe` transform now requires the DataFrame to have string- or byte- typed column names.
|
||||||
|
- Fixed a bug in the `fill_nulls` transform, where values were not correctly filled in if the column was missing.
|
||||||
|
|
||||||
|
### 2019-03-11 (version 1.0.17)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- Now supports adding two numeric columns to generate a resultant column using the expression language.
|
||||||
|
|
||||||
|
Bug fixes and improvements
|
||||||
|
- Improved the documentation and parameter checking for random_split.
|
||||||
|
|
||||||
|
### 2019-02-27 (version 1.0.16)
|
||||||
|
|
||||||
|
Bug fix
|
||||||
|
- Fixed a Service Principal authentication issue that was caused by an API change.
|
||||||
|
|
||||||
|
### 2019-02-25 (version 1.0.15)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- Data Prep now supports writing file streams from a dataflow. Also provides the ability to manipulate the file stream names to create new file names.
|
||||||
|
- How-to guide: [Working With File Streams notebook](https://aka.ms/aml-data-prep-file-stream-nb)
|
||||||
|
|
||||||
|
Bug fixes and improvements
|
||||||
|
- Improved performance of t-Digest on large data sets.
|
||||||
|
- Data Prep now supports reading data from a DataPath.
|
||||||
|
- One hot encoding now works on boolean and numeric columns.
|
||||||
|
- Other miscellaneous bug fixes.
|
||||||
|
|
||||||
|
### 2019-02-11 (version 1.0.12)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- Data Prep now supports reading from an Azure SQL database using Datastore.
|
||||||
|
|
||||||
|
Changes
|
||||||
|
- Significantly improved the memory performance of certain operations on large data.
|
||||||
|
- `read_pandas_dataframe()` now requires `temp_folder` to be specified.
|
||||||
|
- The `name` property on `ColumnProfile` has been deprecated - use `column_name` instead.
|
||||||
|
|
||||||
|
### 2019-01-28 (version 1.0.8)
|
||||||
|
|
||||||
|
Bug fixes
|
||||||
|
- Significantly improved the performance of getting data profiles.
|
||||||
|
- Fixed minor bugs related to error reporting.
|
||||||
|
|
||||||
|
### 2019-01-14 (version 1.0.7)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- Datastore improvements (documented in [Datastore how-to-guide](https://aka.ms/aml-data-prep-datastore-nb))
|
||||||
|
- Added ability to read from and write to Azure File Share and ADLS Datastores in scale-up.
|
||||||
|
- When using Datastores, Data Prep now supports using service principal authentication instead of interactive authentication.
|
||||||
|
- Added support for wasb and wasbs urls.
|
||||||
|
|
||||||
|
### 2019-01-09 (version 1.0.6)
|
||||||
|
|
||||||
|
Bug fixes
|
||||||
|
- Fixed bug with reading from public readable Azure Blob containers on Spark.
|
||||||
|
|
||||||
|
### 2018-12-19 (version 1.0.4)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- `to_bool` function now allows mismatched values to be converted to Error values. This is the new default mismatch behavior for `to_bool` and `set_column_types`, whereas the previous default behavior was to convert mismatched values to False.
|
||||||
|
- When calling `to_pandas_dataframe`, there is a new option to interpret null/missing values in numeric columns as NaN.
|
||||||
|
- Added ability to check the return type of some expressions to ensure type consistency and fail early.
|
||||||
|
- You can now call `parse_json` to parse values in a column as JSON objects and expand them into multiple columns.
|
||||||
|
|
||||||
|
Bug fixes
|
||||||
|
- Fixed a bug that crashed `set_column_types` in Python 3.5.2.
|
||||||
|
- Fixed a bug that crashed when connecting to Datastore using an AML image.
|
||||||
|
|
||||||
|
### 2018-12-07 (version 0.5.3)
|
||||||
|
|
||||||
|
Fixed missing dependency issue for .NET Core2 on Ubuntu 16.
|
||||||
|
|
||||||
|
### 2018-12-03 (version 0.5.2)
|
||||||
|
|
||||||
|
Breaking changes
|
||||||
|
- `SummaryFunction.N` was renamed to `SummaryFunction.Count`.
|
||||||
|
|
||||||
|
Bug fixes
|
||||||
|
- Use latest AML Run Token when reading from and writing to datastores on remote runs. Previously, if the AML Run Token is updated in Python, the Data Prep runtime will not be updated with the updated AML Run Token.
|
||||||
|
- Additional clearer error messages
|
||||||
|
- to_spark_dataframe() will no longer crash when Spark uses Kryo serialization
|
||||||
|
- Value Count Inspector can now show more than 1000 unique values
|
||||||
|
- Random Split no longer fails if the original Dataflow doesn’t have a name
|
||||||
|
|
||||||
|
### 2018-11-19 (version 0.5.0)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- Created a new DataPrep CLI to execute DataPrep packages and view the data profile for a dataset or dataflow
|
||||||
|
- Redesigned SetColumnType API to improve usability
|
||||||
|
- Renamed smart_read_file to auto_read_file
|
||||||
|
- Now includes skew and kurtosis in the Data Profile
|
||||||
|
- Can sample with stratified sampling
|
||||||
|
- Can read from zip files that contain CSV files
|
||||||
|
- Can split datasets row-wise with Random Split (e.g. into test-train sets)
|
||||||
|
- Can get all the column data types from a dataflow or a data profile by calling .dtypes
|
||||||
|
- Can get the row count from a dataflow or a data profile by calling .row_count
|
||||||
|
|
||||||
|
Bug fixes
|
||||||
|
- Fixed long to double conversion
|
||||||
|
- Fixed assert after any add column
|
||||||
|
- Fixed an issue with FuzzyGrouping, where it would not detect groups in some cases
|
||||||
|
- Fixed sort function to respect multi-column sort order
|
||||||
|
- Fixed and/or expressions to be similar to how Pandas handles them
|
||||||
|
- Fixed reading from dbfs path.
|
||||||
|
- Made error messages more understandable
|
||||||
|
- Now no longer fails when reading on remote compute target using AML token
|
||||||
|
- Now no longer fails on Linux DSVM
|
||||||
|
- Now no longer crashes when non-string values are in string predicates
|
||||||
|
- Now handles assertion errors when Dataflow should fail correctly
|
||||||
|
- Now supports dbutils mounted storage locations on Azure Databricks
|
||||||
|
|
||||||
|
### 2018-11-05 (version 0.4.0)
|
||||||
|
|
||||||
|
New features
|
||||||
|
- Type Count added to Data Profile
|
||||||
|
- Value Count and Histogram is now available
|
||||||
|
- More percentiles in Data Profile
|
||||||
|
- The Median is available in Summarize
|
||||||
|
- Python 3.7 is now supported
|
||||||
|
- When you save a dataflow that contains datastores to a Data Prep package, the datastore information will be persisted as part of the Data Prep package
|
||||||
|
- Writing to datastore is now supported
|
||||||
|
|
||||||
|
Bug fixes
|
||||||
|
- 64bit unsigned integer overflows are now handled properly on Linux
|
||||||
|
- Fixed incorrect text label for plain text files in smart_read
|
||||||
|
- String column type now shows up in metrics view
|
||||||
|
- Type count now is fixed to show ValueKinds mapped to single FieldType instead of individual ones
|
||||||
|
- Write_to_csv no longer fails when path is provided as a string
|
||||||
|
- When using Replace, leaving “find” blank will no longer fail
|
||||||
|
|
||||||
|
## Datasets License Information
|
||||||
|
|
||||||
|
IMPORTANT: Please read the notice and find out more about this NYC Taxi and Limousine Commission dataset here: http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml
|
||||||
|
|
||||||
|
IMPORTANT: Please read the notice and find out more about this Chicago Police Department dataset here: https://catalog.data.gov/dataset/crimes-2001-to-present-398a4
|
||||||
@@ -0,0 +1,508 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Cleaning up New York Taxi Cab data\n",
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.<br>\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's use DataPrep to clean and featurize the data which can then be used to predict taxi trip duration. We will not use the For Hire Vehicle (FHV) datasets as they are not really taxi rides and they don't provide drop-off time and geo-coordinates."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from IPython.display import display\n",
|
||||||
|
"from os import path\n",
|
||||||
|
"from tempfile import mkdtemp\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import azureml.dataprep as dprep"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's take a quick peek at yellow cab data and green cab data to see what the data looks like. DataPrep supports globing, so you will notice below that we have added a `*` in the path.\n",
|
||||||
|
"\n",
|
||||||
|
"*We are using a small sample of the taxi data for this demo. You can find a bigger sample ~6GB by changing \"green-small\" to \"green-sample\" and \"yellow-small\" to \"yellow-sample\" in the paths below.*"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"pd.set_option('display.max_columns', None)\n",
|
||||||
|
"\n",
|
||||||
|
"cache_location = mkdtemp()\n",
|
||||||
|
"dataset_root = \"https://dprepdata.blob.core.windows.net/demo\"\n",
|
||||||
|
"\n",
|
||||||
|
"green_path = \"/\".join([dataset_root, \"green-small/*\"])\n",
|
||||||
|
"yellow_path = \"/\".join([dataset_root, \"yellow-small/*\"])\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Retrieving data from the following two sources:\")\n",
|
||||||
|
"print(green_path)\n",
|
||||||
|
"print(yellow_path)\n",
|
||||||
|
"\n",
|
||||||
|
"green_df = dprep.read_csv(path=green_path, header=dprep.PromoteHeadersMode.GROUPED)\n",
|
||||||
|
"yellow_df = dprep.auto_read_file(path=yellow_path)\n",
|
||||||
|
"\n",
|
||||||
|
"display(green_df.head(5))\n",
|
||||||
|
"display(yellow_df.head(5))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Data Cleanup"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's define some shortcut transforms that will apply to all Dataflows."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"all_columns = dprep.ColumnSelector(term=\".*\", use_regex=True)\n",
|
||||||
|
"drop_if_all_null = [all_columns, dprep.ColumnRelationship(dprep.ColumnRelationship.ALL)]\n",
|
||||||
|
"useful_columns = [\n",
|
||||||
|
" \"cost\", \"distance\"\"distance\", \"dropoff_datetime\", \"dropoff_latitude\", \"dropoff_longitude\",\n",
|
||||||
|
" \"passengers\", \"pickup_datetime\", \"pickup_latitude\", \"pickup_longitude\", \"store_forward\", \"vendor\"\n",
|
||||||
|
"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's first work with the green taxi data and get it into a good shape that then can be combined with the yellow taxi data."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tmp_df = (green_df\n",
|
||||||
|
" .replace_na(columns=all_columns)\n",
|
||||||
|
" .drop_nulls(*drop_if_all_null)\n",
|
||||||
|
" .rename_columns(column_pairs={\n",
|
||||||
|
" \"VendorID\": \"vendor\",\n",
|
||||||
|
" \"lpep_pickup_datetime\": \"pickup_datetime\",\n",
|
||||||
|
" \"Lpep_dropoff_datetime\": \"dropoff_datetime\",\n",
|
||||||
|
" \"lpep_dropoff_datetime\": \"dropoff_datetime\",\n",
|
||||||
|
" \"Store_and_fwd_flag\": \"store_forward\",\n",
|
||||||
|
" \"store_and_fwd_flag\": \"store_forward\",\n",
|
||||||
|
" \"Pickup_longitude\": \"pickup_longitude\",\n",
|
||||||
|
" \"Pickup_latitude\": \"pickup_latitude\",\n",
|
||||||
|
" \"Dropoff_longitude\": \"dropoff_longitude\",\n",
|
||||||
|
" \"Dropoff_latitude\": \"dropoff_latitude\",\n",
|
||||||
|
" \"Passenger_count\": \"passengers\",\n",
|
||||||
|
" \"Fare_amount\": \"cost\",\n",
|
||||||
|
" \"Trip_distance\": \"distance\"\n",
|
||||||
|
" })\n",
|
||||||
|
" .keep_columns(columns=useful_columns))\n",
|
||||||
|
"tmp_df.head(5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"green_df = tmp_df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's do the same thing to yellow taxi data."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tmp_df = (yellow_df\n",
|
||||||
|
" .replace_na(columns=all_columns)\n",
|
||||||
|
" .drop_nulls(*drop_if_all_null)\n",
|
||||||
|
" .rename_columns(column_pairs={\n",
|
||||||
|
" \"vendor_name\": \"vendor\",\n",
|
||||||
|
" \"VendorID\": \"vendor\",\n",
|
||||||
|
" \"vendor_id\": \"vendor\",\n",
|
||||||
|
" \"Trip_Pickup_DateTime\": \"pickup_datetime\",\n",
|
||||||
|
" \"tpep_pickup_datetime\": \"pickup_datetime\",\n",
|
||||||
|
" \"Trip_Dropoff_DateTime\": \"dropoff_datetime\",\n",
|
||||||
|
" \"tpep_dropoff_datetime\": \"dropoff_datetime\",\n",
|
||||||
|
" \"store_and_forward\": \"store_forward\",\n",
|
||||||
|
" \"store_and_fwd_flag\": \"store_forward\",\n",
|
||||||
|
" \"Start_Lon\": \"pickup_longitude\",\n",
|
||||||
|
" \"Start_Lat\": \"pickup_latitude\",\n",
|
||||||
|
" \"End_Lon\": \"dropoff_longitude\",\n",
|
||||||
|
" \"End_Lat\": \"dropoff_latitude\",\n",
|
||||||
|
" \"Passenger_Count\": \"passengers\",\n",
|
||||||
|
" \"passenger_count\": \"passengers\",\n",
|
||||||
|
" \"Fare_Amt\": \"cost\",\n",
|
||||||
|
" \"fare_amount\": \"cost\",\n",
|
||||||
|
" \"Trip_Distance\": \"distance\",\n",
|
||||||
|
" \"trip_distance\": \"distance\"\n",
|
||||||
|
" })\n",
|
||||||
|
" .keep_columns(columns=useful_columns))\n",
|
||||||
|
"tmp_df.head(5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"yellow_df = tmp_df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's now append the rows from the `yellow_df` to `green_df`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df = green_df.append_rows(dataflows=[yellow_df])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's take a look at the pickup and drop-off coordinates' data profile to see how the data is distributed."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"decimal_type = dprep.TypeConverter(data_type=dprep.FieldType.DECIMAL)\n",
|
||||||
|
"combined_df = combined_df.set_column_types(type_conversions={\n",
|
||||||
|
" \"pickup_longitude\": decimal_type,\n",
|
||||||
|
" \"pickup_latitude\": decimal_type,\n",
|
||||||
|
" \"dropoff_longitude\": decimal_type,\n",
|
||||||
|
" \"dropoff_latitude\": decimal_type\n",
|
||||||
|
"})\n",
|
||||||
|
"combined_df.keep_columns(columns=[\n",
|
||||||
|
" \"pickup_longitude\", \"pickup_latitude\", \n",
|
||||||
|
" \"dropoff_longitude\", \"dropoff_latitude\"\n",
|
||||||
|
"]).get_profile()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"From the data profile, we can see that there are coordinates that are missing and coordinates that are not in New York. Let's filter out coordinates not in the [city border](https://mapmakerapp.com?map=5b60a055a191245990310739f658)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tmp_df = (combined_df\n",
|
||||||
|
" .drop_nulls(\n",
|
||||||
|
" columns=[\"pickup_longitude\", \"pickup_latitude\", \"dropoff_longitude\", \"dropoff_latitude\"],\n",
|
||||||
|
" column_relationship=dprep.ColumnRelationship(dprep.ColumnRelationship.ANY)\n",
|
||||||
|
" ) \n",
|
||||||
|
" .filter(dprep.f_and(\n",
|
||||||
|
" dprep.col(\"pickup_longitude\") <= -73.72,\n",
|
||||||
|
" dprep.col(\"pickup_longitude\") >= -74.09,\n",
|
||||||
|
" dprep.col(\"pickup_latitude\") <= 40.88,\n",
|
||||||
|
" dprep.col(\"pickup_latitude\") >= 40.53,\n",
|
||||||
|
" dprep.col(\"dropoff_longitude\") <= -73.72,\n",
|
||||||
|
" dprep.col(\"dropoff_longitude\") >= -74.09,\n",
|
||||||
|
" dprep.col(\"dropoff_latitude\") <= 40.88,\n",
|
||||||
|
" dprep.col(\"dropoff_latitude\") >= 40.53\n",
|
||||||
|
" )))\n",
|
||||||
|
"tmp_df.keep_columns(columns=[\n",
|
||||||
|
" \"pickup_longitude\", \"pickup_latitude\", \n",
|
||||||
|
" \"dropoff_longitude\", \"dropoff_latitude\"\n",
|
||||||
|
"]).get_profile()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df = tmp_df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's take a look at the data profile for the `store_forward` column."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df.keep_columns(columns='store_forward').get_profile()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"From the data profile of `store_forward` above, we can see that the data is inconsistent and there are missing values. Let's fix them."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df = combined_df.replace(columns=\"store_forward\", find=\"0\", replace_with=\"N\").fill_nulls(\"store_forward\", \"N\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's now split the pick up and drop off datetimes into a date column and a time column. We will use `split_column_by_example` to perform the split. If the `example` parameter of `split_column_by_example` is omitted, we will automatically try to figure out where to split based on the data."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tmp_df = (combined_df\n",
|
||||||
|
" .split_column_by_example(source_column=\"pickup_datetime\")\n",
|
||||||
|
" .split_column_by_example(source_column=\"dropoff_datetime\"))\n",
|
||||||
|
"tmp_df.head(5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df = tmp_df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's rename the columns generated by `split_column_by_example` into meaningful names."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tmp_df = (combined_df\n",
|
||||||
|
" .rename_columns(column_pairs={\n",
|
||||||
|
" \"pickup_datetime_1\": \"pickup_date\",\n",
|
||||||
|
" \"pickup_datetime_2\": \"pickup_time\",\n",
|
||||||
|
" \"dropoff_datetime_1\": \"dropoff_date\",\n",
|
||||||
|
" \"dropoff_datetime_2\": \"dropoff_time\"\n",
|
||||||
|
" }))\n",
|
||||||
|
"tmp_df.head(5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df = tmp_df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Feature Engineering"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Datetime features"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's split the pickup and drop-off date further into day of week, day of month, and month. For pickup and drop-off time columns, we will split it into hour, minute, and second."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tmp_df = (combined_df\n",
|
||||||
|
" .derive_column_by_example(\n",
|
||||||
|
" source_columns=\"pickup_date\", \n",
|
||||||
|
" new_column_name=\"pickup_weekday\", \n",
|
||||||
|
" example_data=[(\"2009-01-04\", \"Sunday\"), (\"2013-08-22\", \"Thursday\")]\n",
|
||||||
|
" )\n",
|
||||||
|
" .derive_column_by_example(\n",
|
||||||
|
" source_columns=\"dropoff_date\",\n",
|
||||||
|
" new_column_name=\"dropoff_weekday\",\n",
|
||||||
|
" example_data=[(\"2013-08-22\", \"Thursday\"), (\"2013-11-03\", \"Sunday\")]\n",
|
||||||
|
" )\n",
|
||||||
|
" .split_column_by_example(source_column=\"pickup_date\")\n",
|
||||||
|
" .split_column_by_example(source_column=\"pickup_time\")\n",
|
||||||
|
" .split_column_by_example(source_column=\"dropoff_date\")\n",
|
||||||
|
" .split_column_by_example(source_column=\"dropoff_time\")\n",
|
||||||
|
" .split_column_by_example(source_column=\"pickup_time_1\")\n",
|
||||||
|
" .split_column_by_example(source_column=\"dropoff_time_1\")\n",
|
||||||
|
" .drop_columns(columns=[\n",
|
||||||
|
" \"pickup_date\", \"pickup_time\", \"dropoff_date\", \"dropoff_time\", \n",
|
||||||
|
" \"pickup_date_1\", \"dropoff_date_1\", \"pickup_time_1\", \"dropoff_time_1\"\n",
|
||||||
|
" ])\n",
|
||||||
|
" .rename_columns(column_pairs={\n",
|
||||||
|
" \"pickup_date_2\": \"pickup_month\",\n",
|
||||||
|
" \"pickup_date_3\": \"pickup_monthday\",\n",
|
||||||
|
" \"pickup_time_1_1\": \"pickup_hour\",\n",
|
||||||
|
" \"pickup_time_1_2\": \"pickup_minute\",\n",
|
||||||
|
" \"pickup_time_2\": \"pickup_second\",\n",
|
||||||
|
" \"dropoff_date_2\": \"dropoff_month\",\n",
|
||||||
|
" \"dropoff_date_3\": \"dropoff_monthday\",\n",
|
||||||
|
" \"dropoff_time_1_1\": \"dropoff_hour\",\n",
|
||||||
|
" \"dropoff_time_1_2\": \"dropoff_minute\",\n",
|
||||||
|
" \"dropoff_time_2\": \"dropoff_second\"\n",
|
||||||
|
" }))\n",
|
||||||
|
"tmp_df.head(5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df = tmp_df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"From the data above, we can see that the pickup and drop-off date and time components produced from the transforms above looks good. Let's drop the `pickup_datetime` and `dropoff_datetime` columns as they are no longer needed."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tmp_df = combined_df.drop_columns(columns=[\"pickup_datetime\", \"dropoff_datetime\"])\n",
|
||||||
|
"tmp_df.head(5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"combined_df = tmp_df"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's now save the transformation steps into a DataPrep package so we can use it to to run on spark."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dflow_path = path.join(mkdtemp(), \"new_york_taxi.dprep\")\n",
|
||||||
|
"combined_df.save(file_path=dflow_path)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "sihhu"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,129 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Scale-Out Data Preparation\n",
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.<br>\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Once we are done with preparing and featurizing the data locally, we can run the same steps on the full dataset in scale-out mode. The new york taxi cab data is about 300GB in total, which is perfect for scale-out. Let's start by downloading the package we saved earlier to disk. Feel free to run the `new_york_taxi_cab.ipynb` notebook to generate the package yourself, in which case you may comment out the download code and set the `package_path` to where the package is saved."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from tempfile import mkdtemp\n",
|
||||||
|
"from os import path\n",
|
||||||
|
"from urllib.request import urlretrieve\n",
|
||||||
|
"\n",
|
||||||
|
"dflow_root = mkdtemp()\n",
|
||||||
|
"dflow_path = path.join(dflow_root, \"new_york_taxi.dprep\")\n",
|
||||||
|
"print(\"Downloading Dataflow to: {}\".format(dflow_path))\n",
|
||||||
|
"urlretrieve(\"https://dprepdata.blob.core.windows.net/demo/new_york_taxi_v2.dprep\", dflow_path)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's load the package we just downloaded."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import azureml.dataprep as dprep\n",
|
||||||
|
"\n",
|
||||||
|
"df = dprep.Dataflow.open(dflow_path)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's replace the datasources with the full dataset."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from uuid import uuid4\n",
|
||||||
|
"\n",
|
||||||
|
"other_step = df._get_steps()[7].arguments['dataflows'][0]['anonymousSteps'][0]\n",
|
||||||
|
"other_step['id'] = str(uuid4())\n",
|
||||||
|
"other_step['arguments']['path']['target'] = 1\n",
|
||||||
|
"other_step['arguments']['path']['resourceDetails'][0]['path'] = 'https://wranglewestus.blob.core.windows.net/nyctaxi/yellow_tripdata*'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"green_dsource = dprep.BlobDataSource(\"https://wranglewestus.blob.core.windows.net/nyctaxi/green_tripdata*\")\n",
|
||||||
|
"df = df.replace_datasource(green_dsource)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Once we have replaced the datasource, we can now run the same steps on the full dataset. We will print the first 5 rows of the spark DataFrame. Since we are running on the full dataset, this might take a little while depending on your spark cluster's size."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"spark_df = df.take(5).to_pandas_dataframe()\n",
|
||||||
|
"spark_df.head(5)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "sihhu"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.4"
|
||||||
|
},
|
||||||
|
"skip_execute_as_test": true
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -0,0 +1,45 @@
|
|||||||
|
-----BEGIN PRIVATE KEY-----
|
||||||
|
MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDmkkyF0BwipZow
|
||||||
|
Wd1AMkRkySx0y079JPxpsYhv4i1xXKdoa9bpFqwoXmJpeQM1JWnU4UeZzFeM86qK
|
||||||
|
AhQvL4KV4kibcP2ENvu2NKFEdotO3uxPJ+6GlcYwMYzy+tUj008KnnRZfTrR78sJ
|
||||||
|
tIl3C6lnVL0ICihksG59P1sskRq3PvOjXLAdEZalwDjZ4ZPoNDZdj6nUjB2l8zqu
|
||||||
|
pKAt5mR+bJ9Sox4yrDuNhMmFt5QsRDRe3wUqdV+C9OCWHmjlmsjrYw7p9YmjBDvC
|
||||||
|
5U7mF0Mk/XeYFzj0pkXKQVqBL6xqig+q5ob0szYfg19iDeFhS3iIsRcJGEnRVW/A
|
||||||
|
NpsBZyKrAgMBAAECggEBANlvP8C1F8NInhZYuIAwpzTQTh86Fxw8g9h8dijkh2wv
|
||||||
|
LyQXBk07d1B+aZoDZ5X32UzKwcX04N9obfvFqBkzWZdVFJmZvUmwvEEActBoZkkT
|
||||||
|
io+/HX5HweVy5PPCvbsSK6jc8uXtZcnSs4tMeJIOKkvqqnTpd1w00Y1FcQqfMC16
|
||||||
|
4p7o8wbt6OFoFAYqcxeVYVwDzCTLZD3+iJaqmntkBkoDndJy52yXQmMq5z1wbQVp
|
||||||
|
BL6+L9nTvmouy64jiHVSKOx8nnWThYfHsXoPv+rYywjeuK/v3hyaTAwogs36ooEn
|
||||||
|
SnuTBRvJcumN9Q0XIVlxKMVBcGyyAP+0yNKGz5NQgdECgYEA/I/Uq1E3epPJgEWR
|
||||||
|
Bub+LpCgwtrw/lgKncb/Q/AiE9qoXobUe4KNU8aGaNMb7uVNLckY7cOluLS6SQb3
|
||||||
|
Mzwk2Jl0G3vk8rW46tZWvSYB8+zAR2Rz7seUOT9SE5OmvwpnHrnp3nRr1vvVd2bp
|
||||||
|
Q/ypwMLrwWQN51Kr+oTS74bUbrkCgYEA6bXVIUyao7z2Q3qAr6h+6JEWDbkJA7hJ
|
||||||
|
BjHIOXvxd1tMoJJX+X9+IE/2XoJaUkGCb0vrM/hi1cyQFmS4Or/J6IWSZu8oBpDr
|
||||||
|
EBmIK3PF1nrzNvWD28wM46c6ScehyWSm/u4bJWSm9liTX3dv5Kpa6ym7yLKc3c0B
|
||||||
|
ECpSJM+5SoMCgYEAq585Tukzn/IJPUcIk/4nv5C8DW0l0lAVdr2g/JOTNJajTwik
|
||||||
|
HwHJ86G1+Elsc9wRpAlBDWCjnm4BIFrBZGl8SEuOoJaCL4PZEotwCbxoG09IIbtb
|
||||||
|
JGkuifBDX9Y3ux3gkPqYt3e5SC99EVQ3MuHgoIJUHehVolmFUAkuJWIjvNECgYEA
|
||||||
|
5pU0VspRuELzZdgzpxvDOooLDDcHodfslGQBfFXBA1Xc4IACtHMJaa/7D3vkyUtA
|
||||||
|
+bYZtQjX2sEdWDq/WZdoCjXfIBfNkczhXt0R8G0lQFvGIu9QzUchYGrZo3mHMkBQ
|
||||||
|
Uy1xMw9/e4YgwQwCJcW+Nk7Sq00uX9enuN9IdHFOCykCgYAqAGMK6CH1tlpjvHrf
|
||||||
|
k+ZhigYxTXBlsVVvK1BIGGaiwzDpn65zeQp4aLOjSZkI1LuRi3tfTiZ321jRd64J
|
||||||
|
4lGk5Jurqv5grDmxROX/U50wEYbI9ncu/thU7syUdxDiqxHPI2RMG50mRcm3a55p
|
||||||
|
ZCNSqkMlcXyA0U1z8C1ILNUsbA==
|
||||||
|
-----END PRIVATE KEY-----
|
||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIICoTCCAYkCAgPoMA0GCSqGSIb3DQEBBQUAMBQxEjAQBgNVBAMMCUNMSS1Mb2dp
|
||||||
|
bjAiGA8yMDE4MDcxMzIzMjA0N1oYDzIwMTkwNzEzMjMyMDQ5WjAUMRIwEAYDVQQD
|
||||||
|
DAlDTEktTG9naW4wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDmkkyF
|
||||||
|
0BwipZowWd1AMkRkySx0y079JPxpsYhv4i1xXKdoa9bpFqwoXmJpeQM1JWnU4UeZ
|
||||||
|
zFeM86qKAhQvL4KV4kibcP2ENvu2NKFEdotO3uxPJ+6GlcYwMYzy+tUj008KnnRZ
|
||||||
|
fTrR78sJtIl3C6lnVL0ICihksG59P1sskRq3PvOjXLAdEZalwDjZ4ZPoNDZdj6nU
|
||||||
|
jB2l8zqupKAt5mR+bJ9Sox4yrDuNhMmFt5QsRDRe3wUqdV+C9OCWHmjlmsjrYw7p
|
||||||
|
9YmjBDvC5U7mF0Mk/XeYFzj0pkXKQVqBL6xqig+q5ob0szYfg19iDeFhS3iIsRcJ
|
||||||
|
GEnRVW/ANpsBZyKrAgMBAAEwDQYJKoZIhvcNAQEFBQADggEBAI4VlaFb9NsXMLdT
|
||||||
|
Cw5/pk0Xo2Qi6483RGTy8vzrw88IE7f3juB/JWG+rayjtW5bBRx2fae4/ZIdZ4zg
|
||||||
|
N2FDKn2PQPAc9m9pcKyUKUvWOC8ixSkrUmeQew0l1AXU0hsPSlJ7/7ZK4efoyB47
|
||||||
|
hj71fsyKdyKbisZDcUFBq/S8PazdPF0YOD1W/4A2tW0cSMg+jmFWynuUTdWt3SU8
|
||||||
|
CwBGqdiSKT5faJuYwIWnRXDEQS3ObRn1OFEfFdd4d2sxjxydWKRgnINnGlBdiFAT
|
||||||
|
KzCozVr+75cO2ErH6x5C0hLQGG5BxXbaijyxyvaRNokTMVVv6OaDEnjzCGfJ72Yf
|
||||||
|
2wgitNc=
|
||||||
|
-----END CERTIFICATE-----
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
"Retrieved from https://en.wikipedia.org/wiki/Chicago_City_Council on November 6, 2018"
|
||||||
|
|
||||||
|
|
||||||
|
Ward,Name,Took Office,Party
|
||||||
|
1,Proco Joe Moreno,2010*,Dem
|
||||||
|
2,Brian Hopkins,2015,Dem
|
||||||
|
3,Pat Dowell,2007,Dem
|
||||||
|
4,Sophia King,2016*,Dem
|
||||||
|
5,Leslie Hairston,1999,Dem
|
||||||
|
6,Roderick Sawyer,2011,Dem
|
||||||
|
7,Gregory Mitchell,2015,Dem
|
||||||
|
8,Michelle A. Harris,2006*,Dem
|
||||||
|
9,Anthony Beale,1999,Dem
|
||||||
|
10,Susie Sadlowski Garza,2015,Dem
|
||||||
|
11,Patrick Daley Thompson,2015,Dem
|
||||||
|
12,George Cardenas,2003,Dem
|
||||||
|
13,Marty Quinn,2011,Dem
|
||||||
|
14,Edward M. Burke,1969,Dem
|
||||||
|
15,Raymond Lopez,2015,Dem
|
||||||
|
16,Toni Foulkes,2007,Dem
|
||||||
|
17,David H. Moore,2015,Dem
|
||||||
|
18,Derrick Curtis,2015,Dem
|
||||||
|
19,Matthew O'Shea,2011,Dem
|
||||||
|
20,Willie Cochran,2007,Dem
|
||||||
|
21,Howard Brookins Jr.,2003,Dem
|
||||||
|
22,Ricardo Muñoz,1993*,Dem
|
||||||
|
23,Silvana Tabares,2018*,Dem
|
||||||
|
24,"Michael Scott, Jr.",2015,Dem
|
||||||
|
25,Daniel Solis,1996*,Dem
|
||||||
|
26,Roberto Maldonado,2009*,Dem
|
||||||
|
27,"Walter Burnett, Jr.",1995,Dem
|
||||||
|
28,Jason Ervin,2011*,Dem
|
||||||
|
29,Chris Taliaferro,2015,Dem
|
||||||
|
30,Ariel Reboyras,2003,Dem
|
||||||
|
31,Milly Santiago,2015,Dem
|
||||||
|
32,Scott Waguespack,2007,Dem
|
||||||
|
33,Deb Mell,2013*,Dem
|
||||||
|
34,Carrie Austin,1994*,Dem
|
||||||
|
35,Carlos Ramirez-Rosa,2015,Dem
|
||||||
|
36,Gilbert Villegas,2015,Dem
|
||||||
|
37,Emma Mitts,2000*,Dem
|
||||||
|
38,Nicholas Sposato,2011,Ind
|
||||||
|
39,Margaret Laurino,1994*,Dem
|
||||||
|
40,Patrick J. O'Connor,1983,Dem
|
||||||
|
41,Anthony Napolitano,2015,Rep
|
||||||
|
42,Brendan Reilly,2007,Dem
|
||||||
|
43,Michele Smith,2011,Dem
|
||||||
|
44,Thomas M. Tunney,2002*,Dem
|
||||||
|
45,John Arena,2011,Dem
|
||||||
|
46,James Cappleman,2011,Dem
|
||||||
|
47,Ameya Pawar,2011,Dem
|
||||||
|
48,Harry Osterman,2011,Dem
|
||||||
|
49,Joe Moore,1991,Dem
|
||||||
|
50,Debra Silverstein,2011,Dem
|
||||||
|
@@ -0,0 +1,15 @@
|
|||||||
|
File updated 11/2/2018
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ID|Case Number|Date|Block|IUCR|Primary Type|Description|Location Description|Arrest|Domestic|Beat|District|Ward|Community Area|FBI Code|X Coordinate|Y Coordinate|Year|Updated On|Latitude|Longitude|Location
|
||||||
|
10140490|HY329907|07/05/2015 11:50:00 PM|050XX N NEWLAND AVE|0820|THEFT|$500 AND UNDER|STREET|false|false|1613|016|41|10|06|1129230|1933315|2015|07/12/2015 12:42:46 PM|41.973309466|-87.800174996|(41.973309466, -87.800174996)
|
||||||
|
10139776|HY329265|07/05/2015 11:30:00 PM|011XX W MORSE AVE|0460|BATTERY|SIMPLE|STREET|false|true|2431|024|49|1|08B|1167370|1946271|2015|07/12/2015 12:42:46 PM|42.008124017|-87.65955018|(42.008124017, -87.65955018)
|
||||||
|
10140270|HY329253|07/05/2015 11:20:00 PM|121XX S FRONT AVE|0486|BATTERY|DOMESTIC BATTERY SIMPLE|STREET|false|true|0532||9|53|08B|||2015|07/12/2015 12:42:46 PM|||
|
||||||
|
10139885|HY329308|07/05/2015 11:19:00 PM|051XX W DIVISION ST|0610|BURGLARY|FORCIBLE ENTRY|SMALL RETAIL STORE|false|false|1531|015|37|25|05|1141721|1907465|2015|07/12/2015 12:42:46 PM|41.902152027|-87.754883404|(41.902152027, -87.754883404)
|
||||||
|
10140379|HY329556|07/05/2015 11:00:00 PM|012XX W LAKE ST|0930|MOTOR VEHICLE THEFT|THEFT/RECOVERY: AUTOMOBILE|STREET|false|false|1215|012|27|28|07|1168413|1901632|2015|07/12/2015 12:42:46 PM|41.885610142|-87.657008701|(41.885610142, -87.657008701)
|
||||||
|
10140868|HY330421|07/05/2015 10:54:00 PM|118XX S PEORIA ST|1320|CRIMINAL DAMAGE|TO VEHICLE|VEHICLE NON-COMMERCIAL|false|false|0524|005|34|53|14|1172409|1826485|2015|07/12/2015 12:42:46 PM|41.6793109|-87.644545209|(41.6793109, -87.644545209)
|
||||||
|
10139762|HY329232|07/05/2015 10:42:00 PM|026XX W 37TH PL|1020|ARSON|BY FIRE|VACANT LOT/LAND|false|false|0911|009|12|58|09|1159436|1879658|2015|07/12/2015 12:42:46 PM|41.825500607|-87.690578042|(41.825500607, -87.690578042)
|
||||||
|
10139722|HY329228|07/05/2015 10:30:00 PM|016XX S CENTRAL PARK AVE|1811|NARCOTICS|POSS: CANNABIS 30GMS OR LESS|ALLEY|true|false|1021|010|24|29|18|1152687|1891389|2015|07/12/2015 12:42:46 PM|41.857827814|-87.715028789|(41.857827814, -87.715028789)
|
||||||
|
10139774|HY329209|07/05/2015 10:15:00 PM|048XX N ASHLAND AVE|1310|CRIMINAL DAMAGE|TO PROPERTY|APARTMENT|false|false|2032|020|46|3|14|1164821|1932394|2015|07/12/2015 12:42:46 PM|41.970099796|-87.669324377|(41.970099796, -87.669324377)
|
||||||
|
10139697|HY329177|07/05/2015 10:10:00 PM|058XX S ARTESIAN AVE|1320|CRIMINAL DAMAGE|TO VEHICLE|ALLEY|false|false|0824|008|16|63|14|1160997|1865851|2015|07/12/2015 12:42:46 PM|41.787580282|-87.685233078|(41.787580282, -87.685233078)
|
||||||
|
@@ -0,0 +1,11 @@
|
|||||||
|
ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
|
||||||
|
10498554,HZ239907,4/4/2016 23:56,007XX E 111TH ST,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,OTHER,FALSE,FALSE,531,5,9,50,11,1183356,1831503,2016,5/11/2016 15:48,41.69283384,-87.60431945,"(41.692833841, -87.60431945)"
|
||||||
|
10516598,HZ258664,4/15/2016 17:00,082XX S MARSHFIELD AVE,890,THEFT,FROM BUILDING,RESIDENCE,FALSE,FALSE,614,6,21,71,6,1166776,1850053,2016,5/12/2016 15:48,41.74410697,-87.66449429,"(41.744106973, -87.664494285)"
|
||||||
|
10519196,HZ261252,4/15/2016 10:00,104XX S SACRAMENTO AVE,1154,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT $300 AND UNDER,RESIDENCE,FALSE,FALSE,2211,22,19,74,11,,,2016,5/12/2016 15:50,,,
|
||||||
|
10519591,HZ261534,4/15/2016 9:00,113XX S PRAIRIE AVE,1120,DECEPTIVE PRACTICE,FORGERY,RESIDENCE,FALSE,FALSE,531,5,9,49,10,,,2016,5/13/2016 15:51,,,
|
||||||
|
10534446,HZ277630,4/15/2016 10:00,055XX N KEDZIE AVE,890,THEFT,FROM BUILDING,"SCHOOL, PUBLIC, BUILDING",FALSE,FALSE,1712,17,40,13,6,,,2016,5/25/2016 15:59,,,
|
||||||
|
10535059,HZ278872,4/15/2016 4:30,004XX S KILBOURN AVE,810,THEFT,OVER $500,RESIDENCE,FALSE,FALSE,1131,11,24,26,6,,,2016,5/25/2016 15:59,,,
|
||||||
|
10499802,HZ240778,4/15/2016 10:00,010XX N MILWAUKEE AVE,1152,DECEPTIVE PRACTICE,ILLEGAL USE CASH CARD,RESIDENCE,FALSE,FALSE,1213,12,27,24,11,,,2016,5/27/2016 15:45,,,
|
||||||
|
10522293,HZ264802,4/15/2016 16:00,019XX W DIVISION ST,1110,DECEPTIVE PRACTICE,BOGUS CHECK,RESTAURANT,FALSE,FALSE,1424,14,1,24,11,1163094,1908003,2016,5/16/2016 15:48,41.90320604,-87.67636193,"(41.903206037, -87.676361925)"
|
||||||
|
10523111,HZ265911,4/15/2016 8:00,061XX N SHERIDAN RD,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,RESIDENCE,FALSE,FALSE,2433,24,48,77,11,,,2016,5/16/2016 15:50,,,
|
||||||
|
10525877,HZ268138,4/15/2016 15:00,023XX W EASTWOOD AVE,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,,FALSE,FALSE,1911,19,47,4,11,,,2016,5/18/2016 15:50,,,
|
||||||
|
@@ -0,0 +1,11 @@
|
|||||||
|
ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
|
||||||
|
10378283,HZ114126,1/10/2016 11:00,033XX W IRVING PARK RD,610,BURGLARY,FORCIBLE ENTRY,RESIDENCE-GARAGE,TRUE,FALSE,1724,17,33,16,5,1153593,1926401,2016,5/22/2016 15:51,41.95388599,-87.71077048,"(41.95388599, -87.710770479)"
|
||||||
|
10382154,HZ118288,1/10/2016 21:00,055XX S FRANCISCO AVE,1754,OFFENSE INVOLVING CHILDREN,AGG SEX ASSLT OF CHILD FAM MBR,RESIDENCE,FALSE,TRUE,824,8,14,63,2,1157983,1867874,2016,6/1/2016 15:51,41.79319349,-87.69622926,"(41.793193489, -87.696229255)"
|
||||||
|
10374287,HZ110730,1/10/2016 11:50,043XX W ARMITAGE AVE,5002,OTHER OFFENSE,OTHER VEHICLE OFFENSE,STREET,FALSE,TRUE,2522,25,30,20,26,1146917,1912931,2016,6/7/2016 15:55,41.91705356,-87.73565764,"(41.917053561, -87.735657637)"
|
||||||
|
10374662,HZ110403,1/10/2016 1:30,073XX S CLAREMONT AVE,497,BATTERY,AGGRAVATED DOMESTIC BATTERY: OTHER DANG WEAPON,STREET,FALSE,TRUE,835,8,18,66,04B,1162007,1855951,2016,2/4/2016 15:44,41.76039236,-87.68180481,"(41.760392356, -87.681804812)"
|
||||||
|
10374720,HZ110836,1/10/2016 7:30,079XX S RHODES AVE,890,THEFT,FROM BUILDING,OTHER,FALSE,FALSE,624,6,6,44,6,1181279,1852568,2016,2/4/2016 15:44,41.75068679,-87.61127681,"(41.75068679, -87.611276811)"
|
||||||
|
10375178,HZ110832,1/10/2016 14:20,057XX S KEDZIE AVE,460,BATTERY,SIMPLE,RESTAURANT,FALSE,FALSE,824,8,14,63,08B,1156029,1866379,2016,2/4/2016 15:44,41.78913051,-87.7034346,"(41.78913051, -87.703434602)"
|
||||||
|
10398695,HZ135279,1/10/2016 23:00,031XX S PARNELL AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE-GARAGE,FALSE,FALSE,915,9,11,60,5,1173138,1884117,2016,2/4/2016 15:44,41.8374442,-87.64017699,"(41.837444199, -87.640176991)"
|
||||||
|
10402270,HZ138745,1/10/2016 11:00,051XX S ELIZABETH ST,620,BURGLARY,UNLAWFUL ENTRY,APARTMENT,FALSE,FALSE,934,9,16,61,5,,,2016,2/4/2016 6:53,,,
|
||||||
|
10380619,HZ116583,1/10/2016 9:41,091XX S PAXTON AVE,4387,OTHER OFFENSE,VIOLATE ORDER OF PROTECTION,RESIDENCE,TRUE,TRUE,413,4,7,48,26,1192434,1844707,2016,2/2/2016 15:56,41.72885134,-87.57065553,"(41.728851343, -87.570655525)"
|
||||||
|
10400131,HZ136171,1/10/2016 18:00,0000X W TERMINAL ST,810,THEFT,OVER $500,AIRPORT BUILDING NON-TERMINAL - SECURE AREA,FALSE,FALSE,1651,16,41,76,6,,,2016,2/2/2016 15:58,,,
|
||||||
|
204
how-to-use-azureml/work-with-data/dataprep/data/crime.dprep
Normal file
204
how-to-use-azureml/work-with-data/dataprep/data/crime.dprep
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
{
|
||||||
|
"id": "75637565-60ad-4baa-87d3-396a7930cfe7",
|
||||||
|
"blocks": [
|
||||||
|
{
|
||||||
|
"id": "ba5a8061-129e-4618-953a-ce3e89c8f2cb",
|
||||||
|
"type": "Microsoft.DPrep.GetFilesBlock",
|
||||||
|
"arguments": {
|
||||||
|
"path": {
|
||||||
|
"target": 0,
|
||||||
|
"resourceDetails": [
|
||||||
|
{
|
||||||
|
"path": "./crime-spring.csv"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"isEnabled": true,
|
||||||
|
"name": null,
|
||||||
|
"annotation": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "1b345643-6b60-4ca1-99f9-2a64ae932a23",
|
||||||
|
"type": "Microsoft.DPrep.ParseDelimitedBlock",
|
||||||
|
"arguments": {
|
||||||
|
"columnHeadersMode": 1,
|
||||||
|
"fileEncoding": 0,
|
||||||
|
"handleQuotedLineBreaks": false,
|
||||||
|
"preview": false,
|
||||||
|
"separator": ",",
|
||||||
|
"skipRowsMode": 0
|
||||||
|
},
|
||||||
|
"isEnabled": true,
|
||||||
|
"name": null,
|
||||||
|
"annotation": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "12cf73a2-1487-4915-bfa7-c86be7de08c0",
|
||||||
|
"type": "Microsoft.DPrep.SetColumnTypesBlock",
|
||||||
|
"arguments": {
|
||||||
|
"columnConversion": [
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "ID"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "IUCR"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Domestic"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Beat"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "District"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Ward"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Community Area"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Year"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Longitude"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Arrest"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 1
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "X Coordinate"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Updated On"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeArguments": {
|
||||||
|
"dateTimeFormats": [
|
||||||
|
"%m/%d/%Y %I:%M:%S %p"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"typeProperty": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Date"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeArguments": {
|
||||||
|
"dateTimeFormats": [
|
||||||
|
"%m/%d/%Y %I:%M:%S %p"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"typeProperty": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Y Coordinate"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"column": {
|
||||||
|
"type": 2,
|
||||||
|
"details": {
|
||||||
|
"selectedColumn": "Latitude"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"typeProperty": 3
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"isEnabled": true,
|
||||||
|
"name": null,
|
||||||
|
"annotation": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "dfd62543-9285-412b-a930-0aeaaffde699",
|
||||||
|
"type": "Microsoft.DPrep.HandlePathColumnBlock",
|
||||||
|
"arguments": {
|
||||||
|
"pathColumnOperation": 0
|
||||||
|
},
|
||||||
|
"isEnabled": true,
|
||||||
|
"name": null,
|
||||||
|
"annotation": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"inspectors": []
|
||||||
|
}
|
||||||
BIN
how-to-use-azureml/work-with-data/dataprep/data/crime.parquet
Normal file
BIN
how-to-use-azureml/work-with-data/dataprep/data/crime.parquet
Normal file
Binary file not shown.
10
how-to-use-azureml/work-with-data/dataprep/data/crime.txt
Normal file
10
how-to-use-azureml/work-with-data/dataprep/data/crime.txt
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
10140490 HY329907 7/5/2015 23:50 050XX N NEWLAND AVE 820 THEFT
|
||||||
|
10139776 HY329265 7/5/2015 23:30 011XX W MORSE AVE 460 BATTERY
|
||||||
|
10140270 HY329253 7/5/2015 23:20 121XX S FRONT AVE 486 BATTERY
|
||||||
|
10139885 HY329308 7/5/2015 23:19 051XX W DIVISION ST 610 BURGLARY
|
||||||
|
10140379 HY329556 7/5/2015 23:00 012XX W LAKE ST 930 MOTOR VEHICLE THEFT
|
||||||
|
10140868 HY330421 7/5/2015 22:54 118XX S PEORIA ST 1320 CRIMINAL DAMAGE
|
||||||
|
10139762 HY329232 7/5/2015 22:42 026XX W 37TH PL 1020 ARSON
|
||||||
|
10139722 HY329228 7/5/2015 22:30 016XX S CENTRAL PARK AVE 1811 NARCOTICS
|
||||||
|
10139774 HY329209 7/5/2015 22:15 048XX N ASHLAND AVE 1310 CRIMINAL DAMAGE
|
||||||
|
10139697 HY329177 7/5/2015 22:10 058XX S ARTESIAN AVE 1320 CRIMINAL DAMAGE
|
||||||
BIN
how-to-use-azureml/work-with-data/dataprep/data/crime.xlsx
Normal file
BIN
how-to-use-azureml/work-with-data/dataprep/data/crime.xlsx
Normal file
Binary file not shown.
BIN
how-to-use-azureml/work-with-data/dataprep/data/crime.zip
Normal file
BIN
how-to-use-azureml/work-with-data/dataprep/data/crime.zip
Normal file
Binary file not shown.
@@ -0,0 +1,12 @@
|
|||||||
|
ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
|
||||||
|
ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
|
||||||
|
10498554,HZ239907,4/15/2016 23:56,007XX E 111TH ST,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,OTHER,FALSE,FALSE,531,5,9,50,11,1183356,1831503,2016,5/11/2016 15:48,41.69283384,-87.60431945,"(41.692833841, -87.60431945)"
|
||||||
|
10516598,HZ258664,4/15/2016 17:00,082XX S MARSHFIELD AVE,890,THEFT,FROM BUILDING,RESIDENCE,FALSE,FALSE,614,6,21,71,6,1166776,1850053,2016,5/12/2016 15:48,41.74410697,-87.66449429,"(41.744106973, -87.664494285)"
|
||||||
|
10519196,HZ261252,4/15/2016 10:00,104XX S SACRAMENTO AVE,1154,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT $300 AND UNDER,RESIDENCE,FALSE,FALSE,2211,22,19,74,11,,,2016,5/12/2016 15:50,,,
|
||||||
|
10519591,HZ261534,4/15/2016 9:00,113XX S PRAIRIE AVE,1120,DECEPTIVE PRACTICE,FORGERY,RESIDENCE,FALSE,FALSE,531,5,9,49,10,,,2016,5/13/2016 15:51,,,
|
||||||
|
10534446,HZ277630,4/15/2016 10:00,055XX N KEDZIE AVE,890,THEFT,FROM BUILDING,"SCHOOL, PUBLIC, BUILDING",FALSE,FALSE,1712,17,40,13,6,,,2016,5/25/2016 15:59,,,
|
||||||
|
10535059,HZ278872,4/15/2016 4:30,004XX S KILBOURN AVE,810,THEFT,OVER $500,RESIDENCE,FALSE,FALSE,1131,11,24,26,6,,,2016,5/25/2016 15:59,,,
|
||||||
|
10499802,HZ240778,4/15/2016 10:00,010XX N MILWAUKEE AVE,1152,DECEPTIVE PRACTICE,ILLEGAL USE CASH CARD,RESIDENCE,FALSE,FALSE,1213,12,27,24,11,,,2016,5/27/2016 15:45,,,
|
||||||
|
10522293,HZ264802,4/15/2016 16:00,019XX W DIVISION ST,1110,DECEPTIVE PRACTICE,BOGUS CHECK,RESTAURANT,FALSE,FALSE,1424,14,1,24,11,1163094,1908003,2016,5/16/2016 15:48,41.90320604,-87.67636193,"(41.903206037, -87.676361925)"
|
||||||
|
10523111,HZ265911,4/15/2016 8:00,061XX N SHERIDAN RD,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,RESIDENCE,FALSE,FALSE,2433,24,48,77,11,,,2016,5/16/2016 15:50,,,
|
||||||
|
10525877,HZ268138,4/15/2016 15:00,023XX W EASTWOOD AVE,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,,FALSE,FALSE,1911,19,47,4,11,,,2016,5/18/2016 15:50,,,
|
||||||
|
@@ -0,0 +1,10 @@
|
|||||||
|
10140490 HY329907 7/5/2015 23:50 050XX N NEWLAND AVE 820 THEFT
|
||||||
|
10139776 HY329265 7/5/2015 23:30 011XX W MORSE AVE 460 BATTERY
|
||||||
|
10140270 HY329253 7/5/2015 23:20 121XX S FRONT AVE 486 BATTERY
|
||||||
|
10139885 HY329308 7/5/2015 23:19 051XX W DIVISION ST 610 BURGLARY
|
||||||
|
10140379 HY329556 7/5/2015 23:00 012XX W LAKE ST 930 MOTOR VEHICLE THEFT
|
||||||
|
10140868 HY330421 7/5/2015 22:54 118XX S PEORIA ST 1320 CRIMINAL DAMAGE
|
||||||
|
10139762 HY329232 7/5/2015 22:42 026XX W 37TH PL 1020 ARSON
|
||||||
|
10139722 HY329228 7/5/2015 22:30 016XX S CENTRAL PARK AVE 1811 NARCOTICS
|
||||||
|
10139774 HY329209 7/5/2015 22:15 048XX N ASHLAND AVE 1310 CRIMINAL DAMAGE
|
||||||
|
10139697 HY329177 7/5/2015 22:10 058XX S ARTESIAN AVE 1320 CRIMINAL DAMAGE
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
ID |CaseNumber| |Completed|
|
||||||
|
10140490 |HY329907| |Y|
|
||||||
|
10139776 |HY329265| |Y|
|
||||||
|
10140270 |HY329253| |N|
|
||||||
|
10139885 |HY329308| |Y|
|
||||||
|
10140379 |HY329556| |N|
|
||||||
|
10140868 |HY330421| |N|
|
||||||
|
10139762 |HY329232| |N|
|
||||||
|
10139722 |HY329228| |Y|
|
||||||
|
10139774 |HY329209| |N|
|
||||||
|
10139697 |HY329177| |N|
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user