Create deploymentconfig.json

Create inferenceconfig.json
Create mylib.py
2025-12-20 09:37:04 -05:00 · 2019-04-30 08:02:43 -07:00 · 2019-04-30 08:02:06 -07:00 · 2019-04-30 07:36:44 -07:00 · 2019-04-30 07:36:12 -07:00 · 2019-04-28 13:15:14 -07:00
157 changed files with 29651 additions and 4540 deletions
--- a/Dockerfiles/1.0.21/Dockerfile
+++ b/Dockerfiles/1.0.21/Dockerfile
@@ -0,0 +1,29 @@
 FROM continuumio/miniconda:4.5.11
 # install git
 RUN apt-get update && apt-get upgrade -y && apt-get install -y git
 # create a new conda environment named azureml
 RUN conda create -n azureml -y -q Python=3.6
 # install additional packages used by sample notebooks. this is optional
 RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
 # install azurmel-sdk components
 RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.21"]
 # clone Azure ML GitHub sample notebooks
 RUN cd /home && git clone -b "azureml-sdk-1.0.21" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
 # generate jupyter configuration file
 RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
 # set an emtpy token for Jupyter to remove authentication. 
 # this is NOT recommended for production environment
 RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
 # open up port 8887 on the container
 EXPOSE 8887
 # start Jupyter notebook server on port 8887 when the container starts
 CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
--- a/Dockerfiles/1.0.23/Dockerfile
+++ b/Dockerfiles/1.0.23/Dockerfile
@@ -0,0 +1,29 @@
 FROM continuumio/miniconda:4.5.11
 # install git
 RUN apt-get update && apt-get upgrade -y && apt-get install -y git
 # create a new conda environment named azureml
 RUN conda create -n azureml -y -q Python=3.6
 # install additional packages used by sample notebooks. this is optional
 RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
 # install azurmel-sdk components
 RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.23"]
 # clone Azure ML GitHub sample notebooks
 RUN cd /home && git clone -b "azureml-sdk-1.0.23" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
 # generate jupyter configuration file
 RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
 # set an emtpy token for Jupyter to remove authentication. 
 # this is NOT recommended for production environment
 RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
 # open up port 8887 on the container
 EXPOSE 8887
 # start Jupyter notebook server on port 8887 when the container starts
 CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
--- a/Dockerfiles/1.0.30/Dockerfile
+++ b/Dockerfiles/1.0.30/Dockerfile
@@ -0,0 +1,29 @@
 FROM continuumio/miniconda:4.5.11
 # install git
 RUN apt-get update && apt-get upgrade -y && apt-get install -y git
 # create a new conda environment named azureml
 RUN conda create -n azureml -y -q Python=3.6
 # install additional packages used by sample notebooks. this is optional
 RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
 # install azurmel-sdk components
 RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.30"]
 # clone Azure ML GitHub sample notebooks
 RUN cd /home && git clone -b "azureml-sdk-1.0.30" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
 # generate jupyter configuration file
 RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
 # set an emtpy token for Jupyter to remove authentication. 
 # this is NOT recommended for production environment
 RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
 # open up port 8887 on the container
 EXPOSE 8887
 # start Jupyter notebook server on port 8887 when the container starts
 CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
--- a/Dockerfiles/1.0.33/Dockerfile
+++ b/Dockerfiles/1.0.33/Dockerfile
@@ -0,0 +1,29 @@
 FROM continuumio/miniconda:4.5.11
 # install git
 RUN apt-get update && apt-get upgrade -y && apt-get install -y git
 # create a new conda environment named azureml
 RUN conda create -n azureml -y -q Python=3.6
 # install additional packages used by sample notebooks. this is optional
 RUN ["/bin/bash", "-c", "source activate azureml && conda install -y tqdm cython matplotlib scikit-learn"]
 # install azurmel-sdk components
 RUN ["/bin/bash", "-c", "source activate azureml && pip install azureml-sdk[notebooks]==1.0.33"]
 # clone Azure ML GitHub sample notebooks
 RUN cd /home && git clone -b "azureml-sdk-1.0.33" --single-branch https://github.com/Azure/MachineLearningNotebooks.git
 # generate jupyter configuration file
 RUN ["/bin/bash", "-c", "source activate azureml && mkdir ~/.jupyter && cd ~/.jupyter && jupyter notebook --generate-config"]
 # set an emtpy token for Jupyter to remove authentication. 
 # this is NOT recommended for production environment
 RUN echo "c.NotebookApp.token = ''" >> ~/.jupyter/jupyter_notebook_config.py
 # open up port 8887 on the container
 EXPOSE 8887
 # start Jupyter notebook server on port 8887 when the container starts
 CMD /bin/bash -c "cd /home/MachineLearningNotebooks && source activate azureml && jupyter notebook --port 8887 --no-browser --ip 0.0.0.0 --allow-root"
--- a/NBSETUP.md
+++ b/NBSETUP.md
@@ -1,6 +1,4 @@
-# Setting up environment
+# Set up your notebook environment for Azure Machine Learning
 ---
 To run the notebooks in this repository use one of following options.
@@ -12,9 +10,7 @@ Azure Notebooks is a hosted Jupyter-based notebook service in the Azure cloud. A
 1. Follow the instructions in the [Configuration](configuration.ipynb) notebook to create and connect to a workspace
 1. Open one of the sample notebooks
-    **Make sure the Azure Notebook kernel is set to `Python 3.6`** when you open a notebook
+    **Make sure the Azure Notebook kernel is set to `Python 3.6`** when you open a notebook by choosing Kernel > Change Kernel > Python 3.6 from the menus.
    ![set kernel to Python 3.6](images/python36.png)
 ## **Option 2: Use your own notebook server**
@@ -31,9 +27,6 @@ git clone https://github.com/Azure/MachineLearningNotebooks.git
 # install the base SDK and a Jupyter notebook server
 pip install azureml-sdk[notebooks]
 # install the data prep component
 pip install azureml-dataprep
 # install model explainability component
 pip install azureml-sdk[explain]
@@ -58,8 +51,7 @@ Please make sure you start with the [Configuration](configuration.ipynb) noteboo
 ### Video walkthrough:
-[![Get Started video](images/yt_cover.png)](https://youtu.be/VIsXeTuW3FU)
+[!VIDEO https://youtu.be/VIsXeTuW3FU]
 ## **Option 3: Use Docker**
@@ -90,9 +82,6 @@ Now you can point your browser to http://localhost:8887. We recommend that you s
 If you need additional Azure ML SDK components, you can either modify the Docker files before you build the Docker images to add additional steps, or install them through command line in the live container after you build the Docker image. For example:
 ```sh
 # install dataprep components
 pip install azureml-dataprep
 # install the core SDK and automated ml components
 pip install azureml-sdk[automl]
--- a/configuration.ipynb
+++ b/configuration.ipynb
@@ -96,7 +96,7 @@
      "source": [
        "import azureml.core\n",
        "\n",
-        "print(\"This notebook was created using version 1.0.21 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.0.23 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
--- a/contrib/RAPIDS/README.md
+++ b/contrib/RAPIDS/README.md
@@ -287,6 +287,8 @@ Notice how the parameters are modified when using the CPU-only mode.
 The outputs of the script can be observed in the master notebook as the script is executed
 ![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/contrib/RAPIDS/README.png)
--- a/how-to-use-azureml/README.md
+++ b/how-to-use-azureml/README.md
@@ -15,3 +15,6 @@ As a pre-requisite, run the [configuration Notebook](../configuration.ipynb) not
 * [enable-app-insights-in-production-service](./deployment/enable-app-insights-in-production-service) Learn how to use App Insights with production web service.
 Find quickstarts, end-to-end tutorials, and how-tos on the [official documentation site for Azure Machine Learning service](https://docs.microsoft.com/en-us/azure/machine-learning/service/).
 ![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/README.png)
--- a/how-to-use-azureml/automated-machine-learning/README.md
+++ b/how-to-use-azureml/automated-machine-learning/README.md
@@ -1,8 +1,8 @@
 # Table of Contents
 1. [Automated ML Introduction](#introduction)
-1. [Running samples in Azure Notebooks](#jupyter)
+1. [Setup using Azure Notebooks](#jupyter)
-1. [Running samples in Azure Databricks](#databricks)
+1. [Setup using Azure Databricks](#databricks)
-1. [Running samples in a Local Conda environment](#localconda)
+1. [Setup using a Local Conda environment](#localconda)
 1. [Automated ML SDK Sample Notebooks](#samples)
 1. [Documentation](#documentation)
 1. [Running using python command](#pythoncommand)
@@ -13,15 +13,15 @@
 Automated machine learning (automated ML) builds high quality machine learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, automated ML will give you a high quality machine learning model that you can use for predictions.
-If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
+If you are new to Data Science, automated ML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
-If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
+If you are an experienced data scientist, automated ML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. Automated ML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
-Below are the three execution environments supported by AutoML.
+Below are the three execution environments supported by automated ML.
 <a name="jupyter"></a>
-## Running samples in Azure Notebooks - Jupyter based notebooks in the Azure cloud
+## Setup using Azure Notebooks - Jupyter based notebooks in the Azure cloud
 1. [![Azure Notebooks](https://notebooks.azure.com/launch.png)](https://aka.ms/aml-clone-azure-notebooks)
 [Import sample notebooks ](https://aka.ms/aml-clone-azure-notebooks) into Azure Notebooks.
@@ -29,7 +29,7 @@ Below are the three execution environments supported by AutoML.
 1. Open one of the sample notebooks.
 <a name="databricks"></a>
-## Running samples in Azure Databricks
+## Setup using Azure Databricks
 **NOTE**: Please create your Azure Databricks cluster as v4.x (high concurrency preferred) with **Python 3** (dropdown).
 **NOTE**: You should at least have contributor access to your Azure subcription to run the notebook.
@@ -39,7 +39,7 @@ Below are the three execution environments supported by AutoML.
 - Attach the notebook to the cluster.
 <a name="localconda"></a>
-## Running samples in a Local Conda environment
+## Setup using a Local Conda environment
 To run these notebook on your own notebook server, use these installation instructions.
 The instructions below will install everything you need and then start a Jupyter notebook.
@@ -49,11 +49,15 @@ The instructions below will install everything you need and then start a Jupyter
 There's no need to install mini-conda specifically.
 ### 2. Downloading the sample notebooks
- Download the sample notebooks from [GitHub](https://github.com/Azure/MachineLearningNotebooks) as zip and extract the contents to a local directory.  The AutoML sample notebooks are in the "automl" folder.
+- Download the sample notebooks from [GitHub](https://github.com/Azure/MachineLearningNotebooks) as zip and extract the contents to a local directory.  The automated ML sample notebooks are in the "automated-machine-learning" folder.
 ### 3. Setup a new conda environment
-The **automl/automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook.
+The **automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook. It takes the conda environment name as an optional parameter.  The default conda environment name is azure_automl.  The exact command depends on the operating system.  See the specific sections below for Windows, Mac and Linux.  It can take about 10 minutes to execute.
-It takes the conda environment name as an optional parameter.  The default conda environment name is azure_automl.  The exact command depends on the operating system.  See the specific sections below for Windows, Mac and Linux.  It can take about 10 minutes to execute.
+
 Packages installed by the **automl_setup** script:
    <ul><li>python</li><li>nb_conda</li><li>matplotlib</li><li>numpy</li><li>cython</li><li>urllib3</li><li>scipy</li><li>scikit-learn</li><li>pandas</li><li>tensorflow</li><li>py-xgboost</li><li>azureml-sdk</li><li>azureml-widgets</li><li>pandas-ml</li></ul>
 For more details refer to the [automl_env.yml](./automl_env.yml)
 ## Windows
 Start an **Anaconda Prompt** window, cd to the **how-to-use-azureml/automated-machine-learning** folder where the sample notebooks were extracted and then run:
 ```
@@ -81,7 +85,7 @@ bash automl_setup_linux.sh
 ### 5. Running Samples
 - Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks.
- Follow the instructions in the individual notebooks to explore various features in AutoML
+- Follow the instructions in the individual notebooks to explore various features in automated ML.
 ### 6. Starting jupyter notebook manually
 To start your Jupyter notebook manually, use:
@@ -103,22 +107,22 @@ jupyter notebook
 - [auto-ml-classification.ipynb](classification/auto-ml-classification.ipynb)
    - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
-    - Simple example of using Auto ML for classification
+    - Simple example of using automated ML for classification
    - Uses local compute for training
 - [auto-ml-regression.ipynb](regression/auto-ml-regression.ipynb)
    - Dataset: scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html)
-    - Simple example of using Auto ML for regression
+    - Simple example of using automated ML for regression
    - Uses local compute for training
 - [auto-ml-remote-execution.ipynb](remote-execution/auto-ml-remote-execution.ipynb)
    - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
-    - Example of using Auto ML for classification using a remote linux DSVM for training
+    - Example of using automated ML for classification using a remote linux DSVM for training
    - Parallel execution of iterations
    - Async tracking of progress
    - Cancelling individual iterations or entire run
    - Retrieving models for any iteration or logged metric
-    - Specify automl settings as kwargs
+    - Specify automated ML settings as kwargs
 - [auto-ml-remote-amlcompute.ipynb](remote-batchai/auto-ml-remote-amlcompute.ipynb)
    - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
@@ -127,7 +131,7 @@ jupyter notebook
    - Async tracking of progress
    - Cancelling individual iterations or entire run
    - Retrieving models for any iteration or logged metric
-    - Specify automl settings as kwargs
+    - Specify automated ML settings as kwargs
 - [auto-ml-remote-attach.ipynb](remote-attach/auto-ml-remote-attach.ipynb)
    - Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html)
@@ -148,8 +152,8 @@ jupyter notebook
 - [auto-ml-exploring-previous-runs.ipynb](exploring-previous-runs/auto-ml-exploring-previous-runs.ipynb)
    - List all projects for the workspace
-    - List all AutoML Runs for a given project
+    - List all automated ML Runs for a given project
-    - Get details for a AutoML Run. (Automl settings, run widget & all metrics)
+    - Get details for a automated ML Run. (automated ML settings, run widget & all metrics)
    - Download fitted pipeline for any iteration
 - [auto-ml-remote-execution-with-datastore.ipynb](remote-execution-with-datastore/auto-ml-remote-execution-with-datastore.ipynb)
@@ -158,7 +162,7 @@ jupyter notebook
 - [auto-ml-classification-with-deployment.ipynb](classification-with-deployment/auto-ml-classification-with-deployment.ipynb)
    - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
-    - Simple example of using Auto ML for classification
+    - Simple example of using automated ML for classification
    - Registering the model
    - Creating Image and creating aci service
    - Testing the aci service
@@ -178,16 +182,21 @@ jupyter notebook
 - [auto-ml-classification-with-whitelisting.ipynb](classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb)
    - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
-    - Simple example of using Auto ML for classification with whitelisting tensorflow models.
+    - Simple example of using automated ML for classification with whitelisting tensorflow models.
    - Uses local compute for training
 - [auto-ml-forecasting-energy-demand.ipynb](forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb)
    - Dataset: [NYC energy demand data](forecasting-a/nyc_energy.csv)
-    - Example of using AutoML for training a forecasting model
+    - Example of using automated ML for training a forecasting model
 - [auto-ml-forecasting-orange-juice-sales.ipynb](forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb)
    - Dataset: [Dominick's grocery sales of orange juice](forecasting-b/dominicks_OJ.csv)
-    - Example of training an AutoML forecasting model on multiple time-series
+    - Example of training an automated ML forecasting model on multiple time-series
 - [auto-ml-classification-with-onnx.ipynb](classification-with-onnx/auto-ml-classification-with-onnx.ipynb)
    - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
    - Simple example of using automated ML for classification with ONNX models
    - Uses local compute for training
 <a name="documentation"></a>
 See [Configure automated machine learning experiments](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train) to learn how more about the the settings and features available for automated machine learning experiments.
@@ -206,10 +215,18 @@ The main code of the file must be indented so that it is under this condition.
 <a name="troubleshooting"></a>
 # Troubleshooting
 ## automl_setup fails
-1. On windows, make sure that you are running automl_setup from an Anconda Prompt window rather than a regular cmd window.  You can launch the "Anaconda Prompt" window by hitting the Start button and typing "Anaconda Prompt".  If you don't see the application "Anaconda Prompt", you might not have conda or mini conda installed.  In that case, you can install it [here](https://conda.io/miniconda.html)
+1. On Windows, make sure that you are running automl_setup from an Anconda Prompt window rather than a regular cmd window.  You can launch the "Anaconda Prompt" window by hitting the Start button and typing "Anaconda Prompt".  If you don't see the application "Anaconda Prompt", you might not have conda or mini conda installed.  In that case, you can install it [here](https://conda.io/miniconda.html)
 2. Check that you have conda 64-bit installed rather than 32-bit.  You can check this with the command `conda info`.  The `platform` should be `win-64` for Windows or `osx-64` for Mac.
 3. Check that you have conda 4.4.10 or later.  You can check the version with the command `conda -V`.  If you have a previous version installed, you can update it using the command: `conda update conda`.
-4. Pass a new name as the first parameter to automl_setup so that it creates a new conda environment. You can view existing conda environments using `conda env list` and remove them with `conda env remove -n <environmentname>`. 
+4. On Linux, if the error is `gcc: error trying to exec 'cc1plus': execvp: No such file or directory`, install build essentials using the command `sudo apt-get install build-essential`.
 5. Pass a new name as the first parameter to automl_setup so that it creates a new conda environment. You can view existing conda environments using `conda env list` and remove them with `conda env remove -n <environmentname>`. 
 ## automl_setup_linux.sh fails
 If automl_setup_linux.sh fails on Ubuntu Linux with the error: `unable to execute 'gcc': No such file or directory`
 1. Make sure that outbound ports 53 and 80 are enabled.  On an Azure VM, you can do this from the Azure Portal by selecting the VM and clicking on Networking.
 2. Run the command: `sudo apt-get update`
 3. Run the command: `sudo apt-get install build-essential --fix-missing`
 4. Run `automl_setup_linux.sh` again.
 ## configuration.ipynb fails
 1) For local conda, make sure that you have susccessfully run automl_setup first.
@@ -233,13 +250,20 @@ If a sample notebook fails with an error that property, method or library does n
 ## Numpy import fails on Windows
 Some Windows environments see an error loading numpy with the latest Python version 3.6.8.  If you see this issue, try with Python version 3.6.7.
 ## Numpy import fails
 Check the tensorflow version in the automated ml conda environment. Supported versions are < 1.13. Uninstall tensorflow from the environment if version is >= 1.13
 You may check the version of tensorflow and uninstall as follows
 1) start a command shell, activate conda environment where automated ml packages are installed
 2) enter `pip freeze` and look for `tensorflow` , if found, the version listed should be < 1.13 
 3) If the listed version is a not a supported version,  `pip uninstall tensorflow` in the command shell and enter y for confirmation. 
 ## Remote run: DsvmCompute.create fails 
 There are several reasons why the DsvmCompute.create can fail.  The reason is usually in the error message but you have to look at the end of the error message for the detailed reason.  Some common reasons are:
 1) `Compute name is invalid, it should start with a letter, be between 2 and 16 character, and only include letters (a-zA-Z), numbers (0-9) and \'-\'.`  Note that underscore is not allowed in the name.
 2) `The requested VM size xxxxx is not available in the current region.`  You can select a different region or vm_size. 
 ## Remote run: Unable to establish SSH connection
-AutoML uses the SSH protocol to communicate with remote DSVMs.  This defaults to port 22.  Possible causes for this error are:
+Automated ML uses the SSH protocol to communicate with remote DSVMs.  This defaults to port 22.  Possible causes for this error are:
 1) The DSVM is not ready for SSH connections.  When DSVM creation completes, the DSVM might still not be ready to acceept SSH connections.  The sample notebooks have a one minute delay to allow for this.
 2) Your Azure Subscription may restrict the IP address ranges that can access the DSVM on port 22.  You can check this in the Azure Portal by selecting the Virtual Machine and then clicking Networking.  The Virtual Machine name is the name that you provided in the notebook plus 10 alpha numeric characters to make the name unique.  The Inbound Port Rules define what can access the VM on specific ports.  Note that there is a priority priority order.  So, a Deny entry with a low priority number will override a Allow entry with a higher priority number.
@@ -250,13 +274,13 @@ This is often an issue with the `get_data` method.
 3) You can get to the error log for the setup iteration by clicking the `Click here to see the run in Azure portal` link, click `Back to Experiment`, click on the highest run number and then click on Logs.
 ## Remote run: disk full
-AutoML creates files under /tmp/azureml_runs for each iteration that it runs.  It creates a folder with the iteration id.  For example: AutoML_9a038a18-77cc-48f1-80fb-65abdbc33abe_93.  Under this, there is a azureml-logs folder, which contains logs.  If you run too many iterations on the same DSVM, these files can fill the disk.
+Automated ML creates files under /tmp/azureml_runs for each iteration that it runs.  It creates a folder with the iteration id.  For example: AutoML_9a038a18-77cc-48f1-80fb-65abdbc33abe_93.  Under this, there is a azureml-logs folder, which contains logs.  If you run too many iterations on the same DSVM, these files can fill the disk.
 You can delete the files under /tmp/azureml_runs or just delete the VM and create a new one.
 If your get_data downloads files, make sure the delete them or they can use disk space as well.
 When using DataStore, it is good to specify an absolute path for the files so that they are downloaded just once.  If you specify a relative path, it will download a file for each iteration.
 ## Remote run: Iterations fail and the log contains "MemoryError"
-This can be caused by insufficient memory on the DSVM.  AutoML loads all training data into memory.  So, the available memory should be more than the training data size.
+This can be caused by insufficient memory on the DSVM.  Automated ML loads all training data into memory.  So, the available memory should be more than the training data size.
 If you are using a remote DSVM, memory is needed for each concurrent iteration.  The max_concurrent_iterations setting specifies the maximum concurrent iterations.  For example, if the training data size is 8Gb and max_concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
 To resolve this issue, allocate a DSVM with more memory or reduce the value specified for max_concurrent_iterations.
--- a/how-to-use-azureml/automated-machine-learning/automl_env.yml
+++ b/how-to-use-azureml/automated-machine-learning/automl_env.yml
@@ -0,0 +1,21 @@
 name: azure_automl
 dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
 - python>=3.5.2,<3.6.8
 - nb_conda
 - matplotlib==2.1.0
 - numpy>=1.11.0,<=1.16.2
 - cython
 - urllib3<1.24
 - scipy>=1.0.0,<=1.1.0
 - scikit-learn>=0.19.0,<=0.20.3
 - pandas>=0.22.0,<0.23.0
 - py-xgboost<=0.80
 - pip:
  # Required packages for AzureML execution, history, and data preparation.
  - azureml-sdk[automl,explain]
  - azureml-widgets
  - pandas_ml
--- a/how-to-use-azureml/automated-machine-learning/automl_setup.cmd
+++ b/how-to-use-azureml/automated-machine-learning/automl_setup.cmd
@@ -0,0 +1,51 @@
@echo off
 set conda_env_name=%1
 set automl_env_file=%2
 set options=%3
 set PIP_NO_WARN_SCRIPT_LOCATION=0
 IF "%conda_env_name%"=="" SET conda_env_name="azure_automl"
 IF "%automl_env_file%"=="" SET automl_env_file="automl_env.yml"
 IF NOT EXIST %automl_env_file% GOTO YmlMissing
 call conda activate %conda_env_name% 2>nul:
 if not errorlevel 1 (
  echo Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment %conda_env_name%
  call pip install --upgrade azureml-sdk[automl,notebooks,explain]
  if errorlevel 1 goto ErrorExit
 ) else (
  call conda env create -f %automl_env_file% -n %conda_env_name%
 )
 call conda activate %conda_env_name% 2>nul:
 if errorlevel 1 goto ErrorExit
 call python -m ipykernel install --user --name %conda_env_name% --display-name "Python (%conda_env_name%)"
 REM azureml.widgets is now installed as part of the pip install under the conda env.
 REM Removing the old user install so that the notebooks will use the latest widget.
 call jupyter nbextension uninstall --user --py azureml.widgets
 echo.
 echo.
 echo ***************************************
 echo * AutoML setup completed successfully *
 echo ***************************************
 IF NOT "%options%"=="nolaunch" (
  echo.
  echo Starting jupyter notebook - please run the configuration notebook 
  echo.
  jupyter notebook --log-level=50 --notebook-dir='..\..'
 )
 goto End
 :YmlMissing
 echo File %automl_env_file% not found.
 :ErrorExit
 echo Install failed
 :End
--- a/how-to-use-azureml/automated-machine-learning/automl_setup_linux.sh
+++ b/how-to-use-azureml/automated-machine-learning/automl_setup_linux.sh
@@ -0,0 +1,52 @@
 #!/bin/bash
 CONDA_ENV_NAME=$1
 AUTOML_ENV_FILE=$2
 OPTIONS=$3
 PIP_NO_WARN_SCRIPT_LOCATION=0
 if [ "$CONDA_ENV_NAME" == "" ]
 then
  CONDA_ENV_NAME="azure_automl"
 fi
 if [ "$AUTOML_ENV_FILE" == "" ]
 then
  AUTOML_ENV_FILE="automl_env.yml"
 fi
 if [ ! -f $AUTOML_ENV_FILE ]; then
    echo "File $AUTOML_ENV_FILE not found"
    exit 1
 fi
 if source activate $CONDA_ENV_NAME 2> /dev/null
 then
   echo "Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment" $CONDA_ENV_NAME
   pip install --upgrade azureml-sdk[automl,notebooks,explain] &&
   jupyter nbextension uninstall --user --py azureml.widgets
 else
   conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&
   source activate $CONDA_ENV_NAME &&
   python -m ipykernel install --user --name $CONDA_ENV_NAME --display-name "Python ($CONDA_ENV_NAME)" &&
   jupyter nbextension uninstall --user --py azureml.widgets &&
   echo "" &&
   echo "" &&
   echo "***************************************" &&
   echo "* AutoML setup completed successfully *" &&
   echo "***************************************" &&
   if [ "$OPTIONS" != "nolaunch" ]
   then
      echo "" &&
      echo "Starting jupyter notebook - please run the configuration notebook" &&
      echo "" &&
      jupyter notebook --log-level=50 --notebook-dir '../..'
   fi
 fi
 if [ $? -gt 0 ]
 then
   echo "Installation failed"
 fi
--- a/how-to-use-azureml/automated-machine-learning/automl_setup_mac.sh
+++ b/how-to-use-azureml/automated-machine-learning/automl_setup_mac.sh
@@ -0,0 +1,54 @@
 #!/bin/bash
 CONDA_ENV_NAME=$1
 AUTOML_ENV_FILE=$2
 OPTIONS=$3
 PIP_NO_WARN_SCRIPT_LOCATION=0
 if [ "$CONDA_ENV_NAME" == "" ]
 then
  CONDA_ENV_NAME="azure_automl"
 fi
 if [ "$AUTOML_ENV_FILE" == "" ]
 then
  AUTOML_ENV_FILE="automl_env.yml"
 fi
 if [ ! -f $AUTOML_ENV_FILE ]; then
    echo "File $AUTOML_ENV_FILE not found"
    exit 1
 fi
 if source activate $CONDA_ENV_NAME 2> /dev/null
 then
   echo "Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment" $CONDA_ENV_NAME
   pip install --upgrade azureml-sdk[automl,notebooks,explain] &&
   jupyter nbextension uninstall --user --py azureml.widgets
 else
   conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&
   source activate $CONDA_ENV_NAME &&
   conda install lightgbm -c conda-forge -y &&
   python -m ipykernel install --user --name $CONDA_ENV_NAME --display-name "Python ($CONDA_ENV_NAME)" &&
   jupyter nbextension uninstall --user --py azureml.widgets &&
   echo "" &&
   echo "" &&
   echo "***************************************" &&
   echo "* AutoML setup completed successfully *" &&
   echo "***************************************" &&
   if [ "$OPTIONS" != "nolaunch" ]
   then
      echo "" &&
      echo "Starting jupyter notebook - please run the configuration notebook" &&
      echo "" &&
      jupyter notebook --log-level=50 --notebook-dir '../..'
   fi
 fi
 if [ $? -gt 0 ]
 then
   echo "Installation failed"
 fi
--- a/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb
@@ -139,7 +139,6 @@
        "                             primary_metric = 'AUC_weighted',\n",
        "                             iteration_timeout_minutes = 20,\n",
        "                             iterations = 10,\n",
        "                             n_cross_validations = 2,\n",
        "                             verbosity = logging.INFO,\n",
        "                             X = X_train, \n",
        "                             y = y_train,\n",
@@ -263,7 +262,7 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "To ensure the fit results are consistent with the training results, the SDK dependency versions need to be the same as the environment that trains the model. Details about retrieving the versions can be found in notebook [12.auto-ml-retrieve-the-training-sdk-versions](12.auto-ml-retrieve-the-training-sdk-versions.ipynb)."
+        "To ensure the fit results are consistent with the training results, the SDK dependency versions need to be the same as the environment that trains the model. The following cells create a file, myenv.yml, which specifies the dependencies from the run."
      ]
    },
    {
@@ -303,7 +302,8 @@
      "source": [
        "from azureml.core.conda_dependencies import CondaDependencies\n",
        "\n",
-        "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n",
+        "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],\n",
        "                                 pip_packages=['azureml-sdk[automl]'])\n",
        "\n",
        "conda_env_file_name = 'myenv.yml'\n",
        "myenv.save_to_file('.', conda_env_file_name)"
--- a/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb
@@ -0,0 +1,284 @@
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
        "\n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Automated Machine Learning\n",
        "_**Classification with Local Compute**_\n",
        "\n",
        "## Contents\n",
        "1. [Introduction](#Introduction)\n",
        "1. [Setup](#Setup)\n",
        "1. [Data](#Data)\n",
        "1. [Train](#Train)\n",
        "1. [Results](#Results)\n",
        "1. [Test](#Test)\n",
        "\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Introduction\n",
        "\n",
        "In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n",
        "\n",
        "Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
        "\n",
        "Please find the ONNX related documentations [here](https://github.com/onnx/onnx).\n",
        "\n",
        "In this notebook you will learn how to:\n",
        "1. Create an `Experiment` in an existing `Workspace`.\n",
        "2. Configure AutoML using `AutoMLConfig`.\n",
        "3. Train the model using local compute with ONNX compatible config on.\n",
        "4. Explore the results and save the ONNX model."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Setup\n",
        "\n",
        "As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import logging\n",
        "\n",
        "from matplotlib import pyplot as plt\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "from sklearn import datasets\n",
        "\n",
        "import azureml.core\n",
        "from azureml.core.experiment import Experiment\n",
        "from azureml.core.workspace import Workspace\n",
        "from azureml.train.automl import AutoMLConfig"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "ws = Workspace.from_config()\n",
        "\n",
        "# Choose a name for the experiment and specify the project folder.\n",
        "experiment_name = 'automl-classification-onnx'\n",
        "project_folder = './sample_projects/automl-classification-onnx'\n",
        "\n",
        "experiment = Experiment(ws, experiment_name)\n",
        "\n",
        "output = {}\n",
        "output['SDK version'] = azureml.core.VERSION\n",
        "output['Subscription ID'] = ws.subscription_id\n",
        "output['Workspace Name'] = ws.name\n",
        "output['Resource Group'] = ws.resource_group\n",
        "output['Location'] = ws.location\n",
        "output['Project Directory'] = project_folder\n",
        "output['Experiment Name'] = experiment.name\n",
        "pd.set_option('display.max_colwidth', -1)\n",
        "outputDf = pd.DataFrame(data = output, index = [''])\n",
        "outputDf.T"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Data\n",
        "\n",
        "This uses scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) method."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "digits = datasets.load_digits()\n",
        "\n",
        "# Exclude the first 100 rows from training so that they can be used for test.\n",
        "X_train = digits.data[100:,:]\n",
        "y_train = digits.target[100:]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Train with enable ONNX compatible models config on\n",
        "\n",
        "Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
        "\n",
        "Set the parameter enable_onnx_compatible_models=True, if you also want to generate the ONNX compatible models. Please note, the forecasting task and TensorFlow models are not ONNX compatible yet.\n",
        "\n",
        "|Property|Description|\n",
        "|-|-|\n",
        "|**task**|classification or regression|\n",
        "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
        "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
        "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
        "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
        "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
        "|**enable_onnx_compatible_models**|Enable the ONNX compatible models in the experiment.|\n",
        "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "automl_config = AutoMLConfig(task = 'classification',\n",
        "                             debug_log = 'automl_errors.log',\n",
        "                             primary_metric = 'AUC_weighted',\n",
        "                             iteration_timeout_minutes = 60,\n",
        "                             iterations = 10,\n",
        "                             verbosity = logging.INFO,\n",
        "                             X = X_train, \n",
        "                             y = y_train,\n",
        "                             enable_onnx_compatible_models=True,\n",
        "                             path = project_folder)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
        "In this example, we specify `show_output = True` to print currently running iterations to the console."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "local_run = experiment.submit(automl_config, show_output = True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "local_run"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Results"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### Widget for Monitoring Runs\n",
        "\n",
        "The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
        "\n",
        "**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.widgets import RunDetails\n",
        "RunDetails(local_run).show() "
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Retrieve the Best ONNX Model\n",
        "\n",
        "Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing.  Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*.\n",
        "\n",
        "Set the parameter return_onnx_model=True to retrieve the best ONNX model, instead of the Python model."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "best_run, onnx_mdl = local_run.get_output(return_onnx_model=True)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Save the best ONNX model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.train.automl._vendor.automl.client.core.common.onnx_convert import OnnxConverter\n",
        "onnx_fl_path = \"./best_model.onnx\"\n",
        "OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "savitam"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.6"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
--- a/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb
@@ -71,11 +71,17 @@
        "import azureml.core\n",
        "from azureml.core.experiment import Experiment\n",
        "from azureml.core.workspace import Workspace\n",
-        "try:\n",
+        "import sys\n",
-        "    import tensorflow as tf1\n",
+        "whitelist_models=[\"LightGBM\"]\n",
-        "except ImportError:\n",
+        "if \"3.7\" != sys.version[0:3]:\n",
-        "    from pip._internal import main\n",
+        "    try:\n",
-        "    main(['install', 'tensorflow>=1.10.0,<=1.12.0'])\n",
+        "        import tensorflow as tf1\n",
        "    except ImportError:\n",
        "        from pip._internal import main\n",
        "        main(['install', 'tensorflow>=1.10.0,<=1.12.0'])\n",
        "        logging.getLogger().setLevel(logging.ERROR)\n",
        "    whitelist_models=[\"TensorFlowLinearClassifier\", \"TensorFlowDNN\"]\n",
        "\n",
        "from azureml.train.automl import AutoMLConfig"
      ]
    },
@@ -160,12 +166,11 @@
        "                             primary_metric = 'AUC_weighted',\n",
        "                             iteration_timeout_minutes = 60,\n",
        "                             iterations = 10,\n",
        "                             n_cross_validations = 3,\n",
        "                             verbosity = logging.INFO,\n",
        "                             X = X_train, \n",
        "                             y = y_train,\n",
        "                             enable_tf=True,\n",
-        "                             whitelist_models=[\"TensorFlowLinearClassifier\", \"TensorFlowDNN\"],\n",
+        "                             whitelist_models=whitelist_models,\n",
        "                             path = project_folder)"
      ]
    },
--- a/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb
@@ -72,6 +72,32 @@
        "from azureml.train.automl import AutoMLConfig"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Accessing the Azure ML workspace requires authentication with Azure.\n",
        "\n",
        "The default authentication is interactive authentication using the default tenant.  Executing the `ws = Workspace.from_config()` line in the cell below will prompt for authentication the first time that it is run.\n",
        "\n",
        "If you have multiple Azure tenants, you can specify the tenant by replacing the `ws = Workspace.from_config()` line in the cell below with the following:\n",
        "\n",
        "```\n",
        "from azureml.core.authentication import InteractiveLoginAuthentication\n",
        "auth = InteractiveLoginAuthentication(tenant_id = 'mytenantid')\n",
        "ws = Workspace.from_config(auth = auth)\n",
        "```\n",
        "\n",
        "If you need to run in an environment where interactive login is not possible, you can use Service Principal authentication by replacing the `ws = Workspace.from_config()` line in the cell below with the following:\n",
        "\n",
        "```\n",
        "from azureml.core.authentication import ServicePrincipalAuthentication\n",
        "auth = auth = ServicePrincipalAuthentication('mytenantid', 'myappid', 'mypassword')\n",
        "ws = Workspace.from_config(auth = auth)\n",
        "```\n",
        "For more details, see [aka.ms/aml-notebook-auth](http://aka.ms/aml-notebook-auth)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
@@ -133,12 +159,17 @@
        "|-|-|\n",
        "|**task**|classification or regression|\n",
        "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
        "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
        "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
        "|**n_cross_validations**|Number of cross validation splits.|\n",
        "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
        "|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
-        "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
+        "|**n_cross_validations**|Number of cross validation splits.|\n",
        "|\n",
        "\n",
        "Automated machine learning trains multiple machine learning pipelines.  Each pipelines training is known as an iteration.\n",
        "* You can specify a maximum number of iterations using the `iterations` parameter.\n",
        "* You can specify a maximum time for the run using the `experiment_timeout_minutes` parameter.\n",
        "* If you specify neither the `iterations` nor the `experiment_timeout_minutes`, automated ML keeps running iterations while it continues to see improvements in the scores.\n",
        "\n",
        "The following example doesn't specify `iterations` or `experiment_timeout_minutes` and so runs until the scores stop improving.\n"
      ]
    },
    {
@@ -148,15 +179,10 @@
      "outputs": [],
      "source": [
        "automl_config = AutoMLConfig(task = 'classification',\n",
        "                             debug_log = 'automl_errors.log',\n",
        "                             primary_metric = 'AUC_weighted',\n",
        "                             iteration_timeout_minutes = 60,\n",
        "                             iterations = 25,\n",
        "                             n_cross_validations = 3,\n",
        "                             verbosity = logging.INFO,\n",
        "                             X = X_train, \n",
        "                             y = y_train,\n",
-        "                             path = project_folder)"
+        "                             n_cross_validations = 3)"
      ]
    },
    {
--- a/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.ipynb
@@ -163,8 +163,7 @@
        "    \"iterations\" : 2,\n",
        "    \"primary_metric\" : 'AUC_weighted',\n",
        "    \"preprocess\" : False,\n",
-        "    \"verbosity\" : logging.INFO,\n",
+        "    \"verbosity\" : logging.INFO\n",
        "    \"n_cross_validations\": 3\n",
        "}"
      ]
    },
--- a/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb
@@ -0,0 +1,493 @@
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
        "\n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Automated Machine Learning\n",
        "**BikeShare Demand Forecasting**\n",
        "\n",
        "## Contents\n",
        "1. [Introduction](#Introduction)\n",
        "1. [Setup](#Setup)\n",
        "1. [Data](#Data)\n",
        "1. [Train](#Train)\n",
        "1. [Evaluate](#Evaluate)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Introduction\n",
        "In this example, we show how AutoML can be used for bike share forecasting.\n",
        "\n",
        "The purpose is to demonstrate how to take advantage of the built-in holiday featurization, access the feature names, and further demonstrate how to work with the `forecast` function. Please also look at the additional forecasting notebooks, which document lagging, rolling windows, forecast quantiles, other ways to use the forecast function, and forecaster deployment.\n",
        "\n",
        "Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
        "\n",
        "In this notebook you would see\n",
        "1. Creating an Experiment in an existing Workspace\n",
        "2. Instantiating AutoMLConfig with new task type \"forecasting\" for timeseries data training, and other timeseries related settings: for this dataset we use the basic one: \"time_column_name\" \n",
        "3. Training the Model using local compute\n",
        "4. Exploring the results\n",
        "5. Viewing the engineered names for featurized data and featurization summary for all raw features\n",
        "6. Testing the fitted model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Setup\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import azureml.core\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "import logging\n",
        "import warnings\n",
        "# Squash warning messages for cleaner output in the notebook\n",
        "warnings.showwarning = lambda *args, **kwargs: None\n",
        "\n",
        "\n",
        "from azureml.core.workspace import Workspace\n",
        "from azureml.core.experiment import Experiment\n",
        "from azureml.train.automl import AutoMLConfig\n",
        "from matplotlib import pyplot as plt\n",
        "from sklearn.metrics import mean_absolute_error, mean_squared_error"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "ws = Workspace.from_config()\n",
        "\n",
        "# choose a name for the run history container in the workspace\n",
        "experiment_name = 'automl-bikeshareforecasting'\n",
        "# project folder\n",
        "project_folder = './sample_projects/automl-local-bikeshareforecasting'\n",
        "\n",
        "experiment = Experiment(ws, experiment_name)\n",
        "\n",
        "output = {}\n",
        "output['SDK version'] = azureml.core.VERSION\n",
        "output['Subscription ID'] = ws.subscription_id\n",
        "output['Workspace'] = ws.name\n",
        "output['Resource Group'] = ws.resource_group\n",
        "output['Location'] = ws.location\n",
        "output['Project Directory'] = project_folder\n",
        "output['Run History Name'] = experiment_name\n",
        "pd.set_option('display.max_colwidth', -1)\n",
        "outputDf = pd.DataFrame(data = output, index = [''])\n",
        "outputDf.T"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Data\n",
        "Read bike share demand data from file, and preview data."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "data = pd.read_csv('bike-no.csv', parse_dates=['date'])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's set up what we know abou the dataset. \n",
        "\n",
        "**Target column** is what we want to forecast.\n",
        "\n",
        "**Time column** is the time axis along which to predict.\n",
        "\n",
        "**Grain** is another word for an individual time series in your dataset. Grains are identified by values of the columns listed `grain_column_names`, for example \"store\" and \"item\" if your data has multiple time series of sales, one series for each combination of store and item sold.\n",
        "\n",
        "This dataset has only one time series. Please see the [orange juice notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales) for an example of a multi-time series dataset."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "target_column_name = 'cnt'\n",
        "time_column_name = 'date'\n",
        "grain_column_names = []"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Split the data\n",
        "\n",
        "The first split we make is into train and test sets. Note we are splitting on time."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "train = data[data[time_column_name] < '2012-09-01']\n",
        "test = data[data[time_column_name] >= '2012-09-01']\n",
        "\n",
        "X_train = train.copy()\n",
        "y_train = X_train.pop(target_column_name).values\n",
        "\n",
        "X_test = test.copy()\n",
        "y_test = X_test.pop(target_column_name).values\n",
        "\n",
        "print(X_train.shape)\n",
        "print(y_train.shape)\n",
        "print(X_test.shape)\n",
        "print(y_test.shape)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Setting forecaster maximum horizon \n",
        "\n",
        "Assuming your test data forms a full and regular time series(regular time intervals and no holes), \n",
        "the maximum horizon you will need to forecast is the length of the longest grain in your test set."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "if len(grain_column_names) == 0:\n",
        "    max_horizon = len(X_test)\n",
        "else:\n",
        "    max_horizon = X_test.groupby(grain_column_names)[time_column_name].count().max()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Train\n",
        "\n",
        "Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n",
        "\n",
        "|Property|Description|\n",
        "|-|-|\n",
        "|**task**|forecasting|\n",
        "|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>\n",
        "|**iterations**|Number of iterations. In each iteration, Auto ML trains a specific pipeline on the given data|\n",
        "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
        "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
        "|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
        "|**n_cross_validations**|Number of cross validation splits.|\n",
        "|**country**|The country used to generate holiday features. These should be ISO 3166 two-letter country codes (i.e. 'US', 'GB').|\n",
        "|**path**|Relative path to the project folder.  AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "time_column_name = 'date'\n",
        "automl_settings = {\n",
        "    \"time_column_name\": time_column_name,\n",
        "    # these columns are a breakdown of the total and therefore a leak\n",
        "    \"drop_column_names\": ['casual', 'registered'],\n",
        "    # knowing the country allows Automated ML to bring in holidays\n",
        "    \"country\" : 'US',\n",
        "    \"max_horizon\" : max_horizon,\n",
        "    \"target_lags\": 1    \n",
        "}\n",
        "\n",
        "automl_config = AutoMLConfig(task = 'forecasting',                             \n",
        "                             primary_metric='normalized_root_mean_squared_error',\n",
        "                             iterations = 10,\n",
        "                             iteration_timeout_minutes = 5,\n",
        "                             X = X_train,\n",
        "                             y = y_train,\n",
        "                             n_cross_validations = 3,                             \n",
        "                             path=project_folder,\n",
        "                             verbosity = logging.INFO,\n",
        "                            **automl_settings)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "We will now run the experiment, starting with 10 iterations of model search. Experiment can be continued for more iterations if the results are not yet good. You will see the currently running iterations printing to the console."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "local_run = experiment.submit(automl_config, show_output=True)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Displaying the run objects gives you links to the visual tools in the Azure Portal. Go try them!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "local_run"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Retrieve the Best Model\n",
        "Below we select the best pipeline from our iterations. The get_output method on automl_classifier returns the best run and the fitted model for the last fit invocation. There are overloads on get_output that allow you to retrieve the best run and fitted model for any logged metric or a particular iteration."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "best_run, fitted_model = local_run.get_output()\n",
        "fitted_model.steps"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### View the engineered names for featurized data\n",
        "\n",
        "You can accees the engineered feature names generated in time-series featurization. Note that a number of named holiday periods are represented. We recommend that you have at least one year of data when using this feature to ensure that all yearly holidays are captured in the training featurization."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "fitted_model.named_steps['timeseriestransformer'].get_engineered_feature_names()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### View the featurization summary\n",
        "\n",
        "You can also see what featurization steps were performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:\n",
        "\n",
        "- Raw feature name\n",
        "- Number of engineered features formed out of this raw feature\n",
        "- Type detected\n",
        "- If feature was dropped\n",
        "- List of feature transformations for the raw feature"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "fitted_model.named_steps['timeseriestransformer'].get_featurization_summary()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Test the Best Fitted Model\n",
        "\n",
        "Predict on training and test set, and calculate residual values.\n",
        "\n",
        "We always score on the original dataset whose schema matches the scheme of the training dataset."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "X_test.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "y_query = y_test.copy().astype(np.float)\n",
        "y_query.fill(np.NaN)\n",
        "y_fcst, X_trans = fitted_model.forecast(X_test, y_query)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "It is a good practice to always align the output explicitly to the input, as the count and order of the rows may have changed during transformations that span multiple rows."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def align_outputs(y_predicted, X_trans, X_test, y_test, predicted_column_name = 'predicted'):\n",
        "    \"\"\"\n",
        "    Demonstrates how to get the output aligned to the inputs\n",
        "    using pandas indexes. Helps understand what happened if\n",
        "    the output's shape differs from the input shape, or if\n",
        "    the data got re-sorted by time and grain during forecasting.\n",
        "    \n",
        "    Typical causes of misalignment are:\n",
        "    * we predicted some periods that were missing in actuals -> drop from eval\n",
        "    * model was asked to predict past max_horizon -> increase max horizon\n",
        "    * data at start of X_test was needed for lags -> provide previous periods\n",
        "    \"\"\"\n",
        "    df_fcst = pd.DataFrame({predicted_column_name : y_predicted})\n",
        "    # y and X outputs are aligned by forecast() function contract\n",
        "    df_fcst.index = X_trans.index\n",
        "    \n",
        "    # align original X_test to y_test    \n",
        "    X_test_full = X_test.copy()\n",
        "    X_test_full[target_column_name] = y_test\n",
        "\n",
        "    # X_test_full's index does not include origin, so reset for merge\n",
        "    df_fcst.reset_index(inplace=True)\n",
        "    X_test_full = X_test_full.reset_index().drop(columns='index')\n",
        "    together = df_fcst.merge(X_test_full, how='right')\n",
        "    \n",
        "    # drop rows where prediction or actuals are nan \n",
        "    # happens because of missing actuals \n",
        "    # or at edges of time due to lags/rolling windows\n",
        "    clean = together[together[[target_column_name, predicted_column_name]].notnull().all(axis=1)]\n",
        "    return(clean)\n",
        "\n",
        "df_all = align_outputs(y_fcst, X_trans, X_test, y_test)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def MAPE(actual, pred):\n",
        "    \"\"\"\n",
        "    Calculate mean absolute percentage error.\n",
        "    Remove NA and values where actual is close to zero\n",
        "    \"\"\"\n",
        "    not_na = ~(np.isnan(actual) | np.isnan(pred))\n",
        "    not_zero = ~np.isclose(actual, 0.0)\n",
        "    actual_safe = actual[not_na & not_zero]\n",
        "    pred_safe = pred[not_na & not_zero]\n",
        "    APE = 100*np.abs((actual_safe - pred_safe)/actual_safe)\n",
        "    return np.mean(APE)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print(\"Simple forecasting model\")\n",
        "rmse = np.sqrt(mean_squared_error(df_all[target_column_name], df_all['predicted']))\n",
        "print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
        "mae = mean_absolute_error(df_all[target_column_name], df_all['predicted'])\n",
        "print('mean_absolute_error score: %.2f' % mae)\n",
        "print('MAPE: %.2f' % MAPE(df_all[target_column_name], df_all['predicted']))\n",
        "\n",
        "# Plot outputs\n",
        "%matplotlib notebook\n",
        "test_pred = plt.scatter(df_all[target_column_name], df_all['predicted'], color='b')\n",
        "test_test = plt.scatter(y_test, y_test, color='g')\n",
        "plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
        "plt.show()"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "xiaga@microsoft.com, tosingli@microsoft.com"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.7"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
--- a/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv
+++ b/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/bike-no.csv
@@ -0,0 +1,732 @@
 instant,date,season,yr,mnth,weekday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
 1,1/1/2011,1,0,1,6,2,0.344167,0.363625,0.805833,0.160446,331,654,985
 2,1/2/2011,1,0,1,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
 3,1/3/2011,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
 4,1/4/2011,1,0,1,2,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
 5,1/5/2011,1,0,1,3,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600
 6,1/6/2011,1,0,1,4,1,0.204348,0.233209,0.518261,0.0895652,88,1518,1606
 7,1/7/2011,1,0,1,5,2,0.196522,0.208839,0.498696,0.168726,148,1362,1510
 8,1/8/2011,1,0,1,6,2,0.165,0.162254,0.535833,0.266804,68,891,959
 9,1/9/2011,1,0,1,0,1,0.138333,0.116175,0.434167,0.36195,54,768,822
 10,1/10/2011,1,0,1,1,1,0.150833,0.150888,0.482917,0.223267,41,1280,1321
 11,1/11/2011,1,0,1,2,2,0.169091,0.191464,0.686364,0.122132,43,1220,1263
 12,1/12/2011,1,0,1,3,1,0.172727,0.160473,0.599545,0.304627,25,1137,1162
 13,1/13/2011,1,0,1,4,1,0.165,0.150883,0.470417,0.301,38,1368,1406
 14,1/14/2011,1,0,1,5,1,0.16087,0.188413,0.537826,0.126548,54,1367,1421
 15,1/15/2011,1,0,1,6,2,0.233333,0.248112,0.49875,0.157963,222,1026,1248
 16,1/16/2011,1,0,1,0,1,0.231667,0.234217,0.48375,0.188433,251,953,1204
 17,1/17/2011,1,0,1,1,2,0.175833,0.176771,0.5375,0.194017,117,883,1000
 18,1/18/2011,1,0,1,2,2,0.216667,0.232333,0.861667,0.146775,9,674,683
 19,1/19/2011,1,0,1,3,2,0.292174,0.298422,0.741739,0.208317,78,1572,1650
 20,1/20/2011,1,0,1,4,2,0.261667,0.25505,0.538333,0.195904,83,1844,1927
 21,1/21/2011,1,0,1,5,1,0.1775,0.157833,0.457083,0.353242,75,1468,1543
 22,1/22/2011,1,0,1,6,1,0.0591304,0.0790696,0.4,0.17197,93,888,981
 23,1/23/2011,1,0,1,0,1,0.0965217,0.0988391,0.436522,0.2466,150,836,986
 24,1/24/2011,1,0,1,1,1,0.0973913,0.11793,0.491739,0.15833,86,1330,1416
 25,1/25/2011,1,0,1,2,2,0.223478,0.234526,0.616957,0.129796,186,1799,1985
 26,1/26/2011,1,0,1,3,3,0.2175,0.2036,0.8625,0.29385,34,472,506
 27,1/27/2011,1,0,1,4,1,0.195,0.2197,0.6875,0.113837,15,416,431
 28,1/28/2011,1,0,1,5,2,0.203478,0.223317,0.793043,0.1233,38,1129,1167
 29,1/29/2011,1,0,1,6,1,0.196522,0.212126,0.651739,0.145365,123,975,1098
 30,1/30/2011,1,0,1,0,1,0.216522,0.250322,0.722174,0.0739826,140,956,1096
 31,1/31/2011,1,0,1,1,2,0.180833,0.18625,0.60375,0.187192,42,1459,1501
 32,2/1/2011,1,0,2,2,2,0.192174,0.23453,0.829565,0.053213,47,1313,1360
 33,2/2/2011,1,0,2,3,2,0.26,0.254417,0.775417,0.264308,72,1454,1526
 34,2/3/2011,1,0,2,4,1,0.186957,0.177878,0.437826,0.277752,61,1489,1550
 35,2/4/2011,1,0,2,5,2,0.211304,0.228587,0.585217,0.127839,88,1620,1708
 36,2/5/2011,1,0,2,6,2,0.233333,0.243058,0.929167,0.161079,100,905,1005
 37,2/6/2011,1,0,2,0,1,0.285833,0.291671,0.568333,0.1418,354,1269,1623
 38,2/7/2011,1,0,2,1,1,0.271667,0.303658,0.738333,0.0454083,120,1592,1712
 39,2/8/2011,1,0,2,2,1,0.220833,0.198246,0.537917,0.36195,64,1466,1530
 40,2/9/2011,1,0,2,3,2,0.134783,0.144283,0.494783,0.188839,53,1552,1605
 41,2/10/2011,1,0,2,4,1,0.144348,0.149548,0.437391,0.221935,47,1491,1538
 42,2/11/2011,1,0,2,5,1,0.189091,0.213509,0.506364,0.10855,149,1597,1746
 43,2/12/2011,1,0,2,6,1,0.2225,0.232954,0.544167,0.203367,288,1184,1472
 44,2/13/2011,1,0,2,0,1,0.316522,0.324113,0.457391,0.260883,397,1192,1589
 45,2/14/2011,1,0,2,1,1,0.415,0.39835,0.375833,0.417908,208,1705,1913
 46,2/15/2011,1,0,2,2,1,0.266087,0.254274,0.314348,0.291374,140,1675,1815
 47,2/16/2011,1,0,2,3,1,0.318261,0.3162,0.423478,0.251791,218,1897,2115
 48,2/17/2011,1,0,2,4,1,0.435833,0.428658,0.505,0.230104,259,2216,2475
 49,2/18/2011,1,0,2,5,1,0.521667,0.511983,0.516667,0.264925,579,2348,2927
 50,2/19/2011,1,0,2,6,1,0.399167,0.391404,0.187917,0.507463,532,1103,1635
 51,2/20/2011,1,0,2,0,1,0.285217,0.27733,0.407826,0.223235,639,1173,1812
 52,2/21/2011,1,0,2,1,2,0.303333,0.284075,0.605,0.307846,195,912,1107
 53,2/22/2011,1,0,2,2,1,0.182222,0.186033,0.577778,0.195683,74,1376,1450
 54,2/23/2011,1,0,2,3,1,0.221739,0.245717,0.423043,0.094113,139,1778,1917
 55,2/24/2011,1,0,2,4,2,0.295652,0.289191,0.697391,0.250496,100,1707,1807
 56,2/25/2011,1,0,2,5,2,0.364348,0.350461,0.712174,0.346539,120,1341,1461
 57,2/26/2011,1,0,2,6,1,0.2825,0.282192,0.537917,0.186571,424,1545,1969
 58,2/27/2011,1,0,2,0,1,0.343478,0.351109,0.68,0.125248,694,1708,2402
 59,2/28/2011,1,0,2,1,2,0.407273,0.400118,0.876364,0.289686,81,1365,1446
 60,3/1/2011,1,0,3,2,1,0.266667,0.263879,0.535,0.216425,137,1714,1851
 61,3/2/2011,1,0,3,3,1,0.335,0.320071,0.449583,0.307833,231,1903,2134
 62,3/3/2011,1,0,3,4,1,0.198333,0.200133,0.318333,0.225754,123,1562,1685
 63,3/4/2011,1,0,3,5,2,0.261667,0.255679,0.610417,0.203346,214,1730,1944
 64,3/5/2011,1,0,3,6,2,0.384167,0.378779,0.789167,0.251871,640,1437,2077
 65,3/6/2011,1,0,3,0,2,0.376522,0.366252,0.948261,0.343287,114,491,605
 66,3/7/2011,1,0,3,1,1,0.261739,0.238461,0.551304,0.341352,244,1628,1872
 67,3/8/2011,1,0,3,2,1,0.2925,0.3024,0.420833,0.12065,316,1817,2133
 68,3/9/2011,1,0,3,3,2,0.295833,0.286608,0.775417,0.22015,191,1700,1891
 69,3/10/2011,1,0,3,4,3,0.389091,0.385668,0,0.261877,46,577,623
 70,3/11/2011,1,0,3,5,2,0.316522,0.305,0.649565,0.23297,247,1730,1977
 71,3/12/2011,1,0,3,6,1,0.329167,0.32575,0.594583,0.220775,724,1408,2132
 72,3/13/2011,1,0,3,0,1,0.384348,0.380091,0.527391,0.270604,982,1435,2417
 73,3/14/2011,1,0,3,1,1,0.325217,0.332,0.496957,0.136926,359,1687,2046
 74,3/15/2011,1,0,3,2,2,0.317391,0.318178,0.655652,0.184309,289,1767,2056
 75,3/16/2011,1,0,3,3,2,0.365217,0.36693,0.776522,0.203117,321,1871,2192
 76,3/17/2011,1,0,3,4,1,0.415,0.410333,0.602917,0.209579,424,2320,2744
 77,3/18/2011,1,0,3,5,1,0.54,0.527009,0.525217,0.231017,884,2355,3239
 78,3/19/2011,1,0,3,6,1,0.4725,0.466525,0.379167,0.368167,1424,1693,3117
 79,3/20/2011,1,0,3,0,1,0.3325,0.32575,0.47375,0.207721,1047,1424,2471
 80,3/21/2011,2,0,3,1,2,0.430435,0.409735,0.737391,0.288783,401,1676,2077
 81,3/22/2011,2,0,3,2,1,0.441667,0.440642,0.624583,0.22575,460,2243,2703
 82,3/23/2011,2,0,3,3,2,0.346957,0.337939,0.839565,0.234261,203,1918,2121
 83,3/24/2011,2,0,3,4,2,0.285,0.270833,0.805833,0.243787,166,1699,1865
 84,3/25/2011,2,0,3,5,1,0.264167,0.256312,0.495,0.230725,300,1910,2210
 85,3/26/2011,2,0,3,6,1,0.265833,0.257571,0.394167,0.209571,981,1515,2496
 86,3/27/2011,2,0,3,0,2,0.253043,0.250339,0.493913,0.1843,472,1221,1693
 87,3/28/2011,2,0,3,1,1,0.264348,0.257574,0.302174,0.212204,222,1806,2028
 88,3/29/2011,2,0,3,2,1,0.3025,0.292908,0.314167,0.226996,317,2108,2425
 89,3/30/2011,2,0,3,3,2,0.3,0.29735,0.646667,0.172888,168,1368,1536
 90,3/31/2011,2,0,3,4,3,0.268333,0.257575,0.918333,0.217646,179,1506,1685
 91,4/1/2011,2,0,4,5,2,0.3,0.283454,0.68625,0.258708,307,1920,2227
 92,4/2/2011,2,0,4,6,2,0.315,0.315637,0.65375,0.197146,898,1354,2252
 93,4/3/2011,2,0,4,0,1,0.378333,0.378767,0.48,0.182213,1651,1598,3249
 94,4/4/2011,2,0,4,1,1,0.573333,0.542929,0.42625,0.385571,734,2381,3115
 95,4/5/2011,2,0,4,2,2,0.414167,0.39835,0.642083,0.388067,167,1628,1795
 96,4/6/2011,2,0,4,3,1,0.390833,0.387608,0.470833,0.263063,413,2395,2808
 97,4/7/2011,2,0,4,4,1,0.4375,0.433696,0.602917,0.162312,571,2570,3141
 98,4/8/2011,2,0,4,5,2,0.335833,0.324479,0.83625,0.226992,172,1299,1471
 99,4/9/2011,2,0,4,6,2,0.3425,0.341529,0.8775,0.133083,879,1576,2455
 100,4/10/2011,2,0,4,0,2,0.426667,0.426737,0.8575,0.146767,1188,1707,2895
 101,4/11/2011,2,0,4,1,2,0.595652,0.565217,0.716956,0.324474,855,2493,3348
 102,4/12/2011,2,0,4,2,2,0.5025,0.493054,0.739167,0.274879,257,1777,2034
 103,4/13/2011,2,0,4,3,2,0.4125,0.417283,0.819167,0.250617,209,1953,2162
 104,4/14/2011,2,0,4,4,1,0.4675,0.462742,0.540417,0.1107,529,2738,3267
 105,4/15/2011,2,0,4,5,1,0.446667,0.441913,0.67125,0.226375,642,2484,3126
 106,4/16/2011,2,0,4,6,3,0.430833,0.425492,0.888333,0.340808,121,674,795
 107,4/17/2011,2,0,4,0,1,0.456667,0.445696,0.479583,0.303496,1558,2186,3744
 108,4/18/2011,2,0,4,1,1,0.5125,0.503146,0.5425,0.163567,669,2760,3429
 109,4/19/2011,2,0,4,2,2,0.505833,0.489258,0.665833,0.157971,409,2795,3204
 110,4/20/2011,2,0,4,3,1,0.595,0.564392,0.614167,0.241925,613,3331,3944
 111,4/21/2011,2,0,4,4,1,0.459167,0.453892,0.407083,0.325258,745,3444,4189
 112,4/22/2011,2,0,4,5,2,0.336667,0.321954,0.729583,0.219521,177,1506,1683
 113,4/23/2011,2,0,4,6,2,0.46,0.450121,0.887917,0.230725,1462,2574,4036
 114,4/24/2011,2,0,4,0,2,0.581667,0.551763,0.810833,0.192175,1710,2481,4191
 115,4/25/2011,2,0,4,1,1,0.606667,0.5745,0.776667,0.185333,773,3300,4073
 116,4/26/2011,2,0,4,2,1,0.631667,0.594083,0.729167,0.3265,678,3722,4400
 117,4/27/2011,2,0,4,3,2,0.62,0.575142,0.835417,0.3122,547,3325,3872
 118,4/28/2011,2,0,4,4,2,0.6175,0.578929,0.700833,0.320908,569,3489,4058
 119,4/29/2011,2,0,4,5,1,0.51,0.497463,0.457083,0.240063,878,3717,4595
 120,4/30/2011,2,0,4,6,1,0.4725,0.464021,0.503333,0.235075,1965,3347,5312
 121,5/1/2011,2,0,5,0,2,0.451667,0.448204,0.762083,0.106354,1138,2213,3351
 122,5/2/2011,2,0,5,1,2,0.549167,0.532833,0.73,0.183454,847,3554,4401
 123,5/3/2011,2,0,5,2,2,0.616667,0.582079,0.697083,0.342667,603,3848,4451
 124,5/4/2011,2,0,5,3,2,0.414167,0.40465,0.737083,0.328996,255,2378,2633
 125,5/5/2011,2,0,5,4,1,0.459167,0.441917,0.444167,0.295392,614,3819,4433
 126,5/6/2011,2,0,5,5,1,0.479167,0.474117,0.59,0.228246,894,3714,4608
 127,5/7/2011,2,0,5,6,1,0.52,0.512621,0.54125,0.16045,1612,3102,4714
 128,5/8/2011,2,0,5,0,1,0.528333,0.518933,0.631667,0.0746375,1401,2932,4333
 129,5/9/2011,2,0,5,1,1,0.5325,0.525246,0.58875,0.176,664,3698,4362
 130,5/10/2011,2,0,5,2,1,0.5325,0.522721,0.489167,0.115671,694,4109,4803
 131,5/11/2011,2,0,5,3,1,0.5425,0.5284,0.632917,0.120642,550,3632,4182
 132,5/12/2011,2,0,5,4,1,0.535,0.523363,0.7475,0.189667,695,4169,4864
 133,5/13/2011,2,0,5,5,2,0.5125,0.4943,0.863333,0.179725,692,3413,4105
 134,5/14/2011,2,0,5,6,2,0.520833,0.500629,0.9225,0.13495,902,2507,3409
 135,5/15/2011,2,0,5,0,2,0.5625,0.536,0.867083,0.152979,1582,2971,4553
 136,5/16/2011,2,0,5,1,1,0.5775,0.550512,0.787917,0.126871,773,3185,3958
 137,5/17/2011,2,0,5,2,2,0.561667,0.538529,0.837917,0.277354,678,3445,4123
 138,5/18/2011,2,0,5,3,2,0.55,0.527158,0.87,0.201492,536,3319,3855
 139,5/19/2011,2,0,5,4,2,0.530833,0.510742,0.829583,0.108213,735,3840,4575
 140,5/20/2011,2,0,5,5,1,0.536667,0.529042,0.719583,0.125013,909,4008,4917
 141,5/21/2011,2,0,5,6,1,0.6025,0.571975,0.626667,0.12065,2258,3547,5805
 142,5/22/2011,2,0,5,0,1,0.604167,0.5745,0.749583,0.148008,1576,3084,4660
 143,5/23/2011,2,0,5,1,2,0.631667,0.590296,0.81,0.233842,836,3438,4274
 144,5/24/2011,2,0,5,2,2,0.66,0.604813,0.740833,0.207092,659,3833,4492
 145,5/25/2011,2,0,5,3,1,0.660833,0.615542,0.69625,0.154233,740,4238,4978
 146,5/26/2011,2,0,5,4,1,0.708333,0.654688,0.6775,0.199642,758,3919,4677
 147,5/27/2011,2,0,5,5,1,0.681667,0.637008,0.65375,0.240679,871,3808,4679
 148,5/28/2011,2,0,5,6,1,0.655833,0.612379,0.729583,0.230092,2001,2757,4758
 149,5/29/2011,2,0,5,0,1,0.6675,0.61555,0.81875,0.213938,2355,2433,4788
 150,5/30/2011,2,0,5,1,1,0.733333,0.671092,0.685,0.131225,1549,2549,4098
 151,5/31/2011,2,0,5,2,1,0.775,0.725383,0.636667,0.111329,673,3309,3982
 152,6/1/2011,2,0,6,3,2,0.764167,0.720967,0.677083,0.207092,513,3461,3974
 153,6/2/2011,2,0,6,4,1,0.715,0.643942,0.305,0.292287,736,4232,4968
 154,6/3/2011,2,0,6,5,1,0.62,0.587133,0.354167,0.253121,898,4414,5312
 155,6/4/2011,2,0,6,6,1,0.635,0.594696,0.45625,0.123142,1869,3473,5342
 156,6/5/2011,2,0,6,0,2,0.648333,0.616804,0.6525,0.138692,1685,3221,4906
 157,6/6/2011,2,0,6,1,1,0.678333,0.621858,0.6,0.121896,673,3875,4548
 158,6/7/2011,2,0,6,2,1,0.7075,0.65595,0.597917,0.187808,763,4070,4833
 159,6/8/2011,2,0,6,3,1,0.775833,0.727279,0.622083,0.136817,676,3725,4401
 160,6/9/2011,2,0,6,4,2,0.808333,0.757579,0.568333,0.149883,563,3352,3915
 161,6/10/2011,2,0,6,5,1,0.755,0.703292,0.605,0.140554,815,3771,4586
 162,6/11/2011,2,0,6,6,1,0.725,0.678038,0.654583,0.15485,1729,3237,4966
 163,6/12/2011,2,0,6,0,1,0.6925,0.643325,0.747917,0.163567,1467,2993,4460
 164,6/13/2011,2,0,6,1,1,0.635,0.601654,0.494583,0.30535,863,4157,5020
 165,6/14/2011,2,0,6,2,1,0.604167,0.591546,0.507083,0.269283,727,4164,4891
 166,6/15/2011,2,0,6,3,1,0.626667,0.587754,0.471667,0.167912,769,4411,5180
 167,6/16/2011,2,0,6,4,2,0.628333,0.595346,0.688333,0.206471,545,3222,3767
 168,6/17/2011,2,0,6,5,1,0.649167,0.600383,0.735833,0.143029,863,3981,4844
 169,6/18/2011,2,0,6,6,1,0.696667,0.643954,0.670417,0.119408,1807,3312,5119
 170,6/19/2011,2,0,6,0,2,0.699167,0.645846,0.666667,0.102,1639,3105,4744
 171,6/20/2011,2,0,6,1,2,0.635,0.595346,0.74625,0.155475,699,3311,4010
 172,6/21/2011,3,0,6,2,2,0.680833,0.637646,0.770417,0.171025,774,4061,4835
 173,6/22/2011,3,0,6,3,1,0.733333,0.693829,0.7075,0.172262,661,3846,4507
 174,6/23/2011,3,0,6,4,2,0.728333,0.693833,0.703333,0.238804,746,4044,4790
 175,6/24/2011,3,0,6,5,1,0.724167,0.656583,0.573333,0.222025,969,4022,4991
 176,6/25/2011,3,0,6,6,1,0.695,0.643313,0.483333,0.209571,1782,3420,5202
 177,6/26/2011,3,0,6,0,1,0.68,0.637629,0.513333,0.0945333,1920,3385,5305
 178,6/27/2011,3,0,6,1,2,0.6825,0.637004,0.658333,0.107588,854,3854,4708
 179,6/28/2011,3,0,6,2,1,0.744167,0.692558,0.634167,0.144283,732,3916,4648
 180,6/29/2011,3,0,6,3,1,0.728333,0.654688,0.497917,0.261821,848,4377,5225
 181,6/30/2011,3,0,6,4,1,0.696667,0.637008,0.434167,0.185312,1027,4488,5515
 182,7/1/2011,3,0,7,5,1,0.7225,0.652162,0.39625,0.102608,1246,4116,5362
 183,7/2/2011,3,0,7,6,1,0.738333,0.667308,0.444583,0.115062,2204,2915,5119
 184,7/3/2011,3,0,7,0,2,0.716667,0.668575,0.6825,0.228858,2282,2367,4649
 185,7/4/2011,3,0,7,1,2,0.726667,0.665417,0.637917,0.0814792,3065,2978,6043
 186,7/5/2011,3,0,7,2,1,0.746667,0.696338,0.590417,0.126258,1031,3634,4665
 187,7/6/2011,3,0,7,3,1,0.72,0.685633,0.743333,0.149883,784,3845,4629
 188,7/7/2011,3,0,7,4,1,0.75,0.686871,0.65125,0.1592,754,3838,4592
 189,7/8/2011,3,0,7,5,2,0.709167,0.670483,0.757917,0.225129,692,3348,4040
 190,7/9/2011,3,0,7,6,1,0.733333,0.664158,0.609167,0.167912,1988,3348,5336
 191,7/10/2011,3,0,7,0,1,0.7475,0.690025,0.578333,0.183471,1743,3138,4881
 192,7/11/2011,3,0,7,1,1,0.7625,0.729804,0.635833,0.282337,723,3363,4086
 193,7/12/2011,3,0,7,2,1,0.794167,0.739275,0.559167,0.200254,662,3596,4258
 194,7/13/2011,3,0,7,3,1,0.746667,0.689404,0.631667,0.146133,748,3594,4342
 195,7/14/2011,3,0,7,4,1,0.680833,0.635104,0.47625,0.240667,888,4196,5084
 196,7/15/2011,3,0,7,5,1,0.663333,0.624371,0.59125,0.182833,1318,4220,5538
 197,7/16/2011,3,0,7,6,1,0.686667,0.638263,0.585,0.208342,2418,3505,5923
 198,7/17/2011,3,0,7,0,1,0.719167,0.669833,0.604167,0.245033,2006,3296,5302
 199,7/18/2011,3,0,7,1,1,0.746667,0.703925,0.65125,0.215804,841,3617,4458
 200,7/19/2011,3,0,7,2,1,0.776667,0.747479,0.650417,0.1306,752,3789,4541
 201,7/20/2011,3,0,7,3,1,0.768333,0.74685,0.707083,0.113817,644,3688,4332
 202,7/21/2011,3,0,7,4,2,0.815,0.826371,0.69125,0.222021,632,3152,3784
 203,7/22/2011,3,0,7,5,1,0.848333,0.840896,0.580417,0.1331,562,2825,3387
 204,7/23/2011,3,0,7,6,1,0.849167,0.804287,0.5,0.131221,987,2298,3285
 205,7/24/2011,3,0,7,0,1,0.83,0.794829,0.550833,0.169171,1050,2556,3606
 206,7/25/2011,3,0,7,1,1,0.743333,0.720958,0.757083,0.0908083,568,3272,3840
 207,7/26/2011,3,0,7,2,1,0.771667,0.696979,0.540833,0.200258,750,3840,4590
 208,7/27/2011,3,0,7,3,1,0.775,0.690667,0.402917,0.183463,755,3901,4656
 209,7/28/2011,3,0,7,4,1,0.779167,0.7399,0.583333,0.178479,606,3784,4390
 210,7/29/2011,3,0,7,5,1,0.838333,0.785967,0.5425,0.174138,670,3176,3846
 211,7/30/2011,3,0,7,6,1,0.804167,0.728537,0.465833,0.168537,1559,2916,4475
 212,7/31/2011,3,0,7,0,1,0.805833,0.729796,0.480833,0.164813,1524,2778,4302
 213,8/1/2011,3,0,8,1,1,0.771667,0.703292,0.550833,0.156717,729,3537,4266
 214,8/2/2011,3,0,8,2,1,0.783333,0.707071,0.49125,0.20585,801,4044,4845
 215,8/3/2011,3,0,8,3,2,0.731667,0.679937,0.6575,0.135583,467,3107,3574
 216,8/4/2011,3,0,8,4,2,0.71,0.664788,0.7575,0.19715,799,3777,4576
 217,8/5/2011,3,0,8,5,1,0.710833,0.656567,0.630833,0.184696,1023,3843,4866
 218,8/6/2011,3,0,8,6,2,0.716667,0.676154,0.755,0.22825,1521,2773,4294
 219,8/7/2011,3,0,8,0,1,0.7425,0.715292,0.752917,0.201487,1298,2487,3785
 220,8/8/2011,3,0,8,1,1,0.765,0.703283,0.592083,0.192175,846,3480,4326
 221,8/9/2011,3,0,8,2,1,0.775,0.724121,0.570417,0.151121,907,3695,4602
 222,8/10/2011,3,0,8,3,1,0.766667,0.684983,0.424167,0.200258,884,3896,4780
 223,8/11/2011,3,0,8,4,1,0.7175,0.651521,0.42375,0.164796,812,3980,4792
 224,8/12/2011,3,0,8,5,1,0.708333,0.654042,0.415,0.125621,1051,3854,4905
 225,8/13/2011,3,0,8,6,2,0.685833,0.645858,0.729583,0.211454,1504,2646,4150
 226,8/14/2011,3,0,8,0,2,0.676667,0.624388,0.8175,0.222633,1338,2482,3820
 227,8/15/2011,3,0,8,1,1,0.665833,0.616167,0.712083,0.208954,775,3563,4338
 228,8/16/2011,3,0,8,2,1,0.700833,0.645837,0.578333,0.236329,721,4004,4725
 229,8/17/2011,3,0,8,3,1,0.723333,0.666671,0.575417,0.143667,668,4026,4694
 230,8/18/2011,3,0,8,4,1,0.711667,0.662258,0.654583,0.233208,639,3166,3805
 231,8/19/2011,3,0,8,5,2,0.685,0.633221,0.722917,0.139308,797,3356,4153
 232,8/20/2011,3,0,8,6,1,0.6975,0.648996,0.674167,0.104467,1914,3277,5191
 233,8/21/2011,3,0,8,0,1,0.710833,0.675525,0.77,0.248754,1249,2624,3873
 234,8/22/2011,3,0,8,1,1,0.691667,0.638254,0.47,0.27675,833,3925,4758
 235,8/23/2011,3,0,8,2,1,0.640833,0.606067,0.455417,0.146763,1281,4614,5895
 236,8/24/2011,3,0,8,3,1,0.673333,0.630692,0.605,0.253108,949,4181,5130
 237,8/25/2011,3,0,8,4,2,0.684167,0.645854,0.771667,0.210833,435,3107,3542
 238,8/26/2011,3,0,8,5,1,0.7,0.659733,0.76125,0.0839625,768,3893,4661
 239,8/27/2011,3,0,8,6,2,0.68,0.635556,0.85,0.375617,226,889,1115
 240,8/28/2011,3,0,8,0,1,0.707059,0.647959,0.561765,0.304659,1415,2919,4334
 241,8/29/2011,3,0,8,1,1,0.636667,0.607958,0.554583,0.159825,729,3905,4634
 242,8/30/2011,3,0,8,2,1,0.639167,0.594704,0.548333,0.125008,775,4429,5204
 243,8/31/2011,3,0,8,3,1,0.656667,0.611121,0.597917,0.0833333,688,4370,5058
 244,9/1/2011,3,0,9,4,1,0.655,0.614921,0.639167,0.141796,783,4332,5115
 245,9/2/2011,3,0,9,5,2,0.643333,0.604808,0.727083,0.139929,875,3852,4727
 246,9/3/2011,3,0,9,6,1,0.669167,0.633213,0.716667,0.185325,1935,2549,4484
 247,9/4/2011,3,0,9,0,1,0.709167,0.665429,0.742083,0.206467,2521,2419,4940
 248,9/5/2011,3,0,9,1,2,0.673333,0.625646,0.790417,0.212696,1236,2115,3351
 249,9/6/2011,3,0,9,2,3,0.54,0.5152,0.886957,0.343943,204,2506,2710
 250,9/7/2011,3,0,9,3,3,0.599167,0.544229,0.917083,0.0970208,118,1878,1996
 251,9/8/2011,3,0,9,4,3,0.633913,0.555361,0.939565,0.192748,153,1689,1842
 252,9/9/2011,3,0,9,5,2,0.65,0.578946,0.897917,0.124379,417,3127,3544
 253,9/10/2011,3,0,9,6,1,0.66,0.607962,0.75375,0.153608,1750,3595,5345
 254,9/11/2011,3,0,9,0,1,0.653333,0.609229,0.71375,0.115054,1633,3413,5046
 255,9/12/2011,3,0,9,1,1,0.644348,0.60213,0.692174,0.088913,690,4023,4713
 256,9/13/2011,3,0,9,2,1,0.650833,0.603554,0.7125,0.141804,701,4062,4763
 257,9/14/2011,3,0,9,3,1,0.673333,0.6269,0.697083,0.1673,647,4138,4785
 258,9/15/2011,3,0,9,4,2,0.5775,0.553671,0.709167,0.271146,428,3231,3659
 259,9/16/2011,3,0,9,5,2,0.469167,0.461475,0.590417,0.164183,742,4018,4760
 260,9/17/2011,3,0,9,6,2,0.491667,0.478512,0.718333,0.189675,1434,3077,4511
 261,9/18/2011,3,0,9,0,1,0.5075,0.490537,0.695,0.178483,1353,2921,4274
 262,9/19/2011,3,0,9,1,2,0.549167,0.529675,0.69,0.151742,691,3848,4539
 263,9/20/2011,3,0,9,2,2,0.561667,0.532217,0.88125,0.134954,438,3203,3641
 264,9/21/2011,3,0,9,3,2,0.595,0.550533,0.9,0.0964042,539,3813,4352
 265,9/22/2011,3,0,9,4,2,0.628333,0.554963,0.902083,0.128125,555,4240,4795
 266,9/23/2011,4,0,9,5,2,0.609167,0.522125,0.9725,0.0783667,258,2137,2395
 267,9/24/2011,4,0,9,6,2,0.606667,0.564412,0.8625,0.0783833,1776,3647,5423
 268,9/25/2011,4,0,9,0,2,0.634167,0.572637,0.845,0.0503792,1544,3466,5010
 269,9/26/2011,4,0,9,1,2,0.649167,0.589042,0.848333,0.1107,684,3946,4630
 270,9/27/2011,4,0,9,2,2,0.636667,0.574525,0.885417,0.118171,477,3643,4120
 271,9/28/2011,4,0,9,3,2,0.635,0.575158,0.84875,0.148629,480,3427,3907
 272,9/29/2011,4,0,9,4,1,0.616667,0.574512,0.699167,0.172883,653,4186,4839
 273,9/30/2011,4,0,9,5,1,0.564167,0.544829,0.6475,0.206475,830,4372,5202
 274,10/1/2011,4,0,10,6,2,0.41,0.412863,0.75375,0.292296,480,1949,2429
 275,10/2/2011,4,0,10,0,2,0.356667,0.345317,0.791667,0.222013,616,2302,2918
 276,10/3/2011,4,0,10,1,2,0.384167,0.392046,0.760833,0.0833458,330,3240,3570
 277,10/4/2011,4,0,10,2,1,0.484167,0.472858,0.71,0.205854,486,3970,4456
 278,10/5/2011,4,0,10,3,1,0.538333,0.527138,0.647917,0.17725,559,4267,4826
 279,10/6/2011,4,0,10,4,1,0.494167,0.480425,0.620833,0.134954,639,4126,4765
 280,10/7/2011,4,0,10,5,1,0.510833,0.504404,0.684167,0.0223917,949,4036,4985
 281,10/8/2011,4,0,10,6,1,0.521667,0.513242,0.70125,0.0454042,2235,3174,5409
 282,10/9/2011,4,0,10,0,1,0.540833,0.523983,0.7275,0.06345,2397,3114,5511
 283,10/10/2011,4,0,10,1,1,0.570833,0.542925,0.73375,0.0423042,1514,3603,5117
 284,10/11/2011,4,0,10,2,2,0.566667,0.546096,0.80875,0.143042,667,3896,4563
 285,10/12/2011,4,0,10,3,3,0.543333,0.517717,0.90625,0.24815,217,2199,2416
 286,10/13/2011,4,0,10,4,2,0.589167,0.551804,0.896667,0.141787,290,2623,2913
 287,10/14/2011,4,0,10,5,2,0.550833,0.529675,0.71625,0.223883,529,3115,3644
 288,10/15/2011,4,0,10,6,1,0.506667,0.498725,0.483333,0.258083,1899,3318,5217
 289,10/16/2011,4,0,10,0,1,0.511667,0.503154,0.486667,0.281717,1748,3293,5041
 290,10/17/2011,4,0,10,1,1,0.534167,0.510725,0.579583,0.175379,713,3857,4570
 291,10/18/2011,4,0,10,2,2,0.5325,0.522721,0.701667,0.110087,637,4111,4748
 292,10/19/2011,4,0,10,3,3,0.541739,0.513848,0.895217,0.243339,254,2170,2424
 293,10/20/2011,4,0,10,4,1,0.475833,0.466525,0.63625,0.422275,471,3724,4195
 294,10/21/2011,4,0,10,5,1,0.4275,0.423596,0.574167,0.221396,676,3628,4304
 295,10/22/2011,4,0,10,6,1,0.4225,0.425492,0.629167,0.0926667,1499,2809,4308
 296,10/23/2011,4,0,10,0,1,0.421667,0.422333,0.74125,0.0995125,1619,2762,4381
 297,10/24/2011,4,0,10,1,1,0.463333,0.457067,0.772083,0.118792,699,3488,4187
 298,10/25/2011,4,0,10,2,1,0.471667,0.463375,0.622917,0.166658,695,3992,4687
 299,10/26/2011,4,0,10,3,2,0.484167,0.472846,0.720417,0.148642,404,3490,3894
 300,10/27/2011,4,0,10,4,2,0.47,0.457046,0.812917,0.197763,240,2419,2659
 301,10/28/2011,4,0,10,5,2,0.330833,0.318812,0.585833,0.229479,456,3291,3747
 302,10/29/2011,4,0,10,6,3,0.254167,0.227913,0.8825,0.351371,57,570,627
 303,10/30/2011,4,0,10,0,1,0.319167,0.321329,0.62375,0.176617,885,2446,3331
 304,10/31/2011,4,0,10,1,1,0.34,0.356063,0.703333,0.10635,362,3307,3669
 305,11/1/2011,4,0,11,2,1,0.400833,0.397088,0.68375,0.135571,410,3658,4068
 306,11/2/2011,4,0,11,3,1,0.3775,0.390133,0.71875,0.0820917,370,3816,4186
 307,11/3/2011,4,0,11,4,1,0.408333,0.405921,0.702083,0.136817,318,3656,3974
 308,11/4/2011,4,0,11,5,2,0.403333,0.403392,0.6225,0.271779,470,3576,4046
 309,11/5/2011,4,0,11,6,1,0.326667,0.323854,0.519167,0.189062,1156,2770,3926
 310,11/6/2011,4,0,11,0,1,0.348333,0.362358,0.734583,0.0920542,952,2697,3649
 311,11/7/2011,4,0,11,1,1,0.395,0.400871,0.75875,0.057225,373,3662,4035
 312,11/8/2011,4,0,11,2,1,0.408333,0.412246,0.721667,0.0690375,376,3829,4205
 313,11/9/2011,4,0,11,3,1,0.4,0.409079,0.758333,0.0621958,305,3804,4109
 314,11/10/2011,4,0,11,4,2,0.38,0.373721,0.813333,0.189067,190,2743,2933
 315,11/11/2011,4,0,11,5,1,0.324167,0.306817,0.44625,0.314675,440,2928,3368
 316,11/12/2011,4,0,11,6,1,0.356667,0.357942,0.552917,0.212062,1275,2792,4067
 317,11/13/2011,4,0,11,0,1,0.440833,0.43055,0.458333,0.281721,1004,2713,3717
 318,11/14/2011,4,0,11,1,1,0.53,0.524612,0.587083,0.306596,595,3891,4486
 319,11/15/2011,4,0,11,2,2,0.53,0.507579,0.68875,0.199633,449,3746,4195
 320,11/16/2011,4,0,11,3,3,0.456667,0.451988,0.93,0.136829,145,1672,1817
 321,11/17/2011,4,0,11,4,2,0.341667,0.323221,0.575833,0.305362,139,2914,3053
 322,11/18/2011,4,0,11,5,1,0.274167,0.272721,0.41,0.168533,245,3147,3392
 323,11/19/2011,4,0,11,6,1,0.329167,0.324483,0.502083,0.224496,943,2720,3663
 324,11/20/2011,4,0,11,0,2,0.463333,0.457058,0.684583,0.18595,787,2733,3520
 325,11/21/2011,4,0,11,1,3,0.4475,0.445062,0.91,0.138054,220,2545,2765
 326,11/22/2011,4,0,11,2,3,0.416667,0.421696,0.9625,0.118792,69,1538,1607
 327,11/23/2011,4,0,11,3,2,0.440833,0.430537,0.757917,0.335825,112,2454,2566
 328,11/24/2011,4,0,11,4,1,0.373333,0.372471,0.549167,0.167304,560,935,1495
 329,11/25/2011,4,0,11,5,1,0.375,0.380671,0.64375,0.0988958,1095,1697,2792
 330,11/26/2011,4,0,11,6,1,0.375833,0.385087,0.681667,0.0684208,1249,1819,3068
 331,11/27/2011,4,0,11,0,1,0.459167,0.4558,0.698333,0.208954,810,2261,3071
 332,11/28/2011,4,0,11,1,1,0.503478,0.490122,0.743043,0.142122,253,3614,3867
 333,11/29/2011,4,0,11,2,2,0.458333,0.451375,0.830833,0.258092,96,2818,2914
 334,11/30/2011,4,0,11,3,1,0.325,0.311221,0.613333,0.271158,188,3425,3613
 335,12/1/2011,4,0,12,4,1,0.3125,0.305554,0.524583,0.220158,182,3545,3727
 336,12/2/2011,4,0,12,5,1,0.314167,0.331433,0.625833,0.100754,268,3672,3940
 337,12/3/2011,4,0,12,6,1,0.299167,0.310604,0.612917,0.0957833,706,2908,3614
 338,12/4/2011,4,0,12,0,1,0.330833,0.3491,0.775833,0.0839583,634,2851,3485
 339,12/5/2011,4,0,12,1,2,0.385833,0.393925,0.827083,0.0622083,233,3578,3811
 340,12/6/2011,4,0,12,2,3,0.4625,0.4564,0.949583,0.232583,126,2468,2594
 341,12/7/2011,4,0,12,3,3,0.41,0.400246,0.970417,0.266175,50,655,705
 342,12/8/2011,4,0,12,4,1,0.265833,0.256938,0.58,0.240058,150,3172,3322
 343,12/9/2011,4,0,12,5,1,0.290833,0.317542,0.695833,0.0827167,261,3359,3620
 344,12/10/2011,4,0,12,6,1,0.275,0.266412,0.5075,0.233221,502,2688,3190
 345,12/11/2011,4,0,12,0,1,0.220833,0.253154,0.49,0.0665417,377,2366,2743
 346,12/12/2011,4,0,12,1,1,0.238333,0.270196,0.670833,0.06345,143,3167,3310
 347,12/13/2011,4,0,12,2,1,0.2825,0.301138,0.59,0.14055,155,3368,3523
 348,12/14/2011,4,0,12,3,2,0.3175,0.338362,0.66375,0.0609583,178,3562,3740
 349,12/15/2011,4,0,12,4,2,0.4225,0.412237,0.634167,0.268042,181,3528,3709
 350,12/16/2011,4,0,12,5,2,0.375,0.359825,0.500417,0.260575,178,3399,3577
 351,12/17/2011,4,0,12,6,2,0.258333,0.249371,0.560833,0.243167,275,2464,2739
 352,12/18/2011,4,0,12,0,1,0.238333,0.245579,0.58625,0.169779,220,2211,2431
 353,12/19/2011,4,0,12,1,1,0.276667,0.280933,0.6375,0.172896,260,3143,3403
 354,12/20/2011,4,0,12,2,2,0.385833,0.396454,0.595417,0.0615708,216,3534,3750
 355,12/21/2011,1,0,12,3,2,0.428333,0.428017,0.858333,0.2214,107,2553,2660
 356,12/22/2011,1,0,12,4,2,0.423333,0.426121,0.7575,0.047275,227,2841,3068
 357,12/23/2011,1,0,12,5,1,0.373333,0.377513,0.68625,0.274246,163,2046,2209
 358,12/24/2011,1,0,12,6,1,0.3025,0.299242,0.5425,0.190304,155,856,1011
 359,12/25/2011,1,0,12,0,1,0.274783,0.279961,0.681304,0.155091,303,451,754
 360,12/26/2011,1,0,12,1,1,0.321739,0.315535,0.506957,0.239465,430,887,1317
 361,12/27/2011,1,0,12,2,2,0.325,0.327633,0.7625,0.18845,103,1059,1162
 362,12/28/2011,1,0,12,3,1,0.29913,0.279974,0.503913,0.293961,255,2047,2302
 363,12/29/2011,1,0,12,4,1,0.248333,0.263892,0.574167,0.119412,254,2169,2423
 364,12/30/2011,1,0,12,5,1,0.311667,0.318812,0.636667,0.134337,491,2508,2999
 365,12/31/2011,1,0,12,6,1,0.41,0.414121,0.615833,0.220154,665,1820,2485
 366,1/1/2012,1,1,1,0,1,0.37,0.375621,0.6925,0.192167,686,1608,2294
 367,1/2/2012,1,1,1,1,1,0.273043,0.252304,0.381304,0.329665,244,1707,1951
 368,1/3/2012,1,1,1,2,1,0.15,0.126275,0.44125,0.365671,89,2147,2236
 369,1/4/2012,1,1,1,3,2,0.1075,0.119337,0.414583,0.1847,95,2273,2368
 370,1/5/2012,1,1,1,4,1,0.265833,0.278412,0.524167,0.129987,140,3132,3272
 371,1/6/2012,1,1,1,5,1,0.334167,0.340267,0.542083,0.167908,307,3791,4098
 372,1/7/2012,1,1,1,6,1,0.393333,0.390779,0.531667,0.174758,1070,3451,4521
 373,1/8/2012,1,1,1,0,1,0.3375,0.340258,0.465,0.191542,599,2826,3425
 374,1/9/2012,1,1,1,1,2,0.224167,0.247479,0.701667,0.0989,106,2270,2376
 375,1/10/2012,1,1,1,2,1,0.308696,0.318826,0.646522,0.187552,173,3425,3598
 376,1/11/2012,1,1,1,3,2,0.274167,0.282821,0.8475,0.131221,92,2085,2177
 377,1/12/2012,1,1,1,4,2,0.3825,0.381938,0.802917,0.180967,269,3828,4097
 378,1/13/2012,1,1,1,5,1,0.274167,0.249362,0.5075,0.378108,174,3040,3214
 379,1/14/2012,1,1,1,6,1,0.18,0.183087,0.4575,0.187183,333,2160,2493
 380,1/15/2012,1,1,1,0,1,0.166667,0.161625,0.419167,0.251258,284,2027,2311
 381,1/16/2012,1,1,1,1,1,0.19,0.190663,0.5225,0.231358,217,2081,2298
 382,1/17/2012,1,1,1,2,2,0.373043,0.364278,0.716087,0.34913,127,2808,2935
 383,1/18/2012,1,1,1,3,1,0.303333,0.275254,0.443333,0.415429,109,3267,3376
 384,1/19/2012,1,1,1,4,1,0.19,0.190038,0.4975,0.220158,130,3162,3292
 385,1/20/2012,1,1,1,5,2,0.2175,0.220958,0.45,0.20275,115,3048,3163
 386,1/21/2012,1,1,1,6,2,0.173333,0.174875,0.83125,0.222642,67,1234,1301
 387,1/22/2012,1,1,1,0,2,0.1625,0.16225,0.79625,0.199638,196,1781,1977
 388,1/23/2012,1,1,1,1,2,0.218333,0.243058,0.91125,0.110708,145,2287,2432
 389,1/24/2012,1,1,1,2,1,0.3425,0.349108,0.835833,0.123767,439,3900,4339
 390,1/25/2012,1,1,1,3,1,0.294167,0.294821,0.64375,0.161071,467,3803,4270
 391,1/26/2012,1,1,1,4,2,0.341667,0.35605,0.769583,0.0733958,244,3831,4075
 392,1/27/2012,1,1,1,5,2,0.425,0.415383,0.74125,0.342667,269,3187,3456
 393,1/28/2012,1,1,1,6,1,0.315833,0.326379,0.543333,0.210829,775,3248,4023
 394,1/29/2012,1,1,1,0,1,0.2825,0.272721,0.31125,0.24005,558,2685,3243
 395,1/30/2012,1,1,1,1,1,0.269167,0.262625,0.400833,0.215792,126,3498,3624
 396,1/31/2012,1,1,1,2,1,0.39,0.381317,0.416667,0.261817,324,4185,4509
 397,2/1/2012,1,1,2,3,1,0.469167,0.466538,0.507917,0.189067,304,4275,4579
 398,2/2/2012,1,1,2,4,2,0.399167,0.398971,0.672917,0.187187,190,3571,3761
 399,2/3/2012,1,1,2,5,1,0.313333,0.309346,0.526667,0.178496,310,3841,4151
 400,2/4/2012,1,1,2,6,2,0.264167,0.272725,0.779583,0.121896,384,2448,2832
 401,2/5/2012,1,1,2,0,2,0.265833,0.264521,0.687917,0.175996,318,2629,2947
 402,2/6/2012,1,1,2,1,1,0.282609,0.296426,0.622174,0.1538,206,3578,3784
 403,2/7/2012,1,1,2,2,1,0.354167,0.361104,0.49625,0.147379,199,4176,4375
 404,2/8/2012,1,1,2,3,2,0.256667,0.266421,0.722917,0.133721,109,2693,2802
 405,2/9/2012,1,1,2,4,1,0.265,0.261988,0.562083,0.194037,163,3667,3830
 406,2/10/2012,1,1,2,5,2,0.280833,0.293558,0.54,0.116929,227,3604,3831
 407,2/11/2012,1,1,2,6,3,0.224167,0.210867,0.73125,0.289796,192,1977,2169
 408,2/12/2012,1,1,2,0,1,0.1275,0.101658,0.464583,0.409212,73,1456,1529
 409,2/13/2012,1,1,2,1,1,0.2225,0.227913,0.41125,0.167283,94,3328,3422
 410,2/14/2012,1,1,2,2,2,0.319167,0.333946,0.50875,0.141179,135,3787,3922
 411,2/15/2012,1,1,2,3,1,0.348333,0.351629,0.53125,0.1816,141,4028,4169
 412,2/16/2012,1,1,2,4,2,0.316667,0.330162,0.752917,0.091425,74,2931,3005
 413,2/17/2012,1,1,2,5,1,0.343333,0.351629,0.634583,0.205846,349,3805,4154
 414,2/18/2012,1,1,2,6,1,0.346667,0.355425,0.534583,0.190929,1435,2883,4318
 415,2/19/2012,1,1,2,0,2,0.28,0.265788,0.515833,0.253112,618,2071,2689
 416,2/20/2012,1,1,2,1,1,0.28,0.273391,0.507826,0.229083,502,2627,3129
 417,2/21/2012,1,1,2,2,1,0.287826,0.295113,0.594348,0.205717,163,3614,3777
 418,2/22/2012,1,1,2,3,1,0.395833,0.392667,0.567917,0.234471,394,4379,4773
 419,2/23/2012,1,1,2,4,1,0.454167,0.444446,0.554583,0.190913,516,4546,5062
 420,2/24/2012,1,1,2,5,2,0.4075,0.410971,0.7375,0.237567,246,3241,3487
 421,2/25/2012,1,1,2,6,1,0.290833,0.255675,0.395833,0.421642,317,2415,2732
 422,2/26/2012,1,1,2,0,1,0.279167,0.268308,0.41,0.205229,515,2874,3389
 423,2/27/2012,1,1,2,1,1,0.366667,0.357954,0.490833,0.268033,253,4069,4322
 424,2/28/2012,1,1,2,2,1,0.359167,0.353525,0.395833,0.193417,229,4134,4363
 425,2/29/2012,1,1,2,3,2,0.344348,0.34847,0.804783,0.179117,65,1769,1834
 426,3/1/2012,1,1,3,4,1,0.485833,0.475371,0.615417,0.226987,325,4665,4990
 427,3/2/2012,1,1,3,5,2,0.353333,0.359842,0.657083,0.144904,246,2948,3194
 428,3/3/2012,1,1,3,6,2,0.414167,0.413492,0.62125,0.161079,956,3110,4066
 429,3/4/2012,1,1,3,0,1,0.325833,0.303021,0.403333,0.334571,710,2713,3423
 430,3/5/2012,1,1,3,1,1,0.243333,0.241171,0.50625,0.228858,203,3130,3333
 431,3/6/2012,1,1,3,2,1,0.258333,0.255042,0.456667,0.200875,221,3735,3956
 432,3/7/2012,1,1,3,3,1,0.404167,0.3851,0.513333,0.345779,432,4484,4916
 433,3/8/2012,1,1,3,4,1,0.5275,0.524604,0.5675,0.441563,486,4896,5382
 434,3/9/2012,1,1,3,5,2,0.410833,0.397083,0.407083,0.4148,447,4122,4569
 435,3/10/2012,1,1,3,6,1,0.2875,0.277767,0.350417,0.22575,968,3150,4118
 436,3/11/2012,1,1,3,0,1,0.361739,0.35967,0.476957,0.222587,1658,3253,4911
 437,3/12/2012,1,1,3,1,1,0.466667,0.459592,0.489167,0.207713,838,4460,5298
 438,3/13/2012,1,1,3,2,1,0.565,0.542929,0.6175,0.23695,762,5085,5847
 439,3/14/2012,1,1,3,3,1,0.5725,0.548617,0.507083,0.115062,997,5315,6312
 440,3/15/2012,1,1,3,4,1,0.5575,0.532825,0.579583,0.149883,1005,5187,6192
 441,3/16/2012,1,1,3,5,2,0.435833,0.436229,0.842083,0.113192,548,3830,4378
 442,3/17/2012,1,1,3,6,2,0.514167,0.505046,0.755833,0.110704,3155,4681,7836
 443,3/18/2012,1,1,3,0,2,0.4725,0.464,0.81,0.126883,2207,3685,5892
 444,3/19/2012,1,1,3,1,1,0.545,0.532821,0.72875,0.162317,982,5171,6153
 445,3/20/2012,1,1,3,2,1,0.560833,0.538533,0.807917,0.121271,1051,5042,6093
 446,3/21/2012,2,1,3,3,2,0.531667,0.513258,0.82125,0.0895583,1122,5108,6230
 447,3/22/2012,2,1,3,4,1,0.554167,0.531567,0.83125,0.117562,1334,5537,6871
 448,3/23/2012,2,1,3,5,2,0.601667,0.570067,0.694167,0.1163,2469,5893,8362
 449,3/24/2012,2,1,3,6,2,0.5025,0.486733,0.885417,0.192783,1033,2339,3372
 450,3/25/2012,2,1,3,0,2,0.4375,0.437488,0.880833,0.220775,1532,3464,4996
 451,3/26/2012,2,1,3,1,1,0.445833,0.43875,0.477917,0.386821,795,4763,5558
 452,3/27/2012,2,1,3,2,1,0.323333,0.315654,0.29,0.187192,531,4571,5102
 453,3/28/2012,2,1,3,3,1,0.484167,0.47095,0.48125,0.291671,674,5024,5698
 454,3/29/2012,2,1,3,4,1,0.494167,0.482304,0.439167,0.31965,834,5299,6133
 455,3/30/2012,2,1,3,5,2,0.37,0.375621,0.580833,0.138067,796,4663,5459
 456,3/31/2012,2,1,3,6,2,0.424167,0.421708,0.738333,0.250617,2301,3934,6235
 457,4/1/2012,2,1,4,0,2,0.425833,0.417287,0.67625,0.172267,2347,3694,6041
 458,4/2/2012,2,1,4,1,1,0.433913,0.427513,0.504348,0.312139,1208,4728,5936
 459,4/3/2012,2,1,4,2,1,0.466667,0.461483,0.396667,0.100133,1348,5424,6772
 460,4/4/2012,2,1,4,3,1,0.541667,0.53345,0.469583,0.180975,1058,5378,6436
 461,4/5/2012,2,1,4,4,1,0.435,0.431163,0.374167,0.219529,1192,5265,6457
 462,4/6/2012,2,1,4,5,1,0.403333,0.390767,0.377083,0.300388,1807,4653,6460
 463,4/7/2012,2,1,4,6,1,0.4375,0.426129,0.254167,0.274871,3252,3605,6857
 464,4/8/2012,2,1,4,0,1,0.5,0.492425,0.275833,0.232596,2230,2939,5169
 465,4/9/2012,2,1,4,1,1,0.489167,0.476638,0.3175,0.358196,905,4680,5585
 466,4/10/2012,2,1,4,2,1,0.446667,0.436233,0.435,0.249375,819,5099,5918
 467,4/11/2012,2,1,4,3,1,0.348696,0.337274,0.469565,0.295274,482,4380,4862
 468,4/12/2012,2,1,4,4,1,0.3975,0.387604,0.46625,0.290429,663,4746,5409
 469,4/13/2012,2,1,4,5,1,0.4425,0.431808,0.408333,0.155471,1252,5146,6398
 470,4/14/2012,2,1,4,6,1,0.495,0.487996,0.502917,0.190917,2795,4665,7460
 471,4/15/2012,2,1,4,0,1,0.606667,0.573875,0.507917,0.225129,2846,4286,7132
 472,4/16/2012,2,1,4,1,1,0.664167,0.614925,0.561667,0.284829,1198,5172,6370
 473,4/17/2012,2,1,4,2,1,0.608333,0.598487,0.390417,0.273629,989,5702,6691
 474,4/18/2012,2,1,4,3,2,0.463333,0.457038,0.569167,0.167912,347,4020,4367
 475,4/19/2012,2,1,4,4,1,0.498333,0.493046,0.6125,0.0659292,846,5719,6565
 476,4/20/2012,2,1,4,5,1,0.526667,0.515775,0.694583,0.149871,1340,5950,7290
 477,4/21/2012,2,1,4,6,1,0.57,0.542921,0.682917,0.283587,2541,4083,6624
 478,4/22/2012,2,1,4,0,3,0.396667,0.389504,0.835417,0.344546,120,907,1027
 479,4/23/2012,2,1,4,1,2,0.321667,0.301125,0.766667,0.303496,195,3019,3214
 480,4/24/2012,2,1,4,2,1,0.413333,0.405283,0.454167,0.249383,518,5115,5633
 481,4/25/2012,2,1,4,3,1,0.476667,0.470317,0.427917,0.118792,655,5541,6196
 482,4/26/2012,2,1,4,4,2,0.498333,0.483583,0.756667,0.176625,475,4551,5026
 483,4/27/2012,2,1,4,5,1,0.4575,0.452637,0.400833,0.347633,1014,5219,6233
 484,4/28/2012,2,1,4,6,2,0.376667,0.377504,0.489583,0.129975,1120,3100,4220
 485,4/29/2012,2,1,4,0,1,0.458333,0.450121,0.587083,0.116908,2229,4075,6304
 486,4/30/2012,2,1,4,1,2,0.464167,0.457696,0.57,0.171638,665,4907,5572
 487,5/1/2012,2,1,5,2,2,0.613333,0.577021,0.659583,0.156096,653,5087,5740
 488,5/2/2012,2,1,5,3,1,0.564167,0.537896,0.797083,0.138058,667,5502,6169
 489,5/3/2012,2,1,5,4,2,0.56,0.537242,0.768333,0.133696,764,5657,6421
 490,5/4/2012,2,1,5,5,1,0.6275,0.590917,0.735417,0.162938,1069,5227,6296
 491,5/5/2012,2,1,5,6,2,0.621667,0.584608,0.756667,0.152992,2496,4387,6883
 492,5/6/2012,2,1,5,0,2,0.5625,0.546737,0.74,0.149879,2135,4224,6359
 493,5/7/2012,2,1,5,1,2,0.5375,0.527142,0.664167,0.230721,1008,5265,6273
 494,5/8/2012,2,1,5,2,2,0.581667,0.557471,0.685833,0.296029,738,4990,5728
 495,5/9/2012,2,1,5,3,2,0.575,0.553025,0.744167,0.216412,620,4097,4717
 496,5/10/2012,2,1,5,4,1,0.505833,0.491783,0.552083,0.314063,1026,5546,6572
 497,5/11/2012,2,1,5,5,1,0.533333,0.520833,0.360417,0.236937,1319,5711,7030
 498,5/12/2012,2,1,5,6,1,0.564167,0.544817,0.480417,0.123133,2622,4807,7429
 499,5/13/2012,2,1,5,0,1,0.6125,0.585238,0.57625,0.225117,2172,3946,6118
 500,5/14/2012,2,1,5,1,2,0.573333,0.5499,0.789583,0.212692,342,2501,2843
 501,5/15/2012,2,1,5,2,2,0.611667,0.576404,0.794583,0.147392,625,4490,5115
 502,5/16/2012,2,1,5,3,1,0.636667,0.595975,0.697917,0.122512,991,6433,7424
 503,5/17/2012,2,1,5,4,1,0.593333,0.572613,0.52,0.229475,1242,6142,7384
 504,5/18/2012,2,1,5,5,1,0.564167,0.551121,0.523333,0.136817,1521,6118,7639
 505,5/19/2012,2,1,5,6,1,0.6,0.566908,0.45625,0.083975,3410,4884,8294
 506,5/20/2012,2,1,5,0,1,0.620833,0.583967,0.530417,0.254367,2704,4425,7129
 507,5/21/2012,2,1,5,1,2,0.598333,0.565667,0.81125,0.233204,630,3729,4359
 508,5/22/2012,2,1,5,2,2,0.615,0.580825,0.765833,0.118167,819,5254,6073
 509,5/23/2012,2,1,5,3,2,0.621667,0.584612,0.774583,0.102,766,4494,5260
 510,5/24/2012,2,1,5,4,1,0.655,0.6067,0.716667,0.172896,1059,5711,6770
 511,5/25/2012,2,1,5,5,1,0.68,0.627529,0.747083,0.14055,1417,5317,6734
 512,5/26/2012,2,1,5,6,1,0.6925,0.642696,0.7325,0.198992,2855,3681,6536
 513,5/27/2012,2,1,5,0,1,0.69,0.641425,0.697083,0.215171,3283,3308,6591
 514,5/28/2012,2,1,5,1,1,0.7125,0.6793,0.67625,0.196521,2557,3486,6043
 515,5/29/2012,2,1,5,2,1,0.7225,0.672992,0.684583,0.2954,880,4863,5743
 516,5/30/2012,2,1,5,3,2,0.656667,0.611129,0.67,0.134329,745,6110,6855
 517,5/31/2012,2,1,5,4,1,0.68,0.631329,0.492917,0.195279,1100,6238,7338
 518,6/1/2012,2,1,6,5,2,0.654167,0.607962,0.755417,0.237563,533,3594,4127
 519,6/2/2012,2,1,6,6,1,0.583333,0.566288,0.549167,0.186562,2795,5325,8120
 520,6/3/2012,2,1,6,0,1,0.6025,0.575133,0.493333,0.184087,2494,5147,7641
 521,6/4/2012,2,1,6,1,1,0.5975,0.578283,0.487083,0.284833,1071,5927,6998
 522,6/5/2012,2,1,6,2,2,0.540833,0.525892,0.613333,0.209575,968,6033,7001
 523,6/6/2012,2,1,6,3,1,0.554167,0.542292,0.61125,0.077125,1027,6028,7055
 524,6/7/2012,2,1,6,4,1,0.6025,0.569442,0.567083,0.15735,1038,6456,7494
 525,6/8/2012,2,1,6,5,1,0.649167,0.597862,0.467917,0.175383,1488,6248,7736
 526,6/9/2012,2,1,6,6,1,0.710833,0.648367,0.437083,0.144287,2708,4790,7498
 527,6/10/2012,2,1,6,0,1,0.726667,0.663517,0.538333,0.133721,2224,4374,6598
 528,6/11/2012,2,1,6,1,2,0.720833,0.659721,0.587917,0.207713,1017,5647,6664
 529,6/12/2012,2,1,6,2,2,0.653333,0.597875,0.833333,0.214546,477,4495,4972
 530,6/13/2012,2,1,6,3,1,0.655833,0.611117,0.582083,0.343279,1173,6248,7421
 531,6/14/2012,2,1,6,4,1,0.648333,0.624383,0.569583,0.253733,1180,6183,7363
 532,6/15/2012,2,1,6,5,1,0.639167,0.599754,0.589583,0.176617,1563,6102,7665
 533,6/16/2012,2,1,6,6,1,0.631667,0.594708,0.504167,0.166667,2963,4739,7702
 534,6/17/2012,2,1,6,0,1,0.5925,0.571975,0.59875,0.144904,2634,4344,6978
 535,6/18/2012,2,1,6,1,2,0.568333,0.544842,0.777917,0.174746,653,4446,5099
 536,6/19/2012,2,1,6,2,1,0.688333,0.654692,0.69,0.148017,968,5857,6825
 537,6/20/2012,2,1,6,3,1,0.7825,0.720975,0.592083,0.113812,872,5339,6211
 538,6/21/2012,3,1,6,4,1,0.805833,0.752542,0.567917,0.118787,778,5127,5905
 539,6/22/2012,3,1,6,5,1,0.7775,0.724121,0.57375,0.182842,964,4859,5823
 540,6/23/2012,3,1,6,6,1,0.731667,0.652792,0.534583,0.179721,2657,4801,7458
 541,6/24/2012,3,1,6,0,1,0.743333,0.674254,0.479167,0.145525,2551,4340,6891
 542,6/25/2012,3,1,6,1,1,0.715833,0.654042,0.504167,0.300383,1139,5640,6779
 543,6/26/2012,3,1,6,2,1,0.630833,0.594704,0.373333,0.347642,1077,6365,7442
 544,6/27/2012,3,1,6,3,1,0.6975,0.640792,0.36,0.271775,1077,6258,7335
 545,6/28/2012,3,1,6,4,1,0.749167,0.675512,0.4225,0.17165,921,5958,6879
 546,6/29/2012,3,1,6,5,1,0.834167,0.786613,0.48875,0.165417,829,4634,5463
 547,6/30/2012,3,1,6,6,1,0.765,0.687508,0.60125,0.161071,1455,4232,5687
 548,7/1/2012,3,1,7,0,1,0.815833,0.750629,0.51875,0.168529,1421,4110,5531
 549,7/2/2012,3,1,7,1,1,0.781667,0.702038,0.447083,0.195267,904,5323,6227
 550,7/3/2012,3,1,7,2,1,0.780833,0.70265,0.492083,0.126237,1052,5608,6660
 551,7/4/2012,3,1,7,3,1,0.789167,0.732337,0.53875,0.13495,2562,4841,7403
 552,7/5/2012,3,1,7,4,1,0.8275,0.761367,0.457917,0.194029,1405,4836,6241
 553,7/6/2012,3,1,7,5,1,0.828333,0.752533,0.450833,0.146142,1366,4841,6207
 554,7/7/2012,3,1,7,6,1,0.861667,0.804913,0.492083,0.163554,1448,3392,4840
 555,7/8/2012,3,1,7,0,1,0.8225,0.790396,0.57375,0.125629,1203,3469,4672
 556,7/9/2012,3,1,7,1,2,0.710833,0.654054,0.683333,0.180975,998,5571,6569
 557,7/10/2012,3,1,7,2,2,0.720833,0.664796,0.6675,0.151737,954,5336,6290
 558,7/11/2012,3,1,7,3,1,0.716667,0.650271,0.633333,0.151733,975,6289,7264
 559,7/12/2012,3,1,7,4,1,0.715833,0.654683,0.529583,0.146775,1032,6414,7446
 560,7/13/2012,3,1,7,5,2,0.731667,0.667933,0.485833,0.08085,1511,5988,7499
 561,7/14/2012,3,1,7,6,2,0.703333,0.666042,0.699167,0.143679,2355,4614,6969
 562,7/15/2012,3,1,7,0,1,0.745833,0.705196,0.717917,0.166667,1920,4111,6031
 563,7/16/2012,3,1,7,1,1,0.763333,0.724125,0.645,0.164187,1088,5742,6830
 564,7/17/2012,3,1,7,2,1,0.818333,0.755683,0.505833,0.114429,921,5865,6786
 565,7/18/2012,3,1,7,3,1,0.793333,0.745583,0.577083,0.137442,799,4914,5713
 566,7/19/2012,3,1,7,4,1,0.77,0.714642,0.600417,0.165429,888,5703,6591
 567,7/20/2012,3,1,7,5,2,0.665833,0.613025,0.844167,0.208967,747,5123,5870
 568,7/21/2012,3,1,7,6,3,0.595833,0.549912,0.865417,0.2133,1264,3195,4459
 569,7/22/2012,3,1,7,0,2,0.6675,0.623125,0.7625,0.0939208,2544,4866,7410
 570,7/23/2012,3,1,7,1,1,0.741667,0.690017,0.694167,0.138683,1135,5831,6966
 571,7/24/2012,3,1,7,2,1,0.750833,0.70645,0.655,0.211454,1140,6452,7592
 572,7/25/2012,3,1,7,3,1,0.724167,0.654054,0.45,0.1648,1383,6790,8173
 573,7/26/2012,3,1,7,4,1,0.776667,0.739263,0.596667,0.284813,1036,5825,6861
 574,7/27/2012,3,1,7,5,1,0.781667,0.734217,0.594583,0.152992,1259,5645,6904
 575,7/28/2012,3,1,7,6,1,0.755833,0.697604,0.613333,0.15735,2234,4451,6685
 576,7/29/2012,3,1,7,0,1,0.721667,0.667933,0.62375,0.170396,2153,4444,6597
 577,7/30/2012,3,1,7,1,1,0.730833,0.684987,0.66875,0.153617,1040,6065,7105
 578,7/31/2012,3,1,7,2,1,0.713333,0.662896,0.704167,0.165425,968,6248,7216
 579,8/1/2012,3,1,8,3,1,0.7175,0.667308,0.6775,0.141179,1074,6506,7580
 580,8/2/2012,3,1,8,4,1,0.7525,0.707088,0.659583,0.129354,983,6278,7261
 581,8/3/2012,3,1,8,5,2,0.765833,0.722867,0.6425,0.215792,1328,5847,7175
 582,8/4/2012,3,1,8,6,1,0.793333,0.751267,0.613333,0.257458,2345,4479,6824
 583,8/5/2012,3,1,8,0,1,0.769167,0.731079,0.6525,0.290421,1707,3757,5464
 584,8/6/2012,3,1,8,1,2,0.7525,0.710246,0.654167,0.129354,1233,5780,7013
 585,8/7/2012,3,1,8,2,2,0.735833,0.697621,0.70375,0.116908,1278,5995,7273
 586,8/8/2012,3,1,8,3,2,0.75,0.707717,0.672917,0.1107,1263,6271,7534
 587,8/9/2012,3,1,8,4,1,0.755833,0.699508,0.620417,0.1561,1196,6090,7286
 588,8/10/2012,3,1,8,5,2,0.715833,0.667942,0.715833,0.238813,1065,4721,5786
 589,8/11/2012,3,1,8,6,2,0.6925,0.638267,0.732917,0.206479,2247,4052,6299
 590,8/12/2012,3,1,8,0,1,0.700833,0.644579,0.530417,0.122512,2182,4362,6544
 591,8/13/2012,3,1,8,1,1,0.720833,0.662254,0.545417,0.136212,1207,5676,6883
 592,8/14/2012,3,1,8,2,1,0.726667,0.676779,0.686667,0.169158,1128,5656,6784
 593,8/15/2012,3,1,8,3,1,0.706667,0.654037,0.619583,0.169771,1198,6149,7347
 594,8/16/2012,3,1,8,4,1,0.719167,0.654688,0.519167,0.141796,1338,6267,7605
 595,8/17/2012,3,1,8,5,1,0.723333,0.2424,0.570833,0.231354,1483,5665,7148
 596,8/18/2012,3,1,8,6,1,0.678333,0.618071,0.603333,0.177867,2827,5038,7865
 597,8/19/2012,3,1,8,0,2,0.635833,0.603554,0.711667,0.08645,1208,3341,4549
 598,8/20/2012,3,1,8,1,2,0.635833,0.595967,0.734167,0.129979,1026,5504,6530
 599,8/21/2012,3,1,8,2,1,0.649167,0.601025,0.67375,0.0727708,1081,5925,7006
 600,8/22/2012,3,1,8,3,1,0.6675,0.621854,0.677083,0.0702833,1094,6281,7375
 601,8/23/2012,3,1,8,4,1,0.695833,0.637008,0.635833,0.0845958,1363,6402,7765
 602,8/24/2012,3,1,8,5,2,0.7025,0.6471,0.615,0.0721458,1325,6257,7582
 603,8/25/2012,3,1,8,6,2,0.661667,0.618696,0.712917,0.244408,1829,4224,6053
 604,8/26/2012,3,1,8,0,2,0.653333,0.595996,0.845833,0.228858,1483,3772,5255
 605,8/27/2012,3,1,8,1,1,0.703333,0.654688,0.730417,0.128733,989,5928,6917
 606,8/28/2012,3,1,8,2,1,0.728333,0.66605,0.62,0.190925,935,6105,7040
 607,8/29/2012,3,1,8,3,1,0.685,0.635733,0.552083,0.112562,1177,6520,7697
 608,8/30/2012,3,1,8,4,1,0.706667,0.652779,0.590417,0.0771167,1172,6541,7713
 609,8/31/2012,3,1,8,5,1,0.764167,0.6894,0.5875,0.168533,1433,5917,7350
 610,9/1/2012,3,1,9,6,2,0.753333,0.702654,0.638333,0.113187,2352,3788,6140
 611,9/2/2012,3,1,9,0,2,0.696667,0.649,0.815,0.0640708,2613,3197,5810
 612,9/3/2012,3,1,9,1,1,0.7075,0.661629,0.790833,0.151121,1965,4069,6034
 613,9/4/2012,3,1,9,2,1,0.725833,0.686888,0.755,0.236321,867,5997,6864
 614,9/5/2012,3,1,9,3,1,0.736667,0.708983,0.74125,0.187808,832,6280,7112
 615,9/6/2012,3,1,9,4,2,0.696667,0.655329,0.810417,0.142421,611,5592,6203
 616,9/7/2012,3,1,9,5,1,0.703333,0.657204,0.73625,0.171646,1045,6459,7504
 617,9/8/2012,3,1,9,6,2,0.659167,0.611121,0.799167,0.281104,1557,4419,5976
 618,9/9/2012,3,1,9,0,1,0.61,0.578925,0.5475,0.224496,2570,5657,8227
 619,9/10/2012,3,1,9,1,1,0.583333,0.565654,0.50375,0.258713,1118,6407,7525
 620,9/11/2012,3,1,9,2,1,0.5775,0.554292,0.52,0.0920542,1070,6697,7767
 621,9/12/2012,3,1,9,3,1,0.599167,0.570075,0.577083,0.131846,1050,6820,7870
 622,9/13/2012,3,1,9,4,1,0.6125,0.579558,0.637083,0.0827208,1054,6750,7804
 623,9/14/2012,3,1,9,5,1,0.633333,0.594083,0.6725,0.103863,1379,6630,8009
 624,9/15/2012,3,1,9,6,1,0.608333,0.585867,0.501667,0.247521,3160,5554,8714
 625,9/16/2012,3,1,9,0,1,0.58,0.563125,0.57,0.0901833,2166,5167,7333
 626,9/17/2012,3,1,9,1,2,0.580833,0.55305,0.734583,0.151742,1022,5847,6869
 627,9/18/2012,3,1,9,2,2,0.623333,0.565067,0.8725,0.357587,371,3702,4073
 628,9/19/2012,3,1,9,3,1,0.5525,0.540404,0.536667,0.215175,788,6803,7591
 629,9/20/2012,3,1,9,4,1,0.546667,0.532192,0.618333,0.118167,939,6781,7720
 630,9/21/2012,3,1,9,5,1,0.599167,0.571971,0.66875,0.154229,1250,6917,8167
 631,9/22/2012,3,1,9,6,1,0.65,0.610488,0.646667,0.283583,2512,5883,8395
 632,9/23/2012,4,1,9,0,1,0.529167,0.518933,0.467083,0.223258,2454,5453,7907
 633,9/24/2012,4,1,9,1,1,0.514167,0.502513,0.492917,0.142404,1001,6435,7436
 634,9/25/2012,4,1,9,2,1,0.55,0.544179,0.57,0.236321,845,6693,7538
 635,9/26/2012,4,1,9,3,1,0.635,0.596613,0.630833,0.2444,787,6946,7733
 636,9/27/2012,4,1,9,4,2,0.65,0.607975,0.690833,0.134342,751,6642,7393
 637,9/28/2012,4,1,9,5,2,0.619167,0.585863,0.69,0.164179,1045,6370,7415
 638,9/29/2012,4,1,9,6,1,0.5425,0.530296,0.542917,0.227604,2589,5966,8555
 639,9/30/2012,4,1,9,0,1,0.526667,0.517663,0.583333,0.134958,2015,4874,6889
 640,10/1/2012,4,1,10,1,2,0.520833,0.512,0.649167,0.0908042,763,6015,6778
 641,10/2/2012,4,1,10,2,3,0.590833,0.542333,0.871667,0.104475,315,4324,4639
 642,10/3/2012,4,1,10,3,2,0.6575,0.599133,0.79375,0.0665458,728,6844,7572
 643,10/4/2012,4,1,10,4,2,0.6575,0.607975,0.722917,0.117546,891,6437,7328
 644,10/5/2012,4,1,10,5,1,0.615,0.580187,0.6275,0.10635,1516,6640,8156
 645,10/6/2012,4,1,10,6,1,0.554167,0.538521,0.664167,0.268025,3031,4934,7965
 646,10/7/2012,4,1,10,0,2,0.415833,0.419813,0.708333,0.141162,781,2729,3510
 647,10/8/2012,4,1,10,1,2,0.383333,0.387608,0.709583,0.189679,874,4604,5478
 648,10/9/2012,4,1,10,2,2,0.446667,0.438112,0.761667,0.1903,601,5791,6392
 649,10/10/2012,4,1,10,3,1,0.514167,0.503142,0.630833,0.187821,780,6911,7691
 650,10/11/2012,4,1,10,4,1,0.435,0.431167,0.463333,0.181596,834,6736,7570
 651,10/12/2012,4,1,10,5,1,0.4375,0.433071,0.539167,0.235092,1060,6222,7282
 652,10/13/2012,4,1,10,6,1,0.393333,0.391396,0.494583,0.146142,2252,4857,7109
 653,10/14/2012,4,1,10,0,1,0.521667,0.508204,0.640417,0.278612,2080,4559,6639
 654,10/15/2012,4,1,10,1,2,0.561667,0.53915,0.7075,0.296037,760,5115,5875
 655,10/16/2012,4,1,10,2,1,0.468333,0.460846,0.558333,0.182221,922,6612,7534
 656,10/17/2012,4,1,10,3,1,0.455833,0.450108,0.692917,0.101371,979,6482,7461
 657,10/18/2012,4,1,10,4,2,0.5225,0.512625,0.728333,0.236937,1008,6501,7509
 658,10/19/2012,4,1,10,5,2,0.563333,0.537896,0.815,0.134954,753,4671,5424
 659,10/20/2012,4,1,10,6,1,0.484167,0.472842,0.572917,0.117537,2806,5284,8090
 660,10/21/2012,4,1,10,0,1,0.464167,0.456429,0.51,0.166054,2132,4692,6824
 661,10/22/2012,4,1,10,1,1,0.4875,0.482942,0.568333,0.0814833,830,6228,7058
 662,10/23/2012,4,1,10,2,1,0.544167,0.530304,0.641667,0.0945458,841,6625,7466
 663,10/24/2012,4,1,10,3,1,0.5875,0.558721,0.63625,0.0727792,795,6898,7693
 664,10/25/2012,4,1,10,4,2,0.55,0.529688,0.800417,0.124375,875,6484,7359
 665,10/26/2012,4,1,10,5,2,0.545833,0.52275,0.807083,0.132467,1182,6262,7444
 666,10/27/2012,4,1,10,6,2,0.53,0.515133,0.72,0.235692,2643,5209,7852
 667,10/28/2012,4,1,10,0,2,0.4775,0.467771,0.694583,0.398008,998,3461,4459
 668,10/29/2012,4,1,10,1,3,0.44,0.4394,0.88,0.3582,2,20,22
 669,10/30/2012,4,1,10,2,2,0.318182,0.309909,0.825455,0.213009,87,1009,1096
 670,10/31/2012,4,1,10,3,2,0.3575,0.3611,0.666667,0.166667,419,5147,5566
 671,11/1/2012,4,1,11,4,2,0.365833,0.369942,0.581667,0.157346,466,5520,5986
 672,11/2/2012,4,1,11,5,1,0.355,0.356042,0.522083,0.266175,618,5229,5847
 673,11/3/2012,4,1,11,6,2,0.343333,0.323846,0.49125,0.270529,1029,4109,5138
 674,11/4/2012,4,1,11,0,1,0.325833,0.329538,0.532917,0.179108,1201,3906,5107
 675,11/5/2012,4,1,11,1,1,0.319167,0.308075,0.494167,0.236325,378,4881,5259
 676,11/6/2012,4,1,11,2,1,0.280833,0.281567,0.567083,0.173513,466,5220,5686
 677,11/7/2012,4,1,11,3,2,0.295833,0.274621,0.5475,0.304108,326,4709,5035
 678,11/8/2012,4,1,11,4,1,0.352174,0.341891,0.333478,0.347835,340,4975,5315
 679,11/9/2012,4,1,11,5,1,0.361667,0.355413,0.540833,0.214558,709,5283,5992
 680,11/10/2012,4,1,11,6,1,0.389167,0.393937,0.645417,0.0578458,2090,4446,6536
 681,11/11/2012,4,1,11,0,1,0.420833,0.421713,0.659167,0.1275,2290,4562,6852
 682,11/12/2012,4,1,11,1,1,0.485,0.475383,0.741667,0.173517,1097,5172,6269
 683,11/13/2012,4,1,11,2,2,0.343333,0.323225,0.662917,0.342046,327,3767,4094
 684,11/14/2012,4,1,11,3,1,0.289167,0.281563,0.552083,0.199625,373,5122,5495
 685,11/15/2012,4,1,11,4,2,0.321667,0.324492,0.620417,0.152987,320,5125,5445
 686,11/16/2012,4,1,11,5,1,0.345,0.347204,0.524583,0.171025,484,5214,5698
 687,11/17/2012,4,1,11,6,1,0.325,0.326383,0.545417,0.179729,1313,4316,5629
 688,11/18/2012,4,1,11,0,1,0.3425,0.337746,0.692917,0.227612,922,3747,4669
 689,11/19/2012,4,1,11,1,2,0.380833,0.375621,0.623333,0.235067,449,5050,5499
 690,11/20/2012,4,1,11,2,2,0.374167,0.380667,0.685,0.082725,534,5100,5634
 691,11/21/2012,4,1,11,3,1,0.353333,0.364892,0.61375,0.103246,615,4531,5146
 692,11/22/2012,4,1,11,4,1,0.34,0.350371,0.580417,0.0528708,955,1470,2425
 693,11/23/2012,4,1,11,5,1,0.368333,0.378779,0.56875,0.148021,1603,2307,3910
 694,11/24/2012,4,1,11,6,1,0.278333,0.248742,0.404583,0.376871,532,1745,2277
 695,11/25/2012,4,1,11,0,1,0.245833,0.257583,0.468333,0.1505,309,2115,2424
 696,11/26/2012,4,1,11,1,1,0.313333,0.339004,0.535417,0.04665,337,4750,5087
 697,11/27/2012,4,1,11,2,2,0.291667,0.281558,0.786667,0.237562,123,3836,3959
 698,11/28/2012,4,1,11,3,1,0.296667,0.289762,0.50625,0.210821,198,5062,5260
 699,11/29/2012,4,1,11,4,1,0.28087,0.298422,0.555652,0.115522,243,5080,5323
 700,11/30/2012,4,1,11,5,1,0.298333,0.323867,0.649583,0.0584708,362,5306,5668
 701,12/1/2012,4,1,12,6,2,0.298333,0.316904,0.806667,0.0597042,951,4240,5191
 702,12/2/2012,4,1,12,0,2,0.3475,0.359208,0.823333,0.124379,892,3757,4649
 703,12/3/2012,4,1,12,1,1,0.4525,0.455796,0.7675,0.0827208,555,5679,6234
 704,12/4/2012,4,1,12,2,1,0.475833,0.469054,0.73375,0.174129,551,6055,6606
 705,12/5/2012,4,1,12,3,1,0.438333,0.428012,0.485,0.324021,331,5398,5729
 706,12/6/2012,4,1,12,4,1,0.255833,0.258204,0.50875,0.174754,340,5035,5375
 707,12/7/2012,4,1,12,5,2,0.320833,0.321958,0.764167,0.1306,349,4659,5008
 708,12/8/2012,4,1,12,6,2,0.381667,0.389508,0.91125,0.101379,1153,4429,5582
 709,12/9/2012,4,1,12,0,2,0.384167,0.390146,0.905417,0.157975,441,2787,3228
 710,12/10/2012,4,1,12,1,2,0.435833,0.435575,0.925,0.190308,329,4841,5170
 711,12/11/2012,4,1,12,2,2,0.353333,0.338363,0.596667,0.296037,282,5219,5501
 712,12/12/2012,4,1,12,3,2,0.2975,0.297338,0.538333,0.162937,310,5009,5319
 713,12/13/2012,4,1,12,4,1,0.295833,0.294188,0.485833,0.174129,425,5107,5532
 714,12/14/2012,4,1,12,5,1,0.281667,0.294192,0.642917,0.131229,429,5182,5611
 715,12/15/2012,4,1,12,6,1,0.324167,0.338383,0.650417,0.10635,767,4280,5047
 716,12/16/2012,4,1,12,0,2,0.3625,0.369938,0.83875,0.100742,538,3248,3786
 717,12/17/2012,4,1,12,1,2,0.393333,0.4015,0.907083,0.0982583,212,4373,4585
 718,12/18/2012,4,1,12,2,1,0.410833,0.409708,0.66625,0.221404,433,5124,5557
 719,12/19/2012,4,1,12,3,1,0.3325,0.342162,0.625417,0.184092,333,4934,5267
 720,12/20/2012,4,1,12,4,2,0.33,0.335217,0.667917,0.132463,314,3814,4128
 721,12/21/2012,1,1,12,5,2,0.326667,0.301767,0.556667,0.374383,221,3402,3623
 722,12/22/2012,1,1,12,6,1,0.265833,0.236113,0.44125,0.407346,205,1544,1749
 723,12/23/2012,1,1,12,0,1,0.245833,0.259471,0.515417,0.133083,408,1379,1787
 724,12/24/2012,1,1,12,1,2,0.231304,0.2589,0.791304,0.0772304,174,746,920
 725,12/25/2012,1,1,12,2,2,0.291304,0.294465,0.734783,0.168726,440,573,1013
 726,12/26/2012,1,1,12,3,3,0.243333,0.220333,0.823333,0.316546,9,432,441
 727,12/27/2012,1,1,12,4,2,0.254167,0.226642,0.652917,0.350133,247,1867,2114
 728,12/28/2012,1,1,12,5,2,0.253333,0.255046,0.59,0.155471,644,2451,3095
 729,12/29/2012,1,1,12,6,2,0.253333,0.2424,0.752917,0.124383,159,1182,1341
 730,12/30/2012,1,1,12,0,1,0.255833,0.2317,0.483333,0.350754,364,1432,1796
 731,12/31/2012,1,1,12,1,2,0.215833,0.223487,0.5775,0.154846,439,2290,2729
--- a/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb
@@ -37,7 +37,8 @@
        "2. Instantiating AutoMLConfig with new task type \"forecasting\" for timeseries data training, and other timeseries related settings: for this dataset we use the basic one: \"time_column_name\" \n",
        "3. Training the Model using local compute\n",
        "4. Exploring the results\n",
-        "5. Testing the fitted model"
+        "5. Viewing the engineered names for featurized data and featurization summary for all raw features\n",
        "6. Testing the fitted model"
      ]
    },
    {
@@ -122,12 +123,22 @@
        "data.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# let's take note of what columns means what in the data\n",
        "time_column_name = 'timeStamp'\n",
        "target_column_name = 'demand'"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "### Split the data to train and test\n",
+        "### Split the data into train and test sets\n"
        "\n"
      ]
    },
    {
@@ -136,50 +147,10 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "train = data[data['timeStamp'] < '2017-02-01']\n",
+        "X_train = data[data[time_column_name] < '2017-02-01']\n",
-        "test = data[data['timeStamp'] >= '2017-02-01']\n"
+        "X_test = data[data[time_column_name] >= '2017-02-01']\n",
-      ]
+        "y_train = X_train.pop(target_column_name).values\n",
-    },
+        "y_test = X_test.pop(target_column_name).values"
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Prepare the test data, we will feed X_test to the fitted model and get prediction"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "y_test = test.pop('demand').values\n",
        "X_test = test"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Split the train data to train and valid\n",
        "\n",
        "Use one month's data as valid data\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "X_train = train[train['timeStamp'] < '2017-01-01']\n",
        "X_valid = train[train['timeStamp'] >= '2017-01-01']\n",
        "y_train = X_train.pop('demand').values\n",
        "y_valid = X_valid.pop('demand').values\n",
        "print(X_train.shape)\n",
        "print(y_train.shape)\n",
        "print(X_valid.shape)\n",
        "print(y_valid.shape)"
      ]
    },
    {
@@ -198,8 +169,7 @@
        "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
        "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
        "|**y**|(sparse) array-like, shape = [n_samples, ], targets values.|\n",
-        "|**X_valid**|Data used to evaluate a model in a iteration. (sparse) array-like, shape = [n_samples, n_features]|\n",
+        "|**n_cross_validations**|Number of cross validation splits.|\n",
        "|**y_valid**|Data used to evaluate a model in a iteration. (sparse) array-like, shape = [n_samples, ], targets values.|\n",
        "|**path**|Relative path to the project folder.  AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. "
      ]
    },
@@ -209,9 +179,8 @@
      "metadata": {},
      "outputs": [],
      "source": [
        "time_column_name = 'timeStamp'\n",
        "automl_settings = {\n",
-        "    \"time_column_name\": time_column_name,\n",
+        "    \"time_column_name\": time_column_name    \n",
        "}\n",
        "\n",
        "\n",
@@ -222,8 +191,7 @@
        "                             iteration_timeout_minutes = 5,\n",
        "                             X = X_train,\n",
        "                             y = y_train,\n",
-        "                             X_valid = X_valid,\n",
+        "                             n_cross_validations = 3,\n",
        "                             y_valid = y_valid,\n",
        "                             path=project_folder,\n",
        "                             verbosity = logging.INFO,\n",
        "                            **automl_settings)"
@@ -233,7 +201,8 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n",
+        "Submitting the configuration will start a new run in this experiment. For local runs, the execution is synchronous. Depending on the data and number of iterations, this can run for a while. Parameters controlling concurrency may speed up the process, depending on your hardware.\n",
        "\n",
        "You will see the currently running iterations printing to the console."
      ]
    },
@@ -273,13 +242,34 @@
        "fitted_model.steps"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### View the engineered names for featurized data\n",
        "Below we display the engineered feature names generated for the featurized data using the time-series featurization."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "fitted_model.named_steps['timeseriestransformer'].get_engineered_feature_names()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Test the Best Fitted Model\n",
        "\n",
-        "Predict on training and test set, and calculate residual values."
+        "For forecasting, we will use the `forecast` function instead of the `predict` function. There are two reasons for this.\n",
        "\n",
        "We need to pass the recent values of the target variable `y`, whereas the scikit-compatible `predict` function only takes the non-target variables `X`. In our case, the test data immediately follows the training data, and we fill the `y` variable with `NaN`. The `NaN` serves as a question mark for the forecaster to fill with the actuals. Using the forecast function will produce forecasts using the shortest possible forecast horizon. The last time at which a definite (non-NaN) value is seen is the _forecast origin_ - the last time when the value of the target is known. \n",
        "\n",
        "Using the `predict` method would result in getting predictions for EVERY horizon the forecaster can predict at. This is useful when training and evaluating the performance of the forecaster at various horizons, but the level of detail is excessive for normal use."
      ]
    },
    {
@@ -288,15 +278,64 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "y_pred = fitted_model.predict(X_test)\n",
+        "# Replace ALL values in y_pred by NaN. \n",
-        "y_pred"
+        "# The forecast origin will be at the beginning of the first forecast period\n",
        "# (which is the same time as the end of the last training period).\n",
        "y_query = y_test.copy().astype(np.float)\n",
        "y_query.fill(np.nan)\n",
        "# The featurized data, aligned to y, will also be returned.\n",
        "# This contains the assumptions that were made in the forecast\n",
        "# and helps align the forecast to the original data\n",
        "y_fcst, X_trans = fitted_model.forecast(X_test, y_query)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# limit the evaluation to data where y_test has actuals\n",
        "def align_outputs(y_predicted, X_trans, X_test, y_test, predicted_column_name = 'predicted'):\n",
        "    \"\"\"\n",
        "    Demonstrates how to get the output aligned to the inputs\n",
        "    using pandas indexes. Helps understand what happened if\n",
        "    the output's shape differs from the input shape, or if\n",
        "    the data got re-sorted by time and grain during forecasting.\n",
        "    \n",
        "    Typical causes of misalignment are:\n",
        "    * we predicted some periods that were missing in actuals -> drop from eval\n",
        "    * model was asked to predict past max_horizon -> increase max horizon\n",
        "    * data at start of X_test was needed for lags -> provide previous periods\n",
        "    \"\"\"\n",
        "    df_fcst = pd.DataFrame({predicted_column_name : y_predicted})\n",
        "    # y and X outputs are aligned by forecast() function contract\n",
        "    df_fcst.index = X_trans.index\n",
        "    \n",
        "    # align original X_test to y_test    \n",
        "    X_test_full = X_test.copy()\n",
        "    X_test_full[target_column_name] = y_test\n",
        "\n",
        "    # X_test_full's does not include origin, so reset for merge\n",
        "    df_fcst.reset_index(inplace=True)\n",
        "    X_test_full = X_test_full.reset_index().drop(columns='index')\n",
        "    together = df_fcst.merge(X_test_full, how='right')\n",
        "    \n",
        "    # drop rows where prediction or actuals are nan \n",
        "    # happens because of missing actuals \n",
        "    # or at edges of time due to lags/rolling windows\n",
        "    clean = together[together[[target_column_name, predicted_column_name]].notnull().all(axis=1)]\n",
        "    return(clean)\n",
        "\n",
        "df_all = align_outputs(y_fcst, X_trans, X_test, y_test)\n",
        "df_all.head()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "### Use the Check Data Function to remove the nan values from y_test to avoid error when calculate metrics "
+        "Looking at `X_trans` is also useful to see what featurization happened to the data."
      ]
    },
    {
@@ -305,29 +344,14 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "if len(y_test) != len(y_pred):\n",
+        "X_trans"
        "    raise ValueError(\n",
        "        'the true values and prediction values do not have equal length.')\n",
        "elif len(y_test) == 0:\n",
        "    raise ValueError(\n",
        "        'y_true and y_pred are empty.')\n",
        "\n",
        "# if there is any non-numeric element in the y_true or y_pred,\n",
        "# the ValueError exception will be thrown.\n",
        "y_test_f = np.array(y_test).astype(float)\n",
        "y_pred_f = np.array(y_pred).astype(float)\n",
        "\n",
        "# remove entries both in y_true and y_pred where at least\n",
        "# one element in y_true or y_pred is missing\n",
        "y_test = y_test_f[~(np.isnan(y_test_f) | np.isnan(y_pred_f))]\n",
        "y_pred = y_pred_f[~(np.isnan(y_test_f) | np.isnan(y_pred_f))]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "### Calculate metrics for the prediction\n"
+        "### Calculate accuracy metrics\n"
      ]
    },
    {
@@ -336,26 +360,180 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % np.sqrt(mean_squared_error(y_test, y_pred)))\n",
+        "def MAPE(actual, pred):\n",
-        "# Explained variance score: 1 is perfect prediction\n",
+        "    \"\"\"\n",
-        "print('mean_absolute_error score: %.2f' % mean_absolute_error(y_test, y_pred))\n",
+        "    Calculate mean absolute percentage error.\n",
-        "print('R2 score: %.2f' % r2_score(y_test, y_pred))\n",
+        "    Remove NA and values where actual is close to zero\n",
-        "\n",
+        "    \"\"\"\n",
-        "\n",
+        "    not_na = ~(np.isnan(actual) | np.isnan(pred))\n",
        "    not_zero = ~np.isclose(actual, 0.0)\n",
        "    actual_safe = actual[not_na & not_zero]\n",
        "    pred_safe = pred[not_na & not_zero]\n",
        "    APE = 100*np.abs((actual_safe - pred_safe)/actual_safe)\n",
        "    return np.mean(APE)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print(\"Simple forecasting model\")\n",
        "rmse = np.sqrt(mean_squared_error(df_all[target_column_name], df_all['predicted']))\n",
        "print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
        "mae = mean_absolute_error(df_all[target_column_name], df_all['predicted'])\n",
        "print('mean_absolute_error score: %.2f' % mae)\n",
        "print('MAPE: %.2f' % MAPE(df_all[target_column_name], df_all['predicted']))\n",
        "\n",
        "# Plot outputs\n",
        "%matplotlib notebook\n",
-        "test_pred = plt.scatter(y_test, y_pred, color='b')\n",
+        "test_pred = plt.scatter(df_all[target_column_name], df_all['predicted'], color='b')\n",
        "test_test = plt.scatter(y_test, y_test, color='g')\n",
        "plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "The distribution looks a little heavy tailed: we underestimate the excursions of the extremes. A normal-quantile transform of the target might help, but let's first try using some past data with the lags and rolling window transforms.\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Using lags and rolling window features to improve the forecast"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data.\n",
        "\n",
        "Now that we configured target lags, that is the previous values of the target variables, and the prediction is no longer horizon-less. We therefore must specify the `max_horizon` that the model will learn to forecast. The `target_lags` keyword specifies how far back we will construct the lags of the target variable, and the `target_rolling_window_size` specifies the size of the rolling window over which we will generate the `max`, `min` and `sum` features."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "automl_settings_lags = {\n",
        "    'time_column_name': time_column_name,\n",
        "    'target_lags': 1,\n",
        "    'target_rolling_window_size': 5,\n",
        "    # you MUST set the max_horizon when using lags and rolling windows\n",
        "    # it is optional when looking-back features are not used \n",
        "    'max_horizon': len(y_test), # only one grain\n",
        "}\n",
        "\n",
        "\n",
        "automl_config_lags = AutoMLConfig(task = 'forecasting',\n",
        "                             debug_log = 'automl_nyc_energy_errors.log',\n",
        "                             primary_metric='normalized_root_mean_squared_error',\n",
        "                             iterations = 10,\n",
        "                             iteration_timeout_minutes = 5,\n",
        "                             X = X_train,\n",
        "                             y = y_train,\n",
        "                             n_cross_validations = 3,\n",
        "                             path=project_folder,\n",
        "                             verbosity = logging.INFO,\n",
        "                            **automl_settings_lags)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "local_run_lags = experiment.submit(automl_config_lags, show_output=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "best_run_lags, fitted_model_lags = local_run_lags.get_output()\n",
        "y_fcst_lags, X_trans_lags = fitted_model_lags.forecast(X_test, y_query)\n",
        "df_lags = align_outputs(y_fcst_lags, X_trans_lags, X_test, y_test)\n",
        "df_lags.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "X_trans_lags"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print(\"Forecasting model with lags\")\n",
        "rmse = np.sqrt(mean_squared_error(df_lags[target_column_name], df_lags['predicted']))\n",
        "print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
        "mae = mean_absolute_error(df_lags[target_column_name], df_lags['predicted'])\n",
        "print('mean_absolute_error score: %.2f' % mae)\n",
        "print('MAPE: %.2f' % MAPE(df_lags[target_column_name], df_lags['predicted']))\n",
        "\n",
        "# Plot outputs\n",
        "%matplotlib notebook\n",
        "test_pred = plt.scatter(df_lags[target_column_name], df_lags['predicted'], color='b')\n",
        "test_test = plt.scatter(y_test, y_test, color='g')\n",
        "plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### What features matter for the forecast?"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.train.automl.automlexplainer import explain_model\n",
        "\n",
        "# feature names are everything in the transformed data except the target\n",
        "features = X_trans.columns[:-1]\n",
        "expl = explain_model(fitted_model, X_train, X_test, features = features, best_run=best_run_lags, y_train = y_train)\n",
        "# unpack the tuple\n",
        "shap_values, expected_values, feat_overall_imp, feat_names, per_class_summary, per_class_imp = expl\n",
        "best_run_lags"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Please go to the Azure Portal's best run to see the top features chart.\n",
        "\n",
        "The informative features make all sorts of intuitive sense. Temperature is a strong driver of heating and cooling demand in NYC. Apart from that, the daily life cycle, expressed by `hour`, and the weekly cycle, expressed by `wday` drives people's energy use habits."
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
-        "name": "xiaga"
+        "name": "xiaga, tosingli"
      }
    ],
    "kernelspec": {
@@ -373,7 +551,7 @@
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
-      "version": "3.6.8"
+      "version": "3.6.7"
    }
  },
  "nbformat": 4,
--- a/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb
@@ -20,7 +20,9 @@
        "1. [Introduction](#Introduction)\n",
        "1. [Setup](#Setup)\n",
        "1. [Data](#Data)\n",
-        "1. [Train](#Train)"
+        "1. [Train](#Train)\n",
        "1. [Predict](#Predict)\n",
        "1. [Operationalize](#Operationalize)"
      ]
    },
    {
@@ -85,9 +87,9 @@
        "ws = Workspace.from_config()\n",
        "\n",
        "# choose a name for the run history container in the workspace\n",
-        "experiment_name = 'automl-ojsalesforecasting'\n",
+        "experiment_name = 'automl-ojforecasting'\n",
        "# project folder\n",
-        "project_folder = './sample_projects/automl-local-ojsalesforecasting'\n",
+        "project_folder = './sample_projects/automl-local-ojforecasting'\n",
        "\n",
        "experiment = Experiment(ws, experiment_name)\n",
        "\n",
@@ -260,12 +262,12 @@
        "    'time_column_name': time_column_name,\n",
        "    'grain_column_names': grain_column_names,\n",
        "    'drop_column_names': ['logQuantity'],\n",
-        "    'max_horizon': n_test_periods\n",
+        "    'max_horizon': n_test_periods # optional\n",
        "}\n",
        "\n",
        "automl_config = AutoMLConfig(task='forecasting',\n",
        "                             debug_log='automl_oj_sales_errors.log',\n",
-        "                             primary_metric='normalized_root_mean_squared_error',\n",
+        "                             primary_metric='normalized_mean_absolute_error',\n",
        "                             iterations=10,\n",
        "                             X=X_train,\n",
        "                             y=y_train,\n",
@@ -293,15 +295,6 @@
        "local_run = experiment.submit(automl_config, show_output=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "local_run"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
@@ -324,7 +317,7 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "### Make Predictions from the Best Fitted Model\n",
+        "# Predict\n",
        "Now that we have retrieved the best pipeline/model, it can be used to make predictions on test data. First, we remove the target values from the test set:"
      ]
    },
@@ -352,7 +345,7 @@
      "source": [
        "To produce predictions on the test set, we need to know the feature values at all dates in the test set. This requirement is somewhat reasonable for the OJ sales data since the features mainly consist of price, which is usually set in advance, and customer demographics which are approximately constant for each store over the 20 week forecast horizon in the testing data. \n",
        "\n",
-        "The target predictions can be retrieved by calling the `predict` method on the best model:"
+        "We will first create a query `y_query`, which is aligned index-for-index to `X_test`. This is a vector of target values where each `NaN` serves the function of the question mark to be replaced by forecast. Passing definite values in the `y` argument allows the `forecast` function to make predictions on data that does not immediately follow the train data which contains `y`. In each grain, the last time point where the model sees a definite value of `y` is that grain's _forecast origin_."
      ]
    },
    {
@@ -361,15 +354,76 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "y_pred = fitted_pipeline.predict(X_test)"
+        "# Replace ALL values in y_pred by NaN.\n",
        "# The forecast origin will be at the beginning of the first forecast period.\n",
        "# (Which is the same time as the end of the last training period.)\n",
        "y_query = y_test.copy().astype(np.float)\n",
        "y_query.fill(np.nan)\n",
        "# The featurized data, aligned to y, will also be returned.\n",
        "# This contains the assumptions that were made in the forecast\n",
        "# and helps align the forecast to the original data\n",
        "y_pred, X_trans = fitted_pipeline.forecast(X_test, y_query)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "### Calculate evaluation metrics for the prediction\n",
+        "If you are used to scikit pipelines, perhaps you expected `predict(X_test)`. However, forecasting requires a more general interface that also supplies the past target `y` values. Please use `forecast(X,y)` as `predict(X)` is reserved for internal purposes on forecasting models.\n",
-        "To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE)."
+        "\n",
        "The [energy demand forecasting notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand) demonstrates the use of the forecast function in more detail in the context of using lags and rolling window features. "
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Evaluate\n",
        "\n",
        "To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE). \n",
        "\n",
        "It is a good practice to always align the output explicitly to the input, as the count and order of the rows may have changed during transformations that span multiple rows."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "def align_outputs(y_predicted, X_trans, X_test, y_test, predicted_column_name = 'predicted'):\n",
        "    \"\"\"\n",
        "    Demonstrates how to get the output aligned to the inputs\n",
        "    using pandas indexes. Helps understand what happened if\n",
        "    the output's shape differs from the input shape, or if\n",
        "    the data got re-sorted by time and grain during forecasting.\n",
        "    \n",
        "    Typical causes of misalignment are:\n",
        "    * we predicted some periods that were missing in actuals -> drop from eval\n",
        "    * model was asked to predict past max_horizon -> increase max horizon\n",
        "    * data at start of X_test was needed for lags -> provide previous periods in y\n",
        "    \"\"\"\n",
        "    \n",
        "    df_fcst = pd.DataFrame({predicted_column_name : y_predicted})\n",
        "    # y and X outputs are aligned by forecast() function contract\n",
        "    df_fcst.index = X_trans.index\n",
        "    \n",
        "    # align original X_test to y_test    \n",
        "    X_test_full = X_test.copy()\n",
        "    X_test_full[target_column_name] = y_test\n",
        "\n",
        "    # X_test_full's index does not include origin, so reset for merge\n",
        "    df_fcst.reset_index(inplace=True)\n",
        "    X_test_full = X_test_full.reset_index().drop(columns='index')\n",
        "    together = df_fcst.merge(X_test_full, how='right')\n",
        "    \n",
        "    # drop rows where prediction or actuals are nan \n",
        "    # happens because of missing actuals \n",
        "    # or at edges of time due to lags/rolling windows\n",
        "    clean = together[together[[target_column_name, predicted_column_name]].notnull().all(axis=1)]\n",
        "    return(clean)\n",
        "\n",
        "df_all = align_outputs(y_pred, X_trans, X_test, y_test)"
      ]
    },
    {
@@ -388,18 +442,392 @@
        "    actual_safe = actual[not_na & not_zero]\n",
        "    pred_safe = pred[not_na & not_zero]\n",
        "    APE = 100*np.abs((actual_safe - pred_safe)/actual_safe)\n",
-        "    return np.mean(APE)\n",
+        "    return np.mean(APE)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print(\"Simple forecasting model\")\n",
        "rmse = np.sqrt(mean_squared_error(df_all[target_column_name], df_all['predicted']))\n",
        "print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % rmse)\n",
        "mae = mean_absolute_error(df_all[target_column_name], df_all['predicted'])\n",
        "print('mean_absolute_error score: %.2f' % mae)\n",
        "print('MAPE: %.2f' % MAPE(df_all[target_column_name], df_all['predicted']))\n",
        "\n",
-        "print(\"[Test Data] \\nRoot Mean squared error: %.2f\" % np.sqrt(mean_squared_error(y_test, y_pred)))\n",
+        "# Plot outputs\n",
-        "print('mean_absolute_error score: %.2f' % mean_absolute_error(y_test, y_pred))\n",
+        "import matplotlib.pyplot as plt\n",
-        "print('MAPE: %.2f' % MAPE(y_test, y_pred))"
+        "\n",
        "%matplotlib notebook\n",
        "test_pred = plt.scatter(df_all[target_column_name], df_all['predicted'], color='b')\n",
        "test_test = plt.scatter(y_test, y_test, color='g')\n",
        "plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Operationalize"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "_Operationalization_ means getting the model into the cloud so that other can run it after you close the notebook. We will create a docker running on Azure Container Instances with the model."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "description = 'AutoML OJ forecaster'\n",
        "tags = None\n",
        "model = local_run.register_model(description = description, tags = tags)\n",
        "\n",
        "print(local_run.model_id)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Develop the scoring script\n",
        "\n",
        "Serializing and deserializing complex data frames may be tricky. We first develop the `run()` function of the scoring script locally, then write it into a scoring script. It is much easier to debug any quirks of the scoring function without crossing two compute environments. For this exercise, we handle a common quirk of how pandas dataframes serialize time stamp values."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# this is where we test the run function of the scoring script interactively\n",
        "# before putting it in the scoring script\n",
        "\n",
        "timestamp_columns = ['WeekStarting']\n",
        "\n",
        "def run(rawdata, test_model = None):\n",
        "    \"\"\"\n",
        "    Intended to process 'rawdata' string produced by\n",
        "    \n",
        "    {'X': X_test.to_json(), y' : y_test.to_json()}\n",
        "    \n",
        "    Don't convert the X payload to numpy.array, use it as pandas.DataFrame\n",
        "    \"\"\"\n",
        "    try:\n",
        "        # unpack the data frame with timestamp        \n",
        "        rawobj = json.loads(rawdata)                    # rawobj is now a dict of strings        \n",
        "        X_pred = pd.read_json(rawobj['X'], convert_dates=False)   # load the pandas DF from a json string\n",
        "        for col in timestamp_columns:                             # fix timestamps\n",
        "            X_pred[col] = pd.to_datetime(X_pred[col], unit='ms') \n",
        "        \n",
        "        y_pred = np.array(rawobj['y'])                    # reconstitute numpy array from serialized list\n",
        "        \n",
        "        if test_model is None:\n",
        "            result = model.forecast(X_pred, y_pred)       # use the global model from init function\n",
        "        else:\n",
        "            result = test_model.forecast(X_pred, y_pred)  # use the model on which we are testing\n",
        "        \n",
        "    except Exception as e:\n",
        "        result = str(e)\n",
        "        return json.dumps({\"error\": result})\n",
        "    \n",
        "    forecast_as_list = result[0].tolist()\n",
        "    index_as_df = result[1].index.to_frame().reset_index(drop=True)\n",
        "    \n",
        "    return json.dumps({\"forecast\": forecast_as_list,   # return the minimum over the wire: \n",
        "                       \"index\": index_as_df.to_json()  # no forecast and its featurized values\n",
        "                      })"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# test the run function here before putting in the scoring script\n",
        "import json\n",
        "\n",
        "test_sample = json.dumps({'X': X_test.to_json(), 'y' : y_query.tolist()})\n",
        "response = run(test_sample, fitted_pipeline)\n",
        "\n",
        "# unpack the response, dealing with the timestamp serialization again\n",
        "res_dict = json.loads(response)\n",
        "y_fcst_all = pd.read_json(res_dict['index'])\n",
        "y_fcst_all[time_column_name] = pd.to_datetime(y_fcst_all[time_column_name], unit = 'ms')\n",
        "y_fcst_all['forecast'] = res_dict['forecast']\n",
        "y_fcst_all.head()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Now that the function works locally in the notebook, let's write it down into the scoring script. The scoring script is authored by the data scientist. Adjust it to taste, adding inputs, outputs and processing as needed."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%%writefile score_fcast.py\n",
        "import pickle\n",
        "import json\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import azureml.train.automl\n",
        "from sklearn.externals import joblib\n",
        "from azureml.core.model import Model\n",
        "\n",
        "\n",
        "def init():\n",
        "    global model\n",
        "    model_path = Model.get_model_path(model_name = '<<modelid>>') # this name is model.id of model that we want to deploy\n",
        "    # deserialize the model file back into a sklearn model\n",
        "    model = joblib.load(model_path)\n",
        "\n",
        "timestamp_columns = ['WeekStarting']\n",
        "\n",
        "def run(rawdata, test_model = None):\n",
        "    \"\"\"\n",
        "    Intended to process 'rawdata' string produced by\n",
        "    \n",
        "    {'X': X_test.to_json(), y' : y_test.to_json()}\n",
        "    \n",
        "    Don't convert the X payload to numpy.array, use it as pandas.DataFrame\n",
        "    \"\"\"\n",
        "    try:\n",
        "        # unpack the data frame with timestamp        \n",
        "        rawobj = json.loads(rawdata)                    # rawobj is now a dict of strings        \n",
        "        X_pred = pd.read_json(rawobj['X'], convert_dates=False)   # load the pandas DF from a json string\n",
        "        for col in timestamp_columns:                             # fix timestamps\n",
        "            X_pred[col] = pd.to_datetime(X_pred[col], unit='ms') \n",
        "        \n",
        "        y_pred = np.array(rawobj['y'])                    # reconstitute numpy array from serialized list\n",
        "        \n",
        "        if test_model is None:\n",
        "            result = model.forecast(X_pred, y_pred)       # use the global model from init function\n",
        "        else:\n",
        "            result = test_model.forecast(X_pred, y_pred)  # use the model on which we are testing\n",
        "        \n",
        "    except Exception as e:\n",
        "        result = str(e)\n",
        "        return json.dumps({\"error\": result})\n",
        "    \n",
        "    # prepare to send over wire as json\n",
        "    forecast_as_list = result[0].tolist()\n",
        "    index_as_df = result[1].index.to_frame().reset_index(drop=True)\n",
        "    \n",
        "    return json.dumps({\"forecast\": forecast_as_list,   # return the minimum over the wire: \n",
        "                       \"index\": index_as_df.to_json()  # no forecast and its featurized values\n",
        "                      })"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# get the model\n",
        "from azureml.train.automl.run import AutoMLRun\n",
        "\n",
        "experiment = Experiment(ws, experiment_name)\n",
        "ml_run = AutoMLRun(experiment = experiment, run_id = local_run.id)\n",
        "best_iteration = int(str.split(best_run.id,'_')[-1])      # the iteration number is a postfix of the run ID."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# get the best model's dependencies and write them into this file\n",
        "from azureml.core.conda_dependencies import CondaDependencies\n",
        "\n",
        "conda_env_file_name = 'fcast_env.yml'\n",
        "\n",
        "dependencies = ml_run.get_run_sdk_dependencies(iteration = best_iteration)\n",
        "for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n",
        "    print('{}\\t{}'.format(p, dependencies[p]))\n",
        "\n",
        "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n",
        "\n",
        "myenv.save_to_file('.', conda_env_file_name)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# this is the script file name we wrote a few cells above\n",
        "script_file_name = 'score_fcast.py'\n",
        "\n",
        "# Substitute the actual version number in the environment file.\n",
        "# This is not strictly needed in this notebook because the model should have been generated using the current SDK version.\n",
        "# However, we include this in case this code is used on an experiment from a previous SDK version.\n",
        "\n",
        "with open(conda_env_file_name, 'r') as cefr:\n",
        "    content = cefr.read()\n",
        "\n",
        "with open(conda_env_file_name, 'w') as cefw:\n",
        "    cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n",
        "\n",
        "# Substitute the actual model id in the script file.\n",
        "\n",
        "with open(script_file_name, 'r') as cefr:\n",
        "    content = cefr.read()\n",
        "\n",
        "with open(script_file_name, 'w') as cefw:\n",
        "    cefw.write(content.replace('<<modelid>>', local_run.model_id))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Create a Container Image"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.image import Image, ContainerImage\n",
        "\n",
        "image_config = ContainerImage.image_configuration(runtime= \"python\",\n",
        "                                 execution_script = script_file_name,\n",
        "                                 conda_file = conda_env_file_name,\n",
        "                                 tags = {'type': \"automl-forecasting\"},\n",
        "                                 description = \"Image for automl forecasting sample\")\n",
        "\n",
        "image = Image.create(name = \"automl-fcast-image\",\n",
        "                     # this is the model object \n",
        "                     models = [model],\n",
        "                     image_config = image_config, \n",
        "                     workspace = ws)\n",
        "\n",
        "image.wait_for_creation(show_output = True)\n",
        "\n",
        "if image.creation_state == 'Failed':\n",
        "    print(\"Image build log at: \" + image.image_build_log_uri)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Deploy the Image as a Web Service on Azure Container Instance"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.webservice import AciWebservice\n",
        "\n",
        "aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
        "                                               memory_gb = 2, \n",
        "                                               tags = {'type': \"automl-forecasting\"},\n",
        "                                               description = \"Automl forecasting sample service\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.webservice import Webservice\n",
        "\n",
        "aci_service_name = 'automl-forecast-01'\n",
        "print(aci_service_name)\n",
        "\n",
        "aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
        "                                           image = image,\n",
        "                                           name = aci_service_name,\n",
        "                                           workspace = ws)\n",
        "aci_service.wait_for_deployment(True)\n",
        "print(aci_service.state)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Call the service"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# we send the data to the service serialized into a json string\n",
        "test_sample = json.dumps({'X':X_test.to_json(), 'y' : y_query.tolist()})\n",
        "response = aci_service.run(input_data = test_sample)\n",
        "\n",
        "# translate from networkese to datascientese\n",
        "try: \n",
        "    res_dict = json.loads(response)\n",
        "    y_fcst_all = pd.read_json(res_dict['index'])\n",
        "    y_fcst_all[time_column_name] = pd.to_datetime(y_fcst_all[time_column_name], unit = 'ms')\n",
        "    y_fcst_all['forecast'] = res_dict['forecast']    \n",
        "except:\n",
        "    print(res_dict)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "y_fcst_all.head()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Delete the web service if desired"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "serv = Webservice(ws, 'automl-forecast-01')\n",
        "# serv.delete()     # don't do it accidentally"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
-        "name": "erwright"
+        "name": "erwright, tosingli"
      }
    ],
    "kernelspec": {
@@ -417,7 +845,7 @@
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
-      "version": "3.6.8"
+      "version": "3.6.7"
    }
  },
  "nbformat": 4,
--- a/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb
@@ -37,8 +37,9 @@
        "In this notebook you will learn how to:\n",
        "1. Create an `Experiment` in an existing `Workspace`.\n",
        "2. Configure AutoML using `AutoMLConfig`.\n",
-        "4. Train the model.\n",
+        "3. Train the model.\n",
-        "5. Explore the results.\n",
+        "4. Explore the results.\n",
        "5. Viewing the engineered names for featurized data and featurization summary for all raw features.\n",
        "6. Test the best fitted model.\n",
        "\n",
        "In addition this notebook showcases the following features\n",
@@ -154,7 +155,6 @@
        "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
        "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
        "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
        "|**n_cross_validations**|Number of cross validation splits.|\n",
        "|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
        "|**experiment_exit_score**|*double* value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
        "|**blacklist_models**|*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i>|\n",
@@ -174,7 +174,6 @@
        "                             primary_metric = 'AUC_weighted',\n",
        "                             iteration_timeout_minutes = 60,\n",
        "                             iterations = 20,\n",
        "                             n_cross_validations = 5,\n",
        "                             preprocess = True,\n",
        "                             experiment_exit_score = 0.9984,\n",
        "                             blacklist_models = ['KNN','LinearSVM'],\n",
@@ -318,6 +317,45 @@
        "# best_run, fitted_model = local_run.get_output(iteration = iteration)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### View the engineered names for featurized data\n",
        "Below we display the engineered feature names generated for the featurized data using the preprocessing featurization."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "fitted_model.named_steps['datatransformer'].get_engineered_feature_names()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### View the featurization summary\n",
        "Below we display the featurization that was performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:-\n",
        "- Raw feature name\n",
        "- Number of engineered features formed out of this raw feature\n",
        "- Type detected\n",
        "- If feature was dropped\n",
        "- List of feature transformations for the raw feature"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "fitted_model.named_steps['datatransformer'].get_featurization_summary()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
--- a/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.ipynb
@@ -254,7 +254,9 @@
        "3.\toverall_summary: The model level feature importance values sorted in descending order\n",
        "4.\toverall_imp: The feature names sorted in the same order as in overall_summary\n",
        "5.\tper_class_summary: The class level feature importance values sorted in descending order. Only available for the classification case\n",
-        "6.\tper_class_imp: The feature names sorted in the same order as in per_class_summary. Only available for the classification case"
+        "6.\tper_class_imp: The feature names sorted in the same order as in per_class_summary. Only available for the classification case\n",
        "\n",
        "Note:- The **retrieve_model_explanation()** API only works in case AutoML has been configured with **'model_explainability'** flag set to **True**. "
      ]
    },
    {
@@ -305,7 +307,7 @@
        "from azureml.train.automl.automlexplainer import explain_model\n",
        "\n",
        "shap_values, expected_values, overall_summary, overall_imp, per_class_summary, per_class_imp = \\\n",
-        "    explain_model(fitted_model, X_train, X_test)"
+        "    explain_model(fitted_model, X_train, X_test, features=features)"
      ]
    },
    {
--- a/how-to-use-azureml/automated-machine-learning/remote-attach/auto-ml-remote-attach.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/remote-attach/auto-ml-remote-attach.ipynb
@@ -40,7 +40,8 @@
        "3. Configure AutoML using `AutoMLConfig`.\n",
        "4. Train the model using the DSVM.\n",
        "5. Explore the results.\n",
-        "6. Test the best fitted model.\n",
+        "6. Viewing the engineered names for featurized data and featurization summary for all raw features.\n",
        "7. Test the best fitted model.\n",
        "\n",
        "In addition this notebook showcases the following features\n",
        "- **Parallel** executions for iterations\n",
@@ -110,7 +111,7 @@
      "source": [
        "### Attach a Remote Linux DSVM\n",
        "To use a remote Docker compute target:\n",
-        "1. Create a Linux DSVM in Azure, following these [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor (not CentOS). Make sure that disk space is available under `/tmp` because AutoML creates files under `/tmp/azureml_run`s. The DSVM should have more cores than the number of parallel runs that you plan to enable. It should also have at least 4GB per core.\n",
+        "1. Create a Linux DSVM in Azure, following these [instructions](https://docs.microsoft.com/en-us/azure/machine-learning/data-science-virtual-machine/dsvm-ubuntu-intro). Make sure you use the Ubuntu flavor (not CentOS). Make sure that disk space is available under `/tmp` because AutoML creates files under `/tmp/azureml_run`s. The DSVM should have more cores than the number of parallel runs that you plan to enable. It should also have at least 4GB per core.\n",
        "2. Enter the IP address, user name and password below.\n",
        "\n",
        "**Note:** By default, SSH runs on port 22 and you don't need to change the port number below. If you've configured SSH to use a different port, change `dsvm_ssh_port` accordinglyaddress. [Read more](https://docs.microsoft.com/en-us/azure/virtual-machines/troubleshooting/detailed-troubleshoot-ssh-connection) on changing SSH ports for security reasons."
@@ -160,6 +161,7 @@
      "source": [
        "from azureml.core.runconfig import RunConfiguration\n",
        "from azureml.core.conda_dependencies import CondaDependencies\n",
        "import pkg_resources\n",
        "\n",
        "# create a new RunConfig object\n",
        "conda_run_config = RunConfiguration(framework=\"python\")\n",
@@ -167,7 +169,9 @@
        "# Set compute target to the Linux DSVM\n",
        "conda_run_config.target = dsvm_compute\n",
        "\n",
-        "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
+        "pandas_dependency = 'pandas==' + pkg_resources.get_distribution(\"pandas\").version\n",
        "\n",
        "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80',pandas_dependency])\n",
        "conda_run_config.environment.python.conda_dependencies = cd"
      ]
    },
@@ -407,6 +411,45 @@
        "print(fitted_model)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### View the engineered names for featurized data\n",
        "Below we display the engineered feature names generated for the featurized data using the preprocessing featurization."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "fitted_model.named_steps['datatransformer'].get_engineered_feature_names()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### View the featurization summary\n",
        "Below we display the featurization that was performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:-\n",
        "- Raw feature name\n",
        "- Number of engineered features formed out of this raw feature\n",
        "- Type detected\n",
        "- If feature was dropped\n",
        "- List of feature transformations for the raw feature"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "fitted_model.named_steps['datatransformer'].get_featurization_summary()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
--- a/how-to-use-azureml/automated-machine-learning/remote-execution-with-datastore/auto-ml-remote-execution-with-datastore.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/remote-execution-with-datastore/auto-ml-remote-execution-with-datastore.ipynb
@@ -245,6 +245,7 @@
      "source": [
        "from azureml.core.runconfig import RunConfiguration\n",
        "from azureml.core.conda_dependencies import CondaDependencies\n",
        "import pkg_resources\n",
        "\n",
        "# create a new RunConfig object\n",
        "conda_run_config = RunConfiguration(framework=\"python\")\n",
@@ -254,7 +255,9 @@
        "# set the data reference of the run coonfiguration\n",
        "conda_run_config.data_references = {ds.name: dr}\n",
        "\n",
-        "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
+        "pandas_dependency = 'pandas==' + pkg_resources.get_distribution(\"pandas\").version\n",
        "\n",
        "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80',pandas_dependency])\n",
        "conda_run_config.environment.python.conda_dependencies = cd"
      ]
    },
--- a/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb
+++ b/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb
@@ -23,7 +23,8 @@
        "3. Configure Automated ML using `AutoMLConfig`.\n",
        "4. Train the model using Azure Databricks.\n",
        "5. Explore the results.\n",
-        "6. Test the best fitted model.\n",
+        "6. Viewing the engineered names for featurized data and featurization summary for all raw features.\n",
        "7. Test the best fitted model.\n",
        "\n",
        "Before running this notebook, please follow the <a href=\"https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/azure-databricks\" target=\"_blank\">readme for using Automated ML on Azure Databricks</a> for installing necessary libraries to your cluster."
      ]
@@ -556,6 +557,45 @@
        "print(fitted_model)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### View the engineered names for featurized data\n",
        "Below we display the engineered feature names generated for the featurized data using the preprocessing featurization."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "fitted_model.named_steps['datatransformer'].get_engineered_feature_names()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### View the featurization summary\n",
        "Below we display the featurization that was performed on different raw features in the user data. For each raw feature in the user data, the following information is displayed:-\n",
        "- Raw feature name\n",
        "- Number of engineered features formed out of this raw feature\n",
        "- Type detected\n",
        "- If feature was dropped\n",
        "- List of feature transformations for the raw feature"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "fitted_model.named_steps['datatransformer'].get_featurization_summary()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
--- a/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb
+++ b/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb
@@ -207,6 +207,7 @@
        "import os\n",
        "import random\n",
        "import time\n",
        "import json\n",
        "\n",
        "from matplotlib import pyplot as plt\n",
        "from matplotlib.pyplot import imshow\n",
@@ -295,7 +296,7 @@
        "    datastore_name = datastore_name, \n",
        "    container_name = container_name, \n",
        "    account_name = account_name,\n",
-        "    overwrite = True\n",
+        "     overwrite = True\n",
        ")"
      ]
    },
@@ -427,7 +428,7 @@
        "                             debug_log = 'automl_errors.log',\n",
        "                             primary_metric = 'AUC_weighted',\n",
        "                             iteration_timeout_minutes = 10,\n",
-        "                             iterations = 30,\n",
+        "                             iterations = 5,\n",
        "                             preprocess = True,\n",
        "                             n_cross_validations = 10,\n",
        "                             max_concurrent_iterations = 2, #change it based on number of worker nodes\n",
@@ -591,22 +592,21 @@
        "%%writefile score.py\n",
        "import pickle\n",
        "import json\n",
-        "import numpy\n",
+        "import numpy as np\n",
        "import azureml.train.automl\n",
        "from sklearn.externals import joblib\n",
        "from azureml.core.model import Model\n",
-        "\n",
+        "import pandas as pd\n",
        "\n",
        "def init():\n",
        "    global model\n",
-        "    model_path = Model.get_model_path(model_name = '<<modelid>>') # this name is model.id of model that we want to deploy\n",
+        "    model_path = Model.get_model_path(model_name = '<<model_id>>') # this name is model.id of model that we want to deploy\n",
        "    # deserialize the model file back into a sklearn model\n",
        "    model = joblib.load(model_path)\n",
        "\n",
-        "def run(rawdata):\n",
+        "def run(raw_data):\n",
        "    try:\n",
-        "        data = json.loads(rawdata)['data']\n",
+        "        data = (pd.DataFrame(np.array(json.loads(raw_data)['data']), columns=[str(i) for i in range(0,64)]))\n",
        "        data = numpy.array(data)\n",
        "        result = model.predict(data)\n",
        "    except Exception as e:\n",
        "        result = str(e)\n",
@@ -614,6 +614,22 @@
        "    return json.dumps({\"result\":result.tolist()})"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "#Replace <<model_id>>\n",
        "content = \"\"\n",
        "with open(\"score.py\", \"r\") as fo:\n",
        "    content = fo.read()\n",
        "\n",
        "new_content = content.replace(\"<<model_id>>\", local_run.model_id)\n",
        "with open(\"score.py\", \"w\") as fw:\n",
        "    fw.write(new_content)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
@@ -672,16 +688,19 @@
      "metadata": {},
      "outputs": [],
      "source": [
        "\n",
        "# this will take 10-15 minutes to finish\n",
        "\n",
-        "service_name = \"<<servicename>>\"\n",
+        "import uuid\n",
        "from azureml.core.image import ContainerImage\n",
        "\n",
        "guid = str(uuid.uuid4()).split(\"-\")[0]\n",
        "service_name = \"myservice-{}\".format(guid)\n",
        "print(\"Creating service with name: {}\".format(service_name))\n",
        "runtime = \"spark-py\" \n",
        "driver_file = \"score.py\"\n",
        "my_conda_file = \"mydeployenv.yml\"\n",
        "\n",
        "# image creation\n",
        "from azureml.core.image import ContainerImage\n",
        "myimage_config = ContainerImage.image_configuration(execution_script = driver_file, \n",
        "                                    runtime = runtime, \n",
        "                                    conda_file = 'mydeployenv.yml')\n",
@@ -744,18 +763,39 @@
      "metadata": {},
      "outputs": [],
      "source": [
        "import json\n",
        "# Randomly select digits and test.\n",
        "for index in np.random.choice(len(y_test), 2, replace = False):\n",
        "    print(index)\n",
-        "    predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
+        "    test_sample = json.dumps({'data':X_test[index:index + 1].values.tolist()})\n",
        "    predicted = myservice.run(input_data = test_sample)\n",
        "    label = y_test.values[index]\n",
-        "    title = \"Label value = %d  Predicted value = %d \" % (label, predicted)\n",
+        "    predictedDict = json.loads(predicted)\n",
        "    title = \"Label value = %d  Predicted value = %s \" % ( label,predictedDict['result'][0])    \n",
        "    fig = plt.figure(3, figsize = (5,5))\n",
        "    ax1 = fig.add_axes((0,0,.8,.8))\n",
        "    ax1.set_title(title)\n",
        "    plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
        "    display(fig)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "### Delete the service"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "myservice.delete()"
      ]
    }
  ],
  "metadata": {
--- a/how-to-use-azureml/azure-hdi/README.md
+++ b/how-to-use-azureml/azure-hdi/README.md
@@ -0,0 +1,55 @@
 **Azure HDInsight**
 Azure HDInsight is a fully managed cloud Hadoop & Spark offering the gives
 optimized open-source analytic clusters for Spark, Hive, MapReduce, HBase,
 Storm, and Kafka. HDInsight Spark clusters provide kernels that you can use with
 the Jupyter notebook on [Apache Spark](https://spark.apache.org/) for testing
 your applications. 
 How Azure HDInsight works with Azure Machine Learning service
 -   You can train a model using Spark clusters and deploy the model to ACI/AKS
    from within Azure HDInsight.
 -   You can also use [automated machine
    learning](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-automated-ml) capabilities
    integrated within Azure HDInsight.
 You can use Azure HDInsight as a compute target from an [Azure Machine Learning
 pipeline](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines).
 **Set up your HDInsight cluster**
 Create [HDInsight
 cluster](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters)
 **Quick create: Basic cluster setup**
 This article walks you through setup in the [Azure
 portal](https://portal.azure.com/), where you can create an HDInsight cluster
 using *Quick create* or *Custom*.
 ![hdinsight create options custom quick create](media/0a235b34c0b881117e51dc31a232dbe1.png)
 Follow instructions on the screen to do a basic cluster setup. Details are
 provided below for:
 -   [Resource group
    name](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters#resource-group-name)
 -   [Cluster types and
    configuration](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters#cluster-types)
    (Cluster must be Spark 2.3 (HDI 3.6) or greater)
 -   Cluster login and SSH username
 -   [Location](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters#location)
 **Import the sample HDI notebook in Jupyter**
 **Important links:**
 Create HDI cluster:
 <https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-provision-linux-clusters>
 ![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/azure-hdi/README.png)
--- a/how-to-use-azureml/azure-hdi/automl_hdi_local_classification.ipynb
+++ b/how-to-use-azureml/azure-hdi/automl_hdi_local_classification.ipynb
@@ -0,0 +1,624 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
    "\n",
    "Licensed under the MIT License."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Automated ML on Azure HDInsight\n",
    "\n",
    "In this example we use the scikit-learn's <a href=\"http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset\" target=\"_blank\">digit dataset</a> to showcase how you can use AutoML for a simple classification problem.\n",
    "\n",
    "In this notebook you will learn how to:\n",
    "1. Create Azure Machine Learning Workspace object and initialize your notebook directory to easily reload this object from a configuration file.\n",
    "2. Create an `Experiment` in an existing `Workspace`.\n",
    "3. Configure Automated ML using `AutoMLConfig`.\n",
    "4. Train the model using Azure HDInsight.\n",
    "5. Explore the results.\n",
    "6. Test the best fitted model.\n",
    "\n",
    "Before running this notebook, please follow the readme for using Automated ML on Azure HDI for installing necessary libraries to your cluster."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Check the Azure ML Core SDK Version to Validate Your Installation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import azureml.core\n",
    "import pandas as pd\n",
    "from azureml.core.authentication import ServicePrincipalAuthentication\n",
    "from azureml.core.workspace import Workspace\n",
    "from azureml.core.experiment import Experiment\n",
    "from azureml.train.automl import AutoMLConfig\n",
    "from azureml.train.automl.run import AutoMLRun\n",
    "import logging\n",
    "\n",
    "print(\"SDK Version:\", azureml.core.VERSION)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Initialize an Azure ML Workspace\n",
    "### What is an Azure ML Workspace and Why Do I Need One?\n",
    "\n",
    "An Azure ML workspace is an Azure resource that organizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows.  In particular, an Azure ML workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, operationalization, and the monitoring of operationalized models.\n",
    "\n",
    "\n",
    "### What do I Need?\n",
    "\n",
    "To create or access an Azure ML workspace, you will need to import the Azure ML library and specify following information:\n",
    "* A name for your workspace. You can choose one.\n",
    "* Your subscription id. Use the `id` value from the `az account show` command output above.\n",
    "* The resource group name. The resource group organizes Azure resources and provides a default region for the resources in the group. The resource group will be created if it doesn't exist. Resource groups can be created and viewed in the [Azure portal](https://portal.azure.com)\n",
    "* Supported regions include `eastus2`, `eastus`,`westcentralus`, `southeastasia`, `westeurope`, `australiaeast`, `westus2`, `southcentralus`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import azureml.core\n",
    "import pandas as pd\n",
    "from azureml.core.authentication import ServicePrincipalAuthentication\n",
    "from azureml.core.workspace import Workspace\n",
    "from azureml.core.experiment import Experiment\n",
    "from azureml.train.automl import AutoMLConfig\n",
    "from azureml.train.automl.run import AutoMLRun\n",
    "import logging\n",
    "\n",
    "subscription_id = \"<Your SubscriptionId>\" #you should be owner or contributor\n",
    "resource_group = \"<Resource group - new or existing>\" #you should be owner or contributor\n",
    "workspace_name = \"<workspace to be created>\" #your workspace name\n",
    "workspace_region = \"<azureregion>\" #your region\n",
    "\n",
    "\n",
    "tenant_id = \"<tenant_id>\"\n",
    "app_id = \"<app_id>\"\n",
    "app_key = \"<app_key>\"\n",
    "\n",
    "auth_sp = ServicePrincipalAuthentication(tenant_id = tenant_id,\n",
    "                                         service_principal_id = app_id,\n",
    "                                         service_principal_password = app_key)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Creating a Workspace\n",
    "If you already have access to an Azure ML workspace you want to use, you can skip this cell.  Otherwise, this cell will create an Azure ML workspace for you in the specified subscription, provided you have the correct permissions for the given `subscription_id`.\n",
    "\n",
    "This will fail when:\n",
    "1. The workspace already exists.\n",
    "2. You do not have permission to create a workspace in the resource group.\n",
    "3. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this subscription.\n",
    "\n",
    "If workspace creation fails for any reason other than already existing, please work with your IT administrator to provide you with the appropriate permissions or to provision the required resources.\n",
    "\n",
    "**Note:** Creation of a new workspace can take several minutes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "##TESTONLY\n",
    "# Import the Workspace class and check the Azure ML SDK version.\n",
    "from azureml.core import Workspace\n",
    "\n",
    "ws = Workspace.create(name = workspace_name,\n",
    "                      subscription_id = subscription_id,\n",
    "                      resource_group = resource_group, \n",
    "                      location = workspace_region,\n",
    "                      auth = auth_sp,\n",
    "                      exist_ok=True)\n",
    "ws.get_details()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Configuring Your Local Environment\n",
    "You can validate that you have access to the specified workspace and write a configuration file to the default configuration location, `./aml_config/config.json`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from azureml.core import Workspace\n",
    "\n",
    "ws = Workspace(workspace_name = workspace_name,\n",
    "               subscription_id = subscription_id,\n",
    "               resource_group = resource_group,\n",
    "               auth = auth_sp)\n",
    "\n",
    "# Persist the subscription id, resource group name, and workspace name in aml_config/config.json.\n",
    "ws.write_config()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create a Folder to Host Sample Projects\n",
    "Finally, create a folder where all the sample projects will be hosted."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "sample_projects_folder = './sample_projects'\n",
    "\n",
    "if not os.path.isdir(sample_projects_folder):\n",
    "    os.mkdir(sample_projects_folder)\n",
    "    \n",
    "print('Sample projects will be created in {}.'.format(sample_projects_folder))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create an Experiment\n",
    "\n",
    "As part of the setup you have already created an Azure ML `Workspace` object. For Automated ML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import os\n",
    "import random\n",
    "import time\n",
    "\n",
    "from matplotlib import pyplot as plt\n",
    "from matplotlib.pyplot import imshow\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "import azureml.core\n",
    "from azureml.core.experiment import Experiment\n",
    "from azureml.core.workspace import Workspace\n",
    "from azureml.train.automl import AutoMLConfig\n",
    "from azureml.train.automl.run import AutoMLRun"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Choose a name for the experiment and specify the project folder.\n",
    "experiment_name = 'automl-local-classification-hdi'\n",
    "project_folder = './sample_projects/automl-local-classification-hdi'\n",
    "\n",
    "experiment = Experiment(ws, experiment_name)\n",
    "\n",
    "output = {}\n",
    "output['SDK version'] = azureml.core.VERSION\n",
    "output['Subscription ID'] = ws.subscription_id\n",
    "output['Workspace Name'] = ws.name\n",
    "output['Resource Group'] = ws.resource_group\n",
    "output['Location'] = ws.location\n",
    "output['Project Directory'] = project_folder\n",
    "output['Experiment Name'] = experiment.name\n",
    "pd.set_option('display.max_colwidth', -1)\n",
    "pd.DataFrame(data = output, index = ['']).T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Diagnostics\n",
    "\n",
    "Opt-in diagnostics for better experience, quality, and security of future releases."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from azureml.telemetry import set_diagnostics_collection\n",
    "set_diagnostics_collection(send_diagnostics = True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Registering Datastore"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Datastore is the way to save connection information to a storage service (e.g. Azure Blob, Azure Data Lake, Azure SQL) information to your workspace so you can access them without exposing credentials in your code. The first thing you will need to do is register a datastore, you can refer to our [python SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.datastore.datastore?view=azure-ml-py) on how to register datastores. __Note: for best security practices, please do not check in code that contains registering datastores with secrets into your source control__\n",
    "\n",
    "The code below registers a datastore pointing to a publicly readable blob container."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from azureml.core import Datastore\n",
    "\n",
    "datastore_name = 'demo_training'\n",
    "container_name = 'digits' \n",
    "account_name = 'automlpublicdatasets'\n",
    "Datastore.register_azure_blob_container(\n",
    "    workspace = ws, \n",
    "    datastore_name = datastore_name, \n",
    "    container_name = container_name, \n",
    "    account_name = account_name,\n",
    "     overwrite = True\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Below is an example on how to register a private blob container\n",
    "```python\n",
    "datastore = Datastore.register_azure_blob_container(\n",
    "    workspace = ws, \n",
    "    datastore_name = 'example_datastore', \n",
    "    container_name = 'example-container', \n",
    "    account_name = 'storageaccount',\n",
    "    account_key = 'accountkey'\n",
    ")\n",
    "```\n",
    "The example below shows how  to register an Azure Data Lake store. Please make sure you have granted the necessary permissions for the service principal to access the data lake.\n",
    "```python\n",
    "datastore = Datastore.register_azure_data_lake(\n",
    "    workspace = ws,\n",
    "    datastore_name = 'example_datastore',\n",
    "    store_name = 'adlsstore',\n",
    "    tenant_id = 'tenant-id-of-service-principal',\n",
    "    client_id = 'client-id-of-service-principal',\n",
    "    client_secret = 'client-secret-of-service-principal'\n",
    ")\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Training Data Using DataPrep"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Automated ML takes a Dataflow as input.\n",
    "\n",
    "If you are familiar with Pandas and have done your data preparation work in Pandas already, you can use the `read_pandas_dataframe` method in dprep to convert the DataFrame to a Dataflow.\n",
    "```python\n",
    "df = pd.read_csv(...)\n",
    "# apply some transforms\n",
    "dprep.read_pandas_dataframe(df, temp_folder='/path/accessible/by/both/driver/and/worker')\n",
    "```\n",
    "\n",
    "If you just need to ingest data without doing any preparation, you can directly use AzureML Data Prep (Data Prep) to do so. The code below demonstrates this scenario. Data Prep also has data preparation capabilities, we have many [sample notebooks](https://github.com/Microsoft/AMLDataPrepDocs) demonstrating the capabilities.\n",
    "\n",
    "You will get the datastore you registered previously and pass it to Data Prep for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import azureml.dataprep as dprep\n",
    "from azureml.data.datapath import DataPath\n",
    "\n",
    "datastore = Datastore.get(workspace = ws, datastore_name = datastore_name)\n",
    "\n",
    "X_train = dprep.read_csv(datastore.path('X.csv'))\n",
    "y_train = dprep.read_csv(datastore.path('y.csv')).to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Review the Data Preparation Result\n",
    "You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only j records for all the steps in the Dataflow, which makes it fast even against large datasets."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train.get_profile()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_train.get_profile()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Configure AutoML\n",
    "\n",
    "Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
    "\n",
    "|Property|Description|\n",
    "|-|-|\n",
    "|**task**|classification or regression|\n",
    "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
    "|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
    "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
    "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
    "|**n_cross_validations**|Number of cross validation splits.|\n",
    "|**spark_context**|Spark Context object. for HDInsight, use spark_context=sc|\n",
    "|**max_concurrent_iterations**|Maximum number of iterations to execute in parallel. This should be <= number of worker nodes in your Azure HDInsight cluster.|\n",
    "|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
    "|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
    "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
    "|**preprocess**|set this to True to enable pre-processing of data eg. string to numeric using one-hot encoding|\n",
    "|**exit_score**|Target score for experiment. It is associated with the metric. eg. exit_score=0.995 will exit experiment after that|"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "automl_config = AutoMLConfig(task = 'classification',\n",
    "                             debug_log = 'automl_errors.log',\n",
    "                             primary_metric = 'AUC_weighted',\n",
    "                             iteration_timeout_minutes = 10,\n",
    "                             iterations = 3,\n",
    "                             preprocess = True,\n",
    "                             n_cross_validations = 10,\n",
    "                             max_concurrent_iterations = 2, #change it based on number of worker nodes\n",
    "                             verbosity = logging.INFO,\n",
    "                             spark_context=sc, #HDI /spark related\n",
    "                             X = X_train, \n",
    "                             y = y_train,\n",
    "                             path = project_folder)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train the Models\n",
    "\n",
    "Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "local_run = experiment.submit(automl_config, show_output = True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Explore the Results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following will show the child runs and waits for the parent run to complete."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Retrieve All Child Runs after the experiment is completed (in portal)\n",
    "You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "children = list(local_run.get_children())\n",
    "metricslist = {}\n",
    "for run in children:\n",
    "    properties = run.get_properties()\n",
    "    metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}    \n",
    "    metricslist[int(properties['iteration'])] = metrics\n",
    "\n",
    "rundata = pd.DataFrame(metricslist).sort_index(1)\n",
    "rundata"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Retrieve the Best Model after the above run is complete \n",
    "\n",
    "Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing.  Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "best_run, fitted_model = local_run.get_output()\n",
    "print(best_run)\n",
    "print(fitted_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Best Model Based on Any Other Metric after the above run is complete based on the child run\n",
    "Show the run and the model that has the smallest `log_loss` value:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lookup_metric = \"log_loss\"\n",
    "best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
    "print(best_run)\n",
    "print(fitted_model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test the Best Fitted Model\n",
    "\n",
    "#### Load Test Data - you can split the dataset beforehand & pass Train dataset to AutoML and use Test dataset to evaluate the best model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "blob_location = \"https://{}.blob.core.windows.net/{}\".format(account_name, container_name)\n",
    "X_test = pd.read_csv(\"{}./X_valid.csv\".format(blob_location), header=0)\n",
    "y_test = pd.read_csv(\"{}/y_valid.csv\".format(blob_location), header=0)\n",
    "images  = pd.read_csv(\"{}/images.csv\".format(blob_location), header=None)\n",
    "images = np.reshape(images.values, (100,8,8))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Testing Our Best Fitted Model\n",
    "We will try to predict digits and see how our model works. This is just an example to show you."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Randomly select digits and test.\n",
    "for index in np.random.choice(len(y_test), 2, replace = False):\n",
    "    print(index)\n",
    "    predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
    "    label = y_test.values[index]\n",
    "    title = \"Label value = %d  Predicted value = %d \" % (label, predicted)\n",
    "    fig = plt.figure(3, figsize = (5,5))\n",
    "    ax1 = fig.add_axes((0,0,.8,.8))\n",
    "    ax1.set_title(title)\n",
    "    plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
    "    display(fig)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "When deploying an automated ML trained model, please specify _pippackages=['azureml-sdk[automl]']_ in your CondaDependencies.\n",
    "\n",
    "Please refer to only the **Deploy** section in this notebook - <a href=\"https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment\" target=\"_blank\">Deployment of Automated ML trained model</a>"
   ]
  }
 ],
 "metadata": {
  "authors": [
   {
    "name": "savitam"
   },
   {
    "name": "sasum"
   }
  ],
  "kernelspec": {
   "display_name": "Python 3.6",
   "language": "Python",
   "name": "Python36"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "python",
    "version": 3
   },
   "mimetype": "text/x-python",
   "name": "pyspark3",
   "pygments_lexer": "python3"
  },
  "name": "auto-ml-classification-local-adb",
  "notebookId": 587284549713154
 },
 "nbformat": 4,
 "nbformat_minor": 1
 }
--- a/how-to-use-azureml/deploy-to-cloud/README.md
+++ b/how-to-use-azureml/deploy-to-cloud/README.md
@@ -0,0 +1,25 @@
 # Model Deployment with Azure ML service
 You can use Azure Machine Learning to package, debug, validate and deploy inference containers to a variety of compute targets. This process is known as "MLOps" (ML operationalization).
 For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where
 ## Get Started
 To begin, you will need an ML workspace.
 For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace
 ## Deploy to the cloud
 You can deploy to the cloud using the Azure ML CLI or the Azure ML SDK.
 ### Deploy with the CLI
 ```
 az extension add -n azure-cli-ml
 az ml folder attach -w myworkspace -g myresourcegroup
 az ml model register -n sklearn_regression_model.pkl -p sklearn_regression_model.pkl -t model.json
 az ml model deploy -n acicicd -f model.json --ic inferenceConfig.yml --dc deploymentConfig.yml
 ```
 Here is an [Azure DevOps Pipelines model deployment example](./azure-pipelines-model-deploy.yml)
 [![Build Status](https://aidemos.visualstudio.com/azmlcli/_apis/build/status/Azure.MachineLearningNotebooks?branchName=cli-ga)](https://aidemos.visualstudio.com/azmlcli/_build/latest?definitionId=87&branchName=cli-ga)
 ### Deploy from a notebook
 - Notebook example: [model-register-and-deploy](./model-register-and-deploy.ipynb).
--- a/how-to-use-azureml/deploy-to-cloud/azure-pipelines-model-deploy.yml
+++ b/how-to-use-azureml/deploy-to-cloud/azure-pipelines-model-deploy.yml
@@ -0,0 +1,50 @@
 trigger:
 - master
 pool:
  vmImage: 'Ubuntu-16.04'
 steps:
 - task: DownloadSecureFile@1
  inputs:
    name: config.json
    secureFile: config.json
 - script: cp $(Agent.TempDirectory)/config.json $(Build.SourcesDirectory)
 - task: AzureCLI@1
  displayName: 'Install the CLI'
  inputs:
    azureSubscription: 'azmldemows'
    scriptLocation: inlineScript
    inlineScript: 'az extension add -n azure-cli-ml'
 - task: AzureCLI@1
  displayName: 'Attach folder to workspace'
  inputs:
    azureSubscription: 'azmldemows'
    scriptLocation: inlineScript
    inlineScript: 'az ml folder attach'
 - task: AzureCLI@1
  displayName: 'Register model'
  inputs:
    azureSubscription: 'azmldemows'
    scriptLocation: inlineScript
    inlineScript: 'az ml model register -n sklearn_regression_model.pkl -p sklearn_regression_model.pkl -t model.json'
    workingDirectory: 'how-to-use-azureml/deploy-to-cloud/'
 - task: AzureCLI@1
  displayName: 'Deploy model'
  inputs:
    azureSubscription: 'azmldemows'
    scriptLocation: inlineScript
    inlineScript: 'az ml model deploy -n acicicd -f model.json --ic inferenceConfig.yml --dc deploymentConfig.yml'
    workingDirectory: 'how-to-use-azureml/deploy-to-cloud/'
 - task: AzureCLI@1
  displayName: 'Delete deployed service'
  inputs:
    azureSubscription: 'azmldemows'
    scriptLocation: inlineScript
    inlineScript: 'az ml service delete -n acicicd'
--- a/how-to-use-azureml/deploy-to-cloud/deploymentConfig.yml
+++ b/how-to-use-azureml/deploy-to-cloud/deploymentConfig.yml
@@ -0,0 +1,5 @@
 ---
 containerResourceRequirements:
  cpu: 1
  memoryInGB: 1
 computeType: ACI
--- a/how-to-use-azureml/deploy-to-cloud/deploymentconfig.json
+++ b/how-to-use-azureml/deploy-to-cloud/deploymentconfig.json
@@ -0,0 +1,7 @@
 {
   "containerResourceRequirements": {
      "cpu": 1,
      "memoryInGB": 1
   },
   "computeType": "ACI"
 }
--- a/how-to-use-azureml/deploy-to-cloud/helloworld.txt
+++ b/how-to-use-azureml/deploy-to-cloud/helloworld.txt
@@ -0,0 +1 @@
 RUN echo "this is test"
--- a/how-to-use-azureml/deploy-to-cloud/inferenceConfig.yml
+++ b/how-to-use-azureml/deploy-to-cloud/inferenceConfig.yml
@@ -0,0 +1,9 @@
 entryScript: score.py
 runtime: python
 condaFile: myenv.yml
 extraDockerfileSteps:
 schemaFile:
 dependencies:
 enableGpu: False
 baseImage:
 baseImageRegistry:
--- a/how-to-use-azureml/deploy-to-cloud/inferenceconfig.json
+++ b/how-to-use-azureml/deploy-to-cloud/inferenceconfig.json
@@ -0,0 +1,11 @@
 {
   "entryScript": "score.py",
   "runtime": "python",
   "condaFile": "myenv.yml",
   "extraDockerfileSteps": null,
   "schemaFile": null,
   "dependencies": null,
   "enableGpu": false,
   "baseImage": null,
   "baseImageRegistry": null
 }
--- a/how-to-use-azureml/deploy-to-cloud/model-register-and-deploy.ipynb
+++ b/how-to-use-azureml/deploy-to-cloud/model-register-and-deploy.ipynb
@@ -0,0 +1,275 @@
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
        "\n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Register Model and deploy as Webservice\n",
        "\n",
        "This example shows how to deploy a Webservice in step-by-step fashion:\n",
        "\n",
        " 1. Register Model\n",
        " 2. Deploy Model as Webservice"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Prerequisites\n",
        "Make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Check core SDK version number\n",
        "import azureml.core\n",
        "\n",
        "print(\"SDK version:\", azureml.core.VERSION)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Initialize Workspace\n",
        "\n",
        "Initialize a workspace object from persisted configuration."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": [
          "create workspace"
        ]
      },
      "outputs": [],
      "source": [
        "from azureml.core import Workspace\n",
        "\n",
        "ws = Workspace.from_config()\n",
        "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Register Model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "You can add tags and descriptions to your Models. Note you need to have a `sklearn_regression_model.pkl` file in the current directory. This file is generated by the 01 notebook. The below call registers that file as a Model with the same name `sklearn_regression_model.pkl` in the workspace.\n",
        "\n",
        "Using tags, you can track useful information such as the name and version of the machine learning library used to train the model. Note that tags must be alphanumeric."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": [
          "register model from file"
        ]
      },
      "outputs": [],
      "source": [
        "from azureml.core.model import Model\n",
        "\n",
        "model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
        "                       model_name = \"sklearn_regression_model.pkl\",\n",
        "                       tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
        "                       description = \"Ridge regression model to predict diabetes\",\n",
        "                       workspace = ws)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Create Inference Configuration\n",
        "\n",
        "There is now support for a source directory, you can upload an entire folder from your local machine as dependencies for the Webservice.\n",
        "Note: in that case, your entry_script, conda_file, and extra_docker_file_steps paths are relative paths to the source_directory path.\n",
        "\n",
        "Sample code for using a source directory:\n",
        "\n",
        "```python\n",
        "inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
        "                                   runtime= \"python\", \n",
        "                                   entry_script=\"x/y/score.py\",\n",
        "                                   conda_file=\"env/myenv.yml\", \n",
        "                                   extra_docker_file_steps=\"helloworld.txt\")\n",
        "```\n",
        "\n",
        " - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
        " - runtime = Which runtime to use for the image. Current supported runtimes are 'spark-py' and 'python\n",
        " - entry_script = contains logic specific to initializing your model and running predictions\n",
        " - conda_file = manages conda and python package dependencies.\n",
        " - extra_docker_file_steps = optional: any extra steps you want to inject into docker file"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": [
          "create image"
        ]
      },
      "outputs": [],
      "source": [
        "from azureml.core.model import InferenceConfig\n",
        "\n",
        "inference_config = InferenceConfig(runtime= \"python\", \n",
        "                                   entry_script=\"score.py\",\n",
        "                                   conda_file=\"myenv.yml\", \n",
        "                                   extra_docker_file_steps=\"helloworld.txt\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Deploy Model as Webservice on Azure Container Instance\n",
        "\n",
        "Note that the service creation can take few minutes."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.webservice import AciWebservice, Webservice\n",
        "from azureml.exceptions import WebserviceException\n",
        "\n",
        "deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1)\n",
        "aci_service_name = 'aciservice1'\n",
        "\n",
        "try:\n",
        "    # if you want to get existing service below is the command\n",
        "    # since aci name needs to be unique in subscription deleting existing aci if any\n",
        "    # we use aci_service_name to create azure aci\n",
        "    service = Webservice(ws, name=aci_service_name)\n",
        "    if service:\n",
        "        service.delete()\n",
        "except WebserviceException as e:\n",
        "    print()\n",
        "\n",
        "service = Model.deploy(ws, aci_service_name, [model], inference_config, deployment_config)\n",
        "\n",
        "service.wait_for_deployment(True)\n",
        "print(service.state)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### Test web service"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import json\n",
        "test_sample = json.dumps({'data': [\n",
        "    [1,2,3,4,5,6,7,8,9,10], \n",
        "    [10,9,8,7,6,5,4,3,2,1]\n",
        "]})\n",
        "\n",
        "test_sample_encoded = bytes(test_sample,encoding = 'utf8')\n",
        "prediction = service.run(input_data=test_sample_encoded)\n",
        "print(prediction)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### Delete ACI to clean up"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": [
          "deploy service",
          "aci"
        ]
      },
      "outputs": [],
      "source": [
        "service.delete()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Model Profiling\n",
        "\n",
        "you can also take advantage of profiling feature for model\n",
        "\n",
        "```python\n",
        "\n",
        "profile = model.profile(ws, \"profilename\", [model], inference_config, test_sample)\n",
        "profile.wait_for_profiling(True)\n",
        "profiling_results = profile.get_results()\n",
        "print(profiling_results)\n",
        "\n",
        "```"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "aashishb"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
--- a/how-to-use-azureml/deploy-to-cloud/myenv.yml
+++ b/how-to-use-azureml/deploy-to-cloud/myenv.yml
@@ -0,0 +1,8 @@
 name: project_environment
 dependencies:
  - python=3.6.2
  - pip:
    - azureml-defaults
    - scikit-learn
    - numpy
    - inference-schema[numpy-support]
--- a/how-to-use-azureml/deploy-to-cloud/mylib.py
+++ b/how-to-use-azureml/deploy-to-cloud/mylib.py
@@ -0,0 +1,8 @@
 # Copyright (c) Microsoft. All rights reserved.
 # Licensed under the MIT license.
 import numpy as np
 def get_alphas():
    # list of numbers from 0.0 to 1.0 with a 0.05 interval
    return np.arange(0.0, 1.0, 0.05)
--- a/how-to-use-azureml/deploy-to-cloud/score.py
+++ b/how-to-use-azureml/deploy-to-cloud/score.py
@@ -0,0 +1,34 @@
 import pickle
 import json
 import numpy as np
 from sklearn.externals import joblib
 from sklearn.linear_model import Ridge
 from azureml.core.model import Model
 from inference_schema.schema_decorators import input_schema, output_schema
 from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
 def init():
    global model
    # note here "sklearn_regression_model.pkl" is the name of the model registered under
    # this is a different behavior than before when the code is run locally, even though the code is the same.
    model_path = Model.get_model_path('sklearn_regression_model.pkl')
    # deserialize the model file back into a sklearn model
    model = joblib.load(model_path)
 input_sample = np.array([[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]])
 output_sample = np.array([3726.995])
@input_schema('data', NumpyParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
 def run(data):
    try:
        result = model.predict(data)
        # you can return any datatype as long as it is JSON-serializable
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error
--- a/how-to-use-azureml/deploy-to-cloud/sklearn_regression_model.pkl
+++ b/how-to-use-azureml/deploy-to-cloud/sklearn_regression_model.pkl
--- a/how-to-use-azureml/deploy-to-cloud/train.py
+++ b/how-to-use-azureml/deploy-to-cloud/train.py
@@ -0,0 +1,45 @@
 # Copyright (c) Microsoft. All rights reserved.
 # Licensed under the MIT license.
 from sklearn.datasets import load_diabetes
 from sklearn.linear_model import Ridge
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split
 from azureml.core.run import Run
 from sklearn.externals import joblib
 import os
 import numpy as np
 import mylib
 os.makedirs('./outputs', exist_ok=True)
 X, y = load_diabetes(return_X_y=True)
 run = Run.get_context()
 X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=0)
 data = {"train": {"X": X_train, "y": y_train},
        "test": {"X": X_test, "y": y_test}}
 # list of numbers from 0.0 to 1.0 with a 0.05 interval
 alphas = mylib.get_alphas()
 for alpha in alphas:
    # Use Ridge algorithm to create a regression model
    reg = Ridge(alpha=alpha)
    reg.fit(data["train"]["X"], data["train"]["y"])
    preds = reg.predict(data["test"]["X"])
    mse = mean_squared_error(preds, data["test"]["y"])
    run.log('alpha', alpha)
    run.log('mse', mse)
    model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
    # save model in the outputs folder so it automatically get uploaded
    with open(model_file_name, "wb") as file:
        joblib.dump(value=reg, filename=os.path.join('./outputs/',
                                                     model_file_name))
    print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))
--- a/how-to-use-azureml/deploy-to-local/README.md
+++ b/how-to-use-azureml/deploy-to-local/README.md
@@ -0,0 +1,12 @@
 # Model Deployment with Azure ML service
 You can use Azure Machine Learning to package, debug, validate and deploy inference containers to a variety of compute targets. This process is known as "MLOps" (ML operationalization).
 For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where
 ## Get Started
 To begin, you will need an ML workspace.
 For more information please check out this article: https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace
 ## Deploy locally
 You can deploy a model locally for testing & debugging using the Azure ML CLI or the Azure ML SDK.
 - CLI example: https://aka.ms/azmlcli
 - Notebook example: [register-model-deploy-local](./register-model-deploy-local.ipynb).
--- a/how-to-use-azureml/deploy-to-local/dockerSharedDrive.JPG
+++ b/how-to-use-azureml/deploy-to-local/dockerSharedDrive.JPG
--- a/how-to-use-azureml/deploy-to-local/helloworld.txt
+++ b/how-to-use-azureml/deploy-to-local/helloworld.txt
@@ -0,0 +1 @@
 RUN echo "this is test"
--- a/how-to-use-azureml/deploy-to-local/myenv.yml
+++ b/how-to-use-azureml/deploy-to-local/myenv.yml
@@ -0,0 +1,8 @@
 name: project_environment
 dependencies:
  - python=3.6.2
  - pip:
    - azureml-defaults
    - scikit-learn
    - numpy
    - inference-schema[numpy-support]
--- a/how-to-use-azureml/deploy-to-local/register-model-deploy-local-advanced.ipynb
+++ b/how-to-use-azureml/deploy-to-local/register-model-deploy-local-advanced.ipynb
@@ -0,0 +1,487 @@
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
        "\n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Register model and deploy locally with advanced usages\n",
        "\n",
        "This example shows how to deploy a web service in step-by-step fashion:\n",
        "\n",
        " 1. Register model\n",
        " 2. Deploy the image as a web service in a local Docker container.\n",
        " 3. Quickly test changes to your entry script by reloading the local service.\n",
        " 4. Optionally, you can also make changes to model, conda or extra_docker_file_steps and update local service"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Prerequisites\n",
        "Make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Check core SDK version number\n",
        "import azureml.core\n",
        "\n",
        "print(\"SDK version:\", azureml.core.VERSION)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Initialize Workspace\n",
        "\n",
        "Initialize a workspace object from persisted configuration."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": [
          "create workspace"
        ]
      },
      "outputs": [],
      "source": [
        "from azureml.core import Workspace\n",
        "\n",
        "ws = Workspace.from_config()\n",
        "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Register Model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the same name `sklearn_regression_model.pkl` in the workspace.\n",
        "\n",
        "Using tags, you can track useful information such as the name and version of the machine learning library used to train the model, framework, category, target customer etc. Note that tags must be alphanumeric."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": [
          "register model from file"
        ]
      },
      "outputs": [],
      "source": [
        "from azureml.core.model import Model\n",
        "\n",
        "model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
        "                       model_name = \"sklearn_regression_model.pkl\",\n",
        "                       tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
        "                       description = \"Ridge regression model to predict diabetes\",\n",
        "                       workspace = ws)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Manage your dependencies in a folder"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import os\n",
        "\n",
        "source_directory = \"C:/abc\"\n",
        "\n",
        "os.makedirs(source_directory, exist_ok = True)\n",
        "os.makedirs(\"C:/abc/x/y\", exist_ok = True)\n",
        "os.makedirs(\"C:/abc/env\", exist_ok = True)\n",
        "os.makedirs(\"C:/abc/dockerstep\", exist_ok = True)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Show `score.py`. Note that the `sklearn_regression_model.pkl` in the `get_model_path` call is referring to a model named `sklearn_regression_model.pkl` registered under the workspace. It is NOT referencing the local file."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%%writefile C:/abc/x/y/score.py\n",
        "import pickle\n",
        "import json\n",
        "import numpy as np\n",
        "from sklearn.externals import joblib\n",
        "from sklearn.linear_model import Ridge\n",
        "from azureml.core.model import Model\n",
        "\n",
        "from inference_schema.schema_decorators import input_schema, output_schema\n",
        "from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
        "\n",
        "def init():\n",
        "    global model\n",
        "    # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
        "    # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
        "    model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
        "    # deserialize the model file back into a sklearn model\n",
        "    model = joblib.load(model_path)\n",
        "    global name\n",
        "    # note here, entire source directory on inference config gets added into image\n",
        "    # bellow is the example how you can use any extra files in image\n",
        "    with open('./abc/extradata.json') as json_file:  \n",
        "        data = json.load(json_file)\n",
        "        name = data[\"people\"][0][\"name\"]\n",
        "\n",
        "input_sample = np.array([[10,9,8,7,6,5,4,3,2,1]])\n",
        "output_sample = np.array([3726.995])\n",
        "\n",
        "@input_schema('data', NumpyParameterType(input_sample))\n",
        "@output_schema(NumpyParameterType(output_sample))\n",
        "def run(data):\n",
        "    try:\n",
        "        result = model.predict(data)\n",
        "        # you can return any datatype as long as it is JSON-serializable\n",
        "        return \"Hello \" + name + \" here is your result = \" + str(result)\n",
        "    except Exception as e:\n",
        "        error = str(e)\n",
        "        return error"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%%writefile C:/abc/env/myenv.yml\n",
        "name: project_environment\n",
        "dependencies:\n",
        "  - python=3.6.2\n",
        "  - pip:\n",
        "    - azureml-defaults\n",
        "    - scikit-learn\n",
        "    - numpy\n",
        "    - inference-schema[numpy-support]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%%writefile C:/abc/dockerstep/customDockerStep.txt\n",
        "RUN echo \"this is test\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%%writefile C:/abc/extradata.json\n",
        "{\n",
        "    \"people\": [\n",
        "        {\n",
        "            \"website\": \"microsoft.com\", \n",
        "            \"from\": \"Seattle\", \n",
        "            \"name\": \"Mrudula\"\n",
        "        }\n",
        "    ]\n",
        "}"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Create Inference Configuration\n",
        "\n",
        " - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
        " - runtime = Which runtime to use for the image. Current supported runtimes are 'spark-py' and 'python\n",
        " - entry_script = contains logic specific to initializing your model and running predictions\n",
        " - conda_file = manages conda and python package dependencies.\n",
        " - extra_docker_file_steps = optional: any extra steps you want to inject into docker file"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.model import InferenceConfig\n",
        "\n",
        "inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
        "                                   runtime= \"python\", \n",
        "                                   entry_script=\"x/y/score.py\",\n",
        "                                   conda_file=\"env/myenv.yml\", \n",
        "                                   extra_docker_file_steps=\"dockerstep/customDockerStep.txt\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Deploy Model as a Local Docker Web Service\n",
        "\n",
        "*Make sure you have Docker installed and running.*\n",
        "\n",
        "Note that the service creation can take few minutes.\n",
        "\n",
        "NOTE:\n",
        "\n",
        "we require docker running with linux container. If you are running Docker for Windows, you need to ensure the Linux Engine is running\n",
        "\n",
        "    powershell command to switch to linux engine\n",
        "    & 'C:\\Program Files\\Docker\\Docker\\DockerCli.exe' -SwitchLinuxEngine\n",
        "\n",
        "and c drive is shared https://docs.docker.com/docker-for-windows/#shared-drives\n",
        "sometimes you have to reshare c drive as docker \n",
        "\n",
        "<img src=\"./dockerSharedDrive.JPG\" align=\"left\"/>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": [
          "deploy service",
          "aci"
        ]
      },
      "outputs": [],
      "source": [
        "from azureml.core.webservice import LocalWebservice\n",
        "\n",
        "#this is optional, if not provided we choose random port\n",
        "deployment_config = LocalWebservice.deploy_configuration(port=6789)\n",
        "\n",
        "local_service = Model.deploy(ws, \"test\", [model], inference_config, deployment_config)\n",
        "\n",
        "local_service.wait_for_deployment()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print('Local service port: {}'.format(local_service.port))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Check Status and Get Container Logs\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print(local_service.get_logs())"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Test Web Service"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Call the web service with some input data to get a prediction."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import json\n",
        "\n",
        "sample_input = json.dumps({\n",
        "    'data': [\n",
        "        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
        "        [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
        "    ]\n",
        "})\n",
        "\n",
        "sample_input = bytes(sample_input, encoding='utf-8')\n",
        "\n",
        "print(local_service.run(input_data=sample_input))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Reload Service\n",
        "\n",
        "You can update your score.py file and then call `reload()` to quickly restart the service. This will only reload your execution script and dependency files, it will not rebuild the underlying Docker image. As a result, `reload()` is fast, but if you do need to rebuild the image -- to add a new Conda or pip package, for instance -- you will have to call `update()`, instead (see below)."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%%writefile C:/abc/x/y/score.py\n",
        "import pickle\n",
        "import json\n",
        "import numpy as np\n",
        "from sklearn.externals import joblib\n",
        "from sklearn.linear_model import Ridge\n",
        "from azureml.core.model import Model\n",
        "\n",
        "from inference_schema.schema_decorators import input_schema, output_schema\n",
        "from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
        "\n",
        "def init():\n",
        "    global model\n",
        "    # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
        "    # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
        "    model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
        "    # deserialize the model file back into a sklearn model\n",
        "    model = joblib.load(model_path)\n",
        "    global name, from_location\n",
        "    # note here, entire source directory on inference config gets added into image\n",
        "    # bellow is the example how you can use any extra files in image\n",
        "    with open('./abc/extradata.json') as json_file:  \n",
        "        data = json.load(json_file)\n",
        "        name = data[\"people\"][0][\"name\"]\n",
        "        from_location = data[\"people\"][0][\"from\"]\n",
        "\n",
        "input_sample = np.array([[10,9,8,7,6,5,4,3,2,1]])\n",
        "output_sample = np.array([3726.995])\n",
        "\n",
        "@input_schema('data', NumpyParameterType(input_sample))\n",
        "@output_schema(NumpyParameterType(output_sample))\n",
        "def run(data):\n",
        "    try:\n",
        "        result = model.predict(data)\n",
        "        # you can return any datatype as long as it is JSON-serializable\n",
        "        return \"Hello  \" + name + \" from \" + from_location + \" here is your result = \" + str(result)\n",
        "    except Exception as e:\n",
        "        error = str(e)\n",
        "        return error"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "local_service.reload()\n",
        "print(\"--------------------------------------------------------------\")\n",
        "\n",
        "# after reload now if you call run this will return updated return message\n",
        "\n",
        "print(local_service.run(input_data=sample_input))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Update Service\n",
        "\n",
        "If you want to change your model(s), Conda dependencies, or deployment configuration, call `update()` to rebuild the Docker image.\n",
        "\n",
        "```python\n",
        "\n",
        "local_service.update(models = [SomeOtherModelObject],\n",
        "             deployment_config = local_config,\n",
        "             inference_config = inference_config)\n",
        "```"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Delete Service"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "local_service.delete()"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "raymondl"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
--- a/how-to-use-azureml/deploy-to-local/register-model-deploy-local.ipynb
+++ b/how-to-use-azureml/deploy-to-local/register-model-deploy-local.ipynb
@@ -0,0 +1,342 @@
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
        "\n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Register model and deploy locally\n",
        "\n",
        "This example shows how to deploy a web service in step-by-step fashion:\n",
        "\n",
        " 1. Register model\n",
        " 2. Deploy the image as a web service in a local Docker container.\n",
        " 3. Quickly test changes to your entry script by reloading the local service.\n",
        " 4. Optionally, you can also make changes to model, conda or extra_docker_file_steps and update local service"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Prerequisites\n",
        "Make sure you go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Check core SDK version number\n",
        "import azureml.core\n",
        "\n",
        "print(\"SDK version:\", azureml.core.VERSION)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Initialize Workspace\n",
        "\n",
        "Initialize a workspace object from persisted configuration."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core import Workspace\n",
        "\n",
        "ws = Workspace.from_config()\n",
        "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Register Model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "You can add tags and descriptions to your models. we are using `sklearn_regression_model.pkl` file in the current directory as a model with the same name `sklearn_regression_model.pkl` in the workspace.\n",
        "\n",
        "Using tags, you can track useful information such as the name and version of the machine learning library used to train the model, framework, category, target customer etc. Note that tags must be alphanumeric."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": [
          "register model from file"
        ]
      },
      "outputs": [],
      "source": [
        "from azureml.core.model import Model\n",
        "\n",
        "model = Model.register(model_path = \"sklearn_regression_model.pkl\",\n",
        "                       model_name = \"sklearn_regression_model.pkl\",\n",
        "                       tags = {'area': \"diabetes\", 'type': \"regression\"},\n",
        "                       description = \"Ridge regression model to predict diabetes\",\n",
        "                       workspace = ws)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Create Inference Configuration"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.model import InferenceConfig\n",
        "\n",
        "inference_config = InferenceConfig(runtime= \"python\", \n",
        "                                   entry_script=\"score.py\",\n",
        "                                   conda_file=\"myenv.yml\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Deploy Model as a Local Docker Web Service\n",
        "\n",
        "*Make sure you have Docker installed and running.*\n",
        "\n",
        "Note that the service creation can take few minutes.\n",
        "\n",
        "NOTE:\n",
        "\n",
        "we require docker running with linux container. If you are running Docker for Windows, you need to ensure the Linux Engine is running\n",
        "\n",
        "    powershell command to switch to linux engine\n",
        "    & 'C:\\Program Files\\Docker\\Docker\\DockerCli.exe' -SwitchLinuxEngine\n",
        "\n",
        "and c drive is shared https://docs.docker.com/docker-for-windows/#shared-drives\n",
        "sometimes you have to reshare c drive as docker \n",
        "\n",
        "<img src=\"./dockerSharedDrive.JPG\" align=\"left\"/>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.webservice import LocalWebservice\n",
        "\n",
        "#this is optional, if not provided we choose random port\n",
        "deployment_config = LocalWebservice.deploy_configuration(port=6789)\n",
        "\n",
        "local_service = Model.deploy(ws, \"test\", [model], inference_config, deployment_config)\n",
        "\n",
        "local_service.wait_for_deployment()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print('Local service port: {}'.format(local_service.port))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Check Status and Get Container Logs\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print(local_service.get_logs())"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Test Web Service"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Call the web service with some input data to get a prediction."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import json\n",
        "\n",
        "sample_input = json.dumps({\n",
        "    'data': [\n",
        "        [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
        "        [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]\n",
        "    ]\n",
        "})\n",
        "\n",
        "sample_input = bytes(sample_input, encoding='utf-8')\n",
        "\n",
        "print(local_service.run(input_data=sample_input))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Reload Service\n",
        "\n",
        "You can update your score.py file and then call `reload()` to quickly restart the service. This will only reload your execution script and dependency files, it will not rebuild the underlying Docker image. As a result, `reload()` is fast, but if you do need to rebuild the image -- to add a new Conda or pip package, for instance -- you will have to call `update()`, instead (see below)."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%%writefile score.py\n",
        "import pickle\n",
        "import json\n",
        "import numpy as np\n",
        "from sklearn.externals import joblib\n",
        "from sklearn.linear_model import Ridge\n",
        "from azureml.core.model import Model\n",
        "\n",
        "from inference_schema.schema_decorators import input_schema, output_schema\n",
        "from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType\n",
        "\n",
        "def init():\n",
        "    global model\n",
        "    # note here \"sklearn_regression_model.pkl\" is the name of the model registered under\n",
        "    # this is a different behavior than before when the code is run locally, even though the code is the same.\n",
        "    model_path = Model.get_model_path('sklearn_regression_model.pkl')\n",
        "    # deserialize the model file back into a sklearn model\n",
        "    model = joblib.load(model_path)\n",
        "\n",
        "input_sample = np.array([[10,9,8,7,6,5,4,3,2,1]])\n",
        "output_sample = np.array([3726.995])\n",
        "\n",
        "@input_schema('data', NumpyParameterType(input_sample))\n",
        "@output_schema(NumpyParameterType(output_sample))\n",
        "def run(data):\n",
        "    try:\n",
        "        result = model.predict(data)\n",
        "        # you can return any datatype as long as it is JSON-serializable\n",
        "        return 'hello from updated score.py'\n",
        "    except Exception as e:\n",
        "        error = str(e)\n",
        "        return error"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "local_service.reload()\n",
        "print(\"--------------------------------------------------------------\")\n",
        "\n",
        "# after reload now if you call run this will return updated return message\n",
        "\n",
        "print(local_service.run(input_data=sample_input))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Update Service\n",
        "\n",
        "If you want to change your model(s), Conda dependencies, or deployment configuration, call `update()` to rebuild the Docker image.\n",
        "\n",
        "```python\n",
        "\n",
        "local_service.update(models = [SomeOtherModelObject],\n",
        "             deployment_config = local_config,\n",
        "             inference_config = inference_config)\n",
        "```"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Delete Service"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "local_service.delete()"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "raymondl"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
--- a/how-to-use-azureml/deploy-to-local/score.py
+++ b/how-to-use-azureml/deploy-to-local/score.py
@@ -0,0 +1,34 @@
 import pickle
 import json
 import numpy as np
 from sklearn.externals import joblib
 from sklearn.linear_model import Ridge
 from azureml.core.model import Model
 from inference_schema.schema_decorators import input_schema, output_schema
 from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
 def init():
    global model
    # note here "sklearn_regression_model.pkl" is the name of the model registered under
    # this is a different behavior than before when the code is run locally, even though the code is the same.
    model_path = Model.get_model_path('sklearn_regression_model.pkl')
    # deserialize the model file back into a sklearn model
    model = joblib.load(model_path)
 input_sample = np.array([[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]])
 output_sample = np.array([3726.995])
@input_schema('data', NumpyParameterType(input_sample))
@output_schema(NumpyParameterType(output_sample))
 def run(data):
    try:
        result = model.predict(data)
        # you can return any datatype as long as it is JSON-serializable
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error
--- a/how-to-use-azureml/deploy-to-local/sklearn_regression_model.pkl
+++ b/how-to-use-azureml/deploy-to-local/sklearn_regression_model.pkl
--- a/how-to-use-azureml/deployment/onnx/README.md
+++ b/how-to-use-azureml/deployment/onnx/README.md
@@ -6,15 +6,18 @@ These tutorials show how to create and deploy Open Neural Network eXchange ([ONN
 0. [Configure your Azure Machine Learning Workspace](../../../configuration.ipynb)
-#### Obtain models from the [ONNX Model Zoo](https://github.com/onnx/models) and deploy with ONNX Runtime Inference
+#### Obtain pretrained models from the [ONNX Model Zoo](https://github.com/onnx/models) and deploy with ONNX Runtime
-1. [Handwritten Digit Classification (MNIST)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb)
+1. [MNIST - Handwritten Digit Classification with ONNX Runtime](onnx-inference-mnist-deploy.ipynb)
-2. [Facial Expression Recognition (Emotion FER+)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb)
+2. [Emotion FER+ - Facial Expression Recognition with ONNX Runtime](onnx-inference-facial-expression-recognition-deploy.ipynb)
 #### Train model on Azure ML, convert to ONNX, and deploy with ONNX Runtime
 3. [MNIST - Train using PyTorch and deploy with ONNX Runtime](onnx-train-pytorch-aml-deploy-mnist.ipynb)
 #### Demo Notebooks from Microsoft Ignite 2018
 Note that the following notebooks do not have evaluation sections for the models since they were deployed as part of a live demo. You can find the respective pre-processing and post-processing code linked from the ONNX Model Zoo Github pages ([ResNet](https://github.com/onnx/models/tree/master/models/image_classification/resnet), [TinyYoloV2](https://github.com/onnx/models/tree/master/tiny_yolov2)), or experiment with the ONNX models by [running them in the browser](https://microsoft.github.io/onnxjs-demo/#/).
-3. [Image Recognition (ResNet50)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb)
+4. [ResNet50 - Image Recognition with ONNX Runtime](onnx-modelzoo-aml-deploy-resnet50.ipynb)
-4. [Convert Core ML Model to ONNX and deploy - Real Time Object Detection (TinyYOLO)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb) 
+5. [TinyYoloV2 - Convert from CoreML and deploy with ONNX Runtime](onnx-convert-aml-deploy-tinyyolo.ipynb)
 ## Documentation
 - [ONNX Runtime Python API Documentation](http://aka.ms/onnxruntime-python)
--- a/how-to-use-azureml/deployment/onnx/mnist.py
+++ b/how-to-use-azureml/deployment/onnx/mnist.py
@@ -0,0 +1,124 @@
 # This is a modified version of https://github.com/pytorch/examples/blob/master/mnist/main.py which is
 # licensed under BSD 3-Clause (https://github.com/pytorch/examples/blob/master/LICENSE)
 from __future__ import print_function
 import argparse
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
 from torchvision import datasets, transforms
 import os
 class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
 def train(args, model, device, train_loader, optimizer, epoch, output_dir):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
 def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, size_average=False, reduce=True).item()  # sum up batch loss
            pred = output.max(1, keepdim=True)[1]  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
 def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=5, metavar='N',
                        help='number of epochs to train (default: 5)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--output-dir', type=str, default='outputs')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")
    output_dir = args.output_dir
    os.makedirs(output_dir, exist_ok=True)
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('data', train=True, download=True,
                       transform=transforms.Compose([transforms.ToTensor(),
                                                    transforms.Normalize((0.1307,), (0.3081,))])
                       ),
        batch_size=args.batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('data', train=False,
                       transform=transforms.Compose([transforms.ToTensor(),
                                                    transforms.Normalize((0.1307,), (0.3081,))])
                       ),
        batch_size=args.test_batch_size, shuffle=True, **kwargs)
    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch, output_dir)
        test(args, model, device, test_loader)
    # save model
    dummy_input = torch.randn(1, 1, 28, 28, device=device)
    model_path = os.path.join(output_dir, 'mnist.onnx')
    torch.onnx.export(model, dummy_input, model_path)
 if __name__ == '__main__':
    main()
--- a/how-to-use-azureml/deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb
+++ b/how-to-use-azureml/deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb
--- a/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb
+++ b/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb
@@ -167,6 +167,31 @@
        "image.wait_for_creation(show_output = True)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### Use a custom Docker image\n",
        "\n",
        "You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
        "\n",
        "Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
        "```python\n",
        "# use an image available in public Container Registry without authentication\n",
        "image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
        "\n",
        "# or, use an image available in a private Container Registry\n",
        "image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
        "image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
        "image_config.base_image_registry.username = \"username\"\n",
        "image_config.base_image_registry.password = \"password\"\n",
        "\n",
        "# or, use an image built during training.\n",
        "image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
        "```\n",
        "You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
@@ -191,6 +216,56 @@
        "                                  provisioning_configuration = prov_config)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Create AKS Cluster in an existing virtual network (optional)\n",
        "See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-enable-virtual-network#use-azure-kubernetes-service) for more details."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "'''\n",
        "from azureml.core.compute import ComputeTarget, AksCompute\n",
        "\n",
        "# Create the compute configuration and set virtual network information\n",
        "config = AksCompute.provisioning_configuration(location=\"eastus2\")\n",
        "config.vnet_resourcegroup_name = \"mygroup\"\n",
        "config.vnet_name = \"mynetwork\"\n",
        "config.subnet_name = \"default\"\n",
        "config.service_cidr = \"10.0.0.0/16\"\n",
        "config.dns_service_ip = \"10.0.0.10\"\n",
        "config.docker_bridge_cidr = \"172.17.0.1/16\"\n",
        "\n",
        "# Create the compute target\n",
        "aks_target = ComputeTarget.create(workspace = ws,\n",
        "                                  name = \"myaks\",\n",
        "                                  provisioning_configuration = config)\n",
        "'''"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Enable SSL on the AKS Cluster (optional)\n",
        "See code snippet below. Check the documentation [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-secure-web-service) for more details"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# provisioning_config = AksCompute.provisioning_configuration(ssl_cert_pem_file=\"cert.pem\", ssl_key_pem_file=\"key.pem\", ssl_cname=\"www.contoso.com\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
@@ -270,8 +345,9 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "# Test the web service\n",
+        "# Test the web service using run method\n",
-        "We test the web sevice by passing data."
+        "We test the web sevice by passing data.\n",
        "Run() method retrieves API keys behind the scenes to make sure that call is authenticated."
      ]
    },
    {
@@ -293,6 +369,57 @@
        "print(prediction)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Test the web service using raw HTTP request (optional)\n",
        "Alternatively you can construct a raw HTTP request and send it to the service. In this case you need to explicitly pass the HTTP header. This process is shown in the next 2 cells."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# retreive the API keys. AML generates two keys.\n",
        "'''\n",
        "key1, Key2 = aks_service.get_keys()\n",
        "print(key1)\n",
        "'''"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# construct raw HTTP request and send to the service\n",
        "'''\n",
        "%%time\n",
        "\n",
        "import requests\n",
        "\n",
        "import json\n",
        "\n",
        "test_sample = json.dumps({'data': [\n",
        "    [1,2,3,4,5,6,7,8,9,10], \n",
        "    [10,9,8,7,6,5,4,3,2,1]\n",
        "]})\n",
        "test_sample = bytes(test_sample,encoding = 'utf8')\n",
        "\n",
        "# Don't forget to add key to the HTTP header.\n",
        "headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
        "\n",
        "resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)\n",
        "\n",
        "\n",
        "print(\"prediction:\", resp.text)\n",
        "'''"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
@@ -317,7 +444,7 @@
  "metadata": {
    "authors": [
      {
-        "name": "raymondl"
+        "name": "aashishb"
      }
    ],
    "kernelspec": {
--- a/how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb
+++ b/how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb
@@ -261,6 +261,31 @@
        "image.wait_for_creation(show_output = True)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### Use a custom Docker image\n",
        "\n",
        "You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
        "\n",
        "Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
        "```python\n",
        "# use an image available in public Container Registry without authentication\n",
        "image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
        "\n",
        "# or, use an image available in a private Container Registry\n",
        "image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
        "image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
        "image_config.base_image_registry.username = \"username\"\n",
        "image_config.base_image_registry.password = \"password\"\n",
        "\n",
        "# or, use an image built during training.\n",
        "image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
        "```\n",
        "You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
@@ -395,7 +420,7 @@
  "metadata": {
    "authors": [
      {
-        "name": "raymondl"
+        "name": "aashishb"
      }
    ],
    "kernelspec": {
--- a/how-to-use-azureml/machine-learning-pipelines/README.md
+++ b/how-to-use-azureml/machine-learning-pipelines/README.md
@@ -38,18 +38,19 @@ In this directory, there are two types of notebooks:
 * The first type of notebooks will introduce you to core Azure Machine Learning Pipelines features. These notebooks below belong in this category, and are designed to go in sequence; they're all located in the "intro-to-pipelines" folder:
-1. [aml-pipelines-getting-started.ipynb](https://aka.ms/pl-get-started)
+1. [aml-pipelines-getting-started.ipynb](https://aka.ms/pl-get-started): Start with this notebook to understand the concepts of using Azure Machine Learning Pipelines. This notebook will show you how to runs steps in parallel and in sequence.
-2. [aml-pipelines-with-data-dependency-steps.ipynb](https://aka.ms/pl-data-dep)
+2. [aml-pipelines-with-data-dependency-steps.ipynb](https://aka.ms/pl-data-dep): This notebooks shows how to connect steps in your pipeline using data. Data produced by one step is used by subsequent steps to force an explicit dependency between steps. 
-3. [aml-pipelines-publish-and-run-using-rest-endpoint.ipynb](https://aka.ms/pl-pub-rep)
+3. [aml-pipelines-publish-and-run-using-rest-endpoint.ipynb](https://aka.ms/pl-pub-rep): Once you are satisfied with your iterative runs in, you could publish your pipeline to get a REST endpoint which could be invoked from non-Pythons clients as well. 
-4. [aml-pipelines-data-transfer.ipynb](https://aka.ms/pl-data-trans)
+4. [aml-pipelines-data-transfer.ipynb](https://aka.ms/pl-data-trans): This notebook shows how you transfer data between supported datastores.
-5. [aml-pipelines-use-databricks-as-compute-target.ipynb](https://aka.ms/pl-databricks)
+5. [aml-pipelines-use-databricks-as-compute-target.ipynb](https://aka.ms/pl-databricks): This notebooks shows how you can use Pipelines to send your compute payload to Azure Databricks.
-6. [aml-pipelines-use-adla-as-compute-target.ipynb](https://aka.ms/pl-adla)
+6. [aml-pipelines-use-adla-as-compute-target.ipynb](https://aka.ms/pl-adla): This notebook shows how you can use Azure Data Lake Analytics (ADLA) as a compute target.
-7. [aml-pipelines-parameter-tuning-with-hyperdrive.ipynb](https://aka.ms/pl-hyperdrive)
+7. [aml-pipelines-how-to-use-estimatorstep.ipynb](https://aka.ms/pl-estimator): This notebook shows how to use the EstimatorStep.
-8. [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb](https://aka.ms/pl-azbatch)
+7. [aml-pipelines-parameter-tuning-with-hyperdrive.ipynb](https://aka.ms/pl-hyperdrive): HyperDriveStep in Pipelines shows how you can do hyper parameter tuning using Pipelines.
-9. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule)
+8. [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb](https://aka.ms/pl-azbatch): AzureBatchStep can be used to run your custom code in AzureBatch cluster.
-10. [aml-pipelines-with-automated-machine-learning-step.ipynb](https://aka.ms/pl-automl)
+9. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule): Once you publish a Pipeline, you can schedule it to trigger based on an interval or on data change in a defined datastore. 
 10. [aml-pipelines-with-automated-machine-learning-step.ipynb](https://aka.ms/pl-automl): AutoMLStep in Pipelines shows how you can do automated machine learning using Pipelines.
 * The second type of notebooks illustrate more sophisticated scenarios, and are independent of each other. These notebooks include:
-1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score)
+1. [pipeline-batch-scoring.ipynb](https://aka.ms/pl-batch-score): This notebook demonstrates how to run a batch scoring job using Azure Machine Learning pipelines.
 2. [pipeline-style-transfer.ipynb](https://aka.ms/pl-style-trans)
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/README.md
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/README.md
@@ -0,0 +1,15 @@
 # Introduction to Azure Machine Learning Pipelines
 The following notebooks provide an introduction to a concept in Azure Machine Learning Pipelines. They will introduce you to core Azure Machine Learning Pipelines features. 
 These notebooks below are designed to go in sequence.
 1. [aml-pipelines-getting-started.ipynb](https://aka.ms/pl-get-started): Start with this notebook to understand the concepts of using Azure Machine Learning Pipelines. This notebook will show you how to runs steps in parallel and in sequence.
 2. [aml-pipelines-with-data-dependency-steps.ipynb](https://aka.ms/pl-data-dep): This notebooks shows how to connect steps in your pipeline using data. Data produced by one step is used by subsequent steps to force an explicit dependency between steps. 
 3. [aml-pipelines-publish-and-run-using-rest-endpoint.ipynb](https://aka.ms/pl-pub-rep): Once you are satisfied with your iterative runs in, you could publish your pipeline to get a REST endpoint which could be invoked from non-Pythons clients as well. 
 4. [aml-pipelines-data-transfer.ipynb](https://aka.ms/pl-data-trans): This notebook shows how you transfer data between supported datastores.
 5. [aml-pipelines-use-databricks-as-compute-target.ipynb](https://aka.ms/pl-databricks): This notebooks shows how you can use Pipelines to send your compute payload to Azure Databricks.
 6. [aml-pipelines-use-adla-as-compute-target.ipynb](https://aka.ms/pl-adla): This notebook shows how you can use Azure Data Lake Analytics (ADLA) as a compute target.
 7. [aml-pipelines-how-to-use-estimatorstep.ipynb](https://aka.ms/pl-estimator): This notebook shows how to use the EstimatorStep.
 8. [aml-pipelines-parameter-tuning-with-hyperdrive.ipynb](https://aka.ms/pl-hyperdrive): HyperDriveStep in Pipelines shows how you can do hyper parameter tuning using Pipelines.
 9. [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb](https://aka.ms/pl-azbatch): AzureBatchStep can be used to run your custom code in AzureBatch cluster.
 10. [aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb](https://aka.ms/pl-schedule): Once you publish a Pipeline, you can schedule it to trigger based on an interval or on data change in a defined datastore. 
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb
@@ -141,7 +141,7 @@
        "    print(\"registered blob datastore with name: %s\" % blob_datastore_name)\n",
        "\n",
        "# CLI:\n",
-        "# az ml datastore register-blob -n <datastore-name> -a <account-name> -c <container-name> -k <account-key> [-t <sas-token>]"
+        "# az ml datastore attach-blob -n <datastore-name> -a <account-name> -c <container-name> -k <account-key> [-t <sas-token>]"
      ]
    },
    {
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb
@@ -303,7 +303,7 @@
        "\n",
        "The following code will create a PythonScriptStep to be executed in the Azure Machine Learning Compute we created above using train.py, one of the files already made available in the project folder.\n",
        "\n",
-        "A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used."
+        "A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used. You can also use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify requirements for the PythonScriptStep, such as conda dependencies and docker image."
      ]
    },
    {
@@ -369,10 +369,34 @@
        "                         compute_target=aml_compute, \n",
        "                         source_directory=project_folder)\n",
        "\n",
        "# Use a RunConfiguration to specify some additional requirements for this step.\n",
        "from azureml.core.runconfig import RunConfiguration\n",
        "from azureml.core.conda_dependencies import CondaDependencies\n",
        "from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n",
        "\n",
        "# create a new runconfig object\n",
        "run_config = RunConfiguration()\n",
        "\n",
        "# enable Docker \n",
        "run_config.environment.docker.enabled = True\n",
        "\n",
        "# set Docker base image to the default CPU-based image\n",
        "run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE\n",
        "\n",
        "# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
        "run_config.environment.python.user_managed_dependencies = False\n",
        "\n",
        "# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
        "run_config.auto_prepare_environment = True\n",
        "\n",
        "# specify CondaDependencies obj\n",
        "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
        "\n",
        "step3 = PythonScriptStep(name=\"extract_step\",\n",
        "                         script_name=\"extract.py\", \n",
        "                         compute_target=aml_compute, \n",
-        "                         source_directory=project_folder)\n",
+        "                         source_directory=project_folder,\n",
        "                         runconfig=run_config)\n",
        "\n",
        "# list of steps to run\n",
        "steps = [step1, step2, step3]\n",
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb
@@ -0,0 +1,281 @@
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
        "\n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# How to use EstimatorStep in AML Pipeline\n",
        "\n",
        "This notebook shows how to use the EstimatorStep with Azure Machine Learning Pipelines. Estimator is a convenient object in Azure Machine Learning that wraps run configuration information to help simplify the tasks of specifying how a script is executed.\n",
        "\n",
        "\n",
        "## Prerequisite:\n",
        "* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
        "* Go through the [configuration notebook](../../../configuration.ipynb) to:\n",
        "    * install the AML SDK\n",
        "    * create a workspace and its configuration file (`config.json`)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's get started. First let's import some Python libraries."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import azureml.core\n",
        "# check core SDK version number\n",
        "print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Initialize workspace\n",
        "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core import Workspace\n",
        "ws = Workspace.from_config()\n",
        "print('Workspace name: ' + ws.name, \n",
        "      'Azure region: ' + ws.location, \n",
        "      'Subscription id: ' + ws.subscription_id, \n",
        "      'Resource group: ' + ws.resource_group, sep = '\\n')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Create or Attach existing AmlCompute\n",
        "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
        "1. create the configuration (this step is local and only takes a second)\n",
        "2. create the cluster (this step will take about **20 seconds**)\n",
        "3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.compute import ComputeTarget, AmlCompute\n",
        "from azureml.core.compute_target import ComputeTargetException\n",
        "\n",
        "# choose a name for your cluster\n",
        "cluster_name = \"cpucluster\"\n",
        "\n",
        "try:\n",
        "    cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name)\n",
        "    print('Found existing compute target')\n",
        "except ComputeTargetException:\n",
        "    print('Creating a new compute target...')\n",
        "    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', max_nodes=4)\n",
        "\n",
        "    # create the cluster\n",
        "    cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)\n",
        "\n",
        "    # can poll for a minimum number of nodes and for a specific timeout. \n",
        "    # if no min node count is provided it uses the scale settings for the cluster\n",
        "    cpu_cluster.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
        "\n",
        "# use get_status() to get a detailed status for the current cluster. \n",
        "print(cpu_cluster.get_status().serialize())"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'cpucluster' of type `AmlCompute`."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Use a simple script\n",
        "We have already created a simple \"hello world\" script. This is the script that we will submit through the estimator pattern. It prints a hello-world message, and if Azure ML SDK is installed, it will also logs an array of values ([Fibonacci numbers](https://en.wikipedia.org/wiki/Fibonacci_number))."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Build an Estimator object\n",
        "Estimator by default will attempt to use Docker-based execution. You can also enable Docker and let estimator pick the default CPU image supplied by Azure ML for execution. You can target an AmlCompute cluster (or any other supported compute target types). You can also customize the conda environment by adding conda and/or pip packages.\n",
        "\n",
        "> Note: The arguments to the entry script used in the Estimator object should be specified as *list* using\n",
        "    'estimator_entry_script_arguments' parameter when instantiating EstimatorStep. Estimator object's parameter\n",
        "    'script_params' accepts a dictionary. However 'estimator_entry_script_arguments' parameter expects arguments as\n",
        "    a list.\n",
        "\n",
        "> Estimator object initialization involves specifying a list of DataReference objects in its 'inputs' parameter.\n",
        "    In Pipelines, a step can take another step's output or DataReferences as input. So when creating an EstimatorStep,\n",
        "    the parameters 'inputs' and 'outputs' need to be set explicitly and that will override 'inputs' parameter\n",
        "    specified in the Estimator object."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core import Datastore\n",
        "from azureml.data.data_reference import DataReference\n",
        "from azureml.pipeline.core import PipelineData\n",
        "\n",
        "def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
        "\n",
        "input_data = DataReference(\n",
        "    datastore=def_blob_store,\n",
        "    data_reference_name=\"input_data\",\n",
        "    path_on_datastore=\"20newsgroups/20news.pkl\")\n",
        "\n",
        "output = PipelineData(\"output\", datastore=def_blob_store)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.train.estimator import Estimator\n",
        "\n",
        "est = Estimator(source_directory='.', \n",
        "                compute_target=cpu_cluster, \n",
        "                entry_script='dummy_train.py', \n",
        "                conda_packages=['scikit-learn'])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Create an EstimatorStep\n",
        "[EstimatorStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py) adds a step to run Estimator in a Pipeline.\n",
        "\n",
        "- **name:** Name of the step\n",
        "- **estimator:** Estimator object\n",
        "- **estimator_entry_script_arguments:** \n",
        "- **runconfig_pipeline_params:** Override runconfig properties at runtime using key-value pairs each with name of the runconfig property and PipelineParameter for that property\n",
        "- **inputs:** Inputs\n",
        "- **outputs:** Output is list of PipelineData\n",
        "- **compute_target:** Compute target to use \n",
        "- **allow_reuse:** Whether the step should reuse previous results when run with the same settings/inputs. If this is false, a new run will always be generated for this step during pipeline execution.\n",
        "- **version:** Optional version tag to denote a change in functionality for the step"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.pipeline.steps import EstimatorStep\n",
        "\n",
        "est_step = EstimatorStep(name=\"Estimator_Train\", \n",
        "                         estimator=est, \n",
        "                         estimator_entry_script_arguments=[\"--datadir\", input_data, \"--output\", output],\n",
        "                         runconfig_pipeline_params=None, \n",
        "                         inputs=[input_data], \n",
        "                         outputs=[output], \n",
        "                         compute_target=cpu_cluster)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Build and Submit the Experiment"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.pipeline.core import Pipeline\n",
        "from azureml.core import Experiment\n",
        "pipeline = Pipeline(workspace=ws, steps=[est_step])\n",
        "pipeline_run = Experiment(ws, 'Estimator_sample').submit(pipeline)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## View Run Details"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.widgets import RunDetails\n",
        "RunDetails(pipeline_run).show()"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "sanpil"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.7"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb
@@ -36,7 +36,7 @@
        "from azureml.exceptions import ComputeTargetException\n",
        "from azureml.data.data_reference import DataReference\n",
        "from azureml.pipeline.steps import HyperDriveStep\n",
-        "from azureml.pipeline.core import Pipeline\n",
+        "from azureml.pipeline.core import Pipeline, PipelineData\n",
        "from azureml.train.dnn import TensorFlow\n",
        "from azureml.train.hyperdrive import *\n",
        "\n",
@@ -310,11 +310,17 @@
      "metadata": {},
      "outputs": [],
      "source": [
        "metrics_output_name = 'metrics_output'\n",
        "metirics_data = PipelineData(name='metrics_data',\n",
        "                             datastore=ds,\n",
        "                             pipeline_output_name=metrics_output_name)\n",
        "\n",
        "hd_step = HyperDriveStep(\n",
        "    name=\"hyperdrive_module\",\n",
        "    hyperdrive_run_config=hd_config,\n",
        "    estimator_entry_script_arguments=['--data-folder', data_folder],\n",
-        "    inputs=[data_folder])"
+        "    inputs=[data_folder],\n",
        "    metrics_output=metirics_data)"
      ]
    },
    {
@@ -366,6 +372,40 @@
      "source": [
        "pipeline_run.wait_for_completion()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Retrieve the metrics\n",
        "Outputs of above run can be used as inputs of other steps in pipeline. In this tutorial, we will show the result metrics."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n",
        "num_file_downloaded = metrics_output.download('.', show_progress=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "import json\n",
        "with open(metrics_output._path_on_datastore) as f:  \n",
        "   metrics_output_result = f.read()\n",
        "    \n",
        "deserialized_metrics_output = json.loads(metrics_output_result)\n",
        "df = pd.DataFrame(deserialized_metrics_output)\n",
        "df"
      ]
    }
  ],
  "metadata": {
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-publish-and-run-using-rest-endpoint.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-publish-and-run-using-rest-endpoint.ipynb
@@ -33,7 +33,7 @@
      "outputs": [],
      "source": [
        "import azureml.core\n",
-        "from azureml.core import Workspace, Datastore\n",
+        "from azureml.core import Workspace, Datastore, Experiment\n",
        "from azureml.core.compute import AmlCompute\n",
        "from azureml.core.compute import ComputeTarget\n",
        "\n",
@@ -55,10 +55,7 @@
        "print(\"Default datastore's name: {}\".format(def_file_store.name))\n",
        "\n",
        "def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
-        "print(\"Blobstore's name: {}\".format(def_blob_store.name))\n",
+        "print(\"Blobstore's name: {}\".format(def_blob_store.name))"
        "\n",
        "# project folder\n",
        "project_folder = '.'"
      ]
    },
    {
@@ -160,7 +157,7 @@
        "    inputs=[blob_input_data],\n",
        "    outputs=[processed_data1],\n",
        "    compute_target=aml_compute, \n",
-        "    source_directory=project_folder\n",
+        "    source_directory='.'\n",
        ")\n",
        "print(\"trainStep created\")"
      ]
@@ -191,7 +188,7 @@
        "    inputs=[processed_data1],\n",
        "    outputs=[processed_data2],\n",
        "    compute_target=aml_compute, \n",
-        "    source_directory=project_folder)\n",
+        "    source_directory='.')\n",
        "print(\"extractStep created\")"
      ]
    },
@@ -252,7 +249,7 @@
        "    inputs=[processed_data1, processed_data2],\n",
        "    outputs=[processed_data3],    \n",
        "    compute_target=aml_compute, \n",
-        "    source_directory=project_folder)\n",
+        "    source_directory='.')\n",
        "print(\"compareStep created\")"
      ]
    },
@@ -270,10 +267,7 @@
      "outputs": [],
      "source": [
        "pipeline1 = Pipeline(workspace=ws, steps=[compareStep])\n",
-        "print (\"Pipeline is built\")\n",
+        "print (\"Pipeline is built\")"
        "\n",
        "pipeline1.validate()\n",
        "print(\"Simple validation complete\") "
      ]
    },
    {
@@ -290,10 +284,38 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "published_pipeline1 = pipeline1.publish(name=\"My_New_Pipeline\", description=\"My Published Pipeline Description\")\n",
+        "published_pipeline1 = pipeline1.publish(name=\"My_New_Pipeline\", description=\"My Published Pipeline Description\", continue_on_step_failure=True)\n",
        "published_pipeline1"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Note: the continue_on_step_failure parameter specifies whether the execution of steps in the Pipeline will continue if one step fails. The default value is False, meaning when one step fails, the Pipeline execution will stop, canceling any running steps."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Publish the pipeline from a submitted PipelineRun\n",
        "It is also possible to publish a pipeline from a submitted PipelineRun"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# submit a pipeline run\n",
        "pipeline_run1 = Experiment(ws, 'Pipeline_experiment').submit(pipeline1)\n",
        "# publish a pipeline from the submitted pipeline run\n",
        "published_pipeline2 = pipeline_run1.publish_pipeline(name=\"My_New_Pipeline2\", description=\"My Published Pipeline Description\", version=\"0.1\", continue_on_step_failure=True)\n",
        "published_pipeline2"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
@@ -325,7 +347,8 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "### Run published pipeline using its REST endpoint"
+        "### Run published pipeline using its REST endpoint\n",
        "[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
      ]
    },
    {
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb
@@ -107,15 +107,11 @@
      "source": [
        "from azureml.pipeline.steps import PythonScriptStep\n",
        "\n",
        "\n",
        "# project folder\n",
        "project_folder = 'scripts'\n",
        "\n",
        "trainStep = PythonScriptStep(\n",
        "    name=\"Training_Step\",\n",
        "    script_name=\"train.py\", \n",
        "    compute_target=aml_compute_target, \n",
-        "    source_directory=project_folder\n",
+        "    source_directory='.'\n",
        ")\n",
        "print(\"TrainStep created\")"
      ]
@@ -136,9 +132,7 @@
        "from azureml.pipeline.core import Pipeline\n",
        "\n",
        "pipeline1 = Pipeline(workspace=ws, steps=[trainStep])\n",
-        "print (\"Pipeline is built\")\n",
+        "print (\"Pipeline is built\")"
        "\n",
        "pipeline1.validate()"
      ]
    },
    {
@@ -255,11 +249,12 @@
        "schedules = Schedule.get_all(ws, pipeline_id=pub_pipeline_id)\n",
        "\n",
        "# We will iterate through the list of schedules and \n",
-        "# use the last ID in the list for further operations: \n",
+        "# use the last recurrence schedule in the list for further operations: \n",
        "print(\"Found these schedules for the pipeline id {}:\".format(pub_pipeline_id))\n",
        "for schedule in schedules: \n",
        "    print(schedule.id)\n",
-        "    schedule_id = schedule.id\n",
+        "    if schedule.recurrence is not None:\n",
        "        schedule_id = schedule.id\n",
        "\n",
        "print(\"Schedule id to be used for schedule operations: {}\".format(schedule_id))"
      ]
@@ -380,7 +375,8 @@
      "metadata": {},
      "source": [
        "### Create a schedule for the pipeline using a Datastore\n",
-        "This schedule will run when additions or modifications are made to Blobs in the Datastore container.\n",
+        "This schedule will run when additions or modifications are made to Blobs in the Datastore.\n",
        "By default, the Datastore container is monitored for changes. Use the path_on_datastore parameter to instead specify a path on the Datastore to monitor for changes. Changes made to subfolders in the container/path will not trigger the schedule.\n",
        "Note: Only Blob Datastores are supported."
      ]
    },
@@ -400,6 +396,7 @@
        "                           datastore=datastore,\n",
        "                           wait_for_provisioning=True,\n",
        "                           description=\"Schedule Run\")\n",
        "                          #path_on_datastore=\"file/path\") use path_on_datastore to specify a specific folder to monitor for changes.\n",
        "\n",
        "# You may want to make sure that the schedule is provisioned properly\n",
        "# before making any further changes to the schedule\n",
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb
@@ -168,7 +168,7 @@
      "metadata": {},
      "source": [
        "## Data Connections with Inputs and Outputs\n",
-        "The DatabricksStep supports Azure Bloband ADLS for inputs and outputs. You also will need to define a [Secrets](https://docs.azuredatabricks.net/user-guide/secrets/index.html) scope to enable authentication to external data sources such as Blob and ADLS from Databricks.\n",
+        "The DatabricksStep supports Azure Blob and ADLS for inputs and outputs. You also will need to define a [Secrets](https://docs.azuredatabricks.net/user-guide/secrets/index.html) scope to enable authentication to external data sources such as Blob and ADLS from Databricks.\n",
        "\n",
        "- Databricks documentation on [Azure Blob](https://docs.azuredatabricks.net/spark/latest/data-sources/azure/azure-storage.html)\n",
        "- Databricks documentation on [ADLS](https://docs.databricks.com/spark/latest/data-sources/azure/azure-datalake.html)\n",
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb
@@ -0,0 +1,517 @@
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Copyright (c) Microsoft Corporation. All rights reserved.  \n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Azure Machine Learning Pipeline with AutoMLStep\n",
        "This notebook demonstrates the use of AutoMLStep in Azure Machine Learning Pipeline."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Introduction\n",
        "In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n",
        "\n",
        "Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
        "\n",
        "In this notebook you would see\n",
        "1. Create an `Experiment` in an existing `Workspace`.\n",
        "2. Create or Attach existing AmlCompute to a workspace.\n",
        "3. Configure AutoML using `AutoMLConfig`.\n",
        "4. Use AutoMLStep\n",
        "5. Train the model using AmlCompute\n",
        "6. Explore the results.\n",
        "7. Test the best fitted model.\n",
        "\n",
        "In addition this notebook showcases the following features\n",
        "- **Parallel** executions for iterations\n",
        "- **Asynchronous** tracking of progress\n",
        "- Retrieving models for any iteration or logged metric\n",
        "- Specifying AutoML settings as `**kwargs`"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Azure Machine Learning and Pipeline SDK-specific imports"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import logging\n",
        "import os\n",
        "import csv\n",
        "\n",
        "from matplotlib import pyplot as plt\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "from sklearn import datasets\n",
        "\n",
        "import azureml.core\n",
        "from azureml.core.experiment import Experiment\n",
        "from azureml.core.workspace import Workspace\n",
        "from azureml.train.automl import AutoMLConfig\n",
        "from azureml.core.compute import AmlCompute\n",
        "from azureml.core.compute import ComputeTarget\n",
        "from azureml.core.runconfig import RunConfiguration\n",
        "from azureml.core.conda_dependencies import CondaDependencies\n",
        "\n",
        "from azureml.train.automl import AutoMLStep\n",
        "\n",
        "# Check core SDK version number\n",
        "print(\"SDK version:\", azureml.core.VERSION)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Initialize Workspace\n",
        "Initialize a workspace object from persisted configuration. Make sure the config file is present at .\\config.json"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "ws = Workspace.from_config()\n",
        "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Create an Azure ML experiment\n",
        "Let's create an experiment named \"automl-classification\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Choose a name for the run history container in the workspace.\n",
        "experiment_name = 'automlstep-classification'\n",
        "project_folder = './project'\n",
        "\n",
        "experiment = Experiment(ws, experiment_name)\n",
        "experiment"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Create or Attach existing AmlCompute\n",
        "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you create `AmlCompute` as your training compute resource.\n",
        "\n",
        "**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Choose a name for your cluster.\n",
        "amlcompute_cluster_name = \"cpucluster\"\n",
        "\n",
        "found = False\n",
        "# Check if this compute target already exists in the workspace.\n",
        "cts = ws.compute_targets\n",
        "if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
        "    found = True\n",
        "    print('Found existing compute target.')\n",
        "    compute_target = cts[amlcompute_cluster_name]\n",
        "    \n",
        "if not found:\n",
        "    print('Creating a new compute target...')\n",
        "    provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
        "                                                                #vm_priority = 'lowpriority', # optional\n",
        "                                                                max_nodes = 4)\n",
        "\n",
        "    # Create the cluster.\n",
        "    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
        "    \n",
        "    # Can poll for a minimum number of nodes and for a specific timeout.\n",
        "    # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
        "    compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)\n",
        "    \n",
        "     # For a more detailed view of current AmlCompute status, use get_status()."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Prepare and Point to Data\n",
        "For remote executions, you need to make the data accessible from the remote compute.\n",
        "This can be done by uploading the data to DataStore.\n",
        "In this example, we upload scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) data."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "data_train = datasets.load_digits()\n",
        "\n",
        "if not os.path.isdir('data'):\n",
        "    os.mkdir('data')\n",
        "    \n",
        "if not os.path.exists(project_folder):\n",
        "    os.makedirs(project_folder)\n",
        "    \n",
        "pd.DataFrame(data_train.data).to_csv(\"data/X_train.tsv\", index=False, header=False, quoting=csv.QUOTE_ALL, sep=\"\\t\")\n",
        "pd.DataFrame(data_train.target).to_csv(\"data/y_train.tsv\", index=False, header=False, sep=\"\\t\")\n",
        "\n",
        "ds = ws.get_default_datastore()\n",
        "ds.upload(src_dir='./data', target_path='bai_data', overwrite=True, show_progress=True)\n",
        "\n",
        "from azureml.data.data_reference import DataReference      \n",
        "input_data = DataReference(datastore=ds, \n",
        "                           data_reference_name=\"input_data_reference\",\n",
        "                           path_on_datastore='bai_data',\n",
        "                           mode='download',\n",
        "                           path_on_compute='/tmp/azureml_runs',\n",
        "                           overwrite=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# create a new RunConfig object\n",
        "conda_run_config = RunConfiguration(framework=\"python\")\n",
        "\n",
        "# Set compute target to AmlCompute\n",
        "#conda_run_config.target = compute_target\n",
        "\n",
        "conda_run_config.environment.docker.enabled = True\n",
        "conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
        "\n",
        "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], \n",
        "                              conda_packages=['numpy', 'py-xgboost'], \n",
        "                              pin_sdk_version=False)\n",
        "conda_run_config.environment.python.conda_dependencies = cd\n",
        "\n",
        "print('run config is ready')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%%writefile $project_folder/get_data.py\n",
        "\n",
        "import pandas as pd\n",
        "\n",
        "def get_data():\n",
        "    X_train = pd.read_csv(\"/tmp/azureml_runs/bai_data/X_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
        "    y_train = pd.read_csv(\"/tmp/azureml_runs/bai_data/y_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
        "\n",
        "    return { \"X\" : X_train.values, \"y\" : y_train[0].values }\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Set up AutoMLConfig for Training\n",
        "\n",
        "You can specify `automl_settings` as `**kwargs` as well. Also note that you can use a `get_data()` function for local excutions too.\n",
        "\n",
        "**Note:** When using AmlCompute, you can't pass Numpy arrays directly to the fit method.\n",
        "\n",
        "|Property|Description|\n",
        "|-|-|\n",
        "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
        "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
        "|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
        "|**n_cross_validations**|Number of cross validation splits.|\n",
        "|**max_concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "automl_settings = {\n",
        "    \"iteration_timeout_minutes\": 5,\n",
        "    \"iterations\": 20,\n",
        "    \"n_cross_validations\": 5,\n",
        "    \"primary_metric\": 'AUC_weighted',\n",
        "    \"preprocess\": False,\n",
        "    \"max_concurrent_iterations\": 3,\n",
        "    \"verbosity\": logging.INFO\n",
        "}\n",
        "automl_config = AutoMLConfig(task = 'classification',\n",
        "                             debug_log = 'automl_errors.log',\n",
        "                             path = project_folder,\n",
        "                             compute_target=compute_target,\n",
        "                             run_configuration=conda_run_config,\n",
        "                             data_script = project_folder + \"/get_data.py\",\n",
        "                             **automl_settings\n",
        "                            )"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
        "In this example, we specify `show_output = False` to suppress console output while the run is in progress."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Define AutoMLStep"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.pipeline.core import PipelineData, TrainingOutput\n",
        "\n",
        "metrics_output_name = 'metrics_output'\n",
        "best_model_output_name = 'best_model_output'\n",
        "\n",
        "metirics_data = PipelineData(name='metrics_data',\n",
        "                           datastore=ds,\n",
        "                           pipeline_output_name=metrics_output_name,\n",
        "                           training_output=TrainingOutput(type='Metrics'))\n",
        "model_data = PipelineData(name='model_data',\n",
        "                           datastore=ds,\n",
        "                           pipeline_output_name=best_model_output_name,\n",
        "                           training_output=TrainingOutput(type='Model'))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "automl_step = AutoMLStep(\n",
        "    name='automl_module',\n",
        "    experiment=experiment,\n",
        "    automl_config=automl_config,\n",
        "    inputs=[input_data],\n",
        "    outputs=[metirics_data, model_data],\n",
        "    allow_reuse=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.pipeline.core import Pipeline\n",
        "pipeline = Pipeline(\n",
        "    description=\"pipeline_with_automlstep\",\n",
        "    workspace=ws,    \n",
        "    steps=[automl_step])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "pipeline_run = experiment.submit(pipeline)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.widgets import RunDetails\n",
        "RunDetails(pipeline_run).show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "pipeline_run.wait_for_completion()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Examine Results\n",
        "\n",
        "### Retrieve the metrics of all child runs\n",
        "Outputs of above run can be used as inputs of other steps in pipeline. In this tutorial, we will examine the outputs by retrieve output data and running some tests."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)\n",
        "num_file_downloaded = metrics_output.download('.', show_progress=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import json\n",
        "with open(metrics_output._path_on_datastore) as f:  \n",
        "    metrics_output_result = f.read()\n",
        "    \n",
        "deserialized_metrics_output = json.loads(metrics_output_result)\n",
        "df = pd.DataFrame(deserialized_metrics_output)\n",
        "df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Retrieve the Best Model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "best_model_output = pipeline_run.get_pipeline_output(best_model_output_name)\n",
        "num_file_downloaded = best_model_output.download('.', show_progress=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        " import pickle\n",
        "\n",
        " with open(best_model_output._path_on_datastore, \"rb\" ) as f:\n",
        "     best_model = pickle.load(f)\n",
        " best_model"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Test the Model\n",
        "#### Load Test Data"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "digits = datasets.load_digits()\n",
        "X_test = digits.data[:10, :]\n",
        "y_test = digits.target[:10]\n",
        "images = digits.images[:10]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### Testing Best Model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Randomly select digits and test.\n",
        "for index in np.random.choice(len(y_test), 3, replace = False):\n",
        "   print(index)\n",
        "   predicted = best_model.predict(X_test[index:index + 1])[0]\n",
        "   label = y_test[index]\n",
        "   title = \"Label value = %d  Predicted value = %d \" % (label, predicted)\n",
        "   fig = plt.figure(1, figsize=(3,3))\n",
        "   ax1 = fig.add_axes((0,0,.8,.8))\n",
        "   ax1.set_title(title)\n",
        "   plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
        "   plt.show()"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "sanpil"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.7"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb
@@ -83,10 +83,10 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "# project folder\n",
+        "# source directory\n",
-        "project_folder = '.'\n",
+        "source_directory = '.'\n",
        "    \n",
-        "print('Sample projects will be created in {}.'.format(project_folder))"
+        "print('Sample scripts will be created in {} directory.'.format(source_directory))"
      ]
    },
    {
@@ -259,6 +259,44 @@
        "**Open `train.py` in the local machine and examine the arguments, inputs, and outputs for the script. That will give you a good sense of why the script argument names used below are important.** "
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "#### Specify conda dependencies and a base docker image through a RunConfiguration\n",
        "\n",
        "This step uses a docker image and scikit-learn, use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify these requirements and use when creating the PythonScriptStep. "
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.runconfig import RunConfiguration\n",
        "from azureml.core.conda_dependencies import CondaDependencies\n",
        "from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n",
        "\n",
        "# create a new runconfig object\n",
        "run_config = RunConfiguration()\n",
        "\n",
        "# enable Docker \n",
        "run_config.environment.docker.enabled = True\n",
        "\n",
        "# set Docker base image to the default CPU-based image\n",
        "run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE\n",
        "\n",
        "# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
        "run_config.environment.python.user_managed_dependencies = False\n",
        "\n",
        "# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
        "run_config.auto_prepare_environment = True\n",
        "\n",
        "# specify CondaDependencies obj\n",
        "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
@@ -273,7 +311,8 @@
        "    inputs=[blob_input_data],\n",
        "    outputs=[processed_data1],\n",
        "    compute_target=aml_compute, \n",
-        "    source_directory=project_folder\n",
+        "    source_directory=source_directory,\n",
        "    runconfig=run_config\n",
        ")\n",
        "print(\"trainStep created\")"
      ]
@@ -304,7 +343,7 @@
        "    inputs=[processed_data1],\n",
        "    outputs=[processed_data2],\n",
        "    compute_target=aml_compute, \n",
-        "    source_directory=project_folder)\n",
+        "    source_directory=source_directory)\n",
        "print(\"extractStep created\")"
      ]
    },
@@ -312,8 +351,10 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "#### Define a Step that consumes multiple intermediate data and produces intermediate data\n",
+        "#### Define a Step that consumes intermediate data and existing data and produces intermediate data\n",
-        "In this step, we define a step that consumes multiple intermediate data and produces intermediate data.\n",
+        "In this step, we define a step that consumes multiple data types and produces intermediate data.\n",
        "\n",
        "This step uses the output generated from the previous step as well as existing data on a DataStore. The location of the existing data is specified using a [**PipelineParameter**](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelineparameter?view=azure-ml-py) and a [**DataPath**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.datapath.datapath?view=azure-ml-py). Using a PipelineParameter enables easy modification of the data location when the Pipeline is published and resubmitted.\n",
        "\n",
        "**Open `compare.py` in the local machine and examine the arguments, inputs, and outputs for the script. That will give you a good sense of why the script argument names used below are important.**"
      ]
@@ -324,16 +365,31 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "# Now define step6 that takes two inputs (both intermediate data), and produce an output\n",
+        "# Reference the data uploaded to blob storage using a PipelineParameter and a DataPath\n",
        "from azureml.pipeline.core import PipelineParameter\n",
        "from azureml.data.datapath import DataPath, DataPathComputeBinding\n",
        "\n",
        "datapath = DataPath(datastore=def_blob_store, path_on_datastore='20newsgroups/20news.pkl')\n",
        "datapath_param = PipelineParameter(name=\"compare_data\", default_value=datapath)\n",
        "data_parameter1 = (datapath_param, DataPathComputeBinding(mode='mount'))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Now define the compare step which takes two inputs and produces an output\n",
        "processed_data3 = PipelineData(\"processed_data3\", datastore=def_blob_store)\n",
        "\n",
        "compareStep = PythonScriptStep(\n",
        "    script_name=\"compare.py\",\n",
-        "    arguments=[\"--compare_data1\", processed_data1, \"--compare_data2\", processed_data2, \"--output_compare\", processed_data3],\n",
+        "    arguments=[\"--compare_data1\", data_parameter1, \"--compare_data2\", processed_data2, \"--output_compare\", processed_data3],\n",
-        "    inputs=[processed_data1, processed_data2],\n",
+        "    inputs=[data_parameter1, processed_data2],\n",
        "    outputs=[processed_data3],    \n",
        "    compute_target=aml_compute, \n",
-        "    source_directory=project_folder)\n",
+        "    source_directory=source_directory)\n",
        "print(\"compareStep created\")"
      ]
    },
@@ -351,10 +407,7 @@
      "outputs": [],
      "source": [
        "pipeline1 = Pipeline(workspace=ws, steps=[compareStep])\n",
-        "print (\"Pipeline is built\")\n",
+        "print (\"Pipeline is built\")"
        "\n",
        "pipeline1.validate()\n",
        "print(\"Simple validation complete\") "
      ]
    },
    {
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/dummy_train.py
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/dummy_train.py
@@ -0,0 +1,30 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 import argparse
 import os
 print("*********************************************************")
 print("Hello Azure ML!")
 parser = argparse.ArgumentParser()
 parser.add_argument('--datadir', type=str, help="data directory")
 parser.add_argument('--output', type=str, help="output")
 args = parser.parse_args()
 print("Argument 1: %s" % args.datadir)
 print("Argument 2: %s" % args.output)
 if not (args.output is None):
    os.makedirs(args.output, exist_ok=True)
    print("%s created" % args.output)
 try:
    from azureml.core import Run
    run = Run.get_context()
    print("Log Fibonacci numbers.")
    run.log_list('Fibonacci numbers', [0, 1, 1, 2, 3, 5, 8, 13, 21, 34])
    run.complete()
 except:
    print("Warning: you need to install Azure ML SDK in order to log metrics.")
 print("*********************************************************")
--- a/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb
@@ -508,7 +508,8 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "### Get AAD token"
+        "### Get AAD token\n",
        "[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
      ]
    },
    {
--- a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb
@@ -492,7 +492,8 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "## Get AAD token"
+        "## Get AAD token\n",
        "[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
      ]
    },
    {
--- a/how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azure-ml.ipynb
+++ b/how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azure-ml.ipynb
@@ -1,253 +1,253 @@
 {
- "cells": [
+  "cells": [
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
-    "\n",
+        "\n",
-    "Licensed under the MIT License.\n",
+        "Licensed under the MIT License.\n",
-    "\n",
+        "\n",
-    "## Authentication in Azure Machine Learning\n",
+        "## Authentication in Azure Machine Learning\n",
-    "\n",
+        "\n",
-    "This notebook shows you how to authenticate to your Azure ML Workspace using\n",
+        "This notebook shows you how to authenticate to your Azure ML Workspace using\n",
-    "\n",
+        "\n",
-    " 1. Interactive Login Authentication\n",
+        " 1. Interactive Login Authentication\n",
-    " 2. Azure CLI Authentication\n",
+        " 2. Azure CLI Authentication\n",
-    " 3. Service Principal Authentication\n",
+        " 3. Service Principal Authentication\n",
-    " \n",
+        " \n",
-    "The interactive authentication is suitable for local experimentation on your own computer. Azure CLI authentication is suitable if you are already using Azure CLI for managing Azure resources, and want to sign in only once. The Service Principal authentication is suitable for automated workflows, for example as part of Azure Devops build."
+        "The interactive authentication is suitable for local experimentation on your own computer. Azure CLI authentication is suitable if you are already using Azure CLI for managing Azure resources, and want to sign in only once. The Service Principal authentication is suitable for automated workflows, for example as part of Azure Devops build."
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "code",
+      "cell_type": "code",
-   "execution_count": null,
+      "execution_count": null,
-   "metadata": {},
+      "metadata": {},
-   "outputs": [],
+      "outputs": [],
-   "source": [
+      "source": [
-    "from azureml.core import Workspace"
+        "from azureml.core import Workspace"
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "### Interactive Authentication\n",
+        "### Interactive Authentication\n",
-    "\n",
+        "\n",
-    "Interactive authentication is the default mode when using Azure ML SDK.\n",
+        "Interactive authentication is the default mode when using Azure ML SDK.\n",
-    "\n",
+        "\n",
-    "When you connect to your workspace using workspace.from_config, you will get an interactive login dialog."
+        "When you connect to your workspace using workspace.from_config, you will get an interactive login dialog."
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "code",
+      "cell_type": "code",
-   "execution_count": null,
+      "execution_count": null,
-   "metadata": {},
+      "metadata": {},
-   "outputs": [],
+      "outputs": [],
-   "source": [
+      "source": [
-    "ws = Workspace.from_config()"
+        "ws = Workspace.from_config()"
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "Also, if you explicitly specify the subscription ID, resource group and resource group, you will get the dialog."
+        "Also, if you explicitly specify the subscription ID, resource group and resource group, you will get the dialog."
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "code",
+      "cell_type": "code",
-   "execution_count": null,
+      "execution_count": null,
-   "metadata": {},
+      "metadata": {},
-   "outputs": [],
+      "outputs": [],
-   "source": [
+      "source": [
-    "ws = Workspace(subscription_id=\"my-subscription-id\",\n",
+        "ws = Workspace(subscription_id=\"my-subscription-id\",\n",
-    "               resource_group=\"my-ml-rg\",\n",
+        "               resource_group=\"my-ml-rg\",\n",
-    "               workspace_name=\"my-ml-workspace\")"
+        "               workspace_name=\"my-ml-workspace\")"
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "Note the user you're authenticated as must have access to the subscription and resource group. If you receive an error\n",
+        "Note the user you're authenticated as must have access to the subscription and resource group. If you receive an error\n",
-    "\n",
+        "\n",
-    "```\n",
+        "```\n",
-    "AuthenticationException: You don't have access to xxxxxx-xxxx-xxx-xxx-xxxxxxxxxx subscription. All the subscriptions that you have access to = ...\n",
+        "AuthenticationException: You don't have access to xxxxxx-xxxx-xxx-xxx-xxxxxxxxxx subscription. All the subscriptions that you have access to = ...\n",
-    "```\n",
+        "```\n",
-    "\n",
+        "\n",
-    "check that the you used correct login and entered the correct subscription ID."
+        "check that the you used correct login and entered the correct subscription ID."
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "In some cases, you may see a version of the error message containing text: ```All the subscriptions that you have access to = []```\n",
+        "In some cases, you may see a version of the error message containing text: ```All the subscriptions that you have access to = []```\n",
-    "\n",
+        "\n",
-    "In such a case, you may have to specify the tenant ID of the Azure Active Directory you're using. An example would be accessing a subscription as a guest to a tenant that is not your default. You specify the tenant by explicitly instantiating _InteractiveLoginAuthentication_ with tenant ID as argument ([see instructions how to obtain tenant Id](#get-tenant-id))."
+        "In such a case, you may have to specify the tenant ID of the Azure Active Directory you're using. An example would be accessing a subscription as a guest to a tenant that is not your default. You specify the tenant by explicitly instantiating _InteractiveLoginAuthentication_ with tenant ID as argument ([see instructions how to obtain tenant Id](#get-tenant-id))."
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "code",
+      "cell_type": "code",
-   "execution_count": null,
+      "execution_count": null,
-   "metadata": {},
+      "metadata": {},
-   "outputs": [],
+      "outputs": [],
-   "source": [
+      "source": [
-    "from azureml.core.authentication import InteractiveLoginAuthentication\n",
+        "from azureml.core.authentication import InteractiveLoginAuthentication\n",
-    "\n",
+        "\n",
-    "interactive_auth = InteractiveLoginAuthentication(tenant_id=\"my-tenant-id\")\n",
+        "interactive_auth = InteractiveLoginAuthentication(tenant_id=\"my-tenant-id\")\n",
-    "\n",
+        "\n",
-    "ws = Workspace(subscription_id=\"my-subscription-id\",\n",
+        "ws = Workspace(subscription_id=\"my-subscription-id\",\n",
-    "               resource_group=\"my-ml-rg\",\n",
+        "               resource_group=\"my-ml-rg\",\n",
-    "               workspace_name=\"my-ml-workspace\",\n",
+        "               workspace_name=\"my-ml-workspace\",\n",
-    "               auth=interactive_auth)"
+        "               auth=interactive_auth)"
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "### Azure CLI Authentication\n",
+        "### Azure CLI Authentication\n",
-    "\n",
+        "\n",
-    "If you have installed azure-cli package, and used ```az login``` command to log in to your Azure Subscription, you can use _AzureCliAuthentication_ class.\n",
+        "If you have installed azure-cli package, and used ```az login``` command to log in to your Azure Subscription, you can use _AzureCliAuthentication_ class.\n",
-    "\n",
+        "\n",
-    "Note that interactive authentication described above won't use existing Azure CLI auth tokens. "
+        "Note that interactive authentication described above won't use existing Azure CLI auth tokens. "
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "code",
+      "cell_type": "code",
-   "execution_count": null,
+      "execution_count": null,
-   "metadata": {},
+      "metadata": {},
-   "outputs": [],
+      "outputs": [],
-   "source": [
+      "source": [
-    "from azureml.core.authentication import AzureCliAuthentication\n",
+        "from azureml.core.authentication import AzureCliAuthentication\n",
-    "\n",
+        "\n",
-    "cli_auth = AzureCliAuthentication()\n",
+        "cli_auth = AzureCliAuthentication()\n",
-    "\n",
+        "\n",
-    "ws = Workspace(subscription_id=\"my-subscription-id\",\n",
+        "ws = Workspace(subscription_id=\"my-subscription-id\",\n",
-    "               resource_group=\"my-ml-rg\",\n",
+        "               resource_group=\"my-ml-rg\",\n",
-    "               workspace_name=\"my-ml-workspace\",\n",
+        "               workspace_name=\"my-ml-workspace\",\n",
-    "               auth=cli_auth)\n",
+        "               auth=cli_auth)\n",
-    "\n",
+        "\n",
-    "print(\"Found workspace {} at location {}\".format(ws.name, ws.location))"
+        "print(\"Found workspace {} at location {}\".format(ws.name, ws.location))"
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "### Service Principal Authentication\n",
+        "### Service Principal Authentication\n",
-    "\n",
+        "\n",
-    "When setting up a machine learning workflow as an automated process, we recommend using Service Principal Authentication. This approach decouples the authentication from any specific user login, and allows managed access control.\n",
+        "When setting up a machine learning workflow as an automated process, we recommend using Service Principal Authentication. This approach decouples the authentication from any specific user login, and allows managed access control.\n",
-    "\n",
+        "\n",
-    "Note that you must have administrator privileges over the Azure subscription to complete these steps.\n",
+        "Note that you must have administrator privileges over the Azure subscription to complete these steps.\n",
-    "\n",
+        "\n",
-    "The first step is to create a service principal. First, go to [Azure Portal](https://portal.azure.com), select **Azure Active Directory** and **App Registrations**. Then select **+New application registration**, give your service principal a name, for example _my-svc-principal_. You can leave application type as is, and specify a dummy value for Sign-on URL, such as _https://invalid_.\n",
+        "The first step is to create a service principal. First, go to [Azure Portal](https://portal.azure.com), select **Azure Active Directory** and **App Registrations**. Then select **+New application registration**, give your service principal a name, for example _my-svc-principal_. You can leave application type as is, and specify a dummy value for Sign-on URL, such as _https://invalid_.\n",
-    "\n",
+        "\n",
-    "Then click **Create**.\n",
+        "Then click **Create**.\n",
-    "\n",
+        "\n",
-    "![service principal creation]<img src=\"images/svc-pr-1.PNG\">"
+        "![service principal creation]<img src=\"images/svc-pr-1.PNG\">"
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "The next step is to obtain the _Application ID_ (also called username) and create _password_ for the service principal.\n",
+        "The next step is to obtain the _Application ID_ (also called username) and create _password_ for the service principal.\n",
-    "\n",
+        "\n",
-    "From the page for your newly created service principal, copy the _Application ID_. Then select **Settings** and **Keys**, write a description for your key, and select duration. Then click **Save**, and copy the _password_ to a secure location.\n",
+        "From the page for your newly created service principal, copy the _Application ID_. Then select **Settings** and **Keys**, write a description for your key, and select duration. Then click **Save**, and copy the _password_ to a secure location.\n",
-    "\n",
+        "\n",
-    "![application id and password](images/svc-pr-2.PNG)"
+        "![application id and password](images/svc-pr-2.PNG)"
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "<a id =\"get-tenant-id\"></a>\n",
+        "<a id =\"get-tenant-id\"></a>\n",
-    "\n",
+        "\n",
-    "Also, you need to obtain the tenant ID of your Azure subscription. Go back to **Azure Active Directory**, select **Properties** and copy _Directory ID_.\n",
+        "Also, you need to obtain the tenant ID of your Azure subscription. Go back to **Azure Active Directory**, select **Properties** and copy _Directory ID_.\n",
-    "\n",
+        "\n",
-    "![tenant id](images/svc-pr-3.PNG)"
+        "![tenant id](images/svc-pr-3.PNG)"
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "Finally, you need to give the service principal permissions to access your workspace. Navigate to **Resource Groups**, to the resource group for your Machine Learning Workspace. \n",
+        "Finally, you need to give the service principal permissions to access your workspace. Navigate to **Resource Groups**, to the resource group for your Machine Learning Workspace. \n",
-    "\n",
+        "\n",
-    "Then select **Access Control (IAM)** and **Add a role assignment**. For _Role_, specify which level of access you need to grant, for example _Contributor_. Start entering your service principal name and once it is found, select it, and click **Save**.\n",
+        "Then select **Access Control (IAM)** and **Add a role assignment**. For _Role_, specify which level of access you need to grant, for example _Contributor_. Start entering your service principal name and once it is found, select it, and click **Save**.\n",
-    "\n",
+        "\n",
-    "![add role](images/svc-pr-4.PNG)"
+        "![add role](images/svc-pr-4.PNG)"
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "markdown",
+      "cell_type": "markdown",
-   "metadata": {},
+      "metadata": {},
-   "source": [
+      "source": [
-    "Now you are ready to use the service principal authentication. For example, to connect to your Workspace, see code below and enter your own values for tenant ID, application ID, subscription ID, resource group and workspace.\n",
+        "Now you are ready to use the service principal authentication. For example, to connect to your Workspace, see code below and enter your own values for tenant ID, application ID, subscription ID, resource group and workspace.\n",
-    "\n",
+        "\n",
-    "**We strongly recommended that you do not insert the secret password to code**. Instead, you can use environment variables to pass it to your code, for example through Azure Key Vault, or through secret build variables in Azure DevOps. For local testing, you can for example use following PowerShell command to set the environment variable.\n",
+        "**We strongly recommended that you do not insert the secret password to code**. Instead, you can use environment variables to pass it to your code, for example through Azure Key Vault, or through secret build variables in Azure DevOps. For local testing, you can for example use following PowerShell command to set the environment variable.\n",
-    "\n",
+        "\n",
-    "```\n",
+        "```\n",
-    "$env:AZUREML_PASSWORD = \"my-password\"\n",
+        "$env:AZUREML_PASSWORD = \"my-password\"\n",
-    "```"
+        "```"
-   ]
+      ]
-  },
+    },
-  {
+    {
-   "cell_type": "code",
+      "cell_type": "code",
-   "execution_count": null,
+      "execution_count": null,
-   "metadata": {},
+      "metadata": {},
-   "outputs": [],
+      "outputs": [],
-   "source": [
+      "source": [
-    "import os\n",
+        "import os\n",
-    "from azureml.core.authentication import ServicePrincipalAuthentication\n",
+        "from azureml.core.authentication import ServicePrincipalAuthentication\n",
-    "\n",
+        "\n",
-    "svc_pr_password = os.environ.get(\"AZUREML_PASSWORD\")\n",
+        "svc_pr_password = os.environ.get(\"AZUREML_PASSWORD\")\n",
-    "\n",
+        "\n",
-    "svc_pr = ServicePrincipalAuthentication(\n",
+        "svc_pr = ServicePrincipalAuthentication(\n",
-    "    tenant_id=\"my-tenant-id\",\n",
+        "    tenant_id=\"my-tenant-id\",\n",
-    "    service_principal_id=\"my-application-id\",\n",
+        "    service_principal_id=\"my-application-id\",\n",
-    "    service_principal_password=svc_pr_password)\n",
+        "    service_principal_password=svc_pr_password)\n",
-    "\n",
+        "\n",
-    "\n",
+        "\n",
-    "ws = Workspace(\n",
+        "ws = Workspace(\n",
-    "    subscription_id=\"my-subscription-id\",\n",
+        "    subscription_id=\"my-subscription-id\",\n",
-    "    resource_group=\"my-ml-rg\",\n",
+        "    resource_group=\"my-ml-rg\",\n",
-    "    workspace_name=\"my-ml-workspace\",\n",
+        "    workspace_name=\"my-ml-workspace\",\n",
-    "    auth=svc_pr\n",
+        "    auth=svc_pr\n",
-    "    )\n",
+        "    )\n",
-    "\n",
+        "\n",
-    "print(\"Found workspace {} at location {}\".format(ws.name, ws.location))"
+        "print(\"Found workspace {} at location {}\".format(ws.name, ws.location))"
-   ]
+      ]
-  }
+    }
 ],
 "metadata": {
  "authors": [
   {
    "name": "roastala"
   }
  ],
-  "kernelspec": {
+  "metadata": {
-   "display_name": "Python 3.6",
+    "authors": [
-   "language": "python",
+      {
-   "name": "python36"
+        "name": "roastala"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.5"
    }
  },
-  "language_info": {
+  "nbformat": 4,
-   "codemirror_mode": {
+  "nbformat_minor": 2
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/how-to-use-azureml/training-with-deep-learning/distributed-chainer/distributed-chainer.ipynb
+++ b/how-to-use-azureml/training-with-deep-learning/distributed-chainer/distributed-chainer.ipynb
@@ -220,14 +220,14 @@
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.runconfig import MpiConfiguration\n",
        "from azureml.train.dnn import Chainer\n",
        "\n",
        "estimator = Chainer(source_directory=project_folder,\n",
        "                    compute_target=compute_target,\n",
        "                    entry_script='train_mnist.py',\n",
        "                    node_count=2,\n",
-        "                    process_count_per_node=1,\n",
+        "                    distributed_training=MpiConfiguration(),\n",
        "                    distributed_backend='mpi',\n",
        "                    use_gpu=True)"
      ]
    },
--- a/how-to-use-azureml/training-with-deep-learning/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb
+++ b/how-to-use-azureml/training-with-deep-learning/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb
@@ -233,14 +233,14 @@
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.runconfig import MpiConfiguration\n",
        "from azureml.train.dnn import PyTorch\n",
        "\n",
        "estimator = PyTorch(source_directory=project_folder,\n",
        "                    compute_target=compute_target,\n",
        "                    entry_script='pytorch_horovod_mnist.py',\n",
        "                    node_count=2,\n",
-        "                    process_count_per_node=1,\n",
+        "                    distributed_training=MpiConfiguration(),\n",
        "                    distributed_backend='mpi',\n",
        "                    use_gpu=True)"
      ]
    },
--- a/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb
+++ b/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb
@@ -285,7 +285,9 @@
      "metadata": {},
      "source": [
        "### Create a TensorFlow estimator\n",
-        "The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow)."
+        "The AML SDK's TensorFlow estimator enables you to easily submit TensorFlow training jobs for both single-node and distributed runs. For more information on the TensorFlow estimator, refer [here](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-tensorflow).\n",
        "\n",
        "The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release."
      ]
    },
    {
@@ -294,6 +296,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.runconfig import MpiConfiguration\n",
        "from azureml.train.dnn import TensorFlow\n",
        "\n",
        "script_params={\n",
@@ -305,9 +308,8 @@
        "                      script_params=script_params,\n",
        "                      entry_script='tf_horovod_word2vec.py',\n",
        "                      node_count=2,\n",
-        "                      process_count_per_node=1,\n",
+        "                      distributed_training=MpiConfiguration(),\n",
-        "                      distributed_backend='mpi',\n",
+        "                      framework_version='1.13')"
        "                      use_gpu=True)"
      ]
    },
    {
--- a/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb
+++ b/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb
@@ -26,7 +26,7 @@
        "* Go through the [configuration notebook](../../../configuration.ipynb) to:\n",
        "    * install the AML SDK\n",
        "    * create a workspace and its configuration file (`config.json`)\n",
-        "* Review the [tutorial](https://aka.ms/aml-notebook-hyperdrive) on single-node TensorFlow training using the SDK"
+        "* Review the [tutorial](../train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) on single-node TensorFlow training using the SDK"
      ]
    },
    {
@@ -208,6 +208,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.runconfig import TensorflowConfiguration\n",
        "from azureml.train.dnn import TensorFlow\n",
        "\n",
        "script_params={\n",
@@ -215,14 +216,15 @@
        "    '--train_steps': 500\n",
        "}\n",
        "\n",
        "distributed_training = TensorflowConfiguration()\n",
        "distributed_training.worker_count = 2\n",
        "\n",
        "estimator = TensorFlow(source_directory=project_folder,\n",
        "                       compute_target=compute_target,\n",
        "                       script_params=script_params,\n",
        "                       entry_script='tf_mnist_replica.py',\n",
        "                       node_count=2,\n",
-        "                       worker_count=2,\n",
+        "                       distributed_training=distributed_training,\n",
        "                       parameter_server_count=1,   \n",
        "                       distributed_backend='ps',\n",
        "                       use_gpu=True)"
      ]
    },
--- a/how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb
+++ b/how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb
@@ -291,7 +291,7 @@
      "outputs": [],
      "source": [
        "# use a custom Docker image\n",
-        "from azureml.core.runconfig import ContainerRegistry\n",
+        "from azureml.core.container_registry import ContainerRegistry\n",
        "\n",
        "# this is an image available in Docker Hub\n",
        "image_name = 'continuumio/miniconda3'\n",
@@ -309,7 +309,8 @@
        "est = Estimator(source_directory='.', compute_target='local', \n",
        "                entry_script='dummy_train.py',\n",
        "                custom_docker_image=image_name,\n",
-        "                image_registry_details=image_registry_details,\n",
+        "                # uncomment below line to use your private ACR\n",
        "                #image_registry_details=image_registry_details,\n",
        "                user_managed=user_managed_dependencies\n",
        "                )\n",
        "\n",
@@ -336,7 +337,7 @@
  "metadata": {
    "authors": [
      {
-        "name": "minxia"
+        "name": "maxluk"
      }
    ],
    "kernelspec": {
@@ -356,7 +357,7 @@
      "pygments_lexer": "ipython3",
      "version": "3.6.8"
    },
-    "msauthor": "haining"
+    "msauthor": "minxia"
  },
  "nbformat": 4,
  "nbformat_minor": 2
--- a/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb
+++ b/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb
@@ -396,7 +396,7 @@
        "est = TensorFlow(source_directory=script_folder,\n",
        "                 script_params=script_params,\n",
        "                 compute_target=compute_target, \n",
-        "                 conda_packages=['keras', 'matplotlib'],\n",
+        "                 pip_packages=['keras', 'matplotlib'],\n",
        "                 entry_script='keras_mnist.py', \n",
        "                 use_gpu=True)"
      ]
@@ -792,7 +792,7 @@
      "outputs": [],
      "source": [
        "best_run = hdr.get_best_run_by_primary_metric()\n",
-        "print(best_run.get_details()['runDefinition']['Arguments'])"
+        "print(best_run.get_details()['runDefinition']['arguments'])"
      ]
    },
    {
@@ -1144,7 +1144,7 @@
  "metadata": {
    "authors": [
      {
-        "name": "haining"
+        "name": "maxluk"
      }
    ],
    "kernelspec": {
@@ -1164,7 +1164,7 @@
      "pygments_lexer": "ipython3",
      "version": "3.6.7"
    },
-    "msauthor": "haining"
+    "msauthor": "maxluk"
  },
  "nbformat": 4,
  "nbformat_minor": 2
--- a/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb
+++ b/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb
@@ -396,7 +396,10 @@
      "source": [
        "## Create TensorFlow estimator\n",
        "Next, we construct an `azureml.train.dnn.TensorFlow` estimator object, use the Batch AI cluster as compute target, and pass the mount-point of the datastore to the training code as a parameter.\n",
-        "The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker."
+        "\n",
        "The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker.\n",
        "\n",
        "The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release."
      ]
    },
    {
@@ -419,7 +422,8 @@
        "                 script_params=script_params,\n",
        "                 compute_target=compute_target,\n",
        "                 entry_script='tf_mnist.py', \n",
-        "                 use_gpu=True)"
+        "                 use_gpu=True, \n",
        "                 framework_version='1.13')"
      ]
    },
    {
@@ -1158,7 +1162,7 @@
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
-      "version": "3.6.8"
+      "version": "3.6.6"
    },
    "msauthor": "minxia"
  },
--- a/how-to-use-azureml/training/logging-api/logging-api.ipynb
+++ b/how-to-use-azureml/training/logging-api/logging-api.ipynb
--- a/how-to-use-azureml/training/manage-runs/manage-runs.ipynb
+++ b/how-to-use-azureml/training/manage-runs/manage-runs.ipynb
--- a/how-to-use-azureml/training/train-on-local/train-on-local.ipynb
+++ b/how-to-use-azureml/training/train-on-local/train-on-local.ipynb
@@ -319,9 +319,7 @@
      "metadata": {},
      "source": [
        "### Submit script to run in the system-managed environment\n",
-        "A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies.\n",
+        "A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 minutes. But this conda environment is reused so long as you don't change the conda dependencies."
        "\n",
        "\n"
      ]
    },
    {
@@ -332,9 +330,9 @@
      "source": [
        "import subprocess\n",
        "\n",
-        "# Check if Docker is installed and Linux containers are enables\n",
+        "# Check if Docker is installed and Linux containers are enabled\n",
-        "if subprocess.run(\"docker -v\", shell=True) == 0:\n",
+        "if subprocess.run(\"docker -v\", shell=True).returncode == 0:\n",
-        "    out = subprocess.check_output(\"docker system info\", shell=True, encoding=\"ascii\").split(\"\\n\")\n",
+        "    out = subprocess.check_output(\"docker system info\", shell=True).decode('ascii')\n",
        "    if not \"OSType: linux\" in out:\n",
        "        print(\"Switch Docker engine to use Linux containers.\")\n",
        "    else:\n",
@@ -435,6 +433,29 @@
        "))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's compare it to the others"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "%matplotlib inline\n",
        "\n",
        "import matplotlib\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "plt.plot(metrics['alpha'], metrics['mse'], marker='o')\n",
        "plt.ylabel(\"MSE\")\n",
        "plt.xlabel(\"Alpha\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
@@ -455,7 +476,7 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "We know the model `ridge_0.40.pkl` is the best performing model from the eariler queries. So let's register it with the workspace."
+        "We know the model `ridge_0.40.pkl` is the best performing model from the earlier queries. So let's register it with the workspace."
      ]
    },
    {
@@ -488,7 +509,7 @@
  "metadata": {
    "authors": [
      {
-        "name": "haining"
+        "name": "roastala"
      }
    ],
    "kernelspec": {
@@ -506,7 +527,7 @@
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
-      "version": "3.6.6"
+      "version": "3.6.8"
    }
  },
  "nbformat": 4,
--- a/how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb
+++ b/how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb
@@ -110,7 +110,7 @@
      "metadata": {},
      "source": [
        "## Upload data files into datastore\n",
-        "Every workspace comes with a default datastore (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and access it from the compute target."
+        "Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and access it from the compute target."
      ]
    },
    {
@@ -236,7 +236,7 @@
      "metadata": {},
      "source": [
        "## Configure & Run\n",
-        "First let's create a `DataReferenceConfiguration` object to inform the system what data folder to download to the copmute target."
+        "First let's create a `DataReferenceConfiguration` object to inform the system what data folder to download to the compute target."
      ]
    },
    {
@@ -615,7 +615,7 @@
  "metadata": {
    "authors": [
      {
-        "name": "haining"
+        "name": "roastala"
      }
    ],
    "kernelspec": {
--- a/how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb
+++ b/how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb
@@ -673,7 +673,7 @@
  "metadata": {
    "authors": [
      {
-        "name": "haining"
+        "name": "roastala"
      }
    ],
    "kernelspec": {
--- a/how-to-use-azureml/work-with-data/dataprep/README.md
+++ b/how-to-use-azureml/work-with-data/dataprep/README.md
@@ -0,0 +1,201 @@
 # Azure Machine Learning Data Prep SDK
 The Azure Machine Learning Data Prep SDK helps data scientists explore, cleanse and transform data for machine learning workflows in any Python environment.
 Key benefits to the SDK:
 - Cross-platform functionality. Write with a single SDK and run it on Windows, macOS, or Linux.
 - Intelligent transformations powered by AI, including grouping similar values to their canonical form and deriving columns by examples without custom code.
 - Capability to work with large, multiple files of different schema.
 - Scalability on a single machine by streaming data during processing rather than loading into memory.
 - Seamless integration with other Azure Machine Learning services. You can simply pass your prepared data file into `AutoMLConfig` object for automated machine learning training.
 You will find in this repo:
 - [Getting Started Tutorial](tutorials/getting-started/getting-started.ipynb) for a quick introduction to the main features of Data Prep SDK.
 - [Case Study Notebooks](case-studies/new-york-taxi) that present an end-to-end data preparation tutorial where users start with small dataset, profile data with statistics summary, cleanse and perform feature engineering. All transformation steps are saved in a dataflow object. Users can easily reapply the same steps on the full dataset, and run it on Spark.
 - [How-To Guide Notebooks](how-to-guides) for more in-depth sample code at feature level.
 ## Installation
 Here are the [SDK installation steps](https://docs.microsoft.com/python/api/overview/azure/dataprep/intro?view=azure-dataprep-py#install).
 ## Documentation 
 Here is more information on how to use the new Data Prep SDK:
 - [SDK overview and API reference docs](http://aka.ms/data-prep-sdk) that show different classes, methods, and function parameters for the SDK.
 - [Tutorial: Prep NYC taxi data](https://docs.microsoft.com/azure/machine-learning/service/tutorial-data-prep) for regression modeling and then run automated machine learning to build the model.
 - [How to load data](https://docs.microsoft.com/azure/machine-learning/service/how-to-load-data) is an overview guide on how to load data using the Data Prep SDK.
 - [How to transform data](https://docs.microsoft.com/azure/machine-learning/service/how-to-transform-data) is an overview guide on how to transform data. 
 - [How to write data](https://docs.microsoft.com/azure/machine-learning/service/how-to-write-data) is an overview guide on how to write data to different storage locations. 
 ## Support
 If you have any questions or feedback, send us an email at: [askamldataprep@microsoft.com](mailto:askamldataprep@microsoft.com).
 ## Release Notes
 ### 2019-04-08 (version 1.1.1)
 New features
 - You can read multiple Datastore/DataPath/DataReference sources using read_* transforms.
 - You can perform the following operations on columns to create a new column: division, floor, modulo, power, length.
 - Data Prep is now part of the Azure ML diagnostics suite and will log diagnostic information by default.
  - To turn this off, set this environment variable to true: DISABLE_DPREP_LOGGER
 Bug fixes and improvements
 - Improved code documentation for commonly used classes and functions.
 - Fixed a bug in auto_read_file that failed to read Excel files.
 - Added option to overwrite the folder in read_pandas_dataframe.
 - Improved performance of dotnetcore2 dependency installation, and added support for Fedora 27/28 and Ubuntu 1804.
 - Improved the performance of reading from Azure Blobs.
 - Column type detection now supports columns of type Long.
 - Fixed a bug where some date values were being displayed as timestamps instead of Python datetime objects.
 - Fixed a bug where some type counts were being displayed as doubles instead of integers.
 ### 2019-03-25 (version 1.1.0)
 Breaking changes
 - The concept of the Data Prep Package has been deprecated and is no longer supported. Instead of persisting multiple Dataflows in one Package, you can persist Dataflows individually.
  - How-to guide: [Opening and Saving Dataflows notebook](https://aka.ms/aml-data-prep-open-save-dataflows-nb)
 New features
 - Data Prep can now recognize columns that match a particular Semantic Type, and split accordingly. The STypes currently supported include: email address, geographic coordinates (latitude & longitude), IPv4 and IPv6 addresses, US phone number, and US zip code.
  - How-to guide: [Semantic Types notebook](https://aka.ms/aml-data-prep-semantic-types-nb)
 - Data Prep now supports the following operations to generate a resultant column from two numeric columns: subtract, multiply, divide, and modulo.
 - You can call `verify_has_data()` on a Dataflow to check whether the Dataflow would produce records if executed.
 Bug fixes and improvements
 - You can now specify the number of bins to use in a histogram for numeric column profiles.
 - The `read_pandas_dataframe` transform now requires the DataFrame to have string- or byte- typed column names.
 - Fixed a bug in the `fill_nulls` transform, where values were not correctly filled in if the column was missing.
 ### 2019-03-11 (version 1.0.17)
 New features
 - Now supports adding two numeric columns to generate a resultant column using the expression language.
 Bug fixes and improvements
 - Improved the documentation and parameter checking for random_split.
 ### 2019-02-27 (version 1.0.16)
 Bug fix
 - Fixed a Service Principal authentication issue that was caused by an API change.
 ### 2019-02-25 (version 1.0.15)
 New features
 - Data Prep now supports writing file streams from a dataflow. Also provides the ability to manipulate the file stream names to create new file names.
  - How-to guide: [Working With File Streams notebook](https://aka.ms/aml-data-prep-file-stream-nb)
 Bug fixes and improvements
 - Improved performance of t-Digest on large data sets.
 - Data Prep now supports reading data from a DataPath.
 - One hot encoding now works on boolean and numeric columns.
 - Other miscellaneous bug fixes.
 ### 2019-02-11 (version 1.0.12)
 New features
 - Data Prep now supports reading from an Azure SQL database using Datastore.
 Changes
 - Significantly improved the memory performance of certain operations on large data.
 - `read_pandas_dataframe()` now requires `temp_folder` to be specified.
 - The `name` property on `ColumnProfile` has been deprecated - use `column_name` instead.
 ### 2019-01-28 (version 1.0.8)
 Bug fixes
 - Significantly improved the performance of getting data profiles.
 - Fixed minor bugs related to error reporting.
 ### 2019-01-14 (version 1.0.7)
 New features
 - Datastore improvements (documented in [Datastore how-to-guide](https://aka.ms/aml-data-prep-datastore-nb))
  - Added ability to read from and write to Azure File Share and ADLS Datastores in scale-up.
  - When using Datastores, Data Prep now supports using service principal authentication instead of interactive authentication.
  - Added support for wasb and wasbs urls.
 ### 2019-01-09 (version 1.0.6)
 Bug fixes
 - Fixed bug with reading from public readable Azure Blob containers on Spark.
 ### 2018-12-19 (version 1.0.4)
 New features
 - `to_bool` function now allows mismatched values to be converted to Error values. This is the new default mismatch behavior for `to_bool` and `set_column_types`, whereas the previous default behavior was to convert mismatched values to False.
 - When calling `to_pandas_dataframe`, there is a new option to interpret null/missing values in numeric columns as NaN.
 - Added ability to check the return type of some expressions to ensure type consistency and fail early.
 - You can now call `parse_json` to parse values in a column as JSON objects and expand them into multiple columns.
 Bug fixes
 - Fixed a bug that crashed `set_column_types` in Python 3.5.2.
 - Fixed a bug that crashed when connecting to Datastore using an AML image.
 ### 2018-12-07 (version 0.5.3)
 Fixed missing dependency issue for .NET Core2 on Ubuntu 16.
 ### 2018-12-03 (version 0.5.2)
 Breaking changes
 - `SummaryFunction.N` was renamed to `SummaryFunction.Count`.
 Bug fixes
 - Use latest AML Run Token when reading from and writing to datastores on remote runs. Previously, if the AML Run Token is updated in Python, the Data Prep runtime will not be updated with the updated AML Run Token.
 - Additional clearer error messages
 - to_spark_dataframe() will no longer crash when Spark uses Kryo serialization
 - Value Count Inspector can now show more than 1000 unique values
 - Random Split no longer fails if the original Dataflow doesn’t have a name  
 ### 2018-11-19 (version 0.5.0)
 New features
 - Created a new DataPrep CLI to execute DataPrep packages and view the data profile for a dataset or dataflow
 - Redesigned SetColumnType API to improve usability
 - Renamed smart_read_file to auto_read_file
 - Now includes skew and kurtosis in the Data Profile
 - Can sample with stratified sampling
 - Can read from zip files that contain CSV files
 - Can split datasets row-wise with Random Split (e.g. into test-train sets)
 - Can get all the column data types from a dataflow or a data profile by calling .dtypes
 - Can get the row count from a dataflow or a data profile by calling .row_count
 Bug fixes
 - Fixed long to double conversion 
 - Fixed assert after any add column 
 - Fixed an issue with FuzzyGrouping, where it would not detect groups in some cases
 - Fixed sort function to respect multi-column sort order
 - Fixed and/or expressions to be similar to how Pandas handles them
 - Fixed reading from dbfs path.
 - Made error messages more understandable 
 - Now no longer fails when reading on remote compute target using AML token
 - Now no longer fails on Linux DSVM
 - Now no longer crashes when non-string values are in string predicates
 - Now handles assertion errors when Dataflow should fail correctly
 - Now supports dbutils mounted storage locations on Azure Databricks
 ### 2018-11-05 (version 0.4.0)
 New features
 - Type Count added to Data Profile
 - Value Count and Histogram is now available
 - More percentiles in Data Profile
 - The Median is available in Summarize
 - Python 3.7 is now supported
 - When you save a dataflow that contains datastores to a Data Prep package, the datastore information will be persisted as part of the Data Prep package
 - Writing to datastore is now supported
 Bug fixes
 - 64bit unsigned integer overflows are now handled properly on Linux 
 - Fixed incorrect text label for plain text files in smart_read
 - String column type now shows up in metrics view
 - Type count now is fixed to show ValueKinds mapped to single FieldType instead of individual ones
 - Write_to_csv no longer fails when path is provided as a string
 - When using Replace, leaving “find” blank will no longer fail
 ## Datasets License Information
 IMPORTANT: Please read the notice and find out more about this NYC Taxi and Limousine Commission dataset here: http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml 
 IMPORTANT: Please read the notice and find out more about this Chicago Police Department dataset here: https://catalog.data.gov/dataset/crimes-2001-to-present-398a4 
--- a/how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi.ipynb
+++ b/how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi.ipynb
@@ -0,0 +1,508 @@
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Cleaning up New York Taxi Cab data\n",
        "Copyright (c) Microsoft Corporation. All rights reserved.<br>\n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's use DataPrep to clean and featurize the data which can then be used to predict taxi trip duration. We will not use the For Hire Vehicle (FHV) datasets as they are not really taxi rides and they don't provide drop-off time and geo-coordinates."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from IPython.display import display\n",
        "from os import path\n",
        "from tempfile import mkdtemp\n",
        "\n",
        "import pandas as pd\n",
        "import azureml.dataprep as dprep"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's take a quick peek at yellow cab data and green cab data to see what the data looks like. DataPrep supports globing, so you will notice below that we have added a `*` in the path.\n",
        "\n",
        "*We are using a small sample of the taxi data for this demo. You can find a bigger sample ~6GB by changing \"green-small\" to \"green-sample\" and \"yellow-small\" to \"yellow-sample\" in the paths below.*"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "pd.set_option('display.max_columns', None)\n",
        "\n",
        "cache_location = mkdtemp()\n",
        "dataset_root = \"https://dprepdata.blob.core.windows.net/demo\"\n",
        "\n",
        "green_path = \"/\".join([dataset_root, \"green-small/*\"])\n",
        "yellow_path = \"/\".join([dataset_root, \"yellow-small/*\"])\n",
        "\n",
        "print(\"Retrieving data from the following two sources:\")\n",
        "print(green_path)\n",
        "print(yellow_path)\n",
        "\n",
        "green_df = dprep.read_csv(path=green_path, header=dprep.PromoteHeadersMode.GROUPED)\n",
        "yellow_df = dprep.auto_read_file(path=yellow_path)\n",
        "\n",
        "display(green_df.head(5))\n",
        "display(yellow_df.head(5))"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Data Cleanup"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's define some shortcut transforms that will apply to all Dataflows."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "all_columns = dprep.ColumnSelector(term=\".*\", use_regex=True)\n",
        "drop_if_all_null = [all_columns, dprep.ColumnRelationship(dprep.ColumnRelationship.ALL)]\n",
        "useful_columns = [\n",
        "    \"cost\", \"distance\"\"distance\", \"dropoff_datetime\", \"dropoff_latitude\", \"dropoff_longitude\",\n",
        "    \"passengers\", \"pickup_datetime\", \"pickup_latitude\", \"pickup_longitude\", \"store_forward\", \"vendor\"\n",
        "]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's first work with the green taxi data and get it into a good shape that then can be combined with the yellow taxi data."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "tmp_df = (green_df\n",
        "    .replace_na(columns=all_columns)\n",
        "    .drop_nulls(*drop_if_all_null)\n",
        "    .rename_columns(column_pairs={\n",
        "        \"VendorID\": \"vendor\",\n",
        "        \"lpep_pickup_datetime\": \"pickup_datetime\",\n",
        "        \"Lpep_dropoff_datetime\": \"dropoff_datetime\",\n",
        "        \"lpep_dropoff_datetime\": \"dropoff_datetime\",\n",
        "        \"Store_and_fwd_flag\": \"store_forward\",\n",
        "        \"store_and_fwd_flag\": \"store_forward\",\n",
        "        \"Pickup_longitude\": \"pickup_longitude\",\n",
        "        \"Pickup_latitude\": \"pickup_latitude\",\n",
        "        \"Dropoff_longitude\": \"dropoff_longitude\",\n",
        "        \"Dropoff_latitude\": \"dropoff_latitude\",\n",
        "        \"Passenger_count\": \"passengers\",\n",
        "        \"Fare_amount\": \"cost\",\n",
        "        \"Trip_distance\": \"distance\"\n",
        "     })\n",
        "    .keep_columns(columns=useful_columns))\n",
        "tmp_df.head(5)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "green_df = tmp_df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's do the same thing to yellow taxi data."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "tmp_df = (yellow_df\n",
        "    .replace_na(columns=all_columns)\n",
        "    .drop_nulls(*drop_if_all_null)\n",
        "    .rename_columns(column_pairs={\n",
        "        \"vendor_name\": \"vendor\",\n",
        "        \"VendorID\": \"vendor\",\n",
        "        \"vendor_id\": \"vendor\",\n",
        "        \"Trip_Pickup_DateTime\": \"pickup_datetime\",\n",
        "        \"tpep_pickup_datetime\": \"pickup_datetime\",\n",
        "        \"Trip_Dropoff_DateTime\": \"dropoff_datetime\",\n",
        "        \"tpep_dropoff_datetime\": \"dropoff_datetime\",\n",
        "        \"store_and_forward\": \"store_forward\",\n",
        "        \"store_and_fwd_flag\": \"store_forward\",\n",
        "        \"Start_Lon\": \"pickup_longitude\",\n",
        "        \"Start_Lat\": \"pickup_latitude\",\n",
        "        \"End_Lon\": \"dropoff_longitude\",\n",
        "        \"End_Lat\": \"dropoff_latitude\",\n",
        "        \"Passenger_Count\": \"passengers\",\n",
        "        \"passenger_count\": \"passengers\",\n",
        "        \"Fare_Amt\": \"cost\",\n",
        "        \"fare_amount\": \"cost\",\n",
        "        \"Trip_Distance\": \"distance\",\n",
        "        \"trip_distance\": \"distance\"\n",
        "    })\n",
        "    .keep_columns(columns=useful_columns))\n",
        "tmp_df.head(5)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "yellow_df = tmp_df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's now append the rows from the `yellow_df` to `green_df`."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "combined_df = green_df.append_rows(dataflows=[yellow_df])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's take a look at the pickup and drop-off coordinates' data profile to see how the data is distributed."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "decimal_type = dprep.TypeConverter(data_type=dprep.FieldType.DECIMAL)\n",
        "combined_df = combined_df.set_column_types(type_conversions={\n",
        "    \"pickup_longitude\": decimal_type,\n",
        "    \"pickup_latitude\": decimal_type,\n",
        "    \"dropoff_longitude\": decimal_type,\n",
        "    \"dropoff_latitude\": decimal_type\n",
        "})\n",
        "combined_df.keep_columns(columns=[\n",
        "    \"pickup_longitude\", \"pickup_latitude\", \n",
        "    \"dropoff_longitude\", \"dropoff_latitude\"\n",
        "]).get_profile()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "From the data profile, we can see that there are coordinates that are missing and coordinates that are not in New York. Let's filter out coordinates not in the [city border](https://mapmakerapp.com?map=5b60a055a191245990310739f658)."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "tmp_df = (combined_df\n",
        "    .drop_nulls(\n",
        "        columns=[\"pickup_longitude\", \"pickup_latitude\", \"dropoff_longitude\", \"dropoff_latitude\"],\n",
        "        column_relationship=dprep.ColumnRelationship(dprep.ColumnRelationship.ANY)\n",
        "    ) \n",
        "    .filter(dprep.f_and(\n",
        "        dprep.col(\"pickup_longitude\") <= -73.72,\n",
        "        dprep.col(\"pickup_longitude\") >= -74.09,\n",
        "        dprep.col(\"pickup_latitude\") <= 40.88,\n",
        "        dprep.col(\"pickup_latitude\") >= 40.53,\n",
        "        dprep.col(\"dropoff_longitude\") <= -73.72,\n",
        "        dprep.col(\"dropoff_longitude\") >= -74.09,\n",
        "        dprep.col(\"dropoff_latitude\") <= 40.88,\n",
        "        dprep.col(\"dropoff_latitude\") >= 40.53\n",
        "    )))\n",
        "tmp_df.keep_columns(columns=[\n",
        "    \"pickup_longitude\", \"pickup_latitude\", \n",
        "    \"dropoff_longitude\", \"dropoff_latitude\"\n",
        "]).get_profile()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "combined_df = tmp_df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's take a look at the data profile for the `store_forward` column."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "combined_df.keep_columns(columns='store_forward').get_profile()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "From the data profile of `store_forward` above, we can see that the data is inconsistent and there are missing values. Let's fix them."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "combined_df = combined_df.replace(columns=\"store_forward\", find=\"0\", replace_with=\"N\").fill_nulls(\"store_forward\", \"N\")"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's now split the pick up and drop off datetimes into a date column and a time column. We will use `split_column_by_example` to perform the split. If the `example` parameter of `split_column_by_example` is omitted, we will automatically try to figure out where to split based on the data."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "tmp_df = (combined_df\n",
        "    .split_column_by_example(source_column=\"pickup_datetime\")\n",
        "    .split_column_by_example(source_column=\"dropoff_datetime\"))\n",
        "tmp_df.head(5)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "combined_df = tmp_df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's rename the columns generated by `split_column_by_example` into meaningful names."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "tmp_df = (combined_df\n",
        "    .rename_columns(column_pairs={\n",
        "        \"pickup_datetime_1\": \"pickup_date\",\n",
        "        \"pickup_datetime_2\": \"pickup_time\",\n",
        "        \"dropoff_datetime_1\": \"dropoff_date\",\n",
        "        \"dropoff_datetime_2\": \"dropoff_time\"\n",
        "    }))\n",
        "tmp_df.head(5)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "combined_df = tmp_df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Feature Engineering"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "### Datetime features"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's split the pickup and drop-off date further into day of week, day of month, and month. For pickup and drop-off time columns, we will split it into hour, minute, and second."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "tmp_df = (combined_df\n",
        "    .derive_column_by_example(\n",
        "        source_columns=\"pickup_date\", \n",
        "        new_column_name=\"pickup_weekday\", \n",
        "        example_data=[(\"2009-01-04\", \"Sunday\"), (\"2013-08-22\", \"Thursday\")]\n",
        "    )\n",
        "    .derive_column_by_example(\n",
        "        source_columns=\"dropoff_date\",\n",
        "        new_column_name=\"dropoff_weekday\",\n",
        "        example_data=[(\"2013-08-22\", \"Thursday\"), (\"2013-11-03\", \"Sunday\")]\n",
        "    )\n",
        "    .split_column_by_example(source_column=\"pickup_date\")\n",
        "    .split_column_by_example(source_column=\"pickup_time\")\n",
        "    .split_column_by_example(source_column=\"dropoff_date\")\n",
        "    .split_column_by_example(source_column=\"dropoff_time\")\n",
        "    .split_column_by_example(source_column=\"pickup_time_1\")\n",
        "    .split_column_by_example(source_column=\"dropoff_time_1\")\n",
        "    .drop_columns(columns=[\n",
        "        \"pickup_date\", \"pickup_time\", \"dropoff_date\", \"dropoff_time\", \n",
        "        \"pickup_date_1\", \"dropoff_date_1\", \"pickup_time_1\", \"dropoff_time_1\"\n",
        "    ])\n",
        "    .rename_columns(column_pairs={\n",
        "        \"pickup_date_2\": \"pickup_month\",\n",
        "        \"pickup_date_3\": \"pickup_monthday\",\n",
        "        \"pickup_time_1_1\": \"pickup_hour\",\n",
        "        \"pickup_time_1_2\": \"pickup_minute\",\n",
        "        \"pickup_time_2\": \"pickup_second\",\n",
        "        \"dropoff_date_2\": \"dropoff_month\",\n",
        "        \"dropoff_date_3\": \"dropoff_monthday\",\n",
        "        \"dropoff_time_1_1\": \"dropoff_hour\",\n",
        "        \"dropoff_time_1_2\": \"dropoff_minute\",\n",
        "        \"dropoff_time_2\": \"dropoff_second\"\n",
        "    }))\n",
        "tmp_df.head(5)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "combined_df = tmp_df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "From the data above, we can see that the pickup and drop-off date and time components produced from the transforms above looks good. Let's drop the `pickup_datetime` and `dropoff_datetime` columns as they are no longer needed."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "tmp_df = combined_df.drop_columns(columns=[\"pickup_datetime\", \"dropoff_datetime\"])\n",
        "tmp_df.head(5)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "combined_df = tmp_df"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's now save the transformation steps into a DataPrep package so we can use it to to run on spark."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "dflow_path = path.join(mkdtemp(), \"new_york_taxi.dprep\")\n",
        "combined_df.save(file_path=dflow_path)"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "sihhu"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.4"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
--- a/how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.ipynb
+++ b/how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.ipynb
@@ -0,0 +1,129 @@
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Scale-Out Data Preparation\n",
        "Copyright (c) Microsoft Corporation. All rights reserved.<br>\n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Once we are done with preparing and featurizing the data locally, we can run the same steps on the full dataset in scale-out mode. The new york taxi cab data is about 300GB in total, which is perfect for scale-out. Let's start by downloading the package we saved earlier to disk. Feel free to run the `new_york_taxi_cab.ipynb` notebook to generate the package yourself, in which case you may comment out the download code and set the `package_path` to where the package is saved."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from tempfile import mkdtemp\n",
        "from os import path\n",
        "from urllib.request import urlretrieve\n",
        "\n",
        "dflow_root = mkdtemp()\n",
        "dflow_path = path.join(dflow_root, \"new_york_taxi.dprep\")\n",
        "print(\"Downloading Dataflow to: {}\".format(dflow_path))\n",
        "urlretrieve(\"https://dprepdata.blob.core.windows.net/demo/new_york_taxi_v2.dprep\", dflow_path)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's load the package we just downloaded."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import azureml.dataprep as dprep\n",
        "\n",
        "df = dprep.Dataflow.open(dflow_path)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Let's replace the datasources with the full dataset."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from uuid import uuid4\n",
        "\n",
        "other_step = df._get_steps()[7].arguments['dataflows'][0]['anonymousSteps'][0]\n",
        "other_step['id'] = str(uuid4())\n",
        "other_step['arguments']['path']['target'] = 1\n",
        "other_step['arguments']['path']['resourceDetails'][0]['path'] = 'https://wranglewestus.blob.core.windows.net/nyctaxi/yellow_tripdata*'"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "green_dsource = dprep.BlobDataSource(\"https://wranglewestus.blob.core.windows.net/nyctaxi/green_tripdata*\")\n",
        "df = df.replace_datasource(green_dsource)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Once we have replaced the datasource, we can now run the same steps on the full dataset. We will print the first 5 rows of the spark DataFrame. Since we are running on the full dataset, this might take a little while depending on your spark cluster's size."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "spark_df = df.take(5).to_pandas_dataframe()\n",
        "spark_df.head(5)"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "sihhu"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.4"
    },
    "skip_execute_as_test": true
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
--- a/how-to-use-azureml/work-with-data/dataprep/data/adls-dpreptestfiles.crt
+++ b/how-to-use-azureml/work-with-data/dataprep/data/adls-dpreptestfiles.crt
@@ -0,0 +1,45 @@
 -----BEGIN PRIVATE KEY-----
 MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQDmkkyF0BwipZow
 Wd1AMkRkySx0y079JPxpsYhv4i1xXKdoa9bpFqwoXmJpeQM1JWnU4UeZzFeM86qK
 AhQvL4KV4kibcP2ENvu2NKFEdotO3uxPJ+6GlcYwMYzy+tUj008KnnRZfTrR78sJ
 tIl3C6lnVL0ICihksG59P1sskRq3PvOjXLAdEZalwDjZ4ZPoNDZdj6nUjB2l8zqu
 pKAt5mR+bJ9Sox4yrDuNhMmFt5QsRDRe3wUqdV+C9OCWHmjlmsjrYw7p9YmjBDvC
 5U7mF0Mk/XeYFzj0pkXKQVqBL6xqig+q5ob0szYfg19iDeFhS3iIsRcJGEnRVW/A
 NpsBZyKrAgMBAAECggEBANlvP8C1F8NInhZYuIAwpzTQTh86Fxw8g9h8dijkh2wv
 LyQXBk07d1B+aZoDZ5X32UzKwcX04N9obfvFqBkzWZdVFJmZvUmwvEEActBoZkkT
 io+/HX5HweVy5PPCvbsSK6jc8uXtZcnSs4tMeJIOKkvqqnTpd1w00Y1FcQqfMC16
 4p7o8wbt6OFoFAYqcxeVYVwDzCTLZD3+iJaqmntkBkoDndJy52yXQmMq5z1wbQVp
 BL6+L9nTvmouy64jiHVSKOx8nnWThYfHsXoPv+rYywjeuK/v3hyaTAwogs36ooEn
 SnuTBRvJcumN9Q0XIVlxKMVBcGyyAP+0yNKGz5NQgdECgYEA/I/Uq1E3epPJgEWR
 Bub+LpCgwtrw/lgKncb/Q/AiE9qoXobUe4KNU8aGaNMb7uVNLckY7cOluLS6SQb3
 Mzwk2Jl0G3vk8rW46tZWvSYB8+zAR2Rz7seUOT9SE5OmvwpnHrnp3nRr1vvVd2bp
 Q/ypwMLrwWQN51Kr+oTS74bUbrkCgYEA6bXVIUyao7z2Q3qAr6h+6JEWDbkJA7hJ
 BjHIOXvxd1tMoJJX+X9+IE/2XoJaUkGCb0vrM/hi1cyQFmS4Or/J6IWSZu8oBpDr
 EBmIK3PF1nrzNvWD28wM46c6ScehyWSm/u4bJWSm9liTX3dv5Kpa6ym7yLKc3c0B
 ECpSJM+5SoMCgYEAq585Tukzn/IJPUcIk/4nv5C8DW0l0lAVdr2g/JOTNJajTwik
 HwHJ86G1+Elsc9wRpAlBDWCjnm4BIFrBZGl8SEuOoJaCL4PZEotwCbxoG09IIbtb
 JGkuifBDX9Y3ux3gkPqYt3e5SC99EVQ3MuHgoIJUHehVolmFUAkuJWIjvNECgYEA
 5pU0VspRuELzZdgzpxvDOooLDDcHodfslGQBfFXBA1Xc4IACtHMJaa/7D3vkyUtA
 +bYZtQjX2sEdWDq/WZdoCjXfIBfNkczhXt0R8G0lQFvGIu9QzUchYGrZo3mHMkBQ
 Uy1xMw9/e4YgwQwCJcW+Nk7Sq00uX9enuN9IdHFOCykCgYAqAGMK6CH1tlpjvHrf
 k+ZhigYxTXBlsVVvK1BIGGaiwzDpn65zeQp4aLOjSZkI1LuRi3tfTiZ321jRd64J
 4lGk5Jurqv5grDmxROX/U50wEYbI9ncu/thU7syUdxDiqxHPI2RMG50mRcm3a55p
 ZCNSqkMlcXyA0U1z8C1ILNUsbA==
 -----END PRIVATE KEY-----
 -----BEGIN CERTIFICATE-----
 MIICoTCCAYkCAgPoMA0GCSqGSIb3DQEBBQUAMBQxEjAQBgNVBAMMCUNMSS1Mb2dp
 bjAiGA8yMDE4MDcxMzIzMjA0N1oYDzIwMTkwNzEzMjMyMDQ5WjAUMRIwEAYDVQQD
 DAlDTEktTG9naW4wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDmkkyF
 0BwipZowWd1AMkRkySx0y079JPxpsYhv4i1xXKdoa9bpFqwoXmJpeQM1JWnU4UeZ
 zFeM86qKAhQvL4KV4kibcP2ENvu2NKFEdotO3uxPJ+6GlcYwMYzy+tUj008KnnRZ
 fTrR78sJtIl3C6lnVL0ICihksG59P1sskRq3PvOjXLAdEZalwDjZ4ZPoNDZdj6nU
 jB2l8zqupKAt5mR+bJ9Sox4yrDuNhMmFt5QsRDRe3wUqdV+C9OCWHmjlmsjrYw7p
 9YmjBDvC5U7mF0Mk/XeYFzj0pkXKQVqBL6xqig+q5ob0szYfg19iDeFhS3iIsRcJ
 GEnRVW/ANpsBZyKrAgMBAAEwDQYJKoZIhvcNAQEFBQADggEBAI4VlaFb9NsXMLdT
 Cw5/pk0Xo2Qi6483RGTy8vzrw88IE7f3juB/JWG+rayjtW5bBRx2fae4/ZIdZ4zg
 N2FDKn2PQPAc9m9pcKyUKUvWOC8ixSkrUmeQew0l1AXU0hsPSlJ7/7ZK4efoyB47
 hj71fsyKdyKbisZDcUFBq/S8PazdPF0YOD1W/4A2tW0cSMg+jmFWynuUTdWt3SU8
 CwBGqdiSKT5faJuYwIWnRXDEQS3ObRn1OFEfFdd4d2sxjxydWKRgnINnGlBdiFAT
 KzCozVr+75cO2ErH6x5C0hLQGG5BxXbaijyxyvaRNokTMVVv6OaDEnjzCGfJ72Yf
 2wgitNc=
 -----END CERTIFICATE-----
--- a/how-to-use-azureml/work-with-data/dataprep/data/chicago-aldermen-2015.csv
+++ b/how-to-use-azureml/work-with-data/dataprep/data/chicago-aldermen-2015.csv
@@ -0,0 +1,54 @@
 "Retrieved from https://en.wikipedia.org/wiki/Chicago_City_Council on November 6, 2018"
 Ward,Name,Took Office,Party
 1,Proco Joe Moreno,2010*,Dem
 2,Brian Hopkins,2015,Dem
 3,Pat Dowell,2007,Dem
 4,Sophia King,2016*,Dem
 5,Leslie Hairston,1999,Dem
 6,Roderick Sawyer,2011,Dem
 7,Gregory Mitchell,2015,Dem
 8,Michelle A. Harris,2006*,Dem
 9,Anthony Beale,1999,Dem
 10,Susie Sadlowski Garza,2015,Dem
 11,Patrick Daley Thompson,2015,Dem
 12,George Cardenas,2003,Dem
 13,Marty Quinn,2011,Dem
 14,Edward M. Burke,1969,Dem
 15,Raymond Lopez,2015,Dem
 16,Toni Foulkes,2007,Dem
 17,David H. Moore,2015,Dem
 18,Derrick Curtis,2015,Dem
 19,Matthew O'Shea,2011,Dem
 20,Willie Cochran,2007,Dem
 21,Howard Brookins Jr.,2003,Dem
 22,Ricardo Muñoz,1993*,Dem
 23,Silvana Tabares,2018*,Dem
 24,"Michael Scott, Jr.",2015,Dem
 25,Daniel Solis,1996*,Dem
 26,Roberto Maldonado,2009*,Dem
 27,"Walter Burnett, Jr.",1995,Dem
 28,Jason Ervin,2011*,Dem
 29,Chris Taliaferro,2015,Dem
 30,Ariel Reboyras,2003,Dem
 31,Milly Santiago,2015,Dem
 32,Scott Waguespack,2007,Dem
 33,Deb Mell,2013*,Dem
 34,Carrie Austin,1994*,Dem
 35,Carlos Ramirez-Rosa,2015,Dem
 36,Gilbert Villegas,2015,Dem
 37,Emma Mitts,2000*,Dem
 38,Nicholas Sposato,2011,Ind
 39,Margaret Laurino,1994*,Dem
 40,Patrick J. O'Connor,1983,Dem
 41,Anthony Napolitano,2015,Rep
 42,Brendan Reilly,2007,Dem
 43,Michele Smith,2011,Dem
 44,Thomas M. Tunney,2002*,Dem
 45,John Arena,2011,Dem
 46,James Cappleman,2011,Dem
 47,Ameya Pawar,2011,Dem
 48,Harry Osterman,2011,Dem
 49,Joe Moore,1991,Dem
 50,Debra Silverstein,2011,Dem
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime-dirty.csv
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime-dirty.csv
@@ -0,0 +1,15 @@
 File updated 11/2/2018
 ID|Case Number|Date|Block|IUCR|Primary Type|Description|Location Description|Arrest|Domestic|Beat|District|Ward|Community Area|FBI Code|X Coordinate|Y Coordinate|Year|Updated On|Latitude|Longitude|Location
 10140490|HY329907|07/05/2015 11:50:00 PM|050XX N NEWLAND AVE|0820|THEFT|$500 AND UNDER|STREET|false|false|1613|016|41|10|06|1129230|1933315|2015|07/12/2015 12:42:46 PM|41.973309466|-87.800174996|(41.973309466, -87.800174996)
 10139776|HY329265|07/05/2015 11:30:00 PM|011XX W MORSE AVE|0460|BATTERY|SIMPLE|STREET|false|true|2431|024|49|1|08B|1167370|1946271|2015|07/12/2015 12:42:46 PM|42.008124017|-87.65955018|(42.008124017, -87.65955018)
 10140270|HY329253|07/05/2015 11:20:00 PM|121XX S FRONT AVE|0486|BATTERY|DOMESTIC BATTERY SIMPLE|STREET|false|true|0532||9|53|08B|||2015|07/12/2015 12:42:46 PM|||
 10139885|HY329308|07/05/2015 11:19:00 PM|051XX W DIVISION ST|0610|BURGLARY|FORCIBLE ENTRY|SMALL RETAIL STORE|false|false|1531|015|37|25|05|1141721|1907465|2015|07/12/2015 12:42:46 PM|41.902152027|-87.754883404|(41.902152027, -87.754883404)
 10140379|HY329556|07/05/2015 11:00:00 PM|012XX W LAKE ST|0930|MOTOR VEHICLE THEFT|THEFT/RECOVERY: AUTOMOBILE|STREET|false|false|1215|012|27|28|07|1168413|1901632|2015|07/12/2015 12:42:46 PM|41.885610142|-87.657008701|(41.885610142, -87.657008701)
 10140868|HY330421|07/05/2015 10:54:00 PM|118XX S PEORIA ST|1320|CRIMINAL DAMAGE|TO VEHICLE|VEHICLE NON-COMMERCIAL|false|false|0524|005|34|53|14|1172409|1826485|2015|07/12/2015 12:42:46 PM|41.6793109|-87.644545209|(41.6793109, -87.644545209)
 10139762|HY329232|07/05/2015 10:42:00 PM|026XX W 37TH PL|1020|ARSON|BY FIRE|VACANT LOT/LAND|false|false|0911|009|12|58|09|1159436|1879658|2015|07/12/2015 12:42:46 PM|41.825500607|-87.690578042|(41.825500607, -87.690578042)
 10139722|HY329228|07/05/2015 10:30:00 PM|016XX S CENTRAL PARK AVE|1811|NARCOTICS|POSS: CANNABIS 30GMS OR LESS|ALLEY|true|false|1021|010|24|29|18|1152687|1891389|2015|07/12/2015 12:42:46 PM|41.857827814|-87.715028789|(41.857827814, -87.715028789)
 10139774|HY329209|07/05/2015 10:15:00 PM|048XX N ASHLAND AVE|1310|CRIMINAL DAMAGE|TO PROPERTY|APARTMENT|false|false|2032|020|46|3|14|1164821|1932394|2015|07/12/2015 12:42:46 PM|41.970099796|-87.669324377|(41.970099796, -87.669324377)
 10139697|HY329177|07/05/2015 10:10:00 PM|058XX S ARTESIAN AVE|1320|CRIMINAL DAMAGE|TO VEHICLE|ALLEY|false|false|0824|008|16|63|14|1160997|1865851|2015|07/12/2015 12:42:46 PM|41.787580282|-87.685233078|(41.787580282, -87.685233078)
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime-spring.csv
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime-spring.csv
@@ -0,0 +1,11 @@
 ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
 10498554,HZ239907,4/4/2016 23:56,007XX E 111TH ST,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,OTHER,FALSE,FALSE,531,5,9,50,11,1183356,1831503,2016,5/11/2016 15:48,41.69283384,-87.60431945,"(41.692833841, -87.60431945)"
 10516598,HZ258664,4/15/2016 17:00,082XX S MARSHFIELD AVE,890,THEFT,FROM BUILDING,RESIDENCE,FALSE,FALSE,614,6,21,71,6,1166776,1850053,2016,5/12/2016 15:48,41.74410697,-87.66449429,"(41.744106973, -87.664494285)"
 10519196,HZ261252,4/15/2016 10:00,104XX S SACRAMENTO AVE,1154,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT $300 AND UNDER,RESIDENCE,FALSE,FALSE,2211,22,19,74,11,,,2016,5/12/2016 15:50,,,
 10519591,HZ261534,4/15/2016 9:00,113XX S PRAIRIE AVE,1120,DECEPTIVE PRACTICE,FORGERY,RESIDENCE,FALSE,FALSE,531,5,9,49,10,,,2016,5/13/2016 15:51,,,
 10534446,HZ277630,4/15/2016 10:00,055XX N KEDZIE AVE,890,THEFT,FROM BUILDING,"SCHOOL, PUBLIC, BUILDING",FALSE,FALSE,1712,17,40,13,6,,,2016,5/25/2016 15:59,,,
 10535059,HZ278872,4/15/2016 4:30,004XX S KILBOURN AVE,810,THEFT,OVER $500,RESIDENCE,FALSE,FALSE,1131,11,24,26,6,,,2016,5/25/2016 15:59,,,
 10499802,HZ240778,4/15/2016 10:00,010XX N MILWAUKEE AVE,1152,DECEPTIVE PRACTICE,ILLEGAL USE CASH CARD,RESIDENCE,FALSE,FALSE,1213,12,27,24,11,,,2016,5/27/2016 15:45,,,
 10522293,HZ264802,4/15/2016 16:00,019XX W DIVISION ST,1110,DECEPTIVE PRACTICE,BOGUS CHECK,RESTAURANT,FALSE,FALSE,1424,14,1,24,11,1163094,1908003,2016,5/16/2016 15:48,41.90320604,-87.67636193,"(41.903206037, -87.676361925)"
 10523111,HZ265911,4/15/2016 8:00,061XX N SHERIDAN RD,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,RESIDENCE,FALSE,FALSE,2433,24,48,77,11,,,2016,5/16/2016 15:50,,,
 10525877,HZ268138,4/15/2016 15:00,023XX W EASTWOOD AVE,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,,FALSE,FALSE,1911,19,47,4,11,,,2016,5/18/2016 15:50,,,
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime-winter.csv
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime-winter.csv
@@ -0,0 +1,11 @@
 ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
 10378283,HZ114126,1/10/2016 11:00,033XX W IRVING PARK RD,610,BURGLARY,FORCIBLE ENTRY,RESIDENCE-GARAGE,TRUE,FALSE,1724,17,33,16,5,1153593,1926401,2016,5/22/2016 15:51,41.95388599,-87.71077048,"(41.95388599, -87.710770479)"
 10382154,HZ118288,1/10/2016 21:00,055XX S FRANCISCO AVE,1754,OFFENSE INVOLVING CHILDREN,AGG SEX ASSLT OF CHILD FAM MBR,RESIDENCE,FALSE,TRUE,824,8,14,63,2,1157983,1867874,2016,6/1/2016 15:51,41.79319349,-87.69622926,"(41.793193489, -87.696229255)"
 10374287,HZ110730,1/10/2016 11:50,043XX W ARMITAGE AVE,5002,OTHER OFFENSE,OTHER VEHICLE OFFENSE,STREET,FALSE,TRUE,2522,25,30,20,26,1146917,1912931,2016,6/7/2016 15:55,41.91705356,-87.73565764,"(41.917053561, -87.735657637)"
 10374662,HZ110403,1/10/2016 1:30,073XX S CLAREMONT AVE,497,BATTERY,AGGRAVATED DOMESTIC BATTERY: OTHER DANG WEAPON,STREET,FALSE,TRUE,835,8,18,66,04B,1162007,1855951,2016,2/4/2016 15:44,41.76039236,-87.68180481,"(41.760392356, -87.681804812)"
 10374720,HZ110836,1/10/2016 7:30,079XX S RHODES AVE,890,THEFT,FROM BUILDING,OTHER,FALSE,FALSE,624,6,6,44,6,1181279,1852568,2016,2/4/2016 15:44,41.75068679,-87.61127681,"(41.75068679, -87.611276811)"
 10375178,HZ110832,1/10/2016 14:20,057XX S KEDZIE AVE,460,BATTERY,SIMPLE,RESTAURANT,FALSE,FALSE,824,8,14,63,08B,1156029,1866379,2016,2/4/2016 15:44,41.78913051,-87.7034346,"(41.78913051, -87.703434602)"
 10398695,HZ135279,1/10/2016 23:00,031XX S PARNELL AVE,620,BURGLARY,UNLAWFUL ENTRY,RESIDENCE-GARAGE,FALSE,FALSE,915,9,11,60,5,1173138,1884117,2016,2/4/2016 15:44,41.8374442,-87.64017699,"(41.837444199, -87.640176991)"
 10402270,HZ138745,1/10/2016 11:00,051XX S ELIZABETH ST,620,BURGLARY,UNLAWFUL ENTRY,APARTMENT,FALSE,FALSE,934,9,16,61,5,,,2016,2/4/2016 6:53,,,
 10380619,HZ116583,1/10/2016 9:41,091XX S PAXTON AVE,4387,OTHER OFFENSE,VIOLATE ORDER OF PROTECTION,RESIDENCE,TRUE,TRUE,413,4,7,48,26,1192434,1844707,2016,2/2/2016 15:56,41.72885134,-87.57065553,"(41.728851343, -87.570655525)"
 10400131,HZ136171,1/10/2016 18:00,0000X W TERMINAL ST,810,THEFT,OVER $500,AIRPORT BUILDING NON-TERMINAL - SECURE AREA,FALSE,FALSE,1651,16,41,76,6,,,2016,2/2/2016 15:58,,,
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime.dprep
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime.dprep
@@ -0,0 +1,204 @@
 {
  "id": "75637565-60ad-4baa-87d3-396a7930cfe7",
  "blocks": [
    {
      "id": "ba5a8061-129e-4618-953a-ce3e89c8f2cb",
      "type": "Microsoft.DPrep.GetFilesBlock",
      "arguments": {
        "path": {
          "target": 0,
          "resourceDetails": [
            {
              "path": "./crime-spring.csv"
            }
          ]
        }
      },
      "isEnabled": true,
      "name": null,
      "annotation": null
    },
    {
      "id": "1b345643-6b60-4ca1-99f9-2a64ae932a23",
      "type": "Microsoft.DPrep.ParseDelimitedBlock",
      "arguments": {
        "columnHeadersMode": 1,
        "fileEncoding": 0,
        "handleQuotedLineBreaks": false,
        "preview": false,
        "separator": ",",
        "skipRowsMode": 0
      },
      "isEnabled": true,
      "name": null,
      "annotation": null
    },
    {
      "id": "12cf73a2-1487-4915-bfa7-c86be7de08c0",
      "type": "Microsoft.DPrep.SetColumnTypesBlock",
      "arguments": {
        "columnConversion": [
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "ID"
              }
            },
            "typeProperty": 3
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "IUCR"
              }
            },
            "typeProperty": 3
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Domestic"
              }
            },
            "typeProperty": 1
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Beat"
              }
            },
            "typeProperty": 3
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "District"
              }
            },
            "typeProperty": 3
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Ward"
              }
            },
            "typeProperty": 3
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Community Area"
              }
            },
            "typeProperty": 3
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Year"
              }
            },
            "typeProperty": 3
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Longitude"
              }
            },
            "typeProperty": 3
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Arrest"
              }
            },
            "typeProperty": 1
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "X Coordinate"
              }
            },
            "typeProperty": 3
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Updated On"
              }
            },
            "typeArguments": {
              "dateTimeFormats": [
                "%m/%d/%Y %I:%M:%S %p"
              ]
            },
            "typeProperty": 4
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Date"
              }
            },
            "typeArguments": {
              "dateTimeFormats": [
                "%m/%d/%Y %I:%M:%S %p"
              ]
            },
            "typeProperty": 4
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Y Coordinate"
              }
            },
            "typeProperty": 3
          },
          {
            "column": {
              "type": 2,
              "details": {
                "selectedColumn": "Latitude"
              }
            },
            "typeProperty": 3
          }
        ]
      },
      "isEnabled": true,
      "name": null,
      "annotation": null
    },
    {
      "id": "dfd62543-9285-412b-a930-0aeaaffde699",
      "type": "Microsoft.DPrep.HandlePathColumnBlock",
      "arguments": {
        "pathColumnOperation": 0
      },
      "isEnabled": true,
      "name": null,
      "annotation": null
    }
  ],
  "inspectors": []
 }
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime.parquet
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime.parquet
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime.txt
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime.txt
@@ -0,0 +1,10 @@
 10140490 HY329907 7/5/2015  23:50                  050XX N NEWLAND AVE 820    THEFT                 
 10139776 HY329265 7/5/2015  23:30                  011XX W MORSE AVE 460    BATTERY
 10140270 HY329253 7/5/2015  23:20                  121XX S FRONT AVE 486    BATTERY
 10139885 HY329308 7/5/2015  23:19                  051XX W DIVISION ST 610    BURGLARY
 10140379 HY329556 7/5/2015  23:00                  012XX W LAKE ST 930    MOTOR VEHICLE THEFT
 10140868 HY330421 7/5/2015  22:54                  118XX S PEORIA ST 1320    CRIMINAL DAMAGE
 10139762 HY329232 7/5/2015  22:42                  026XX W 37TH PL 1020    ARSON
 10139722 HY329228 7/5/2015  22:30                  016XX S CENTRAL PARK AVE 1811    NARCOTICS
 10139774 HY329209 7/5/2015  22:15                  048XX N ASHLAND AVE 1310    CRIMINAL DAMAGE
 10139697 HY329177 7/5/2015  22:10                  058XX S ARTESIAN AVE 1320    CRIMINAL DAMAGE
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime.xlsx
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime.xlsx
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime.zip
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime.zip
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime_duplicate_headers.csv
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime_duplicate_headers.csv
@@ -0,0 +1,12 @@
 ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
 ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,District,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
 10498554,HZ239907,4/15/2016 23:56,007XX E 111TH ST,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,OTHER,FALSE,FALSE,531,5,9,50,11,1183356,1831503,2016,5/11/2016 15:48,41.69283384,-87.60431945,"(41.692833841, -87.60431945)"
 10516598,HZ258664,4/15/2016 17:00,082XX S MARSHFIELD AVE,890,THEFT,FROM BUILDING,RESIDENCE,FALSE,FALSE,614,6,21,71,6,1166776,1850053,2016,5/12/2016 15:48,41.74410697,-87.66449429,"(41.744106973, -87.664494285)"
 10519196,HZ261252,4/15/2016 10:00,104XX S SACRAMENTO AVE,1154,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT $300 AND UNDER,RESIDENCE,FALSE,FALSE,2211,22,19,74,11,,,2016,5/12/2016 15:50,,,
 10519591,HZ261534,4/15/2016 9:00,113XX S PRAIRIE AVE,1120,DECEPTIVE PRACTICE,FORGERY,RESIDENCE,FALSE,FALSE,531,5,9,49,10,,,2016,5/13/2016 15:51,,,
 10534446,HZ277630,4/15/2016 10:00,055XX N KEDZIE AVE,890,THEFT,FROM BUILDING,"SCHOOL, PUBLIC, BUILDING",FALSE,FALSE,1712,17,40,13,6,,,2016,5/25/2016 15:59,,,
 10535059,HZ278872,4/15/2016 4:30,004XX S KILBOURN AVE,810,THEFT,OVER $500,RESIDENCE,FALSE,FALSE,1131,11,24,26,6,,,2016,5/25/2016 15:59,,,
 10499802,HZ240778,4/15/2016 10:00,010XX N MILWAUKEE AVE,1152,DECEPTIVE PRACTICE,ILLEGAL USE CASH CARD,RESIDENCE,FALSE,FALSE,1213,12,27,24,11,,,2016,5/27/2016 15:45,,,
 10522293,HZ264802,4/15/2016 16:00,019XX W DIVISION ST,1110,DECEPTIVE PRACTICE,BOGUS CHECK,RESTAURANT,FALSE,FALSE,1424,14,1,24,11,1163094,1908003,2016,5/16/2016 15:48,41.90320604,-87.67636193,"(41.903206037, -87.676361925)"
 10523111,HZ265911,4/15/2016 8:00,061XX N SHERIDAN RD,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,RESIDENCE,FALSE,FALSE,2433,24,48,77,11,,,2016,5/16/2016 15:50,,,
 10525877,HZ268138,4/15/2016 15:00,023XX W EASTWOOD AVE,1153,DECEPTIVE PRACTICE,FINANCIAL IDENTITY THEFT OVER $ 300,,FALSE,FALSE,1911,19,47,4,11,,,2016,5/18/2016 15:50,,,
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime_fixed_width_file.txt
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime_fixed_width_file.txt
@@ -0,0 +1,10 @@
 10140490 HY329907 7/5/2015  23:50                  050XX N NEWLAND AVE 820    THEFT                 
 10139776 HY329265 7/5/2015  23:30                  011XX W MORSE AVE 460    BATTERY
 10140270 HY329253 7/5/2015  23:20                  121XX S FRONT AVE 486    BATTERY
 10139885 HY329308 7/5/2015  23:19                  051XX W DIVISION ST 610    BURGLARY
 10140379 HY329556 7/5/2015  23:00                  012XX W LAKE ST 930    MOTOR VEHICLE THEFT
 10140868 HY330421 7/5/2015  22:54                  118XX S PEORIA ST 1320    CRIMINAL DAMAGE
 10139762 HY329232 7/5/2015  22:42                  026XX W 37TH PL 1020    ARSON
 10139722 HY329228 7/5/2015  22:30                  016XX S CENTRAL PARK AVE 1811    NARCOTICS
 10139774 HY329209 7/5/2015  22:15                  048XX N ASHLAND AVE 1310    CRIMINAL DAMAGE
 10139697 HY329177 7/5/2015  22:10                  058XX S ARTESIAN AVE 1320    CRIMINAL DAMAGE
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime_multiple_separators.csv
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime_multiple_separators.csv
@@ -0,0 +1,11 @@
 ID |CaseNumber| |Completed|
 10140490 |HY329907| |Y|
 10139776 |HY329265| |Y|
 10140270 |HY329253| |N|
 10139885 |HY329308| |Y|
 10140379 |HY329556| |N|
 10140868 |HY330421| |N|
 10139762 |HY329232| |N|
 10139722 |HY329228| |Y|
 10139774 |HY329209| |N|
 10139697 |HY329177| |N|
--- a/how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/_SUCCESS
+++ b/how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/_SUCCESS
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jordan Edwards	01e188d61f	Create deploymentconfig.json	2019-04-30 08:02:43 -07:00
Jordan Edwards	7a754faa7e	Create inferenceconfig.json	2019-04-30 08:02:06 -07:00
Jordan Edwards	78f8ec3d24	Create mylib.py	2019-04-30 07:36:44 -07:00
Jordan Edwards	6120c6897f	Adding sample train.py file	2019-04-30 07:36:12 -07:00
Jordan Edwards	8c4168dfa1	Update README.md	2019-04-28 13:15:14 -07:00
Jordan Edwards	8580dcc01b	Update README.md	2019-04-28 13:14:30 -07:00
Jordan Edwards	5443b05152	Update azure-pipelines-model-deploy.yml	2019-04-28 13:06:38 -07:00
Jordan Edwards	afec974f09	Update README.md	2019-04-28 13:06:13 -07:00
Jordan Edwards	d6b558f88d	Update README.md	2019-04-28 13:04:26 -07:00
Jordan Edwards	70c021eb69	Update azure-pipelines-model-deploy.yml	2019-04-28 12:59:48 -07:00
Jordan Edwards	550abc7167	Update azure-pipelines-model-deploy.yml	2019-04-28 12:54:29 -07:00
Jordan Edwards	6c12043732	Update README.md	2019-04-28 12:53:59 -07:00
Jordan Edwards	2f3f1ff756	Update azure-pipelines-model-deploy.yml	2019-04-28 12:49:49 -07:00
Jordan Edwards	05f056a692	Update azure-pipelines-model-deploy.yml	2019-04-28 12:49:25 -07:00
Jordan Edwards	8e559ef577	Create azure-pipelines-model-deploy.yml	2019-04-28 12:47:45 -07:00
Jordan Edwards	a8e4fc2f9a	Create deploymentConfig.yml	2019-04-28 12:46:20 -07:00
Jordan Edwards	f5e4df0864	Update inferenceConfig.yml	2019-04-28 12:46:00 -07:00
Jordan Edwards	13d0d9baf1	Create inferenceConfig.yml	2019-04-28 12:45:21 -07:00
Roope Astala	845e9d653e	Merge pull request #342 from rastala/master dockerfile 1.0.33	2019-04-26 14:01:55 -04:00
Roope Astala	639ef81636	dockerfile 1.0.33	2019-04-26 13:57:46 -04:00
Roope Astala	60158bf41a	Merge pull request #341 from rastala/master version 1.0.33	2019-04-26 13:45:47 -04:00
Roope Astala	8dbbb01b8a	version 1.0.33	2019-04-26 13:44:15 -04:00
Roope Astala	6e6b2b0c48	Merge pull request #340 from rastala/master add readme	2019-04-26 09:41:49 -04:00
Roope Astala	85f5721bf8	add readme	2019-04-26 09:40:24 -04:00
Shané Winner	6a7dd741e7	Pixel server added	2019-04-23 13:48:23 -07:00
Shané Winner	314218fc89	Added pixel server	2019-04-23 13:47:06 -07:00
Shané Winner	b50d2725c7	Added pixel server	2019-04-23 13:46:06 -07:00
Shané Winner	9a2f448792	Added pixel server	2019-04-23 13:45:05 -07:00
Shané Winner	dd620f19fd	Pixel server added	2019-04-23 13:43:41 -07:00
Shané Winner	8116d31da4	Pixel Server added	2019-04-23 13:40:26 -07:00
Shané Winner	ef29dc1fa5	Added Pixel Server	2019-04-23 13:39:18 -07:00
Shané Winner	97b345cb33	Implemented Pixel Server	2019-04-23 13:37:41 -07:00
Shané Winner	282250e670	Implementing Pixel Server	2019-04-23 13:36:24 -07:00
Shané Winner	acef60c5b3	Testing pixel web app	2019-04-23 13:15:04 -07:00
Shané Winner	bfb444eb15	Testing Pixel Tracker	2019-04-23 13:07:48 -07:00
Shané Winner	6277659bf2	Testing Pixel Server	2019-04-23 11:48:55 -07:00
Shané Winner	1645e12712	Testing Tracking Pixel	2019-04-23 11:15:53 -07:00
Roope Astala	cc4a32e70b	Merge pull request #337 from jeff-shepherd/master Updated automl_setup scripts	2019-04-23 13:50:09 -04:00
Jeff Shepherd	997a35aed5	Updated automl_setup scripts	2019-04-23 10:40:33 -07:00
Roope Astala	dd6317a4a0	Merge pull request #336 from rastala/master adding work-with-data	2019-04-23 10:05:08 -04:00
Roope Astala	82d8353d54	adding work-with-data	2019-04-23 10:04:32 -04:00
Shané Winner	59a01c17a0	Testing the pixel tracker	2019-04-22 14:45:09 -07:00
Shané Winner	e31e1d9af3	Implemented a test pixel tracker	2019-04-22 14:41:32 -07:00
Roope Astala	d38b9db255	Merge pull request #334 from rastala/master docker update	2019-04-22 15:43:28 -04:00
Roope Astala	761ad88c93	docker update	2019-04-22 15:43:02 -04:00
Roope Astala	644729e5db	Merge pull request #333 from rastala/master version 1.0.30	2019-04-22 15:40:11 -04:00
Roope Astala	e2b1b3fcaa	version 1.0.30	2019-04-22 15:39:18 -04:00
Roope Astala	dc692589a9	Merge pull request #326 from rastala/master update aks notebook	2019-04-18 16:19:51 -04:00
Roope Astala	624b4595b5	update aks notebook	2019-04-18 16:18:33 -04:00
Roope Astala	0ed85c33c2	Delete release.json	2019-04-18 10:01:50 -04:00
Roope Astala	5b01de605f	Merge pull request #318 from savitamittal1/hdinotebook Sample HDI notebook	2019-04-18 10:01:26 -04:00
Savitam	c351ac988a	Sample HDI notebook sample HDI notebook	2019-04-15 12:35:34 -07:00
Josée Martens	759ec3934c	Delete yt_cover.png	2019-04-15 12:06:25 -05:00
Josée Martens	b499b88a85	Delete python36.png	2019-04-15 12:06:16 -05:00
Josée Martens	5f4edac3c1	Update NBSETUP.md	2019-04-15 12:00:31 -05:00
Josée Martens	edfce0d936	Update README.md	2019-04-12 17:28:16 -05:00
Josée Martens	1516c7fc24	Update README.md testing for search	2019-04-12 17:19:55 -05:00
Roope Astala	389fb668ce	Add files via upload	2019-04-10 11:12:55 -04:00
Josée Martens	647d5e72a5	Merge pull request #307 from Azure/vizhur-patch-2 Create googled8147fb6c0788258.html	2019-04-09 15:21:51 -05:00
vizhur	43ac4c84bb	Create googled8147fb6c0788258.html	2019-04-09 16:19:47 -04:00
Roope Astala	8a1a82b50a	Merge pull request #303 from rastala/master dockerfile and missing config update	2019-04-08 15:38:13 -04:00
Roope Astala	72f386298c	dockerfile and missing config update	2019-04-08 15:37:48 -04:00
Roope Astala	41d697e298	Merge pull request #302 from rastala/master version 1.0.23	2019-04-08 15:35:50 -04:00
Roope Astala	c3ce932029	version 1.0.23	2019-04-08 15:34:51 -04:00
Roope Astala	a956162114	Merge pull request #290 from rastala/master update aks deployment notebook	2019-04-03 10:53:51 -04:00
Roope Astala	cb5a178e40	Merge branch 'master' of github.com:rastala/MachineLearningNotebooks	2019-04-03 10:52:40 -04:00
Roope Astala	d81c336c59	update production deploy to aks	2019-04-03 10:52:15 -04:00
Roope Astala	4244a24d81	Merge pull request #287 from jeff-shepherd/master Fixed line termination on automl_setup_linux.sh	2019-04-03 09:21:35 -04:00
Jeff Shepherd	3b488555e5	Added back automl_setup_linux.sh with correct line termination	2019-04-02 16:24:05 -07:00
Jeff Shepherd	6abc478f33	Removed automl_setup_linux.sh	2019-04-02 16:23:11 -07:00
Roope Astala	666c2579eb	Merge pull request #285 from jeff-shepherd/master Corrected line termination for automl_setup_mac.sh	2019-04-02 09:19:53 -04:00
Jeff Shepherd	5af3aa4231	Fixed line termination	2019-04-01 16:19:00 -07:00
Jeff Shepherd	e48d828ab0	Removed automl_setup_mac.sh	2019-04-01 16:17:56 -07:00
Jeff Shepherd	44aa636c21	Merge branch 'master' of https://github.com/Azure/MachineLearningNotebooks	2019-04-01 16:07:11 -07:00
Jeff Shepherd	4678f9adc3	Merge branch 'master' of https://github.com/jeff-shepherd/MachineLearningNotebooks	2019-04-01 16:04:46 -07:00
Jeff Shepherd	5bf85edade	Added automl_setup_mac.sh with correct line termination	2019-04-01 16:03:39 -07:00
Jeff Shepherd	94f381e884	Removed automl_setup_mac.sh	2019-04-01 16:02:53 -07:00
Roope Astala	ea1b7599c3	Merge pull request #267 from rastala/master add automl files	2019-03-25 19:26:07 -04:00
Roope Astala	6b8a6befde	add automl files	2019-03-25 19:25:38 -04:00
Roope Astala	c1511b7b74	Merge pull request #266 from rastala/master 1.0.21 dockerfile	2019-03-25 15:10:05 -04:00
Roope Astala	8f007a3333	1.0.21 dockerfile	2019-03-25 15:09:39 -04:00
Jeff Shepherd	18a11bbd8d	Added model printing example	2019-03-18 16:31:48 -07:00
`@@ -287,6 +287,8 @@ Notice how the parameters are modified when using the CPU-only mode.`

	`The outputs of the script can be observed in the master notebook as the script is executed`	`The outputs of the script can be observed in the master notebook as the script is executed`

		`![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/contrib/RAPIDS/README.png)`