update samples from Release-8 as a part of 1.4.0 SDK stable release

2025-12-19 17:17:04 -05:00 · 2020-04-27 17:44:41 +00:00
parent 7970209069
commit fd2b09e2c2
30 changed files with 774 additions and 72 deletions
--- a/configuration.ipynb
+++ b/configuration.ipynb
@@ -103,7 +103,7 @@
      "source": [
        "import azureml.core\n",
        "\n",
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
--- a/how-to-use-azureml/automated-machine-learning/automl_env.yml
+++ b/how-to-use-azureml/automated-machine-learning/automl_env.yml
@@ -28,7 +28,6 @@ dependencies:
  - azureml-pipeline
  - pytorch-transformers==1.0.0
  - spacy==2.1.8
  - onnxruntime==1.0.0
  - https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
 channels:
--- a/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml
+++ b/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml
@@ -29,7 +29,6 @@ dependencies:
  - azureml-pipeline
  - pytorch-transformers==1.0.0
  - spacy==2.1.8
  - onnxruntime==1.0.0
  - https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz  
 channels:
--- a/how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb
@@ -105,7 +105,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
--- a/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb
@@ -93,7 +93,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
--- a/how-to-use-azureml/automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.ipynb
@@ -97,7 +97,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
@@ -194,8 +194,8 @@
        "    '''\n",
        "    remove = ('headers', 'footers', 'quotes')\n",
        "    categories = [\n",
-        "        'alt.atheism',\n",
+        "        'rec.sport.baseball',\n",
-        "        'talk.religion.misc',\n",
+        "        'rec.sport.hockey',\n",
        "        'comp.graphics',\n",
        "        'sci.space',\n",
        "        ]\n",
@@ -345,7 +345,8 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "You can test the model locally to get a feel of the input/output. This step may require additional package installations such as pytorch."
+        "You can test the model locally to get a feel of the input/output. When the model contains BERT, this step will require pytorch and pytorch-transformers installed in your local environment. The exact versions of these packages can be found in the **automl_env.yml** file located in the local copy of your MachineLearningNotebooks folder here:\n",
        "MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/automl_env.yml"
      ]
    },
    {
@@ -481,7 +482,7 @@
      "source": [
        "script_folder = os.path.join(os.getcwd(), 'inference')\n",
        "os.makedirs(script_folder, exist_ok=True)\n",
-        "shutil.copy2('infer.py', script_folder)"
+        "shutil.copy('infer.py', script_folder)"
      ]
    },
    {
--- a/how-to-use-azureml/automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.yml
+++ b/how-to-use-azureml/automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.yml
@@ -5,7 +5,6 @@ dependencies:
  - azureml-train-automl
  - azureml-widgets
  - matplotlib
  - azurmel-train
  - https://download.pytorch.org/whl/cpu/torch-1.1.0-cp35-cp35m-win_amd64.whl
  - sentencepiece==0.1.82
  - pytorch-transformers==1.0
--- a/how-to-use-azureml/automated-machine-learning/continuous-retraining/auto-ml-continuous-retraining.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/continuous-retraining/auto-ml-continuous-retraining.ipynb
@@ -88,7 +88,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
--- a/how-to-use-azureml/automated-machine-learning/forecasting-beer-remote/auto-ml-forecasting-beer-remote.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/forecasting-beer-remote/auto-ml-forecasting-beer-remote.ipynb
@@ -114,7 +114,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
@@ -572,7 +572,7 @@
        "\n",
        "script_folder = os.path.join(os.getcwd(), 'inference')\n",
        "os.makedirs(script_folder, exist_ok=True)\n",
-        "shutil.copy2('infer.py', script_folder)"
+        "shutil.copy('infer.py', script_folder)"
      ]
    },
    {
--- a/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb
@@ -87,7 +87,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
@@ -453,8 +453,8 @@
        "\n",
        "script_folder = os.path.join(os.getcwd(), 'forecast')\n",
        "os.makedirs(script_folder, exist_ok=True)\n",
-        "shutil.copy2('forecasting_script.py', script_folder)\n",
+        "shutil.copy('forecasting_script.py', script_folder)\n",
-        "shutil.copy2('forecasting_helper.py', script_folder)"
+        "shutil.copy('forecasting_helper.py', script_folder)"
      ]
    },
    {
--- a/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb
@@ -97,7 +97,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
--- a/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/auto-ml-forecasting-function.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/auto-ml-forecasting-function.ipynb
@@ -95,7 +95,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
@@ -355,9 +355,24 @@
        "                             label_column_name=target_label,\n",
        "                             **time_series_settings)\n",
        "\n",
-        "remote_run = experiment.submit(automl_config, show_output=False)\n",
+        "remote_run = experiment.submit(automl_config, show_output=False)"
-        "remote_run.wait_for_completion()\n",
+      ]
-        "\n",
+    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "remote_run.wait_for_completion()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Retrieve the best model to use it further.\n",
        "_, fitted_model = remote_run.get_output()"
      ]
--- a/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb
@@ -65,7 +65,8 @@
        "\n",
        "from azureml.core.workspace import Workspace\n",
        "from azureml.core.experiment import Experiment\n",
-        "from azureml.train.automl import AutoMLConfig"
+        "from azureml.train.automl import AutoMLConfig\n",
        "from azureml.automl.core.featurization import FeaturizationConfig"
      ]
    },
    {
@@ -81,7 +82,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
@@ -318,6 +319,36 @@
        "target_column_name = 'Quantity'"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Customization\n",
        "\n",
        "The featurization customization in forecasting is an advanced feature in AutoML which allows our customers to change the default forecasting featurization behaviors and column types through `FeaturizationConfig`. The supported scenarios include,\n",
        "1. Column purposes update: Override feature type for the specified column. Currently supports DateTime, Categorical and Numeric. This customization can be used in the scenario that the type of the column cannot correctly reflect its purpose. Some numerical columns, for instance, can be treated as Categorical columns which need to be converted to categorical while some can be treated as epoch timestamp which need to be converted to datetime. To tell our SDK to correctly preprocess these columns, a configuration need to be add with the columns and their desired types.\n",
        "2. Transformer parameters update: Currently supports parameter change for Imputer only. User can customize imputation methods, the supported methods are constant for target data and mean, median, most frequent and constant for training data. This customization can be used for the scenario that our customers know which imputation methods fit best to the input data. For instance, some datasets use NaN to represent 0 which the correct behavior should impute all the missing value with 0. To achieve this behavior, these columns need to be configured as constant imputation with `fill_value` 0.\n",
        "3. Drop columns: Columns to drop from being featurized. These usually are the columns which are leaky or the columns contain no useful data.\n",
        "\n",
        "This step requires an Enterprise workspace to gain access to this feature. To learn more about creating an Enterprise workspace or upgrading to an Enterprise workspace from the Azure portal, please visit our [Workspace page.](https://docs.microsoft.com/azure/machine-learning/service/concept-workspace#upgrade)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "featurization_config = FeaturizationConfig()\n",
        "featurization_config.drop_columns = ['logQuantity']  # 'logQuantity' is a leaky feature, so we remove it.\n",
        "# Force the CPWVOL5 feature to be numeric type.\n",
        "featurization_config.add_column_purpose('CPWVOL5', 'Numeric')\n",
        "# Fill missing values in the target column, Quantity, with zeros.\n",
        "featurization_config.add_transformer_params('Imputer', ['Quantity'], {\"strategy\": \"constant\", \"fill_value\": 0})\n",
        "# Fill missing values in the INCOME column with median value.\n",
        "featurization_config.add_transformer_params('Imputer', ['INCOME'], {\"strategy\": \"median\"})"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
@@ -349,8 +380,8 @@
        "|**debug_log**|Log file path for writing debugging information|\n",
        "|**time_column_name**|Name of the datetime column in the input data|\n",
        "|**grain_column_names**|Name(s) of the columns defining individual series in the input data|\n",
-        "|**drop_column_names**|Name(s) of columns to drop prior to modeling|\n",
+        "|**max_horizon**|Maximum desired forecast horizon in units of time-series frequency|\n",
-        "|**max_horizon**|Maximum desired forecast horizon in units of time-series frequency|"
+        "|**featurization**| 'auto' / 'off' / FeaturizationConfig Indicator for whether featurization step should be done automatically or not, or whether customized featurization should be used. Setting this enables AutoML to perform featurization on the input to handle *missing data*, and to perform some common *feature extraction*.|"
      ]
    },
    {
@@ -362,7 +393,6 @@
        "time_series_settings = {\n",
        "    'time_column_name': time_column_name,\n",
        "    'grain_column_names': grain_column_names,\n",
        "    'drop_column_names': ['logQuantity'],  # 'logQuantity' is a leaky feature, so we remove it.\n",
        "    'max_horizon': n_test_periods\n",
        "}\n",
        "\n",
@@ -374,6 +404,7 @@
        "                             label_column_name=target_column_name,\n",
        "                             compute_target=compute_target,\n",
        "                             enable_early_stopping=True,\n",
        "                             featurization=featurization_config,\n",
        "                             n_cross_validations=3,\n",
        "                             verbosity=logging.INFO,\n",
        "                             **time_series_settings)"
@@ -425,6 +456,33 @@
        "model_name = best_run.properties['model_name']"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Transparency\n",
        "\n",
        "View updated featurization summary"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "custom_featurizer = fitted_model.named_steps['timeseriestransformer']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "custom_featurizer.get_featurization_summary()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
--- a/how-to-use-azureml/automated-machine-learning/local-run-classification-credit-card-fraud/auto-ml-classification-credit-card-fraud-local.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/local-run-classification-credit-card-fraud/auto-ml-classification-credit-card-fraud-local.ipynb
@@ -95,7 +95,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
@@ -370,7 +370,7 @@
      "metadata": {},
      "source": [
        "#### Initialize the Mimic Explainer for feature importance\n",
-        "For explaining the AutoML models, use the MimicWrapper from azureml.explain.model package. The MimicWrapper can be initialized with fields in automl_explainer_setup_obj, your workspace and a LightGBM model which acts as a surrogate model to explain the AutoML model (fitted_model here). The MimicWrapper also takes the automl_run object where engineered explanations will be uploaded."
+        "For explaining the AutoML models, use the MimicWrapper from azureml.explain.model package. The MimicWrapper can be initialized with fields in automl_explainer_setup_obj, your workspace and a surrogate model to explain the AutoML model (fitted_model here). The MimicWrapper also takes the automl_run object where engineered explanations will be uploaded."
      ]
    },
    {
@@ -379,13 +379,14 @@
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel\n",
        "from azureml.explain.model.mimic_wrapper import MimicWrapper\n",
-        "explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMExplainableModel, \n",
+        "explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator,\n",
        "                         explainable_model=automl_explainer_setup_obj.surrogate_model, \n",
        "                         init_dataset=automl_explainer_setup_obj.X_transform, run=automl_run,\n",
        "                         features=automl_explainer_setup_obj.engineered_feature_names, \n",
        "                         feature_maps=[automl_explainer_setup_obj.feature_map],\n",
-        "                         classes=automl_explainer_setup_obj.classes)"
+        "                         classes=automl_explainer_setup_obj.classes,\n",
        "                         explainer_kwargs=automl_explainer_setup_obj.surrogate_model_params)"
      ]
    },
    {
--- a/how-to-use-azureml/automated-machine-learning/regression-explanation-featurization/auto-ml-regression-explanation-featurization.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/regression-explanation-featurization/auto-ml-regression-explanation-featurization.ipynb
@@ -98,7 +98,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
--- a/how-to-use-azureml/automated-machine-learning/regression-explanation-featurization/train_explainer.py
+++ b/how-to-use-azureml/automated-machine-learning/regression-explanation-featurization/train_explainer.py
@@ -11,7 +11,6 @@ from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplaine
 from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel
 from azureml.explain.model.mimic_wrapper import MimicWrapper
 from automl.client.core.common.constants import MODEL_PATH
 from azureml.automl.core.shared.constants import MODEL_EXPLANATION_TAG
 from azureml.explain.model.scoring.scoring_explainer import TreeScoringExplainer, save
@@ -69,9 +68,6 @@ raw_explanations = explainer.explain(['local', 'global'], get_raw=True, tag='raw
                                     raw_feature_names=automl_explainer_setup_obj.raw_feature_names,
                                     eval_dataset=automl_explainer_setup_obj.X_test_transform)
 # Set tag that explanations completed
 automl_run.tag(MODEL_EXPLANATION_TAG, 'True')
 print("Engineered and raw explanations computed successfully")
 # Initialize the ScoringExplainer
--- a/how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb
@@ -92,7 +92,7 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },
--- a/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb
+++ b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb
@@ -696,6 +696,7 @@
        "1. [Save model explanations via Azure Machine Learning Run History](../run-history/save-retrieve-explanations-run-history.ipynb)\n",
        "1. Inferencing time: deploy a classification model and explainer:\n",
        "    1. [Deploy a locally-trained model and explainer](../scoring-time/train-explain-model-locally-and-deploy.ipynb)\n",
        "    1. [Deploy a locally-trained keras model and explainer](../scoring-time/train-explain-model-keras-locally-and-deploy.ipynb)\n",
        "    1. [Deploy a remotely-trained model and explainer](../scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)"
      ]
    },
--- a/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb
+++ b/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb
@@ -591,6 +591,7 @@
        "1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../remote-explanation/explain-model-on-amlcompute.ipynb)\n",
        "1. Inferencing time: deploy a classification model and explainer:\n",
        "    1. [Deploy a locally-trained model and explainer](../scoring-time/train-explain-model-locally-and-deploy.ipynb)\n",
        "    1. [Deploy a locally-trained keras model and explainer](../scoring-time/train-explain-model-keras-locally-and-deploy.ipynb)\n",
        "    1. [Deploy a remotely-trained model and explainer](../scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)"
      ]
    },
--- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/score_local_explain_keras.py
+++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/score_local_explain_keras.py
@@ -0,0 +1,33 @@
 import json
 import pandas as pd
 from sklearn.externals import joblib
 from azureml.core.model import Model
 import tensorflow as tf
 def init():
    global preprocess
    global network
    global scoring_explainer
    # Retrieve the path to the model file using the model name
    # Assume original model is named original_prediction_model
    featurize_path = Model.get_model_path('featurize')
    keras_model_path = Model.get_model_path('keras_model')
    scoring_explainer_path = Model.get_model_path('IBM_attrition_explainer')
    preprocess = joblib.load(featurize_path)
    network = tf.keras.models.load_model(keras_model_path)
    scoring_explainer = joblib.load(scoring_explainer_path)
 def run(raw_data):
    # Get predictions and explanations for each data point
    data = pd.read_json(raw_data)
    preprocessed_data = preprocess.transform(data)
    # Make prediction
    predictions = network.predict(preprocessed_data)
    # Retrieve model explanations
    local_importance_values = scoring_explainer.explain(data)
    # You can return any data type as long as it is JSON-serializable
    return {'predictions': predictions.tolist(), 'local_importance_values': local_importance_values}
--- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-keras-locally-and-deploy.ipynb
+++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-keras-locally-and-deploy.ipynb
@@ -0,0 +1,595 @@
 {
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
        "\n",
        "Licensed under the MIT License."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.png)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Train and explain keras model locally and deploy model with scoring explainer\n",
        "\n",
        "\n",
        "_**This notebook illustrates how to use the Azure Machine Learning Interpretability SDK to deploy a locally-trained keras model and its corresponding deep scoring explainer to Azure Container Instances (ACI) as a web service.**_\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "Problem: IBM employee attrition classification with keras (train and explain a model locally and use Azure Container Instances (ACI) for deploying your model and its corresponding deep scoring explainer as a web service.)\n",
        "\n",
        "---\n",
        "\n",
        "## Table of Contents\n",
        "\n",
        "1. [Introduction](#Introduction)\n",
        "1. [Setup](#Setup)\n",
        "1. [Run model explainer locally at training time](#Explain)\n",
        "    1. Apply feature transformations\n",
        "    1. Train a binary classification keras model\n",
        "    1. Explain the model on raw features\n",
        "        1. Generate global explanations\n",
        "        1. Generate local explanations\n",
        "1. [Visualize explanations](#Visualize)\n",
        "1. [Deploy keras model and scoring explainer](#Deploy)\n",
        "1. [Next steps](#Next)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Introduction\n",
        "\n",
        "\n",
        "This notebook showcases how to train and explain a keras classification model locally, and deploy the trained model and its corresponding DeepExplainer to Azure Container Instances (ACI).\n",
        "It demonstrates the API calls that you need to make to submit a run for training and explaining a keras model to AMLCompute, download the compute explanations remotely, and visualizing the global and local explanations via a visualization dashboard that provides an interactive way of discovering patterns in model predictions and downloaded explanations. It also demonstrates how to use Azure Machine Learning MLOps capabilities to deploy your keras model and its corresponding DeepExplainer.\n",
        "\n",
        "We will showcase one of the tabular data explainers, DeepExplainer (SHAP), following these steps:\n",
        "1.\tDevelop a machine learning script in Python which involves the training script and the explanation script.\n",
        "2.\tRun the script locally.\n",
        "3.\tUse the interpretability toolkit\u00e2\u20ac\u2122s visualization dashboard to visualize predictions and their explanation. If the metrics and explanations don't indicate a desired outcome, loop back to step 1 and iterate on your scripts.\n",
        "5.\tAfter a satisfactory run is found, create a Deep Scoring Explainer and register the persisted model and its corresponding DeepExplainer in the model registry.\n",
        "6.\tDevelop a scoring script.\n",
        "7.\tCreate an image and register it in the image registry.\n",
        "8.\tDeploy the image as a web service in Azure.\n",
        "\n",
        "\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Setup\n",
        "Make sure you go through the [configuration notebook](../../../../configuration.ipynb) first if you haven't."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Check core SDK version number\n",
        "import azureml.core\n",
        "\n",
        "print(\"SDK version:\", azureml.core.VERSION)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Initialize a Workspace\n",
        "\n",
        "Initialize a workspace object from persisted configuration"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "tags": [
          "create workspace"
        ]
      },
      "outputs": [],
      "source": [
        "from azureml.core import Workspace\n",
        "\n",
        "ws = Workspace.from_config()\n",
        "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Explain\n",
        "Create An Experiment: **Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core import Experiment\n",
        "experiment_name = 'explain_model_at_scoring_time'\n",
        "experiment = Experiment(workspace=ws, name=experiment_name)\n",
        "run = experiment.start_logging()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# get IBM attrition data\n",
        "import os\n",
        "import pandas as pd\n",
        "\n",
        "outdirname = 'dataset.6.21.19'\n",
        "try:\n",
        "    from urllib import urlretrieve\n",
        "except ImportError:\n",
        "    from urllib.request import urlretrieve\n",
        "import zipfile\n",
        "zipfilename = outdirname + '.zip'\n",
        "urlretrieve('https://publictestdatasets.blob.core.windows.net/data/' + zipfilename, zipfilename)\n",
        "with zipfile.ZipFile(zipfilename, 'r') as unzip:\n",
        "    unzip.extractall('.')\n",
        "attritionData = pd.read_csv('./WA_Fn-UseC_-HR-Employee-Attrition.csv')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.externals import joblib\n",
        "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
        "from sklearn.impute import SimpleImputer\n",
        "from sklearn.pipeline import Pipeline\n",
        "from sklearn_pandas import DataFrameMapper\n",
        "\n",
        "os.makedirs('./outputs', exist_ok=True)\n",
        "\n",
        "# Dropping Employee count as all values are 1 and hence attrition is independent of this feature\n",
        "attritionData = attritionData.drop(['EmployeeCount'], axis=1)\n",
        "# Dropping Employee Number since it is merely an identifier\n",
        "attritionData = attritionData.drop(['EmployeeNumber'], axis=1)\n",
        "attritionData = attritionData.drop(['Over18'], axis=1)\n",
        "# Since all values are 80\n",
        "attritionData = attritionData.drop(['StandardHours'], axis=1)\n",
        "\n",
        "# Converting target variables from string to numerical values\n",
        "target_map = {'Yes': 1, 'No': 0}\n",
        "attritionData[\"Attrition_numerical\"] = attritionData[\"Attrition\"].apply(lambda x: target_map[x])\n",
        "target = attritionData[\"Attrition_numerical\"]\n",
        "\n",
        "attritionXData = attritionData.drop(['Attrition_numerical', 'Attrition'], axis=1)\n",
        "\n",
        "# Creating dummy columns for each categorical feature\n",
        "categorical = []\n",
        "for col, value in attritionXData.iteritems():\n",
        "    if value.dtype == 'object':\n",
        "        categorical.append(col)\n",
        "\n",
        "# Store the numerical columns in a list numerical\n",
        "numerical = attritionXData.columns.difference(categorical)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from sklearn.compose import ColumnTransformer\n",
        "\n",
        "# We create the preprocessing pipelines for both numeric and categorical data.\n",
        "numeric_transformer = Pipeline(steps=[\n",
        "    ('imputer', SimpleImputer(strategy='median')),\n",
        "    ('scaler', StandardScaler())])\n",
        "\n",
        "categorical_transformer = Pipeline(steps=[\n",
        "    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n",
        "    ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n",
        "\n",
        "preprocess = ColumnTransformer(\n",
        "    transformers=[\n",
        "        ('num', numeric_transformer, numerical),\n",
        "        ('cat', categorical_transformer, categorical)])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from sklearn.pipeline import make_pipeline\n",
        "pipeline = make_pipeline(preprocess)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "\n",
        "X_train, X_test, y_train, y_test = train_test_split(attritionXData, \n",
        "                                                    target, \n",
        "                                                    test_size=0.2,\n",
        "                                                    random_state=0,\n",
        "                                                    stratify=target)\n",
        "\n",
        "X_train_t = pipeline.fit_transform(X_train)\n",
        "X_test_t = pipeline.transform(X_test)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# check tensorflow version\n",
        "import tensorflow as tf\n",
        "from distutils.version import StrictVersion\n",
        "\n",
        "print(tf.__version__)\n",
        "# Append classifier to preprocessing pipeline.\n",
        "# Now we have a full prediction pipeline.\n",
        "\n",
        "\n",
        "network = tf.keras.models.Sequential()\n",
        "network.add(tf.keras.layers.Dense(units=16, activation='relu', input_shape=(X_train_t.shape[1],)))\n",
        "network.add(tf.keras.layers.Dense(units=16, activation='relu'))\n",
        "network.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))\n",
        "\n",
        "# Compile neural network\n",
        "network.compile(loss='binary_crossentropy', # Cross-entropy\n",
        "                optimizer='rmsprop', # Root Mean Square Propagation\n",
        "                metrics=['accuracy']) # Accuracy performance metric\n",
        "\n",
        "# Train neural network\n",
        "history = network.fit(X_train_t, # Features\n",
        "                      y_train, # Target vector\n",
        "                      epochs=20, # Number of epochs\n",
        "                      verbose=1, # Print description after each epoch\n",
        "                      batch_size=100, # Number of observations per batch\n",
        "                      validation_data=(X_test_t, y_test)) # Data for evaluation"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# You can run the DeepExplainer directly, or run the TabularExplainer which will choose the most appropriate explainer\n",
        "from interpret.ext.greybox import DeepExplainer\n",
        "explainer = DeepExplainer(network,\n",
        "                          X_train,\n",
        "                          features=X_train.columns,\n",
        "                          classes=[\"STAYING\", \"LEAVING\"], \n",
        "                          transformations=preprocess,\n",
        "                          model_task=\"classification\",\n",
        "                          is_classifier=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Save featurization prior to keras model in the outputs folder so it automatically get uploaded\n",
        "# We cannot save Keras with the pipeline due to known issues with pickling Keras models\n",
        "featurize_file_name = 'featurize.pkl'\n",
        "\n",
        "with open(featurize_file_name, 'wb') as file:\n",
        "    joblib.dump(value=preprocess, filename=os.path.join('./outputs/', featurize_file_name))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Save keras model to disk\n",
        "keras_model_file_name = 'keras_model.pkl'\n",
        "network.save(os.path.join('./outputs/', keras_model_file_name))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Explain overall model predictions (global explanation)\n",
        "# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
        "# x_train can be passed as well, but with more examples explanations it will\n",
        "# take longer although they may be more accurate\n",
        "global_explanation = explainer.explain_global(X_test)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.interpret.scoring.scoring_explainer import DeepScoringExplainer, save\n",
        "from azureml.interpret.model.serialize import KerasSerializer\n",
        "# ScoringExplainer with custom keras serializer\n",
        "scoring_explainer = DeepScoringExplainer(explainer, serializer=KerasSerializer())\n",
        "# Pickle scoring explainer locally\n",
        "save(scoring_explainer, exist_ok=True)\n",
        "\n",
        "# Register featurization\n",
        "run.upload_file(featurize_file_name, os.path.join('./outputs/', featurize_file_name))\n",
        "featurize_model = run.register_model(model_name='featurize',\n",
        "                                     model_path=featurize_file_name)\n",
        "\n",
        "# Register keras model\n",
        "run.upload_file(keras_model_file_name, os.path.join('./outputs/', keras_model_file_name))\n",
        "keras_model = run.register_model(model_name='keras_model',\n",
        "                                 model_path=keras_model_file_name)\n",
        "\n",
        "# Register scoring explainer\n",
        "run.upload_file('IBM_attrition_explainer.pkl', 'scoring_explainer.pkl')\n",
        "scoring_explainer_model = run.register_model(model_name='IBM_attrition_explainer', model_path='IBM_attrition_explainer.pkl')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Use helper utility to wrap keras model in scikit-learn style API for visualization dashboard\n",
        "from interpret_community.common.model_wrapper import wrap_model\n",
        "from interpret_community.dataset.dataset_wrapper import DatasetWrapper\n",
        "wrapped_model, ml_domain = wrap_model(network, DatasetWrapper(X_test_t), \"classification\")\n",
        "wrapped_model.fit = network.fit\n",
        "from sklearn.pipeline import Pipeline\n",
        "dashboard_pipeline = Pipeline(steps=[('preprocess', preprocess), ('network', wrapped_model)])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Visualize\n",
        "Visualize the explanations"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from interpret_community.widget import ExplanationDashboard"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "ExplanationDashboard(global_explanation, dashboard_pipeline, datasetX=X_test)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Deploy \n",
        "\n",
        "Deploy Model and ScoringExplainer.\n",
        "\n",
        "Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.conda_dependencies import CondaDependencies \n",
        "\n",
        "# azureml-defaults is required to host the model as a web service.\n",
        "azureml_pip_packages = [\n",
        "    'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n",
        "    'azureml-interpret'\n",
        "]\n",
        "# specify CondaDependencies obj\n",
        "myenv = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas'],\n",
        "                                 pip_packages=['sklearn-pandas', 'pyyaml', 'tensorflow<2.0', 'keras==2.3.1'] + azureml_pip_packages)\n",
        "\n",
        "with open(\"myenv.yml\",\"w\") as f:\n",
        "    f.write(myenv.serialize_to_string())\n",
        "\n",
        "with open(\"myenv.yml\",\"r\") as f:\n",
        "    print(f.read())"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.model import Model\n",
        "# retrieve scoring explainer for deployment\n",
        "scoring_explainer_model = Model(ws, 'IBM_attrition_explainer')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "from azureml.core.webservice import Webservice\n",
        "from azureml.core.model import InferenceConfig\n",
        "from azureml.core.webservice import AciWebservice\n",
        "from azureml.core.model import Model\n",
        "from azureml.core.environment import Environment\n",
        "\n",
        "\n",
        "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
        "                                               memory_gb=1,\n",
        "                                               tags={\"data\": \"IBM_Attrition\",\n",
        "                                                     \"method\" : \"local_explanation\"},\n",
        "                                               description='Get local explanations for IBM Employee Attrition data')\n",
        "\n",
        "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
        "inference_config = InferenceConfig(entry_script=\"score_local_explain_keras.py\", environment=myenv)\n",
        "\n",
        "# Use configs and models generated above\n",
        "service = Model.deploy(ws, 'model-scoring-deploy-local', [scoring_explainer_model, featurize_model, keras_model], inference_config, aciconfig)\n",
        "service.wait_for_deployment(show_output=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "print(service.get_logs())"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import requests\n",
        "import json\n",
        "\n",
        "# Create data to test service with\n",
        "sample_data = '{\"Age\":{\"899\":49},\"BusinessTravel\":{\"899\":\"Travel_Rarely\"},\"DailyRate\":{\"899\":1098},\"Department\":{\"899\":\"Research & Development\"},\"DistanceFromHome\":{\"899\":4},\"Education\":{\"899\":2},\"EducationField\":{\"899\":\"Medical\"},\"EnvironmentSatisfaction\":{\"899\":1},\"Gender\":{\"899\":\"Male\"},\"HourlyRate\":{\"899\":85},\"JobInvolvement\":{\"899\":2},\"JobLevel\":{\"899\":5},\"JobRole\":{\"899\":\"Manager\"},\"JobSatisfaction\":{\"899\":3},\"MaritalStatus\":{\"899\":\"Married\"},\"MonthlyIncome\":{\"899\":18711},\"MonthlyRate\":{\"899\":12124},\"NumCompaniesWorked\":{\"899\":2},\"OverTime\":{\"899\":\"No\"},\"PercentSalaryHike\":{\"899\":13},\"PerformanceRating\":{\"899\":3},\"RelationshipSatisfaction\":{\"899\":3},\"StockOptionLevel\":{\"899\":1},\"TotalWorkingYears\":{\"899\":23},\"TrainingTimesLastYear\":{\"899\":2},\"WorkLifeBalance\":{\"899\":4},\"YearsAtCompany\":{\"899\":1},\"YearsInCurrentRole\":{\"899\":0},\"YearsSinceLastPromotion\":{\"899\":0},\"YearsWithCurrManager\":{\"899\":0}}'\n",
        "\n",
        "headers = {'Content-Type':'application/json'}\n",
        "\n",
        "# send request to service\n",
        "resp = requests.post(service.scoring_uri, sample_data, headers=headers)\n",
        "\n",
        "print(\"POST to url\", service.scoring_uri)\n",
        "# can covert back to Python objects from json string if desired\n",
        "print(\"prediction:\", resp.text)\n",
        "result = json.loads(resp.text)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "#plot the feature importance for the prediction\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt; plt.rcdefaults()\n",
        "\n",
        "labels = json.loads(sample_data)\n",
        "labels = labels.keys()\n",
        "objects = labels\n",
        "y_pos = np.arange(len(objects))\n",
        "performance = result[\"local_importance_values\"][0][0]\n",
        "\n",
        "plt.bar(y_pos, performance, align='center', alpha=0.5)\n",
        "plt.xticks(y_pos, objects)\n",
        "locs, labels = plt.xticks()\n",
        "plt.setp(labels, rotation=90)\n",
        "plt.ylabel('Feature impact - leaving vs not leaving')\n",
        "plt.title('Local feature importance for prediction')\n",
        "\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "service.delete()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "## Next\n",
        "Learn about other use cases of the explain package on a:\n",
        "1. [Training time: regression problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-regression-local.ipynb)       \n",
        "1. [Training time: binary classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-binary-classification-local.ipynb)\n",
        "1. [Training time: multiclass classification problem](https://github.com/interpretml/interpret-community/blob/master/notebooks/explain-multiclass-classification-local.ipynb)\n",
        "1. Explain models with engineered features:\n",
        "    1. [Simple feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/simple-feature-transformations-explain-local.ipynb)\n",
        "    1. [Advanced feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/advanced-feature-transformations-explain-local.ipynb)\n",
        "1. [Save model explanations via Azure Machine Learning Run History](../run-history/save-retrieve-explanations-run-history.ipynb)\n",
        "1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../remote-explanation/explain-model-on-amlcompute.ipynb)\n",
        "1. [Inferencing time: deploy a remotely-trained model and explainer](./train-explain-model-on-amlcompute-and-deploy.ipynb)\n",
        "1. [Inferencing time: deploy a locally-trained model and explainer](./train-explain-model-locally-and-deploy.ipynb)"
      ]
    }
  ],
  "metadata": {
    "authors": [
      {
        "name": "mesameki"
      }
    ],
    "kernelspec": {
      "display_name": "Python 3.6",
      "language": "python",
      "name": "python36"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.8"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
 }
--- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-keras-locally-and-deploy.yml
+++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-keras-locally-and-deploy.yml
@@ -0,0 +1,12 @@
 name: train-explain-model-keras-locally-and-deploy
 dependencies:
 - pip:
  - azureml-sdk
  - azureml-interpret
  - interpret-community[visualization]
  - matplotlib
  - azureml-contrib-interpret
  - sklearn-pandas
  - ipywidgets
  - tensorflow<2.0
  - keras
--- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb
+++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb
@@ -453,7 +453,8 @@
        "    1. [Advanced feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/advanced-feature-transformations-explain-local.ipynb)\n",
        "1. [Save model explanations via Azure Machine Learning Run History](../run-history/save-retrieve-explanations-run-history.ipynb)\n",
        "1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../remote-explanation/explain-model-on-amlcompute.ipynb)\n",
-        "1. [Inferencing time: deploy a remotely-trained model and explainer](./train-explain-model-on-amlcompute-and-deploy.ipynb)"
+        "1. [Inferencing time: deploy a remotely-trained model and explainer](./train-explain-model-on-amlcompute-and-deploy.ipynb)\n",
        "1. [Inferencing time: deploy a locally-trained keras model and explainer](./train-explain-model-keras-locally-and-deploy.ipynb)"
      ]
    },
    {
--- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb
+++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb
@@ -491,7 +491,8 @@
        "    1. [Advanced feature transformations](https://github.com/interpretml/interpret-community/blob/master/notebooks/advanced-feature-transformations-explain-local.ipynb)\n",
        "1. [Save model explanations via Azure Machine Learning Run History](../run-history/save-retrieve-explanations-run-history.ipynb)\n",
        "1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../remote-explanation/explain-model-on-amlcompute.ipynb)\n",
-        "1. [Inferencing time: deploy a locally-trained model and explainer](./train-explain-model-locally-and-deploy.ipynb)"
+        "1. [Inferencing time: deploy a locally-trained model and explainer](./train-explain-model-locally-and-deploy.ipynb)\n",
        "1. [Inferencing time: deploy a locally-trained keras model and explainer](./train-explain-model-keras-locally-and-deploy.ipynb)"
      ]
    },
    {
--- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb
@@ -70,11 +70,7 @@
        "from azureml.core.experiment import Experiment\n",
        "from azureml.core.workspace import Workspace\n",
        "from azureml.train.automl import AutoMLConfig\n",
        "from azureml.core.compute import AmlCompute\n",
        "from azureml.core.compute import ComputeTarget\n",
        "from azureml.core.dataset import Dataset\n",
        "from azureml.core.runconfig import RunConfiguration\n",
        "from azureml.core.conda_dependencies import CondaDependencies\n",
        "\n",
        "from azureml.pipeline.steps import AutoMLStep\n",
        "\n",
@@ -138,31 +134,24 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "# Choose a name for your cluster.\n",
+        "from azureml.core.compute import AmlCompute\n",
        "from azureml.core.compute import ComputeTarget\n",
        "\n",
        "# Choose a name for your CPU cluster\n",
        "amlcompute_cluster_name = \"cpu-cluster\"\n",
        "\n",
-        "found = False\n",
+        "# Verify that cluster does not exist already\n",
-        "# Check if this compute target already exists in the workspace.\n",
+        "try:\n",
-        "cts = ws.compute_targets\n",
+        "    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)\n",
-        "if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
+        "    print('Found existing cluster, use it.')\n",
-        "    found = True\n",
+        "except ComputeTargetException:\n",
-        "    print('Found existing compute target.')\n",
+        "    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use \"STANDARD_NC6\"\n",
-        "    compute_target = cts[amlcompute_cluster_name]\n",
+        "                                                           #vm_priority = 'lowpriority', # optional\n",
-        "    \n",
+        "                                                           max_nodes=4)\n",
-        "if not found:\n",
+        "    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
        "    print('Creating a new compute target...')\n",
        "    provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
        "                                                                #vm_priority = 'lowpriority', # optional\n",
        "                                                                max_nodes = 4)\n",
        "\n",
-        "    # Create the cluster.\n",
+        "compute_target.wait_for_completion(show_output=True, min_node_count = 1, timeout_in_minutes = 10)\n",
-        "    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
+        "# For a more detailed view of current AmlCompute status, use get_status()."
        "    \n",
        "    # Can poll for a minimum number of nodes and for a specific timeout.\n",
        "    # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
        "    compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)\n",
        "    \n",
        "     # For a more detailed view of current AmlCompute status, use get_status()."
      ]
    },
    {
--- a/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb
+++ b/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb
@@ -686,8 +686,7 @@
        "    \"n_cross_validations\": 5\n",
        "}\n",
        "\n",
-        "train_X = output_split_train.parse_parquet_files(file_extension=None).keep_columns(['pickup_weekday','pickup_hour', 'distance','passengers', 'vendor'])\n",
+        "training_dataset = output_split_train.parse_parquet_files(file_extension=None).keep_columns(['pickup_weekday','pickup_hour', 'distance','passengers', 'vendor', 'cost'])\n",
        "train_y = output_split_train.parse_parquet_files(file_extension=None).keep_columns('cost')\n",
        "\n",
        "automl_config = AutoMLConfig(task = 'regression',\n",
        "                             debug_log = 'automated_ml_errors.log',\n",
@@ -695,8 +694,8 @@
        "                             compute_target = aml_compute,\n",
        "                             run_configuration = aml_run_config,\n",
        "                             featurization = 'auto',\n",
-        "                             X = train_X,\n",
+        "                             training_data = training_dataset,\n",
-        "                             y = train_y,\n",
+        "                             label_column_name = 'cost',\n",
        "                             **automl_settings)\n",
        "                             \n",
        "print(\"AutoML config created.\")"
--- a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.yml
+++ b/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.yml
@@ -3,5 +3,6 @@ dependencies:
 - pip:
  - azureml-sdk
  - azureml-contrib-pipeline-steps
  - azureml-pipeline-steps
  - azureml-widgets
  - requests
--- a/how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb
+++ b/how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb
@@ -100,7 +100,7 @@
        "\n",
        "# Check core SDK version number\n",
        "\n",
-        "print(\"This notebook was created using SDK version 1.3.0, you are currently running version\", azureml.core.VERSION)"
+        "print(\"This notebook was created using SDK version 1.4.0, you are currently running version\", azureml.core.VERSION)"
      ]
    },
    {
--- a/index.md
+++ b/index.md
@@ -122,6 +122,7 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
 | [tensorflow-model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/tensorflow/tensorflow-model-register-and-deploy.ipynb) |  |  |  |  |  |  |
 | [explain-model-on-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb) |  |  |  |  |  |  |
 | [save-retrieve-explanations-run-history](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb) |  |  |  |  |  |  |
 | [train-explain-model-keras-locally-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-keras-locally-and-deploy.ipynb) |  |  |  |  |  |  |
 | [train-explain-model-locally-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb) |  |  |  |  |  |  |
 | [train-explain-model-on-amlcompute-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb) |  |  |  |  |  |  |
 | [training_notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/notebook_runner/training_notebook.ipynb) |  |  |  |  |  |  |
--- a/setup-environment/configuration.ipynb
+++ b/setup-environment/configuration.ipynb
@@ -102,7 +102,7 @@
      "source": [
        "import azureml.core\n",
        "\n",
-        "print(\"This notebook was created using version 1.3.0 of the Azure ML SDK\")\n",
+        "print(\"This notebook was created using version 1.4.0 of the Azure ML SDK\")\n",
        "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
      ]
    },