From b891440e2d7815a6bea8e6520cde478e86bcef8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Shan=C3=A9=20Winner?=
 <43390034+swinner95@users.noreply.github.com>
Date: Tue, 27 Aug 2019 11:18:50 -0700
Subject: [PATCH] Delete auto-ml-dataprep.ipynb

---
 .../dataprep/auto-ml-dataprep.ipynb           | 417 ------------------
 1 file changed, 417 deletions(-)
 delete mode 100644 how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.ipynb

diff --git a/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.ipynb b/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.ipynb
deleted file mode 100644
index b409f6e6..00000000
--- a/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.ipynb
+++ /dev/null
@@ -1,417 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.png)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
-        "\n",
-        "Licensed under the MIT License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "# Automated Machine Learning\n",
-        "_**Prepare Data using `azureml.dataprep` for Local Execution**_\n",
-        "\n",
-        "## Contents\n",
-        "1. [Introduction](#Introduction)\n",
-        "1. [Setup](#Setup)\n",
-        "1. [Data](#Data)\n",
-        "1. [Train](#Train)\n",
-        "1. [Results](#Results)\n",
-        "1. [Test](#Test)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Introduction\n",
-        "In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
-        "\n",
-        "Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
-        "\n",
-        "In this notebook you will learn how to:\n",
-        "1. Define data loading and preparation steps in a `Dataflow` using `azureml.dataprep`.\n",
-        "2. Pass the `Dataflow` to AutoML for a local run.\n",
-        "3. Pass the `Dataflow` to AutoML for a remote run."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Setup\n",
-        "\n",
-        "Currently, Data Prep only supports __Ubuntu 16__ and __Red Hat Enterprise Linux 7__. We are working on supporting more linux distros."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import logging\n",
-        "\n",
-        "import pandas as pd\n",
-        "\n",
-        "import azureml.core\n",
-        "from azureml.core.experiment import Experiment\n",
-        "from azureml.core.workspace import Workspace\n",
-        "import azureml.dataprep as dprep\n",
-        "from azureml.train.automl import AutoMLConfig"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "ws = Workspace.from_config()\n",
-        " \n",
-        "# choose a name for experiment\n",
-        "experiment_name = 'automl-dataprep-local'\n",
-        "# project folder\n",
-        "project_folder = './sample_projects/automl-dataprep-local'\n",
-        " \n",
-        "experiment = Experiment(ws, experiment_name)\n",
-        " \n",
-        "output = {}\n",
-        "output['SDK version'] = azureml.core.VERSION\n",
-        "output['Subscription ID'] = ws.subscription_id\n",
-        "output['Workspace Name'] = ws.name\n",
-        "output['Resource Group'] = ws.resource_group\n",
-        "output['Location'] = ws.location\n",
-        "output['Project Directory'] = project_folder\n",
-        "output['Experiment Name'] = experiment.name\n",
-        "pd.set_option('display.max_colwidth', -1)\n",
-        "outputDf = pd.DataFrame(data = output, index = [''])\n",
-        "outputDf.T"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Data"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# You can use `auto_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
-        "# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n",
-        "# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
-        "# and convert column types manually.\n",
-        "example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n",
-        "dflow = dprep.auto_read_file(example_data).skip(1)  # Remove the header row.\n",
-        "dflow.get_profile()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# As `Primary Type` is our y data, we need to drop the values those are null in this column.\n",
-        "dflow = dflow.drop_nulls('Primary Type')\n",
-        "dflow.head(5)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Review the Data Preparation Result\n",
-        "\n",
-        "You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets.\n",
-        "\n",
-        "`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "X = dflow.drop_columns(columns=['Primary Type', 'FBI Code'])\n",
-        "y = dflow.keep_columns(columns=['Primary Type'], validate_column_exists=True)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Train\n",
-        "\n",
-        "This creates a general AutoML settings object applicable for both local and remote runs."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "automl_settings = {\n",
-        "    \"iteration_timeout_minutes\" : 10,\n",
-        "    \"iterations\" : 2,\n",
-        "    \"primary_metric\" : 'AUC_weighted',\n",
-        "    \"preprocess\" : True,\n",
-        "    \"verbosity\" : logging.INFO\n",
-        "}"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Pass Data with `Dataflow` Objects\n",
-        "\n",
-        "The `Dataflow` objects captured above can be passed to the `submit` method for a local run. AutoML will retrieve the results from the `Dataflow` for model training."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "automl_config = AutoMLConfig(task = 'classification',\n",
-        "                             debug_log = 'automl_errors.log',\n",
-        "                             X = X,\n",
-        "                             y = y,\n",
-        "                             **automl_settings)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "local_run = experiment.submit(automl_config, show_output = True)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "local_run"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Results"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "#### Widget for Monitoring Runs\n",
-        "\n",
-        "The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
-        "\n",
-        "**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from azureml.widgets import RunDetails\n",
-        "RunDetails(local_run).show()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "#### Retrieve All Child Runs\n",
-        "You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "children = list(local_run.get_children())\n",
-        "metricslist = {}\n",
-        "for run in children:\n",
-        "    properties = run.get_properties()\n",
-        "    metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
-        "    metricslist[int(properties['iteration'])] = metrics\n",
-        "    \n",
-        "rundata = pd.DataFrame(metricslist).sort_index(1)\n",
-        "rundata"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "### Retrieve the Best Model\n",
-        "\n",
-        "Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "best_run, fitted_model = local_run.get_output()\n",
-        "print(best_run)\n",
-        "print(fitted_model)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "#### Best Model Based on Any Other Metric\n",
-        "Show the run and the model that has the smallest `log_loss` value:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "lookup_metric = \"log_loss\"\n",
-        "best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
-        "print(best_run)\n",
-        "print(fitted_model)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "#### Model from a Specific Iteration\n",
-        "Show the run and the model from the first iteration:"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "iteration = 0\n",
-        "best_run, fitted_model = local_run.get_output(iteration = iteration)\n",
-        "print(best_run)\n",
-        "print(fitted_model)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Test\n",
-        "\n",
-        "#### Load Test Data\n",
-        "For the test data, it should have the same preparation step as the train data. Otherwise it might get failed at the preprocessing step."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "dflow_test = dprep.auto_read_file(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv').skip(1)\n",
-        "dflow_test = dflow_test.drop_nulls('Primary Type')"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "#### Testing Our Best Fitted Model\n",
-        "We will use confusion matrix to see how our model works."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from pandas_ml import ConfusionMatrix\n",
-        "\n",
-        "y_test = dflow_test.keep_columns(columns=['Primary Type']).to_pandas_dataframe()\n",
-        "X_test = dflow_test.drop_columns(columns=['Primary Type', 'FBI Code']).to_pandas_dataframe()\n",
-        "\n",
-        "ypred = fitted_model.predict(X_test)\n",
-        "\n",
-        "cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n",
-        "\n",
-        "print(cm)\n",
-        "\n",
-        "cm.plot()"
-      ]
-    }
-  ],
-  "metadata": {
-    "authors": [
-      {
-        "name": "savitam"
-      }
-    ],
-    "kernelspec": {
-      "display_name": "Python 3.6",
-      "language": "python",
-      "name": "python36"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.6.5"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 2
-}
\ No newline at end of file