From 143590cfb4ab3b0a8f8457dafd188ebc5297a2b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Shan=C3=A9=20Winner?=
 <43390034+swinner95@users.noreply.github.com>
Date: Wed, 21 Aug 2019 10:30:39 -0700
Subject: [PATCH] Delete new-york-taxi_scale-out.ipynb

---
 .../new-york-taxi_scale-out.ipynb             | 135 ------------------
 1 file changed, 135 deletions(-)
 delete mode 100644 work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.ipynb

diff --git a/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.ipynb b/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.ipynb
deleted file mode 100644
index fd69f736..00000000
--- a/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.ipynb
+++ /dev/null
@@ -1,135 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "# Scale-Out Data Preparation\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Once we are done with preparing and featurizing the data locally, we can run the same steps on the full dataset in scale-out mode. The new york taxi cab data is about 300GB in total, which is perfect for scale-out. Let's start by downloading the package we saved earlier to disk. Feel free to run the `new_york_taxi_cab.ipynb` notebook to generate the package yourself, in which case you may comment out the download code and set the `package_path` to where the package is saved."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from tempfile import mkdtemp\n",
-        "from os import path\n",
-        "from urllib.request import urlretrieve\n",
-        "\n",
-        "dflow_root = mkdtemp()\n",
-        "dflow_path = path.join(dflow_root, \"new_york_taxi.dprep\")\n",
-        "print(\"Downloading Dataflow to: {}\".format(dflow_path))\n",
-        "urlretrieve(\"https://dprepdata.blob.core.windows.net/demo/new_york_taxi_v2.dprep\", dflow_path)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Let's load the package we just downloaded."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import azureml.dataprep as dprep\n",
-        "\n",
-        "df = dprep.Dataflow.open(dflow_path)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Let's replace the datasources with the full dataset."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from uuid import uuid4\n",
-        "\n",
-        "other_step = df._get_steps()[7].arguments['dataflows'][0]['anonymousSteps'][0]\n",
-        "other_step['id'] = str(uuid4())\n",
-        "other_step['arguments']['path']['target'] = 1\n",
-        "other_step['arguments']['path']['resourceDetails'][0]['path'] = 'https://wranglewestus.blob.core.windows.net/nyctaxi/yellow_tripdata*'"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "green_dsource = dprep.BlobDataSource(\"https://wranglewestus.blob.core.windows.net/nyctaxi/green_tripdata*\")\n",
-        "df = df.replace_datasource(green_dsource)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Once we have replaced the datasource, we can now run the same steps on the full dataset. We will print the first 5 rows of the spark DataFrame. Since we are running on the full dataset, this might take a little while depending on your spark cluster's size."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "spark_df = df.to_spark_dataframe()\n",
-        "spark_df.head(5)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.png)"
-      ]
-    }
-  ],
-  "metadata": {
-    "authors": [
-      {
-        "name": "sihhu"
-      }
-    ],
-    "kernelspec": {
-      "display_name": "Python 3.6",
-      "language": "python",
-      "name": "python36"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.6.4"
-    },
-    "notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License.",
-    "skip_execute_as_test": true
-  },
-  "nbformat": 4,
-  "nbformat_minor": 2
-}
\ No newline at end of file