From 218fed3d65685fe878164bac4b9de02be2db6148 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Shan=C3=A9=20Winner?=
 <43390034+swinner95@users.noreply.github.com>
Date: Sun, 28 Jul 2019 00:21:35 -0700
Subject: [PATCH] Delete cache.ipynb

---
 .../dataprep/how-to-guides/cache.ipynb        | 195 ------------------
 1 file changed, 195 deletions(-)
 delete mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/cache.ipynb
diff --git a/how-to-use-azureml/work-with-data/dataprep/how-to-guides/cache.ipynb b/how-to-use-azureml/work-with-data/dataprep/how-to-guides/cache.ipynb
deleted file mode 100644
index a8044902..00000000
--- a/how-to-use-azureml/work-with-data/dataprep/how-to-guides/cache.ipynb
+++ /dev/null
@@ -1,195 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/work-with-data/dataprep/how-to-guides/cache.png)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "# Cache\n",
-        "Copyright (c) Microsoft Corporation. All rights reserved.<br>\n",
-        "Licensed under the MIT License."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "A Dataflow can be cached as a file on your disk during a local run by calling `dflow_cached = dflow.cache(directory_path)`. Doing this will run all the steps in the Dataflow, `dflow`, and save the cached data to the specified `directory_path`. The returned Dataflow, `dflow_cached`, has a Caching Step added at the end. Any subsequent runs on on the Dataflow `dflow_cached` will reuse the cached data, and the steps before the Caching Step will not be run again.\n",
-        "\n",
-        "Caching avoids running transforms multiple times, which can make local runs more efficient. Here are common places to use Caching:\n",
-        "- after reading data from remote\n",
-        "- after expensive transforms, such as Sort\n",
-        "- after transforms that change the shape of data, such as Sampling, Filter and Summarize\n",
-        "\n",
-        "Caching Step will be ignored during scale-out run invoked by `to_spark_dataframe()`."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "We will start by reading in a dataset and applying some transforms to the Dataflow."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import azureml.dataprep as dprep\n",
-        "dflow = dprep.read_csv(path='../data/crime-spring.csv')\n",
-        "dflow = dflow.take_sample(probability=0.2, seed=7)\n",
-        "dflow = dflow.sort_asc(columns='Primary Type')\n",
-        "dflow = dflow.keep_columns(['ID', 'Case Number', 'Date', 'Primary Type'])\n",
-        "dflow.head(5)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Next, we will choose a directory to store the cached data."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import os\n",
-        "from pathlib import Path\n",
-        "cache_dir = str(Path(os.getcwd(), 'dataflow-cache'))\n",
-        "cache_dir"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "We will now call `dflow.cache(directory_path)` to cache the Dataflow to your directory."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "dflow_cached = dflow.cache(directory_path=cache_dir)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Now we will check steps in the `dflow_cached` to see that all of the previous steps were cached."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "[s.step_type for s in dflow_cached._get_steps()]"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "We also check the data stored in the cache directory."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "os.listdir(cache_dir)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Running against `dflow_cached` will reuse the cached data and skip running all of the previous steps again."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "dflow_cached.head(5)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "Adding additional steps to `dflow_cached` will also reuse the cache data and skip running the steps prior to the Cache Step."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "dflow_cached_take = dflow_cached.take(10)\n",
-        "dflow_cached_skip = dflow_cached.skip(10).take(10)\n",
-        "\n",
-        "df_cached_take = dflow_cached_take.to_pandas_dataframe()\n",
-        "df_cached_skip = dflow_cached_skip.to_pandas_dataframe()"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# shutil.rmtree will then clean up the cached data \n",
-        "import shutil\n",
-        "shutil.rmtree(path=cache_dir)"
-      ]
-    }
-  ],
-  "metadata": {
-    "authors": [
-      {
-        "name": "sihhu"
-      }
-    ],
-    "kernelspec": {
-      "display_name": "Python 3.6",
-      "language": "python",
-      "name": "python36"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.6.4"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 2
-}
\ No newline at end of file