diff --git a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer-parallel-run.ipynb b/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer-parallel-run.ipynb
deleted file mode 100644
index 51bc2da0..00000000
--- a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer-parallel-run.ipynb
+++ /dev/null
@@ -1,753 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Copyright (c) Microsoft Corporation. All rights reserved.\n",
- "\n",
- "Licensed under the MIT License."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- ""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Neural style transfer on video\n",
- "Using modified code from `pytorch`'s neural style [example](https://pytorch.org/tutorials/advanced/neural_style_tutorial.html), we show how to setup a pipeline for doing style transfer on video. The pipeline has following steps:\n",
- "1. Split a video into images\n",
- "2. Run neural style on each image using one of the provided models (from `pytorch` pretrained models for this example).\n",
- "3. Stitch the image back into a video.\n",
- "\n",
- "> **Tip**\n",
- "If your system requires low-latency processing (to process a single document or small set of documents quickly), use [real-time scoring](https://docs.microsoft.com/en-us/azure/machine-learning/v1/how-to-consume-web-service) instead of batch prediction."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Prerequisites\n",
- "If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc. "
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Initialize Workspace\n",
- "\n",
- "Initialize a workspace object from persisted configuration."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Check core SDK version number\n",
- "import azureml.core\n",
- "\n",
- "print(\"SDK version:\", azureml.core.VERSION)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core import Workspace, Experiment\n",
- "\n",
- "ws = Workspace.from_config()\n",
- "print('Workspace name: ' + ws.name, \n",
- " 'Azure region: ' + ws.location, \n",
- " 'Subscription id: ' + ws.subscription_id, \n",
- " 'Resource group: ' + ws.resource_group, sep = '\\n')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core.compute import AmlCompute, ComputeTarget\n",
- "from azureml.core import Datastore, Dataset\n",
- "from azureml.pipeline.core import Pipeline\n",
- "from azureml.pipeline.steps import PythonScriptStep\n",
- "from azureml.core.runconfig import CondaDependencies, RunConfiguration\n",
- "from azureml.core.compute_target import ComputeTargetException\n",
- "from azureml.data import OutputFileDatasetConfig"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Download models"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import os\n",
- "\n",
- "# create directory for model\n",
- "model_dir = 'models'\n",
- "if not os.path.isdir(model_dir):\n",
- " os.mkdir(model_dir)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import urllib.request\n",
- "\n",
- "def download_model(model_name):\n",
- " # downloaded models from https://pytorch.org/tutorials/advanced/neural_style_tutorial.html are kept here\n",
- " url = \"https://pipelinedata.blob.core.windows.net/styletransfer/saved_models/\" + model_name\n",
- " local_path = os.path.join(model_dir, model_name)\n",
- " urllib.request.urlretrieve(url, local_path)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Register all Models"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core.model import Model\n",
- "mosaic_model = None\n",
- "candy_model = None\n",
- "\n",
- "models = Model.list(workspace=ws, tags=['scenario'])\n",
- "for m in models:\n",
- " print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)\n",
- " if m.name == 'mosaic' and mosaic_model is None:\n",
- " mosaic_model = m\n",
- " elif m.name == 'candy' and candy_model is None:\n",
- " candy_model = m\n",
- "\n",
- "if mosaic_model is None:\n",
- " print('Mosaic model does not exist, registering it')\n",
- " download_model('mosaic.pth')\n",
- " mosaic_model = Model.register(model_path = os.path.join(model_dir, \"mosaic.pth\"),\n",
- " model_name = \"mosaic\",\n",
- " tags = {'type': \"mosaic\", 'scenario': \"Style transfer using batch inference\"},\n",
- " description = \"Style transfer - Mosaic\",\n",
- " workspace = ws)\n",
- "else:\n",
- " print('Reusing existing mosaic model')\n",
- " \n",
- "\n",
- "if candy_model is None:\n",
- " print('Candy model does not exist, registering it')\n",
- " download_model('candy.pth')\n",
- " candy_model = Model.register(model_path = os.path.join(model_dir, \"candy.pth\"),\n",
- " model_name = \"candy\",\n",
- " tags = {'type': \"candy\", 'scenario': \"Style transfer using batch inference\"},\n",
- " description = \"Style transfer - Candy\",\n",
- " workspace = ws)\n",
- "else:\n",
- " print('Reusing existing candy model')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Create or use existing compute\n",
- "\n",
- "> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# AmlCompute\n",
- "cpu_cluster_name = \"cpu-cluster\"\n",
- "try:\n",
- " cpu_cluster = AmlCompute(ws, cpu_cluster_name)\n",
- " print(\"found existing cluster.\")\n",
- "except ComputeTargetException:\n",
- " print(\"creating new cluster\")\n",
- " provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_v2\",\n",
- " max_nodes = 1)\n",
- "\n",
- " # create the cluster\n",
- " cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, provisioning_config)\n",
- " cpu_cluster.wait_for_completion(show_output=True)\n",
- " \n",
- "# AmlCompute\n",
- "gpu_cluster_name = \"gpu-cluster\"\n",
- "try:\n",
- " gpu_cluster = AmlCompute(ws, gpu_cluster_name)\n",
- " print(\"found existing cluster.\")\n",
- "except ComputeTargetException:\n",
- " print(\"creating new cluster\")\n",
- " provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"Standard_NC6s_v3\",\n",
- " max_nodes = 3)\n",
- "\n",
- " # create the cluster\n",
- " gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)\n",
- " gpu_cluster.wait_for_completion(show_output=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Python Scripts\n",
- "We use an edited version of `neural_style_mpi.py` (original is [here](https://github.com/pytorch/examples/blob/master/fast_neural_style/neural_style/neural_style.py)). Scripts to split and stitch the video are thin wrappers to calls to `ffmpeg`. \n",
- "\n",
- "We install `ffmpeg` through conda dependencies."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "scripts_folder = \"scripts\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "process_video_script_file = \"process_video.py\"\n",
- "\n",
- "# peek at contents\n",
- "with open(os.path.join(scripts_folder, process_video_script_file)) as process_video_file:\n",
- " print(process_video_file.read())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "stitch_video_script_file = \"stitch_video.py\"\n",
- "\n",
- "# peek at contents\n",
- "with open(os.path.join(scripts_folder, stitch_video_script_file)) as stitch_video_file:\n",
- " print(stitch_video_file.read())"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The sample video **organutan.mp4** is stored at a publicly shared datastore. We are registering the datastore below. If you want to take a look at the original video, click here. (https://pipelinedata.blob.core.windows.net/sample-videos/orangutan.mp4)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# datastore for input video\n",
- "account_name = \"pipelinedata\"\n",
- "video_ds = Datastore.register_azure_blob_container(ws, \"videos\", \"sample-videos\",\n",
- " account_name=account_name, overwrite=True)\n",
- "\n",
- "# the default blob store attached to a workspace\n",
- "default_datastore = ws.get_default_datastore()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Sample video"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "video_name=os.getenv(\"STYLE_TRANSFER_VIDEO_NAME\", \"orangutan.mp4\") \n",
- "orangutan_video = Dataset.File.from_files((video_ds,video_name))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "cd = CondaDependencies.create(python_version=\"3.8\", conda_packages=['pip==20.2.4'])\n",
- "\n",
- "cd.add_channel(\"conda-forge\")\n",
- "cd.add_conda_package(\"ffmpeg==4.0.2\")\n",
- "\n",
- "# Runconfig\n",
- "amlcompute_run_config = RunConfiguration(conda_dependencies=cd)\n",
- "amlcompute_run_config.environment.docker.base_image = \"pytorch/pytorch\"\n",
- "amlcompute_run_config.environment.spark.precache_packages = False"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "ffmpeg_audio = OutputFileDatasetConfig(name=\"ffmpeg_audio\")\n",
- "processed_images = OutputFileDatasetConfig(name=\"processed_images\")\n",
- "output_video = OutputFileDatasetConfig(name=\"output_video\")\n",
- "\n",
- "ffmpeg_images = OutputFileDatasetConfig(name=\"ffmpeg_images\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Define tweakable parameters to pipeline\n",
- "These parameters can be changed when the pipeline is published and rerun from a REST call.\n",
- "As part of ParallelRunStep following 2 pipeline parameters will be created which can be used to override values.\n",
- " node_count\n",
- " process_count_per_node"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.pipeline.core.graph import PipelineParameter\n",
- "# create a parameter for style (one of \"candy\", \"mosaic\") to transfer the images to\n",
- "style_param = PipelineParameter(name=\"style\", default_value=\"mosaic\")\n",
- "# create a parameter for the number of nodes to use in step no. 2 (style transfer)\n",
- "nodecount_param = PipelineParameter(name=\"nodecount\", default_value=2)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "split_video_step = PythonScriptStep(\n",
- " name=\"split video\",\n",
- " script_name=\"process_video.py\",\n",
- " arguments=[\"--input_video\", orangutan_video.as_mount(),\n",
- " \"--output_audio\", ffmpeg_audio,\n",
- " \"--output_images\", ffmpeg_images],\n",
- " compute_target=cpu_cluster,\n",
- " runconfig=amlcompute_run_config,\n",
- " source_directory=scripts_folder\n",
- ")\n",
- "\n",
- "stitch_video_step = PythonScriptStep(\n",
- " name=\"stitch\",\n",
- " script_name=\"stitch_video.py\",\n",
- " arguments=[\"--images_dir\", processed_images.as_input(), \n",
- " \"--input_audio\", ffmpeg_audio.as_input(), \n",
- " \"--output_dir\", output_video],\n",
- " compute_target=cpu_cluster,\n",
- " runconfig=amlcompute_run_config,\n",
- " source_directory=scripts_folder\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Create environment, parallel step run config and parallel run step"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core import Environment\n",
- "from azureml.core.runconfig import DEFAULT_GPU_IMAGE\n",
- "\n",
- "parallel_cd = CondaDependencies.create(python_version=\"3.8\", conda_packages=['pip==20.2.4', 'numpy==1.19'])\n",
- "\n",
- "parallel_cd.add_channel(\"pytorch\")\n",
- "parallel_cd.add_conda_package(\"pytorch\")\n",
- "parallel_cd.add_conda_package(\"torchvision\")\n",
- "parallel_cd.add_conda_package(\"pillow<7\") # needed for torchvision==0.4.0\n",
- "\n",
- "styleenvironment = Environment(name=\"styleenvironment\")\n",
- "styleenvironment.python.conda_dependencies=parallel_cd\n",
- "styleenvironment.docker.base_image = DEFAULT_GPU_IMAGE"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.pipeline.core import PipelineParameter\n",
- "from azureml.pipeline.steps import ParallelRunConfig\n",
- "\n",
- "parallel_run_config = ParallelRunConfig(\n",
- " environment=styleenvironment,\n",
- " entry_script='transform.py',\n",
- " output_action='summary_only',\n",
- " mini_batch_size=\"1\",\n",
- " error_threshold=1,\n",
- " source_directory=scripts_folder,\n",
- " compute_target=gpu_cluster, \n",
- " node_count=nodecount_param,\n",
- " process_count_per_node=2\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.pipeline.steps import ParallelRunStep\n",
- "from datetime import datetime\n",
- "\n",
- "parallel_step_name = 'styletransfer-' + datetime.now().strftime('%Y%m%d%H%M')\n",
- "\n",
- "distributed_style_transfer_step = ParallelRunStep(\n",
- " name=parallel_step_name,\n",
- " inputs=[ffmpeg_images], # Input file share/blob container/file dataset\n",
- " output=processed_images, # Output file share/blob container\n",
- " arguments=[\"--style\", style_param],\n",
- " parallel_run_config=parallel_run_config,\n",
- " allow_reuse=False #[optional - default value True]\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Run the pipeline"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "pipeline = Pipeline(workspace=ws, steps=[stitch_video_step])\n",
- "\n",
- "pipeline.validate()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# submit the pipeline and provide values for the PipelineParameters used in the pipeline\n",
- "pipeline_run = Experiment(ws, 'styletransfer_parallel_mosaic').submit(pipeline)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Monitor pipeline run\n",
- "\n",
- "The pipeline run status could be checked in Azure Machine Learning portal (https://ml.azure.com). The link to the pipeline run could be retrieved by inspecting the `pipeline_run` object.\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# This will output information of the pipeline run, including the link to the details page of portal.\n",
- "pipeline_run"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Optional: View detailed logs (streaming) "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Wait the run for completion and show output log to console\n",
- "pipeline_run.wait_for_completion(show_output=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Download output video"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Downloads the video in `output_video` folder"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def download_video(run, target_dir=None):\n",
- " stitch_run = run.find_step_run(stitch_video_step.name)[0]\n",
- " port_data = stitch_run.get_details()['outputDatasets'][0]['dataset']\n",
- " port_data.download(target_dir)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "pipeline_run.wait_for_completion()\n",
- "download_video(pipeline_run, \"output_video_mosaic\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Publish pipeline"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "pipeline_name = \"style-transfer-batch-inference\"\n",
- "print(pipeline_name)\n",
- "\n",
- "published_pipeline = pipeline.publish(\n",
- " name=pipeline_name, \n",
- " description=pipeline_name)\n",
- "print(\"Newly published pipeline id: {}\".format(published_pipeline.id))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Get published pipeline\n",
- "This is another way to get the published pipeline."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.pipeline.core import PublishedPipeline\n",
- "\n",
- "# You could retrieve all pipelines that are published, or \n",
- "# just get the published pipeline object that you have the ID for.\n",
- "\n",
- "# Get all published pipeline objects in the workspace\n",
- "all_pub_pipelines = PublishedPipeline.list(ws)\n",
- "\n",
- "# We will iterate through the list of published pipelines and \n",
- "# use the last ID in the list for Schelue operations: \n",
- "print(\"Published pipelines found in the workspace:\")\n",
- "for pub_pipeline in all_pub_pipelines:\n",
- " print(\"Name:\", pub_pipeline.name,\"\\tDescription:\", pub_pipeline.description, \"\\tId:\", pub_pipeline.id, \"\\tStatus:\", pub_pipeline.status)\n",
- " if(pub_pipeline.name == pipeline_name):\n",
- " published_pipeline = pub_pipeline\n",
- "\n",
- "print(\"Published pipeline id: {}\".format(published_pipeline.id))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Run pipeline through REST calls for other styles\n",
- "\n",
- "# Get AAD token"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core.authentication import InteractiveLoginAuthentication\n",
- "import requests\n",
- "\n",
- "auth = InteractiveLoginAuthentication()\n",
- "aad_token = auth.get_authentication_header()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Get endpoint URL"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "rest_endpoint = published_pipeline.endpoint\n",
- "print(\"Pipeline REST endpoing: {}\".format(rest_endpoint))"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Send request and monitor"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "experiment_name = 'styletransfer_parallel_candy'\n",
- "response = requests.post(rest_endpoint, \n",
- " headers=aad_token,\n",
- " json={\"ExperimentName\": experiment_name,\n",
- " \"ParameterAssignments\": {\"style\": \"candy\", \"NodeCount\": 3}})\n",
- "\n",
- "run_id = response.json()[\"Id\"]\n",
- "\n",
- "from azureml.pipeline.core.run import PipelineRun\n",
- "published_pipeline_run_candy = PipelineRun(ws.experiments[experiment_name], run_id)\n",
- "\n",
- "# Show detail information of run\n",
- "published_pipeline_run_candy"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Download output from re-run"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "published_pipeline_run_candy.wait_for_completion()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "download_video(published_pipeline_run_candy, target_dir=\"output_video_candy\")"
- ]
- }
- ],
- "metadata": {
- "authors": [
- {
- "name": "sanpil joringer asraniwa pansav tracych"
- }
- ],
- "category": "Other notebooks",
- "compute": [
- "AML Compute"
- ],
- "datasets": [],
- "deployment": [
- "None"
- ],
- "exclude_from_index": true,
- "framework": [
- "None"
- ],
- "friendly_name": "Style transfer using ParallelRunStep",
- "index_order": 1,
- "kernelspec": {
- "display_name": "Python 3.8 - AzureML",
- "language": "python",
- "name": "python38-azureml"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.9"
- },
- "tags": [
- "Batch Inferencing",
- "Pipeline"
- ],
- "task": "Style transfer"
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
\ No newline at end of file
diff --git a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/scripts/process_video.py b/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/scripts/process_video.py
deleted file mode 100644
index 1148f533..00000000
--- a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/scripts/process_video.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import argparse
-import glob
-import os
-import subprocess
-
-parser = argparse.ArgumentParser(description="Process input video")
-parser.add_argument('--input_video', required=True)
-parser.add_argument('--output_audio', required=True)
-parser.add_argument('--output_images', required=True)
-
-args = parser.parse_args()
-
-os.makedirs(args.output_audio, exist_ok=True)
-os.makedirs(args.output_images, exist_ok=True)
-
-subprocess.run("ffmpeg -i {} {}/video.aac".format(args.input_video, args.output_audio),
- shell=True,
- check=True)
-
-subprocess.run("ffmpeg -i {} {}/%05d_video.jpg -hide_banner".format(args.input_video, args.output_images),
- shell=True,
- check=True)
diff --git a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/scripts/stitch_video.py b/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/scripts/stitch_video.py
deleted file mode 100644
index ce237772..00000000
--- a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/scripts/stitch_video.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import argparse
-import os
-import subprocess
-
-parser = argparse.ArgumentParser(description="Process input video")
-parser.add_argument('--images_dir', required=True)
-parser.add_argument('--input_audio', required=True)
-parser.add_argument('--output_dir', required=True)
-
-args = parser.parse_args()
-
-os.makedirs(args.output_dir, exist_ok=True)
-
-subprocess.run("ffmpeg -framerate 30 -i {}/%05d_video.jpg -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p "
- "-y {}/video_without_audio.mp4"
- .format(args.images_dir, args.output_dir),
- shell=True, check=True)
-
-subprocess.run("ffmpeg -i {}/video_without_audio.mp4 -i {}/video.aac -map 0:0 -map 1:0 -vcodec "
- "copy -acodec copy -y {}/video_with_audio.mp4"
- .format(args.output_dir, args.input_audio, args.output_dir),
- shell=True, check=True)
diff --git a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/scripts/transform.py b/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/scripts/transform.py
deleted file mode 100644
index f8ac0ee4..00000000
--- a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/scripts/transform.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import argparse
-import os
-import sys
-import re
-import json
-import traceback
-from PIL import Image
-
-import torch
-from torchvision import transforms
-
-from azureml.core.model import Model
-
-style_model = None
-
-
-class TransformerNet(torch.nn.Module):
- def __init__(self):
- super(TransformerNet, self).__init__()
- # Initial convolution layers
- self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
- self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
- self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2)
- self.in2 = torch.nn.InstanceNorm2d(64, affine=True)
- self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2)
- self.in3 = torch.nn.InstanceNorm2d(128, affine=True)
- # Residual layers
- self.res1 = ResidualBlock(128)
- self.res2 = ResidualBlock(128)
- self.res3 = ResidualBlock(128)
- self.res4 = ResidualBlock(128)
- self.res5 = ResidualBlock(128)
- # Upsampling Layers
- self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2)
- self.in4 = torch.nn.InstanceNorm2d(64, affine=True)
- self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2)
- self.in5 = torch.nn.InstanceNorm2d(32, affine=True)
- self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1)
- # Non-linearities
- self.relu = torch.nn.ReLU()
-
- def forward(self, X):
- y = self.relu(self.in1(self.conv1(X)))
- y = self.relu(self.in2(self.conv2(y)))
- y = self.relu(self.in3(self.conv3(y)))
- y = self.res1(y)
- y = self.res2(y)
- y = self.res3(y)
- y = self.res4(y)
- y = self.res5(y)
- y = self.relu(self.in4(self.deconv1(y)))
- y = self.relu(self.in5(self.deconv2(y)))
- y = self.deconv3(y)
- return y
-
-
-class ConvLayer(torch.nn.Module):
- def __init__(self, in_channels, out_channels, kernel_size, stride):
- super(ConvLayer, self).__init__()
- reflection_padding = kernel_size // 2
- self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
- self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
-
- def forward(self, x):
- out = self.reflection_pad(x)
- out = self.conv2d(out)
- return out
-
-
-class ResidualBlock(torch.nn.Module):
- """ResidualBlock
- introduced in: https://arxiv.org/abs/1512.03385
- recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
- """
-
- def __init__(self, channels):
- super(ResidualBlock, self).__init__()
- self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
- self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
- self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
- self.in2 = torch.nn.InstanceNorm2d(channels, affine=True)
- self.relu = torch.nn.ReLU()
-
- def forward(self, x):
- residual = x
- out = self.relu(self.in1(self.conv1(x)))
- out = self.in2(self.conv2(out))
- out = out + residual
- return out
-
-
-class UpsampleConvLayer(torch.nn.Module):
- """UpsampleConvLayer
- Upsamples the input and then does a convolution. This method gives better results
- compared to ConvTranspose2d.
- ref: http://distill.pub/2016/deconv-checkerboard/
- """
-
- def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
- super(UpsampleConvLayer, self).__init__()
- self.upsample = upsample
- if upsample:
- self.upsample_layer = torch.nn.Upsample(mode='nearest', scale_factor=upsample)
- reflection_padding = kernel_size // 2
- self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
- self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
-
- def forward(self, x):
- x_in = x
- if self.upsample:
- x_in = self.upsample_layer(x_in)
- out = self.reflection_pad(x_in)
- out = self.conv2d(out)
- return out
-
-
-def load_image(filename):
- img = Image.open(filename)
- return img
-
-
-def save_image(filename, data):
- img = data.clone().clamp(0, 255).numpy()
- img = img.transpose(1, 2, 0).astype("uint8")
- img = Image.fromarray(img)
- img.save(filename)
-
-
-def init():
- global output_path, args
- global style_model, device
- output_path = os.environ['AZUREML_BI_OUTPUT_PATH']
- print(f'output path: {output_path}')
- print(f'Cuda available? {torch.cuda.is_available()}')
-
- arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
- arg_parser.add_argument("--style", type=str, help="style name")
- args, unknown_args = arg_parser.parse_known_args()
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- with torch.no_grad():
- style_model = TransformerNet()
- model_path = Model.get_model_path(args.style)
- state_dict = torch.load(os.path.join(model_path))
- # remove saved deprecated running_* keys in InstanceNorm from the checkpoint
- for k in list(state_dict.keys()):
- if re.search(r'in\d+\.running_(mean|var)$', k):
- del state_dict[k]
- style_model.load_state_dict(state_dict)
- style_model.to(device)
- print(f'Model loaded successfully. Path: {model_path}')
-
-
-def run(mini_batch):
-
- result = []
- for image_file_path in mini_batch:
- img = load_image(image_file_path)
-
- with torch.no_grad():
- content_transform = transforms.Compose([
- transforms.ToTensor(),
- transforms.Lambda(lambda x: x.mul(255))
- ])
- content_image = content_transform(img)
- content_image = content_image.unsqueeze(0).to(device)
-
- output = style_model(content_image).cpu()
- output_file_path = os.path.join(output_path, os.path.basename(image_file_path))
- save_image(output_file_path, output[0])
- result.append(output_file_path)
-
- return result
diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/nn.png b/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/nn.png
deleted file mode 100644
index 8910281e..00000000
Binary files a/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/nn.png and /dev/null differ
diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/tf_mnist.py b/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/tf_mnist.py
deleted file mode 100644
index 87be1ab3..00000000
--- a/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/tf_mnist.py
+++ /dev/null
@@ -1,190 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-
-import numpy as np
-import argparse
-import os
-import re
-import tensorflow as tf
-import time
-import glob
-
-from azureml.core import Run
-from utils import load_data
-from tensorflow.keras import Model, layers
-
-
-# Create TF Model.
-class NeuralNet(Model):
- # Set layers.
- def __init__(self):
- super(NeuralNet, self).__init__()
- # First hidden layer.
- self.h1 = layers.Dense(n_h1, activation=tf.nn.relu)
- # Second hidden layer.
- self.h2 = layers.Dense(n_h2, activation=tf.nn.relu)
- self.out = layers.Dense(n_outputs)
-
- # Set forward pass.
- def call(self, x, is_training=False):
- x = self.h1(x)
- x = self.h2(x)
- x = self.out(x)
- if not is_training:
- # Apply softmax when not training.
- x = tf.nn.softmax(x)
- return x
-
-
-def cross_entropy_loss(y, logits):
- # Convert labels to int 64 for tf cross-entropy function.
- y = tf.cast(y, tf.int64)
- # Apply softmax to logits and compute cross-entropy.
- loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
- # Average loss across the batch.
- return tf.reduce_mean(loss)
-
-
-# Accuracy metric.
-def accuracy(y_pred, y_true):
- # Predicted class is the index of highest score in prediction vector (i.e. argmax).
- correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
- return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)
-
-
-# Optimization process.
-def run_optimization(x, y):
- # Wrap computation inside a GradientTape for automatic differentiation.
- with tf.GradientTape() as g:
- # Forward pass.
- logits = neural_net(x, is_training=True)
- # Compute loss.
- loss = cross_entropy_loss(y, logits)
-
- # Variables to update, i.e. trainable variables.
- trainable_variables = neural_net.trainable_variables
-
- # Compute gradients.
- gradients = g.gradient(loss, trainable_variables)
-
- # Update W and b following gradients.
- optimizer.apply_gradients(zip(gradients, trainable_variables))
-
-
-print("TensorFlow version:", tf.__version__)
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--data-folder', type=str, dest='data_folder', default='data', help='data folder mounting point')
-parser.add_argument('--batch-size', type=int, dest='batch_size', default=128, help='mini batch size for training')
-parser.add_argument('--first-layer-neurons', type=int, dest='n_hidden_1', default=128,
- help='# of neurons in the first layer')
-parser.add_argument('--second-layer-neurons', type=int, dest='n_hidden_2', default=128,
- help='# of neurons in the second layer')
-parser.add_argument('--learning-rate', type=float, dest='learning_rate', default=0.01, help='learning rate')
-parser.add_argument('--resume-from', type=str, default=None,
- help='location of the model or checkpoint files from where to resume the training')
-args = parser.parse_args()
-
-previous_model_location = args.resume_from
-# You can also use environment variable to get the model/checkpoint files location
-# previous_model_location = os.path.expandvars(os.getenv("AZUREML_DATAREFERENCE_MODEL_LOCATION", None))
-
-data_folder = args.data_folder
-print('Data folder:', data_folder)
-
-# load train and test set into numpy arrays
-# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
-X_train = load_data(glob.glob(os.path.join(data_folder, '**/train-images-idx3-ubyte.gz'),
- recursive=True)[0], False) / np.float32(255.0)
-X_test = load_data(glob.glob(os.path.join(data_folder, '**/t10k-images-idx3-ubyte.gz'),
- recursive=True)[0], False) / np.float32(255.0)
-y_train = load_data(glob.glob(os.path.join(data_folder, '**/train-labels-idx1-ubyte.gz'),
- recursive=True)[0], True).reshape(-1)
-y_test = load_data(glob.glob(os.path.join(data_folder, '**/t10k-labels-idx1-ubyte.gz'),
- recursive=True)[0], True).reshape(-1)
-
-print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n')
-
-training_set_size = X_train.shape[0]
-
-n_inputs = 28 * 28
-n_h1 = args.n_hidden_1
-n_h2 = args.n_hidden_2
-n_outputs = 10
-learning_rate = args.learning_rate
-n_epochs = 20
-batch_size = args.batch_size
-
-# Build neural network model.
-neural_net = NeuralNet()
-
-# Stochastic gradient descent optimizer.
-optimizer = tf.optimizers.SGD(learning_rate)
-
-# start an Azure ML run
-run = Run.get_context()
-
-if previous_model_location:
- # Restore variables from latest checkpoint.
- checkpoint = tf.train.Checkpoint(model=neural_net, optimizer=optimizer)
- checkpoint_file_path = tf.train.latest_checkpoint(previous_model_location)
- checkpoint.restore(checkpoint_file_path)
- checkpoint_filename = os.path.basename(checkpoint_file_path)
- num_found = re.search(r'\d+', checkpoint_filename)
- if num_found:
- start_epoch = int(num_found.group(0))
- print("Resuming from epoch {}".format(str(start_epoch)))
-
-start_time = time.perf_counter()
-for epoch in range(0, n_epochs):
-
- # randomly shuffle training set
- indices = np.random.permutation(training_set_size)
- X_train = X_train[indices]
- y_train = y_train[indices]
-
- # batch index
- b_start = 0
- b_end = b_start + batch_size
- for _ in range(training_set_size // batch_size):
- # get a batch
- X_batch, y_batch = X_train[b_start: b_end], y_train[b_start: b_end]
-
- # update batch index for the next batch
- b_start = b_start + batch_size
- b_end = min(b_start + batch_size, training_set_size)
-
- # train
- run_optimization(X_batch, y_batch)
-
- # evaluate training set
- pred = neural_net(X_batch, is_training=False)
- acc_train = accuracy(pred, y_batch)
-
- # evaluate validation set
- pred = neural_net(X_test, is_training=False)
- acc_val = accuracy(pred, y_test)
-
- # log accuracies
- run.log('training_acc', np.float(acc_train))
- run.log('validation_acc', np.float(acc_val))
- print(epoch, '-- Training accuracy:', acc_train, '\b Validation accuracy:', acc_val)
-
- # Save checkpoints in the "./outputs" folder so that they are automatically uploaded into run history.
- checkpoint_dir = './outputs/'
- checkpoint = tf.train.Checkpoint(model=neural_net, optimizer=optimizer)
-
- if epoch % 2 == 0:
- checkpoint.save(checkpoint_dir)
-
-run.log('final_acc', np.float(acc_val))
-os.makedirs('./outputs/model', exist_ok=True)
-
-# files saved in the "./outputs" folder are automatically uploaded into run history
-# this is workaround for https://github.com/tensorflow/tensorflow/issues/33913 and will be fixed once we move to >tf2.1
-neural_net._set_inputs(X_train)
-tf.saved_model.save(neural_net, './outputs/model/')
-
-stop_time = time.perf_counter()
-training_time = (stop_time - start_time) * 1000
-print("Total time in milliseconds for training: {}".format(str(training_time)))
diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb b/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb
deleted file mode 100644
index e536ccbe..00000000
--- a/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb
+++ /dev/null
@@ -1,1234 +0,0 @@
-{
- "cells": [
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Copyright (c) Microsoft Corporation. All rights reserved.\n",
- "\n",
- "Licensed under the MIT License."
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- ""
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {
- "nbpresent": {
- "id": "bf74d2e9-2708-49b1-934b-e0ede342f475"
- }
- },
- "source": [
- "# Training, hyperparameter tune, and deploy with TensorFlow\n",
- "\n",
- "## Introduction\n",
- "This tutorial shows how to train a simple deep neural network using the MNIST dataset and TensorFlow on Azure Machine Learning. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of `28x28` pixels, representing number from 0 to 9. The goal is to create a multi-class classifier to identify the digit each image represents, and deploy it as a web service in Azure.\n",
- "\n",
- "For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/).\n",
- "\n",
- "## Prerequisite:\n",
- "* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
- "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../../configuration.ipynb) to:\n",
- " * install the AML SDK\n",
- " * create a workspace and its configuration file (`config.json`)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's get started. First let's import some Python libraries."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "nbpresent": {
- "id": "c377ea0c-0cd9-4345-9be2-e20fb29c94c3"
- }
- },
- "outputs": [],
- "source": [
- "%matplotlib inline\n",
- "import numpy as np\n",
- "import os\n",
- "import matplotlib.pyplot as plt"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "nbpresent": {
- "id": "edaa7f2f-2439-4148-b57a-8c794c0945ec"
- }
- },
- "outputs": [],
- "source": [
- "import azureml\n",
- "from azureml.core import Workspace\n",
- "\n",
- "# check core SDK version number\n",
- "print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Diagnostics\n",
- "Opt-in diagnostics for better experience, quality, and security of future releases."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": [
- "Diagnostics"
- ]
- },
- "outputs": [],
- "source": [
- "from azureml.telemetry import set_diagnostics_collection\n",
- "\n",
- "set_diagnostics_collection(send_diagnostics=True)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Initialize workspace\n",
- "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "ws = Workspace.from_config()\n",
- "print('Workspace name: ' + ws.name, \n",
- " 'Azure region: ' + ws.location, \n",
- " 'Subscription id: ' + ws.subscription_id, \n",
- " 'Resource group: ' + ws.resource_group, sep = '\\n')"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {
- "nbpresent": {
- "id": "59f52294-4a25-4c92-bab8-3b07f0f44d15"
- }
- },
- "source": [
- "## Create an Azure ML experiment\n",
- "Let's create an experiment named \"tf-mnist\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "nbpresent": {
- "id": "bc70f780-c240-4779-96f3-bc5ef9a37d59"
- }
- },
- "outputs": [],
- "source": [
- "from azureml.core import Experiment\n",
- "\n",
- "script_folder = './tf-mnist'\n",
- "os.makedirs(script_folder, exist_ok=True)\n",
- "\n",
- "exp = Experiment(workspace=ws, name='tf-mnist')"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {
- "nbpresent": {
- "id": "defe921f-8097-44c3-8336-8af6700804a7"
- }
- },
- "source": [
- "## Download MNIST dataset\n",
- "In order to train on the MNIST dataset we will first need to download it from azuremlopendatasets blob directly and save them in a `data` folder locally. If you want you can directly download the same data from Yan LeCun's web site."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import urllib.request\n",
- "\n",
- "data_folder = os.path.join(os.getcwd(), 'data')\n",
- "os.makedirs(data_folder, exist_ok=True)\n",
- "\n",
- "urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz',\n",
- " filename=os.path.join(data_folder, 'train-images-idx3-ubyte.gz'))\n",
- "urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz',\n",
- " filename=os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'))\n",
- "urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
- " filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
- "urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
- " filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {
- "nbpresent": {
- "id": "c3f2f57c-7454-4d3e-b38d-b0946cf066ea"
- }
- },
- "source": [
- "## Show some sample images\n",
- "Let's load the downloaded compressed file into numpy arrays using some utility functions included in the `utils.py` library file from the current folder. Then we use `matplotlib` to plot 30 random images from the dataset along with their labels."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "nbpresent": {
- "id": "396d478b-34aa-4afa-9898-cdce8222a516"
- }
- },
- "outputs": [],
- "source": [
- "from utils import load_data\n",
- "\n",
- "# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n",
- "X_train = load_data(os.path.join(data_folder, 'train-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
- "X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
- "y_train = load_data(os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
- "y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
- "\n",
- "count = 0\n",
- "sample_size = 30\n",
- "plt.figure(figsize = (16, 6))\n",
- "for i in np.random.permutation(X_train.shape[0])[:sample_size]:\n",
- " count = count + 1\n",
- " plt.subplot(1, sample_size, count)\n",
- " plt.axhline('')\n",
- " plt.axvline('')\n",
- " plt.text(x = 10, y = -10, s = y_train[i], fontsize = 18)\n",
- " plt.imshow(X_train[i].reshape(28, 28), cmap = plt.cm.Greys)\n",
- "plt.show()"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Create a Dataset for Files\n",
- "A Dataset can reference single or multiple files in your datastores or public urls. The files can be of any format. Dataset provides you with the ability to download or mount the files to your compute. By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred. [Learn More](https://aka.ms/azureml/howto/createdatasets)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core.dataset import Dataset\n",
- "web_paths = ['https://azureopendatastorage.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz',\n",
- " 'https://azureopendatastorage.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz',\n",
- " 'https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
- " 'https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz'\n",
- " ]\n",
- "dataset = Dataset.File.from_files(path = web_paths)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "You may want to regiester datasets using the register() method to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script.\n",
- "You can try get the dataset first to see if it's already registered."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.exceptions import UserErrorException\n",
- "dataset_registered = False\n",
- "try:\n",
- " temp = Dataset.get_by_name(workspace = ws, name = 'mnist-dataset')\n",
- " dataset_registered = True\n",
- "except UserErrorException:\n",
- " print(\"The dataset mnist-dataset is not registered in workspace yet.\")\n",
- "\n",
- "if not dataset_registered:\n",
- " dataset = dataset.register(workspace = ws,\n",
- " name = 'mnist-dataset',\n",
- " description='training and test dataset',\n",
- " create_new_version=True)\n",
- "# list the files referenced by dataset\n",
- "dataset.to_path()"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Create or Attach existing AmlCompute\n",
- "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource.\n",
- "\n",
- "> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist."
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `Standard_NC6s_v3` GPU VMs. This process is broken down into 3 steps:\n",
- "1. create the configuration (this step is local and only takes a second)\n",
- "2. create the cluster (this step will take about **20 seconds**)\n",
- "3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core.compute import ComputeTarget, AmlCompute\n",
- "from azureml.core.compute_target import ComputeTargetException\n",
- "\n",
- "# choose a name for your cluster\n",
- "cluster_name = \"hd-cluster\"\n",
- "\n",
- "try:\n",
- " compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
- " print('Found existing compute target')\n",
- "except ComputeTargetException:\n",
- " print('Creating a new compute target...')\n",
- " compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_NC6s_v3', \n",
- " max_nodes=4)\n",
- "\n",
- " # create the cluster\n",
- " compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
- "\n",
- "# can poll for a minimum number of nodes and for a specific timeout. \n",
- "# if no min node count is provided it uses the scale settings for the cluster\n",
- "compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
- "\n",
- "# use get_status() to get a detailed status for the current cluster. \n",
- "print(compute_target.get_status().serialize())"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'gpu-cluster' of type `AmlCompute`."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "compute_targets = ws.compute_targets\n",
- "for name, ct in compute_targets.items():\n",
- " print(name, ct.type, ct.provisioning_state)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Copy the training files into the script folder\n",
- "The TensorFlow training script is already created for you. You can simply copy it into the script folder, together with the utility library used to load compressed data file into numpy array."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import shutil\n",
- "\n",
- "# the training logic is in the tf_mnist.py file.\n",
- "shutil.copy('./tf_mnist.py', script_folder)\n",
- "\n",
- "# the utils.py just helps loading data from the downloaded MNIST dataset into numpy arrays.\n",
- "shutil.copy('./utils.py', script_folder)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {
- "nbpresent": {
- "id": "2039d2d5-aca6-4f25-a12f-df9ae6529cae"
- }
- },
- "source": [
- "## Construct neural network in TensorFlow\n",
- "In the training script `tf_mnist.py`, it creates a very simple DNN (deep neural network), with just 2 hidden layers. The input layer has 28 * 28 = 784 neurons, each representing a pixel in an image. The first hidden layer has 300 neurons, and the second hidden layer has 100 neurons. The output layer has 10 neurons, each representing a targeted label from 0 to 9.\n",
- "\n",
- ""
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Azure ML concepts \n",
- "Please note the following three things in the code below:\n",
- "1. The script accepts arguments using the argparse package. In this case there is one argument `--data_folder` which specifies the file system folder in which the script can find the MNIST data\n",
- "```\n",
- " parser = argparse.ArgumentParser()\n",
- " parser.add_argument('--data_folder')\n",
- "```\n",
- "2. The script is accessing the Azure ML `Run` object by executing `run = Run.get_context()`. Further down the script is using the `run` to report the training accuracy and the validation accuracy as training progresses.\n",
- "```\n",
- " run.log('training_acc', np.float(acc_train))\n",
- " run.log('validation_acc', np.float(acc_val))\n",
- "```\n",
- "3. When running the script on Azure ML, you can write files out to a folder `./outputs` that is relative to the root directory. This folder is specially tracked by Azure ML in the sense that any files written to that folder during script execution on the remote target will be picked up by Run History; these files (known as artifacts) will be available as part of the run history record."
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The next cell will print out the training code for you to inspect it."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "with open(os.path.join(script_folder, './tf_mnist.py'), 'r') as f:\n",
- " print(f.read())"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Create an environment\n",
- "\n",
- "In this tutorial, we will use one of Azure ML's curated TensorFlow environments for training. [Curated environments](https://docs.microsoft.com/azure/machine-learning/how-to-use-environments#use-a-curated-environment) are available in your workspace by default. Specifically, we will use the TensorFlow 2.0 GPU curated environment."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core import Environment\n",
- "\n",
- "tf_env = Environment.get(ws, name='azureml-tensorflow-2.11-cuda11')"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Configure the training job\n",
- "\n",
- "Create a ScriptRunConfig object to specify the configuration details of your training job, including your training script, environment to use, and the compute target to run on."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core import ScriptRunConfig\n",
- "\n",
- "args = ['--data-folder', dataset.as_named_input('mnist').as_mount(),\n",
- " '--batch-size', 64,\n",
- " '--first-layer-neurons', 256,\n",
- " '--second-layer-neurons', 128,\n",
- " '--learning-rate', 0.01]\n",
- "\n",
- "src = ScriptRunConfig(source_directory=script_folder,\n",
- " script='tf_mnist.py',\n",
- " arguments=args,\n",
- " compute_target=compute_target,\n",
- " environment=tf_env)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Submit job to run\n",
- "Submit the ScriptRunConfig to an Azure ML experiment to kick off the execution."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "run = exp.submit(src)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Monitor the Run \n",
- "As the Run is executed, it will go through the following stages:\n",
- "1. Preparing: A docker image is created based on the specifications of the Azure ML environment and it will be uploaded to the workspace's Azure Container Registry. This step will only happen once for each Python environment -- the container will then be cached for subsequent runs. Creating and uploading the image takes about **5 minutes**. While the job is preparing, logs are streamed to the run history and can be viewed to monitor the progress of the image creation.\n",
- "\n",
- "2. Scaling: If the compute needs to be scaled up (i.e. the Batch AI cluster requires more nodes to execute the run than currently available), the cluster will attempt to scale up in order to make the required amount of nodes available. Scaling typically takes about **5 minutes**.\n",
- "\n",
- "3. Running: All scripts in the script folder are uploaded to the compute target, data stores are mounted/copied and the `entry_script` is executed. While the job is running, stdout and the `./logs` folder are streamed to the run history and can be viewed to monitor the progress of the run.\n",
- "\n",
- "4. Post-Processing: The `./outputs` folder of the run is copied over to the run history\n",
- "\n",
- "There are multiple ways to check the progress of a running job. We can use a Jupyter notebook widget. \n",
- "\n",
- "**Note: The widget will automatically update ever 10-15 seconds, always showing you the most up-to-date information about the run**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.widgets import RunDetails\n",
- "\n",
- "RunDetails(run).show()"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can also periodically check the status of the run object, and navigate to Azure portal to monitor the run."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "run"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "run.wait_for_completion(show_output=True)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### The Run object \n",
- "The Run object provides the interface to the run history -- both to the job and to the control plane (this notebook), and both while the job is running and after it has completed. It provides a number of interesting features for instance:\n",
- "* `run.get_details()`: Provides a rich set of properties of the run\n",
- "* `run.get_metrics()`: Provides a dictionary with all the metrics that were reported for the Run\n",
- "* `run.get_file_names()`: List all the files that were uploaded to the run history for this Run. This will include the `outputs` and `logs` folder, azureml-logs and other logs, as well as files that were explicitly uploaded to the run using `run.upload_file()`\n",
- "\n",
- "Below are some examples -- please run through them and inspect their output. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "run.get_details()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "run.get_metrics()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "run.get_file_names()"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Plot accuracy over epochs\n",
- "Since we can retrieve the metrics from the run, we can easily make plots using `matplotlib` in the notebook. Then we can add the plotted image to the run using `run.log_image()`, so all information about the run is kept together."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "\n",
- "os.makedirs('./imgs', exist_ok=True)\n",
- "metrics = run.get_metrics()\n",
- "\n",
- "plt.figure(figsize = (13,5))\n",
- "plt.plot(metrics['validation_acc'], 'r-', lw=4, alpha=.6)\n",
- "plt.plot(metrics['training_acc'], 'b--', alpha=0.5)\n",
- "plt.legend(['Full evaluation set', 'Training set mini-batch'])\n",
- "plt.xlabel('epochs', fontsize=14)\n",
- "plt.ylabel('accuracy', fontsize=14)\n",
- "plt.title('Accuracy over Epochs', fontsize=16)\n",
- "run.log_image(name='acc_over_epochs.png', plot=plt)\n",
- "plt.show()"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Download the saved model"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In the training script, a TensorFlow `saver` object is used to persist the model in a local folder (local to the compute target). The model was saved to the `./outputs` folder on the disk of the Batch AI cluster node where the job is run. Azure ML automatically uploaded anything written in the `./outputs` folder into run history file store. Subsequently, we can use the `Run` object to download the model files the `saver` object saved. They are under the the `outputs/model` folder in the run history file store, and are downloaded into a local folder named `model`. Note the TensorFlow model consists of four files in binary format and they are not human-readable."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "run.download_files(prefix='outputs/model', output_directory='./model', append_prefix=False)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Predict on the test set (Optional)\n",
- "Now load the saved TensorFlow graph, and list all operations under the `network` scope. This way we can discover the input tensor `network/X:0` and the output tensor `network/output/MatMul:0`, and use them in the scoring script in the next step.\n",
- "\n",
- "Note: if your local TensorFlow version is different than the version running in the cluster where the model is trained, you might see a \"compiletime version mismatch\" warning. You can ignore it."
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- " import tensorflow as tf\n",
- " imported_model = tf.saved_model.load('./model')"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- " pred = imported_model(X_test)\n",
- " y_hat = np.argmax(pred, axis=1)\n",
- "\n",
- " # print the first 30 labels and predictions\n",
- " print('labels: \\t', y_test[:30])\n",
- " print('predictions:\\t', y_hat[:30])"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- " print(\"Accuracy on the test set:\", np.average(y_hat == y_test))"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- " print(\"Accuracy on the test set:\", np.average(y_hat == y_test))"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Intelligent hyperparameter tuning\n",
- "We have trained the model with one set of hyperparameters, now let's how we can do hyperparameter tuning by launching multiple runs on the cluster. First let's define the parameter space using random sampling."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal\n",
- "from azureml.train.hyperdrive import choice, loguniform\n",
- "\n",
- "ps = RandomParameterSampling(\n",
- " {\n",
- " '--batch-size': choice(32, 64, 128),\n",
- " '--first-layer-neurons': choice(16, 64, 128, 256, 512),\n",
- " '--second-layer-neurons': choice(16, 64, 256, 512),\n",
- " '--learning-rate': loguniform(-6, -1)\n",
- " }\n",
- ")"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Next, we will create a new ScriptRunConfig without the above parameters since they will be passed in later. Note we still need to keep the `data-folder` parameter since that's not a hyperparamter we will sweep."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "args = ['--data-folder', dataset.as_named_input('mnist').as_mount()]\n",
- "\n",
- "src = ScriptRunConfig(source_directory=script_folder,\n",
- " script='tf_mnist.py',\n",
- " arguments=args,\n",
- " compute_target=compute_target,\n",
- " environment=tf_env)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now we will define an early termnination policy. The `BanditPolicy` basically states to check the job every 2 iterations. If the primary metric (defined later) falls outside of the top 10% range, Azure ML terminate the job. This saves us from continuing to explore hyperparameters that don't show promise of helping reach our target metric."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now we are ready to configure a run configuration object, and specify the primary metric `validation_acc` that's recorded in your training runs. If you go back to visit the training script, you will notice that this value is being logged after every epoch (a full batch set). We also want to tell the service that we are looking to maximizing this value. We also set the number of samples to 20, and maximal concurrent job to 4, which is the same as the number of nodes in our computer cluster."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "htc = HyperDriveConfig(run_config=src, \n",
- " hyperparameter_sampling=ps, \n",
- " policy=policy, \n",
- " primary_metric_name='validation_acc', \n",
- " primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, \n",
- " max_total_runs=8,\n",
- " max_concurrent_runs=4)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Finally, let's launch the hyperparameter tuning job."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "htr = exp.submit(config=htc)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can use a run history widget to show the progress. Be patient as this might take a while to complete."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "RunDetails(htr).show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "htr.wait_for_completion(show_output=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "assert(htr.get_status() == \"Completed\")"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Warm start a Hyperparameter Tuning experiment and resuming child runs\n",
- "Often times, finding the best hyperparameter values for your model can be an iterative process, needing multiple tuning runs that learn from previous hyperparameter tuning runs. Reusing knowledge from these previous runs will accelerate the hyperparameter tuning process, thereby reducing the cost of tuning the model and will potentially improve the primary metric of the resulting model. When warm starting a hyperparameter tuning experiment with Bayesian sampling, trials from the previous run will be used as prior knowledge to intelligently pick new samples, so as to improve the primary metric. Additionally, when using Random or Grid sampling, any early termination decisions will leverage metrics from the previous runs to determine poorly performing training runs. \n",
- "\n",
- "Azure Machine Learning allows you to warm start your hyperparameter tuning run by leveraging knowledge from up to 5 previously completed hyperparameter tuning parent runs. \n",
- "\n",
- "Additionally, there might be occasions when individual training runs of a hyperparameter tuning experiment are cancelled due to budget constraints or fail due to other reasons. It is now possible to resume such individual training runs from the last checkpoint (assuming your training script handles checkpoints). Resuming an individual training run will use the same hyperparameter configuration and mount the storage used for that run. The training script should accept the \"--resume-from\" argument, which contains the checkpoint or model files from which to resume the training run. You can also resume individual runs as part of an experiment that spends additional budget on hyperparameter tuning. Any additional budget, after resuming the specified training runs is used for exploring additional configurations.\n",
- "\n",
- "For more information on warm starting and resuming hyperparameter tuning runs, please refer to the [Hyperparameter Tuning for Azure Machine Learning documentation](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-tune-hyperparameters) \n",
- "\n",
- "## Find and register best model \n",
- "When all the jobs finish, we can find out the one that has the highest accuracy."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "best_run = htr.get_best_run_by_primary_metric()"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Now let's list the model files uploaded during the run."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "print(best_run.get_file_names())"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can then register the folder (and all files in it) as a model named `tf-dnn-mnist` under the workspace for deployment."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "model = best_run.register_model(model_name='tf-dnn-mnist', model_path='outputs/model')"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Deploy the model in ACI\n",
- "Now we are ready to deploy the model as a web service running in Azure Container Instance [ACI](https://azure.microsoft.com/en-us/services/container-instances/). Azure Machine Learning accomplishes this by constructing a Docker image with the scoring logic and model baked in.\n",
- "### Create score.py\n",
- "First, we will create a scoring script that will be invoked by the web service call. \n",
- "\n",
- "* Note that the scoring script must have two required functions, `init()` and `run(input_data)`. \n",
- " * In `init()` function, you typically load the model into a global object. This function is executed only once when the Docker container is started. \n",
- " * In `run(input_data)` function, the model is used to predict a value based on the input data. The input and output to `run` typically use JSON as serialization and de-serialization format but you are not limited to that."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "%%writefile score.py\n",
- "import json\n",
- "import numpy as np\n",
- "import os\n",
- "import tensorflow as tf\n",
- "\n",
- "from azureml.core.model import Model\n",
- "\n",
- "def init():\n",
- " global tf_model\n",
- " model_root = os.getenv('AZUREML_MODEL_DIR')\n",
- " # the name of the folder in which to look for tensorflow model files\n",
- " tf_model_folder = 'model'\n",
- " \n",
- " tf_model = tf.saved_model.load(os.path.join(model_root, tf_model_folder))\n",
- "\n",
- "def run(raw_data):\n",
- " data = np.array(json.loads(raw_data)['data'], dtype=np.float32)\n",
- " \n",
- " # make prediction\n",
- " out = tf_model(data)\n",
- " y_hat = np.argmax(out, axis=1)\n",
- "\n",
- " return y_hat.tolist()"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Create myenv.yml\n",
- "We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify packages `numpy`, `tensorflow`."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core.runconfig import CondaDependencies\n",
- "\n",
- "cd = CondaDependencies.create()\n",
- "cd.add_conda_package('numpy')\n",
- "cd.add_pip_package('tensorflow==2.2.0')\n",
- "cd.add_pip_package(\"azureml-defaults\")\n",
- "cd.add_pip_package(\"protobuf==3.20.1\")\n",
- "cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
- "\n",
- "print(cd.serialize_to_string())"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Deploy to ACI\n",
- "We are almost ready to deploy. Create the inference configuration and deployment configuration and deploy to ACI. This cell will run for about 7-8 minutes."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core.webservice import AciWebservice\n",
- "from azureml.core.model import InferenceConfig\n",
- "from azureml.core.model import Model\n",
- "\n",
- "\n",
- "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
- "inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
- "\n",
- "aciconfig = AciWebservice.deploy_configuration(cpu_cores=2, \n",
- " memory_gb=2, \n",
- " tags={'name':'mnist', 'framework': 'TensorFlow DNN'},\n",
- " description='Tensorflow DNN on MNIST')\n",
- "\n",
- "service = Model.deploy(workspace=ws, \n",
- " name='tf-mnist-svc', \n",
- " models=[model], \n",
- " inference_config=inference_config, \n",
- " deployment_config=aciconfig)\n",
- "\n",
- "service.wait_for_deployment(True)\n",
- "print(service.state)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "print(service.get_logs())"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "This is the scoring web service endpoint:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "print(service.scoring_uri)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Test the deployed model\n",
- "Let's test the deployed model. Pick 30 random samples from the test set, and send it to the web service hosted in ACI. Note here we are using the `run` API in the SDK to invoke the service. You can also make raw HTTP calls using any HTTP tool such as curl.\n",
- "\n",
- "After the invocation, we print the returned predictions and plot them along with the input images. Use red font color and inversed image (white on black) to highlight the misclassified samples. Note since the model accuracy is pretty high, you might have to run the below cell a few times before you can see a misclassified sample."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import json\n",
- "\n",
- "# find 30 random samples from test set\n",
- "n = 30\n",
- "sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n",
- "\n",
- "test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n",
- "test_samples = bytes(test_samples, encoding='utf8')\n",
- "\n",
- "# predict using the deployed model\n",
- "result = service.run(input_data=test_samples)\n",
- "\n",
- "# compare actual value vs. the predicted values:\n",
- "i = 0\n",
- "plt.figure(figsize = (20, 1))\n",
- "\n",
- "for s in sample_indices:\n",
- " plt.subplot(1, n, i + 1)\n",
- " plt.axhline('')\n",
- " plt.axvline('')\n",
- " \n",
- " # use different color for misclassified sample\n",
- " font_color = 'red' if y_test[s] != result[i] else 'black'\n",
- " clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n",
- " \n",
- " plt.text(x=10, y=-10, s=result[i], fontsize=18, color=font_color)\n",
- " plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n",
- " \n",
- " i = i + 1\n",
- "plt.show()"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "We can also send raw HTTP request to the service."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import requests\n",
- "\n",
- "# send a random row from the test set to score\n",
- "random_index = np.random.randint(0, len(X_test)-1)\n",
- "input_data = \"{\\\"data\\\": [\" + str(list(X_test[random_index])) + \"]}\"\n",
- "\n",
- "headers = {'Content-Type':'application/json'}\n",
- "\n",
- "resp = requests.post(service.scoring_uri, input_data, headers=headers)\n",
- "\n",
- "print(\"POST to url\", service.scoring_uri)\n",
- "#print(\"input data:\", input_data)\n",
- "print(\"label:\", y_test[random_index])\n",
- "print(\"prediction:\", resp.text)"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Let's look at the workspace after the web service was deployed. You should see \n",
- "* a registered model named 'model' and with the id 'model:1'\n",
- "* a webservice called 'tf-mnist' with some scoring URL"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "model = ws.models['tf-dnn-mnist']\n",
- "print(\"Model: {}, ID: {}\".format('tf-dnn-mnist', model.id))\n",
- " \n",
- "webservice = ws.webservices['tf-mnist-svc']\n",
- "print(\"Webservice: {}, scoring URI: {}\".format('tf-mnist-svc', webservice.scoring_uri))"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Clean up\n",
- "You can delete the ACI deployment with a simple delete API call."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "service.delete()"
- ]
- }
- ],
- "metadata": {
- "authors": [
- {
- "name": "nagaur"
- }
- ],
- "category": "training",
- "compute": [
- "AML Compute"
- ],
- "datasets": [
- "MNIST"
- ],
- "deployment": [
- "Azure Container Instance"
- ],
- "exclude_from_index": false,
- "framework": [
- "TensorFlow"
- ],
- "friendly_name": "Training and hyperparameter tuning using the TensorFlow estimator",
- "index_order": 1,
- "kernelspec": {
- "display_name": "Python 3.8 - AzureML",
- "language": "python",
- "name": "python38-azureml"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.9"
- },
- "tags": [
- "None"
- ],
- "task": "Train a deep neural network"
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
\ No newline at end of file
diff --git a/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/utils.py b/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/utils.py
deleted file mode 100644
index 98170ada..00000000
--- a/how-to-use-azureml/ml-frameworks/tensorflow/train-hyperparameter-tune-deploy-with-tensorflow/utils.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License.
-
-import gzip
-import numpy as np
-import struct
-
-
-# load compressed MNIST gz files and return numpy arrays
-def load_data(filename, label=False):
- with gzip.open(filename) as gz:
- struct.unpack('I', gz.read(4))
- n_items = struct.unpack('>I', gz.read(4))
- if not label:
- n_rows = struct.unpack('>I', gz.read(4))[0]
- n_cols = struct.unpack('>I', gz.read(4))[0]
- res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
- res = res.reshape(n_items[0], n_rows * n_cols)
- else:
- res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
- res = res.reshape(n_items[0], 1)
- return res
-
-
-# one-hot encode a 1-D array
-def one_hot_encode(array, num_of_classes):
- return np.eye(num_of_classes)[array.reshape(-1)]
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/datadrift-tutorial.ipynb b/how-to-use-azureml/work-with-data/datadrift-tutorial/datadrift-tutorial.ipynb
deleted file mode 100644
index 50a54a87..00000000
--- a/how-to-use-azureml/work-with-data/datadrift-tutorial/datadrift-tutorial.ipynb
+++ /dev/null
@@ -1,466 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Copyright (c) Microsoft Corporation. All rights reserved.\n",
- "\n",
- "Licensed under the MIT License."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- ""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Analyze data drift in Azure Machine Learning datasets \n",
- "\n",
- "In this tutorial, you will setup a data drift monitor on a weather dataset to:\n",
- "\n",
- "☑ Analyze historical data for drift\n",
- "\n",
- "☑ Setup a monitor to recieve email alerts if data drift is detected going forward\n",
- "\n",
- "If your workspace is Enterprise level, view and exlpore the results in the Azure Machine Learning studio. The video below shows the results from this tutorial. \n",
- "\n",
- ""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Prerequisites\n",
- "If you are using an Azure Machine Learning Compute instance, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) if you haven't already established your connection to the AzureML Workspace."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Check core SDK version number\n",
- "import azureml.core\n",
- "\n",
- "print('SDK version:', azureml.core.VERSION)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Initialize Workspace\n",
- "\n",
- "Initialize a workspace object from persisted configuration."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core import Workspace\n",
- "\n",
- "ws = Workspace.from_config()\n",
- "ws"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Setup target and baseline datasets\n",
- "\n",
- "Setup the baseline and target datasets. The baseline will be used to compare each time slice of the target dataset, which is sampled by a given frequency. For further details, see [our documentation](http://aka.ms/datadrift). \n",
- "\n",
- "The next few cells will:\n",
- " * get the default datastore\n",
- " * upload the `weather-data` to the datastore\n",
- " * create the Tabular dataset from the data\n",
- " * add the timeseries trait by specifying the timestamp column `datetime`\n",
- " * register the dataset\n",
- " * create the baseline as a time slice of the target dataset\n",
- " * optionally, register the baseline dataset\n",
- " \n",
- "The folder `weather-data` contains weather data from the [NOAA Integrated Surface Data](https://azure.microsoft.com/services/open-datasets/catalog/noaa-integrated-surface-data/) filtered down to to station names containing the string 'FLORIDA' to reduce the size of data. See `get_data.py` to see how this data is curated and modify as desired. This script may take a long time to run, hence the data is provided in the `weather-data` folder for this demo."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# use default datastore\n",
- "dstore = ws.get_default_datastore()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# upload weather data\n",
- "dstore.upload('weather-data', 'datadrift-data', overwrite=True, show_progress=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# import Dataset class\n",
- "from azureml.core import Dataset\n",
- "\n",
- "# create target dataset \n",
- "target = Dataset.Tabular.from_parquet_files(dstore.path('datadrift-data/**/data.parquet'))\n",
- "# set the timestamp column\n",
- "target = target.with_timestamp_columns('datetime')\n",
- "# register the target dataset\n",
- "target = target.register(ws, 'target')\n",
- "# retrieve the dataset from the workspace by name\n",
- "target = Dataset.get_by_name(ws, 'target')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# import datetime \n",
- "from datetime import datetime\n",
- "\n",
- "# set baseline dataset as January 2019 weather data\n",
- "baseline = Dataset.Tabular.from_parquet_files(dstore.path('datadrift-data/2019/01/data.parquet'))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# optionally, register the baseline dataset. if skipped, an unregistered dataset will be used\n",
- "#baseline = baseline.register(ws, 'baseline')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Create compute target\n",
- "\n",
- "> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
- "\n",
- "Create an Azure Machine Learning compute cluster to run the data drift monitor and associated runs. The below cell will create a compute cluster named `'cpu-cluster'`. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from azureml.core.compute import AmlCompute, ComputeTarget\n",
- "\n",
- "compute_name = 'cpu-cluster'\n",
- "\n",
- "if compute_name in ws.compute_targets:\n",
- " compute_target = ws.compute_targets[compute_name]\n",
- " if compute_target and type(compute_target) is AmlCompute:\n",
- " print('found compute target. just use it. ' + compute_name)\n",
- "else:\n",
- " print('creating a new compute target...')\n",
- " provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D3_V2', min_nodes=0, max_nodes=2)\n",
- "\n",
- " # create the cluster\n",
- " compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)\n",
- "\n",
- " # can poll for a minimum number of nodes and for a specific timeout.\n",
- " # if no min node count is provided it will use the scale settings for the cluster\n",
- " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
- "\n",
- " # For a more detailed view of current AmlCompute status, use get_status()\n",
- " print(compute_target.get_status().serialize())"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Create data drift monitor\n",
- "\n",
- "See [our documentation](http://aka.ms/datadrift) for a complete description for all of the parameters. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "tags": [
- "datadrift-remarks-sample"
- ]
- },
- "outputs": [],
- "source": [
- "from azureml.datadrift import DataDriftDetector, AlertConfiguration\n",
- "\n",
- "alert_config = AlertConfiguration(['user@contoso.com']) # replace with your email to recieve alerts from the scheduled pipeline after enabling\n",
- "\n",
- "monitor = DataDriftDetector.create_from_datasets(ws, 'weather-monitor', baseline, target, \n",
- " compute_target='cpu-cluster', # compute target for scheduled pipeline and backfills \n",
- " frequency='Week', # how often to analyze target data\n",
- " feature_list=None, # list of features to detect drift on\n",
- " drift_threshold=None, # threshold from 0 to 1 for email alerting\n",
- " latency=0, # SLA in hours for target data to arrive in the dataset\n",
- " alert_config=alert_config) # email addresses to send alert"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Update data drift monitor\n",
- "\n",
- "Many settings of the data drift monitor can be updated after creation. In this demo, we will update the `drift_threshold` and `feature_list`. See [our documentation](http://aka.ms/datadrift) for details on which settings can be changed."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# get monitor by name\n",
- "monitor = DataDriftDetector.get_by_name(ws, 'weather-monitor')\n",
- "\n",
- "# create feature list - need to exclude columns that naturally drift or increment over time, such as year, day, index\n",
- "columns = list(baseline.take(1).to_pandas_dataframe())\n",
- "exclude = ['year', 'day', 'version', '__index_level_0__']\n",
- "features = [col for col in columns if col not in exclude]\n",
- "\n",
- "# update the feature list\n",
- "monitor = monitor.update(feature_list=features)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Analyze historical data and backfill\n",
- "\n",
- "You can use the `backfill` method to:\n",
- " * analyze historical data\n",
- " * backfill metrics after updating the settings (mainly the feature list)\n",
- " * backfill metrics for failed runs\n",
- " \n",
- "The below cells will run two backfills that will produce data drift results for 2019 weather data, with January used as the baseline in the monitor. The output can be seen from the `show` method after the runs have completed, or viewed from the Azure Machine Learning studio for Enterprise workspaces.\n",
- "\n",
- ""
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {
- "jupyter": {
- "source_hidden": true
- }
- },
- "source": [
- ">**Tip!** When starting with the data drift capability, start by backfilling on a small section of data to get initial results. Update the feature list as needed by removing columns that are causing drift, but can be ignored, and backfill this section of data until satisfied with the results. Then, backfill on a larger slice of data and/or set the alert configuration, threshold, and enable the schedule to recieve alerts to drift on your dataset. All of this can be done through the UI (Enterprise) or Python SDK."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Although it depends on many factors, the below backfill should typically take less than 20 minutes to run. Results will show as soon as they become available, not when the backfill is completed, so you may begin to see some metrics in a few minutes."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# backfill for one month\n",
- "backfill_start_date = datetime(2019, 9, 1)\n",
- "backfill_end_date = datetime(2019, 10, 1)\n",
- "backfill = monitor.backfill(backfill_start_date, backfill_end_date)\n",
- "backfill"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Query metrics and show results in Python\n",
- "\n",
- "The below cell will plot some key data drift metrics, and can be used to query the results. Run `help(monitor.get_output)` for specifics on the object returned."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# make sure the backfill has completed\n",
- "backfill.wait_for_completion(wait_post_processing=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# get results from Python SDK (wait for backfills or monitor runs to finish)\n",
- "results, metrics = monitor.get_output(start_time=datetime(year=2019, month=9, day=1))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# plot the results from Python SDK \n",
- "monitor.show(backfill_start_date, backfill_end_date)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Enable the monitor's pipeline schedule\n",
- "\n",
- "Turn on a scheduled pipeline which will anlayze the target dataset for drift every `frequency`. Use the latency parameter to adjust the start time of the pipeline. For instance, if it takes 24 hours for my data processing pipelines for data to arrive in the target dataset, set latency to 24. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# enable the pipeline schedule and recieve email alerts\n",
- "monitor.enable_schedule()\n",
- "\n",
- "# disable the pipeline schedule \n",
- "#monitor.disable_schedule()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Delete compute target\n",
- "\n",
- "Do not delete the compute target if you intend to keep using it for the data drift monitor scheduled runs or otherwise. If the minimum nodes are set to 0, it will scale down soon after jobs are completed, and scale up the next time the cluster is needed."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# optionally delete the compute target\n",
- "#compute_target.delete()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Delete the DataDriftDetector\n",
- "\n",
- "Invoking the `delete()` method on the object deletes the the drift monitor permanently and cannot be undone. You will no longer be able to find it in the UI and the `list()` or `get()` methods. The object on which delete() was called will have its state set to deleted and name suffixed with deleted. The baseline and target datasets and model data that was collected, if any, are not deleted. The compute is not deleted. The DataDrift schedule pipeline is disabled and archived."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "monitor.delete()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Next steps\n",
- "\n",
- " * See [our documentation](https://aka.ms/datadrift) or [Python SDK reference](https://docs.microsoft.com/python/api/overview/azure/ml/intro)\n",
- " * [Send requests or feedback](mailto:driftfeedback@microsoft.com) on data drift directly to the team\n",
- " * Please open issues with data drift here on GitHub or on StackOverflow if others are likely to run into the same issue"
- ]
- }
- ],
- "metadata": {
- "authors": [
- {
- "name": "jamgan"
- }
- ],
- "category": "tutorial",
- "compute": [
- "Remote"
- ],
- "datasets": [
- "NOAA"
- ],
- "deployment": [
- "None"
- ],
- "exclude_from_index": false,
- "framework": [
- "Azure ML"
- ],
- "friendly_name": "Data drift quickdemo",
- "index_order": 1,
- "kernelspec": {
- "display_name": "Python 3.8 - AzureML",
- "language": "python",
- "name": "python38-azureml"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.4"
- },
- "star_tag": [
- "featured"
- ],
- "tags": [
- "Dataset",
- "Timeseries",
- "Drift"
- ],
- "task": "Filtering"
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
\ No newline at end of file
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/get_data.py b/how-to-use-azureml/work-with-data/datadrift-tutorial/get_data.py
deleted file mode 100644
index b97c913e..00000000
--- a/how-to-use-azureml/work-with-data/datadrift-tutorial/get_data.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# import packages
-import os
-import pandas as pd
-from calendar import monthrange
-from datetime import datetime, timedelta
-from azureml.core import Dataset, Datastore, Workspace
-from azureml.opendatasets import NoaaIsdWeather
-
-# get workspace and datastore
-ws = Workspace.from_config()
-dstore = ws.get_default_datastore()
-
-# adjust parameters as needed
-target_years = list(range(2010, 2020))
-start_month = 1
-
-# get data
-for year in target_years:
- for month in range(start_month, 12 + 1):
- path = 'weather-data/{}/{:02d}/'.format(year, month)
- try:
- start = datetime(year, month, 1)
- end = datetime(year, month, monthrange(year, month)[1]) + timedelta(days=1)
- isd = NoaaIsdWeather(start, end).to_pandas_dataframe()
- isd = isd[isd['stationName'].str.contains('FLORIDA', regex=True, na=False)]
- os.makedirs(path, exist_ok=True)
- isd.to_parquet(path + 'data.parquet')
- except Exception as e:
- print('Month {} in year {} likely has no data.\n'.format(month, year))
- print('Exception: {}'.format(e))
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/media/drift-results.png b/how-to-use-azureml/work-with-data/datadrift-tutorial/media/drift-results.png
deleted file mode 100644
index 0bd594d2..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/media/drift-results.png and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/media/video.gif b/how-to-use-azureml/work-with-data/datadrift-tutorial/media/video.gif
deleted file mode 100644
index 38ca0685..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/media/video.gif and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/01/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/01/data.parquet
deleted file mode 100644
index 6556ef57..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/01/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/02/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/02/data.parquet
deleted file mode 100644
index a25bcecf..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/02/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/03/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/03/data.parquet
deleted file mode 100644
index 35d2a8a3..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/03/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/04/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/04/data.parquet
deleted file mode 100644
index e5b37308..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/04/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/05/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/05/data.parquet
deleted file mode 100644
index e2241397..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/05/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/06/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/06/data.parquet
deleted file mode 100644
index b1d441a5..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/06/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/07/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/07/data.parquet
deleted file mode 100644
index 93fa02fa..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/07/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/08/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/08/data.parquet
deleted file mode 100644
index b0f3db22..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/08/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/09/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/09/data.parquet
deleted file mode 100644
index 58a4cb98..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/09/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/10/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/10/data.parquet
deleted file mode 100644
index c0e67b7a..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/10/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/11/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/11/data.parquet
deleted file mode 100644
index e2f302af..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/11/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/12/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/12/data.parquet
deleted file mode 100644
index 35d2b414..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2010/12/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/01/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/01/data.parquet
deleted file mode 100644
index 018386bc..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/01/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/02/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/02/data.parquet
deleted file mode 100644
index fd1697b6..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/02/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/03/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/03/data.parquet
deleted file mode 100644
index a915880e..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/03/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/04/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/04/data.parquet
deleted file mode 100644
index 86441702..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/04/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/05/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/05/data.parquet
deleted file mode 100644
index 6cbc901e..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/05/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/06/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/06/data.parquet
deleted file mode 100644
index a827a1d3..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/06/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/07/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/07/data.parquet
deleted file mode 100644
index 1c5ae793..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/07/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/08/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/08/data.parquet
deleted file mode 100644
index 9257f655..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/08/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/09/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/09/data.parquet
deleted file mode 100644
index d329dc8d..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/09/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/10/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/10/data.parquet
deleted file mode 100644
index de9d8c33..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/10/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/11/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/11/data.parquet
deleted file mode 100644
index 92b43f77..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/11/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/12/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/12/data.parquet
deleted file mode 100644
index 707302ec..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2011/12/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/01/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/01/data.parquet
deleted file mode 100644
index 65cf1b6f..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/01/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/02/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/02/data.parquet
deleted file mode 100644
index 8c3e161f..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/02/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/03/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/03/data.parquet
deleted file mode 100644
index a79e4c68..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/03/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/04/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/04/data.parquet
deleted file mode 100644
index 3b04685e..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/04/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/05/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/05/data.parquet
deleted file mode 100644
index afeec31a..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/05/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/06/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/06/data.parquet
deleted file mode 100644
index d257f1e7..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/06/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/07/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/07/data.parquet
deleted file mode 100644
index bb8fcfe2..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/07/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/08/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/08/data.parquet
deleted file mode 100644
index b2bdacec..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/08/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/09/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/09/data.parquet
deleted file mode 100644
index 29f33707..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/09/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/10/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/10/data.parquet
deleted file mode 100644
index 87500c90..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/10/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/11/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/11/data.parquet
deleted file mode 100644
index 61dfbfe6..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/11/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/12/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/12/data.parquet
deleted file mode 100644
index b0ad90a4..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2012/12/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/01/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/01/data.parquet
deleted file mode 100644
index e9e20552..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/01/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/02/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/02/data.parquet
deleted file mode 100644
index 51d276f6..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/02/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/03/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/03/data.parquet
deleted file mode 100644
index 8a139cc9..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/03/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/04/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/04/data.parquet
deleted file mode 100644
index 3057a368..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/04/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/05/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/05/data.parquet
deleted file mode 100644
index 67e8a252..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/05/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/06/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/06/data.parquet
deleted file mode 100644
index d3be405d..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/06/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/07/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/07/data.parquet
deleted file mode 100644
index 7b1e6c63..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/07/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/08/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/08/data.parquet
deleted file mode 100644
index 15c10def..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/08/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/09/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/09/data.parquet
deleted file mode 100644
index b8418e78..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/09/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/10/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/10/data.parquet
deleted file mode 100644
index 83ceea24..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/10/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/11/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/11/data.parquet
deleted file mode 100644
index 0889b265..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/11/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/12/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/12/data.parquet
deleted file mode 100644
index cea2ef28..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2013/12/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/01/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/01/data.parquet
deleted file mode 100644
index 722baaf3..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/01/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/02/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/02/data.parquet
deleted file mode 100644
index f4549928..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/02/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/03/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/03/data.parquet
deleted file mode 100644
index c033fe9b..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/03/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/04/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/04/data.parquet
deleted file mode 100644
index f0c137d5..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/04/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/05/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/05/data.parquet
deleted file mode 100644
index f3dc98eb..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/05/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/06/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/06/data.parquet
deleted file mode 100644
index c37b3c32..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/06/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/07/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/07/data.parquet
deleted file mode 100644
index 283e10af..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/07/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/08/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/08/data.parquet
deleted file mode 100644
index 029ab002..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/08/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/09/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/09/data.parquet
deleted file mode 100644
index 89be2b23..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/09/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/10/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/10/data.parquet
deleted file mode 100644
index fd8edced..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/10/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/11/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/11/data.parquet
deleted file mode 100644
index b3d70815..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/11/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/12/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/12/data.parquet
deleted file mode 100644
index ecdc8220..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2014/12/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/01/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/01/data.parquet
deleted file mode 100644
index 73640451..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/01/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/02/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/02/data.parquet
deleted file mode 100644
index 58a18736..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/02/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/03/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/03/data.parquet
deleted file mode 100644
index 2850f71e..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/03/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/04/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/04/data.parquet
deleted file mode 100644
index 7868881c..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/04/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/05/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/05/data.parquet
deleted file mode 100644
index c17200dd..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/05/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/06/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/06/data.parquet
deleted file mode 100644
index 42320bc2..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/06/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/07/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/07/data.parquet
deleted file mode 100644
index c71a3f4f..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/07/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/08/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/08/data.parquet
deleted file mode 100644
index eeb0e597..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/08/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/09/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/09/data.parquet
deleted file mode 100644
index 83cd3831..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/09/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/10/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/10/data.parquet
deleted file mode 100644
index 51ec9300..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/10/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/11/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/11/data.parquet
deleted file mode 100644
index 8504a6a5..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/11/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/12/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/12/data.parquet
deleted file mode 100644
index 7b2c11c5..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2015/12/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/01/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/01/data.parquet
deleted file mode 100644
index f37f3cb8..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/01/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/02/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/02/data.parquet
deleted file mode 100644
index 2c9181e6..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/02/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/03/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/03/data.parquet
deleted file mode 100644
index 039bd45e..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/03/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/04/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/04/data.parquet
deleted file mode 100644
index 2fbfa438..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/04/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/05/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/05/data.parquet
deleted file mode 100644
index 611da493..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/05/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/06/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/06/data.parquet
deleted file mode 100644
index 68e4d920..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/06/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/07/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/07/data.parquet
deleted file mode 100644
index 0fadca5e..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/07/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/08/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/08/data.parquet
deleted file mode 100644
index 7aa40772..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/08/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/09/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/09/data.parquet
deleted file mode 100644
index 95bb7def..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/09/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/10/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/10/data.parquet
deleted file mode 100644
index bd7d83ca..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/10/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/11/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/11/data.parquet
deleted file mode 100644
index b0936ed8..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/11/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/12/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/12/data.parquet
deleted file mode 100644
index 30574296..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2016/12/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/01/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/01/data.parquet
deleted file mode 100644
index 01e67ffe..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/01/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/02/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/02/data.parquet
deleted file mode 100644
index 50733a5b..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/02/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/03/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/03/data.parquet
deleted file mode 100644
index 78cd78ec..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/03/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/04/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/04/data.parquet
deleted file mode 100644
index f388521f..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/04/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/05/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/05/data.parquet
deleted file mode 100644
index c7bcc47c..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/05/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/06/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/06/data.parquet
deleted file mode 100644
index f64198f9..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/06/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/07/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/07/data.parquet
deleted file mode 100644
index 7a90e1fe..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/07/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/08/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/08/data.parquet
deleted file mode 100644
index ab213773..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/08/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/09/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/09/data.parquet
deleted file mode 100644
index 7d7475fc..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/09/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/10/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/10/data.parquet
deleted file mode 100644
index 21cfc454..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/10/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/11/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/11/data.parquet
deleted file mode 100644
index 816f904a..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/11/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/12/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/12/data.parquet
deleted file mode 100644
index 03d031cc..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2017/12/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/01/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/01/data.parquet
deleted file mode 100644
index 4ecf3783..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/01/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/02/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/02/data.parquet
deleted file mode 100644
index 13b4c0b3..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/02/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/03/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/03/data.parquet
deleted file mode 100644
index 95584de1..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/03/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/04/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/04/data.parquet
deleted file mode 100644
index 1b683721..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/04/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/05/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/05/data.parquet
deleted file mode 100644
index 56cc0ef8..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/05/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/06/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/06/data.parquet
deleted file mode 100644
index 8cbbb0f5..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/06/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/07/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/07/data.parquet
deleted file mode 100644
index 906ae3df..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/07/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/08/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/08/data.parquet
deleted file mode 100644
index 0e2d107d..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/08/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/09/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/09/data.parquet
deleted file mode 100644
index 26cafff1..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/09/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/10/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/10/data.parquet
deleted file mode 100644
index 3312265d..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/10/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/11/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/11/data.parquet
deleted file mode 100644
index e45a27e3..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/11/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/12/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/12/data.parquet
deleted file mode 100644
index 4292dbc2..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2018/12/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/01/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/01/data.parquet
deleted file mode 100644
index 0f2e4be5..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/01/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/02/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/02/data.parquet
deleted file mode 100644
index ff6b97af..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/02/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/03/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/03/data.parquet
deleted file mode 100644
index b93cea1f..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/03/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/04/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/04/data.parquet
deleted file mode 100644
index 257eedc5..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/04/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/05/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/05/data.parquet
deleted file mode 100644
index 9ea22a88..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/05/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/06/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/06/data.parquet
deleted file mode 100644
index aa4d8a92..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/06/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/07/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/07/data.parquet
deleted file mode 100644
index a92ccd0d..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/07/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/08/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/08/data.parquet
deleted file mode 100644
index 8328d039..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/08/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/09/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/09/data.parquet
deleted file mode 100644
index 9ee3faa0..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/09/data.parquet and /dev/null differ
diff --git a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/10/data.parquet b/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/10/data.parquet
deleted file mode 100644
index 7635534c..00000000
Binary files a/how-to-use-azureml/work-with-data/datadrift-tutorial/weather-data/2019/10/data.parquet and /dev/null differ