update samples from Release-163 as a part of 1.0.79 SDK release

2025-12-19 17:17:04 -05:00 · 2019-12-09 20:09:30 +00:00
parent 8aa04307fb
commit 0040644e7a
20 changed files with 1306 additions and 46 deletions
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/prepare.py
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/prepare.py
@@ -0,0 +1,35 @@
+import os
+
+
+def convert(imgf, labelf, outf, n):
+    f = open(imgf, "rb")
+    l = open(labelf, "rb")
+    o = open(outf, "w")
+
+    f.read(16)
+    l.read(8)
+    images = []
+
+    for i in range(n):
+        image = [ord(l.read(1))]
+        for j in range(28 * 28):
+            image.append(ord(f.read(1)))
+        images.append(image)
+
+    for image in images:
+        o.write(",".join(str(pix) for pix in image) + "\n")
+    f.close()
+    o.close()
+    l.close()
+
+
+mounted_input_path = os.environ['fashion_ds']
+mounted_output_path = os.environ['AZUREML_DATAREFERENCE_prepared_fashion_ds']
+os.makedirs(mounted_output_path, exist_ok=True)
+
+convert(os.path.join(mounted_input_path, 'train-images-idx3-ubyte'),
+        os.path.join(mounted_input_path, 'train-labels-idx1-ubyte'),
+        os.path.join(mounted_output_path, 'mnist_train.csv'), 60000)
+convert(os.path.join(mounted_input_path, 't10k-images-idx3-ubyte'),
+        os.path.join(mounted_input_path, 't10k-labels-idx1-ubyte'),
+        os.path.join(mounted_output_path, 'mnist_test.csv'), 10000)
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/t10k-images-idx3-ubyte
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/t10k-images-idx3-ubyte
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/t10k-labels-idx1-ubyte
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/t10k-labels-idx1-ubyte
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/train-images-idx3-ubyte
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/train-images-idx3-ubyte
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/train-labels-idx1-ubyte
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/train-labels-idx1-ubyte
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/train.py
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/train.py
@@ -0,0 +1,120 @@
+import keras
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Flatten
+from keras.layers import Conv2D, MaxPooling2D
+from keras.layers.normalization import BatchNormalization
+from keras.utils import to_categorical
+from keras.callbacks import Callback
+
+import numpy as np
+import pandas as pd
+import os
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from azureml.core import Run
+
+# dataset object from the run
+run = Run.get_context()
+dataset = run.input_datasets['prepared_fashion_ds']
+
+# split dataset into train and test set
+(train_dataset, test_dataset) = dataset.random_split(percentage=0.8, seed=111)
+
+# load dataset into pandas dataframe
+data_train = train_dataset.to_pandas_dataframe()
+data_test = test_dataset.to_pandas_dataframe()
+
+img_rows, img_cols = 28, 28
+input_shape = (img_rows, img_cols, 1)
+
+X = np.array(data_train.iloc[:, 1:])
+y = to_categorical(np.array(data_train.iloc[:, 0]))
+
+# here we split validation data to optimiza classifier during training
+X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=13)
+
+# test data
+X_test = np.array(data_test.iloc[:, 1:])
+y_test = to_categorical(np.array(data_test.iloc[:, 0]))
+
+
+X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1).astype('float32') / 255
+X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1).astype('float32') / 255
+X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1).astype('float32') / 255
+
+batch_size = 256
+num_classes = 10
+epochs = 10
+
+# construct neuron network
+model = Sequential()
+model.add(Conv2D(32, kernel_size=(3, 3),
+                 activation='relu',
+                 kernel_initializer='he_normal',
+                 input_shape=input_shape))
+model.add(MaxPooling2D((2, 2)))
+model.add(Dropout(0.25))
+model.add(Conv2D(64, (3, 3), activation='relu'))
+model.add(MaxPooling2D(pool_size=(2, 2)))
+model.add(Dropout(0.25))
+model.add(Conv2D(128, (3, 3), activation='relu'))
+model.add(Dropout(0.4))
+model.add(Flatten())
+model.add(Dense(128, activation='relu'))
+model.add(Dropout(0.3))
+model.add(Dense(num_classes, activation='softmax'))
+
+model.compile(loss=keras.losses.categorical_crossentropy,
+              optimizer=keras.optimizers.Adam(),
+              metrics=['accuracy'])
+
+# start an Azure ML run
+run = Run.get_context()
+
+
+class LogRunMetrics(Callback):
+    # callback at the end of every epoch
+    def on_epoch_end(self, epoch, log):
+        # log a value repeated which creates a list
+        run.log('Loss', log['loss'])
+        run.log('Accuracy', log['accuracy'])
+
+
+history = model.fit(X_train, y_train,
+                    batch_size=batch_size,
+                    epochs=epochs,
+                    verbose=1,
+                    validation_data=(X_val, y_val),
+                    callbacks=[LogRunMetrics()])
+
+score = model.evaluate(X_test, y_test, verbose=0)
+
+# log a single value
+run.log("Final test loss", score[0])
+print('Test loss:', score[0])
+
+run.log('Final test accuracy', score[1])
+print('Test accuracy:', score[1])
+
+plt.figure(figsize=(6, 3))
+plt.title('Fashion MNIST with Keras ({} epochs)'.format(epochs), fontsize=14)
+plt.plot(history.history['accuracy'], 'b-', label='Accuracy', lw=4, alpha=0.5)
+plt.plot(history.history['loss'], 'r--', label='Loss', lw=4, alpha=0.5)
+plt.legend(fontsize=12)
+plt.grid(True)
+
+# log an image
+run.log_image('Loss v.s. Accuracy', plot=plt)
+
+# create a ./outputs/model folder in the compute target
+# files saved in the "./outputs" folder are automatically uploaded into run history
+os.makedirs('./outputs/model', exist_ok=True)
+
+# serialize NN architecture to JSON
+model_json = model.to_json()
+# save model JSON
+with open('./outputs/model/model.json', 'w') as f:
+    f.write(model_json)
+# save model weights
+model.save_weights('./outputs/model/model.h5')
+print("model saved in ./outputs/model folder")
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/pipeline-for-image-classification.ipynb
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/pipeline-for-image-classification.ipynb
@@ -0,0 +1,488 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+        "\n",
+        "Licensed under the MIT License [2017] Zalando SE, https://tech.zalando.com"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/pipeline-for-image-classification.png)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Build a simple ML pipeline for image classification\n",
+        "\n",
+        "## Introduction\n",
+        "This tutorial shows how to train a simple deep neural network using the [Fashion MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset and Keras on Azure Machine Learning. Fashion-MNIST is a dataset of Zalando's article images\u00e2\u20ac\u201dconsisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes.\n",
+        "\n",
+        "Learn how to:\n",
+        "\n",
+        "> * Set up your development environment\n",
+        "> * Create the Fashion MNIST dataset\n",
+        "> * Create a machine learning pipeline to train a simple deep learning neural network on a remote cluster\n",
+        "> * Retrieve input datasets from the experiment and register the output model with datasets\n",
+        "\n",
+        "## Prerequisite:\n",
+        "* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
+        "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
+        "    * install the latest version of AzureML SDK\n",
+        "    * create a workspace and its configuration file (`config.json`)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Set up your development environment\n",
+        "\n",
+        "All the setup for your development work can be accomplished in a Python notebook.  Setup includes:\n",
+        "\n",
+        "* Importing Python packages\n",
+        "* Connecting to a workspace to enable communication between your local computer and remote resources\n",
+        "* Creating an experiment to track all your runs\n",
+        "* Creating a remote compute target to use for training\n",
+        "\n",
+        "### Import packages\n",
+        "\n",
+        "Import Python packages you need in this session. Also display the Azure Machine Learning SDK version."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import azureml.core\n",
+        "from azureml.core import Workspace, Dataset, Datastore, ComputeTarget, RunConfiguration, Experiment\n",
+        "from azureml.core.runconfig import CondaDependencies\n",
+        "from azureml.pipeline.steps import PythonScriptStep, EstimatorStep\n",
+        "from azureml.pipeline.core import Pipeline, PipelineData\n",
+        "from azureml.train.dnn import TensorFlow\n",
+        "\n",
+        "# check core SDK version number\n",
+        "print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Connect to workspace\n",
+        "\n",
+        "Create a workspace object from the existing workspace. `Workspace.from_config()` reads the file **config.json** and loads the details into an object named `workspace`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# load workspace\n",
+        "workspace = Workspace.from_config()\n",
+        "print('Workspace name: ' + workspace.name, \n",
+        "      'Azure region: ' + workspace.location, \n",
+        "      'Subscription id: ' + workspace.subscription_id, \n",
+        "      'Resource group: ' + workspace.resource_group, sep='\\n')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Create experiment and a directory\n",
+        "\n",
+        "Create an experiment to track the runs in your workspace and a directory to deliver the necessary code from your computer to the remote resource."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# create an ML experiment\n",
+        "exp = Experiment(workspace=workspace, name='keras-mnist-fashion')\n",
+        "\n",
+        "# create a directory\n",
+        "script_folder = './keras-mnist-fashion'\n",
+        "os.makedirs(script_folder, exist_ok=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Create or Attach existing compute resource\n",
+        "By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you create Azure Machine Learning Compute as your training environment. The code below creates the compute clusters for you if they don't already exist in your workspace.\n",
+        "\n",
+        "**Creation of compute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from azureml.core.compute import ComputeTarget, AmlCompute\n",
+        "from azureml.core.compute_target import ComputeTargetException\n",
+        "\n",
+        "# choose a name for your cluster\n",
+        "cluster_name = \"your-cluster-name\"\n",
+        "\n",
+        "try:\n",
+        "    compute_target = ComputeTarget(workspace=workspace, name=cluster_name)\n",
+        "    print('Found existing compute target')\n",
+        "except ComputeTargetException:\n",
+        "    print('Creating a new compute target...')\n",
+        "    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
+        "                                                           max_nodes=4)\n",
+        "\n",
+        "    # create the cluster\n",
+        "    compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)\n",
+        "\n",
+        "    # can poll for a minimum number of nodes and for a specific timeout. \n",
+        "    # if no min node count is provided it uses the scale settings for the cluster\n",
+        "    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
+        "\n",
+        "# use get_status() to get a detailed status for the current cluster. \n",
+        "print(compute_target.get_status().serialize())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Create the Fashion MNIST dataset\n",
+        "\n",
+        "By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred. \n",
+        "\n",
+        "Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create a dataset from it. We will now upload the [Fashion MNIST](./keras-mnist-fashion) to the default datastore (blob) within your workspace."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "datastore = workspace.get_default_datastore()\n",
+        "datastore.upload_files(files = ['keras-mnist-fashion/t10k-images-idx3-ubyte', 'keras-mnist-fashion/t10k-labels-idx1-ubyte',\n",
+        "                               'keras-mnist-fashion/train-images-idx3-ubyte','keras-mnist-fashion/train-labels-idx1-ubyte'],\n",
+        "                       target_path = 'mnist-fashion',\n",
+        "                       overwrite = True,\n",
+        "                       show_progress = True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Then we will create an unregistered FileDataset pointing to the path in the datastore. You can also create a dataset from multiple paths. [Learn More](https://aka.ms/azureml/howto/createdatasets) "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "fashion_ds = Dataset.File.from_files([(datastore, 'mnist-fashion')])\n",
+        "\n",
+        "# list the files referenced by fashion_ds\n",
+        "fashion_ds.to_path()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Build 2-step ML pipeline\n",
+        "\n",
+        "The [Azure Machine Learning Pipeline](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines) enables data scientists to create and manage multiple simple and complex workflows concurrently. A typical pipeline would have multiple tasks to prepare data, train, deploy and evaluate models. Individual steps in the pipeline can make use of diverse compute options (for example: CPU for data preparation and GPU for training) and languages. [Learn More](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/machine-learning-pipelines)\n",
+        "\n",
+        "\n",
+        "### Step 1: data preparation\n",
+        "\n",
+        "In step one, we will load the image and labels from Fashion MNIST dataset into mnist_train.csv and mnist_test.csv\n",
+        "\n",
+        "Each image is 28 pixels in height and 28 pixels in width, for a total of 784 pixels in total. Each pixel has a single pixel-value associated with it, indicating the lightness or darkness of that pixel, with higher numbers meaning darker. This pixel-value is an integer between 0 and 255. Both mnist_train.csv and mnist_test.csv contain 785 columns. The first column consists of the class labels, which represent the article of clothing. The rest of the columns contain the pixel-values of the associated image."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# set up the compute environment to install required packages\n",
+        "conda = CondaDependencies.create(\n",
+        "        pip_packages=['azureml-sdk','azureml-dataprep[fuse,pandas]'],\n",
+        "        pin_sdk_version=False)\n",
+        "\n",
+        "conda.set_pip_option('--pre')\n",
+        "\n",
+        "run_config = RunConfiguration()\n",
+        "run_config.environment.python.conda_dependencies = conda"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Intermediate data (or output of a step) is represented by a `PipelineData` object. preprared_fashion_ds is produced as the output of step 1, and used as the input of step 2. PipelineData introduces a data dependency between steps, and creates an implicit execution order in the pipeline. You can register a `PipelineData` as a dataset and version the output data automatically. [Learn More](https://docs.microsoft.com/azure/machine-learning/service/how-to-version-track-datasets#version-a-pipeline-output-dataset) "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# define output data\n",
+        "prepared_fashion_ds = PipelineData('prepared_fashion_ds', datastore=datastore).as_dataset()\n",
+        "\n",
+        "# register output data as dataset\n",
+        "prepared_fashion_ds = prepared_fashion_ds.register(name='prepared_fashion_ds', create_new_version=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used. You can also use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify requirements for the PythonScriptStep, such as conda dependencies and docker image."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "prep_step = PythonScriptStep(name='prepare step',\n",
+        "                             script_name=\"prepare.py\",\n",
+        "                             # mount fashion_ds dataset to the compute_target\n",
+        "                             inputs=[fashion_ds.as_named_input('fashion_ds').as_mount()],\n",
+        "                             outputs=[prepared_fashion_ds],\n",
+        "                             source_directory=script_folder,\n",
+        "                             compute_target=compute_target,\n",
+        "                             runconfig=run_config)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Step 2: train CNN with Keras\n",
+        "\n",
+        "Next, we construct an `azureml.train.dnn.TensorFlow` estimator object. The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed.\n",
+        "\n",
+        "[EstimatorStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py) adds a step to run Tensorflow Estimator in a Pipeline. It takes a dataset as the input."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# set up training step with Tensorflow estimator\n",
+        "est = TensorFlow(entry_script='train.py',\n",
+        "                 source_directory=script_folder,                 \n",
+        "                 pip_packages = ['azureml-sdk','keras','numpy','scikit-learn', 'matplotlib'],\n",
+        "                 compute_target=compute_target)\n",
+        "\n",
+        "est_step = EstimatorStep(name='train step',\n",
+        "                         estimator=est,\n",
+        "                         estimator_entry_script_arguments=[],\n",
+        "                         # parse prepared_fashion_ds into TabularDataset and use it as the input\n",
+        "                         inputs=[prepared_fashion_ds.parse_delimited_files()],\n",
+        "                         compute_target=compute_target)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Build the pipeline\n",
+        "Once we have the steps (or steps collection), we can build the [pipeline](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.pipeline.pipeline?view=azure-ml-py).\n",
+        "\n",
+        "A pipeline is created with a list of steps and a workspace. Submit a pipeline using [submit](https://docs.microsoft.com/python/api/azureml-core/azureml.core.experiment(class)?view=azure-ml-py#submit-config--tags-none----kwargs-). When submit is called, a [PipelineRun](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelinerun?view=azure-ml-py) is created which in turn creates [StepRun](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.steprun?view=azure-ml-py) objects for each step in the workflow."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# build pipeline & run experiment\n",
+        "pipeline = Pipeline(workspace, steps=[prep_step, est_step])\n",
+        "run = exp.submit(pipeline)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "### Monitor the PipelineRun"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "inputHidden": false,
+        "outputHidden": false
+      },
+      "outputs": [],
+      "source": [
+        "run.wait_for_completion(show_output=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "run.find_step_run('train step')[0].get_metrics()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Register the input dataset and the output model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Azure Machine Learning dataset makes it easy to trace how your data is used in ML. [Learn More](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-version-track-datasets#track-datasets-in-experiments)<br>\n",
+        "For each Machine Learning experiment, you can easily trace the datasets used as the input through `Run` object."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# get input datasets\n",
+        "prep_step = run.find_step_run('prepare step')[0]\n",
+        "inputs = prep_step.get_details()['inputDatasets']\n",
+        "input_dataset = inputs[0]['dataset']\n",
+        "\n",
+        "# list the files referenced by input_dataset\n",
+        "input_dataset.to_path()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Register the input Fashion MNIST dataset with the workspace so that you can reuse it in other experiments or share it with your colleagues who have access to your workspace."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "fashion_ds = input_dataset.register(workspace = workspace,\n",
+        "                                    name = 'fashion_ds',\n",
+        "                                    description = 'image and label files from fashion mnist',\n",
+        "                                    create_new_version = True)\n",
+        "fashion_ds"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Register the output model with dataset"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "run.find_step_run('train step')[0].register_model(model_name = 'keras-model', model_path = 'outputs/model/', \n",
+        "                                                  datasets =[('train test data',fashion_ds)])"
+      ]
+    }
+  ],
+  "metadata": {
+    "authors": [
+      {
+        "name": "sihhu"
+      }
+    ],
+    "category": "tutorial",
+    "compute": [
+      "Remote"
+    ],
+    "datasets": [
+      "Fashion MNIST"
+    ],
+    "deployment": [
+      "None"
+    ],
+    "exclude_from_index": false,
+    "framework": [
+      "Azure ML"
+    ],
+    "friendly_name": "Datasets with ML Pipeline",
+    "index_order": 1,
+    "kernelspec": {
+      "display_name": "Python 3.6",
+      "language": "python",
+      "name": "python36"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    },
+    "nteract": {
+      "version": "nteract-front-end@1.0.0"
+    },
+    "star_tag": [
+      "featured"
+    ],
+    "tags": [
+      "Dataset",
+      "Pipeline",
+      "Estimator",
+      "ScriptRun"
+    ],
+    "task": "Train"
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
+}