update samples from Release-63 as a part of SDK release

2020-08-31 20:00:07 +00:00
parent 5080053a35
commit b01c52bfd6
63 changed files with 1290 additions and 3412 deletions
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/t10k-images-idx3-ubyte
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/t10k-images-idx3-ubyte
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/t10k-labels-idx1-ubyte
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/t10k-labels-idx1-ubyte
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/train-images-idx3-ubyte
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/train-images-idx3-ubyte
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/train-labels-idx1-ubyte
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/train-labels-idx1-ubyte
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/prepare.py
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/keras-mnist-fashion/prepare.py
@@ -1,4 +1,5 @@
 import os
+import sys


 def convert(imgf, labelf, outf, n):
@@ -23,8 +24,8 @@ def convert(imgf, labelf, outf, n):
    l.close()


-mounted_input_path = os.environ['fashion_ds']
-mounted_output_path = os.environ['AZUREML_DATAREFERENCE_prepared_fashion_ds']
+mounted_input_path = sys.argv[1]
+mounted_output_path = sys.argv[2]
 os.makedirs(mounted_output_path, exist_ok=True)

 convert(os.path.join(mounted_input_path, 'train-images-idx3-ubyte'),
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/pipeline-for-image-classification.ipynb
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/pipeline-for-image-classification.ipynb
@@ -65,12 +65,9 @@
      "source": [
        "import os\n",
        "import azureml.core\n",
-        "from azureml.core import Workspace, Dataset, Datastore, ComputeTarget, RunConfiguration, Experiment\n",
-        "from azureml.core.runconfig import CondaDependencies\n",
+        "from azureml.core import Workspace, Dataset, Datastore, ComputeTarget, Experiment\n",
        "from azureml.pipeline.steps import PythonScriptStep, EstimatorStep\n",
-        "from azureml.pipeline.core import Pipeline, PipelineData\n",
-        "from azureml.train.dnn import TensorFlow\n",
-        "\n",
+        "from azureml.pipeline.core import Pipeline\n",
        "# check core SDK version number\n",
        "print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
      ]
@@ -141,7 +138,7 @@
        "from azureml.core.compute_target import ComputeTargetException\n",
        "\n",
        "# choose a name for your cluster\n",
-        "cluster_name = \"gpu-cluster\"\n",
+        "cluster_name = \"amlcomp\"\n",
        "\n",
        "try:\n",
        "    compute_target = ComputeTarget(workspace=workspace, name=cluster_name)\n",
@@ -168,9 +165,9 @@
      "source": [
        "## Create the Fashion MNIST dataset\n",
        "\n",
-        "By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred. \n",
+        "By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred.\n",
        "\n",
-        "Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create a dataset from it. We will now upload the [Fashion MNIST](./keras-mnist-fashion) to the default datastore (blob) within your workspace."
+        "Every workspace comes with a default [datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create a dataset from it. We will now upload the [Fashion MNIST](./data) to the default datastore (blob) within your workspace."
      ]
    },
    {
@@ -180,8 +177,8 @@
      "outputs": [],
      "source": [
        "datastore = workspace.get_default_datastore()\n",
-        "datastore.upload_files(files = ['keras-mnist-fashion/t10k-images-idx3-ubyte', 'keras-mnist-fashion/t10k-labels-idx1-ubyte',\n",
-        "                               'keras-mnist-fashion/train-images-idx3-ubyte','keras-mnist-fashion/train-labels-idx1-ubyte'],\n",
+        "datastore.upload_files(files = ['data/t10k-images-idx3-ubyte', 'data/t10k-labels-idx1-ubyte',\n",
+        "                               'data/train-images-idx3-ubyte','data/train-labels-idx1-ubyte'],\n",
        "                       target_path = 'mnist-fashion',\n",
        "                       overwrite = True,\n",
        "                       show_progress = True)"
@@ -191,7 +188,7 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "Then we will create an unregistered FileDataset pointing to the path in the datastore. You can also create a dataset from multiple paths. [Learn More](https://aka.ms/azureml/howto/createdatasets) "
+        "Then we will create an unregistered FileDataset pointing to the path in the datastore. You can also create a dataset from multiple paths. [Learn More](https://aka.ms/azureml/howto/createdatasets)  "
      ]
    },
    {
@@ -212,7 +209,7 @@
      "source": [
        "## Build 2-step ML pipeline\n",
        "\n",
-        "The [Azure Machine Learning Pipeline](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines) enables data scientists to create and manage multiple simple and complex workflows concurrently. A typical pipeline would have multiple tasks to prepare data, train, deploy and evaluate models. Individual steps in the pipeline can make use of diverse compute options (for example: CPU for data preparation and GPU for training) and languages. [Learn More](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/machine-learning-pipelines)\n",
+        "The [Azure Machine Learning Pipeline](https://docs.microsoft.com/azure/machine-learning/service/concept-ml-pipelines) enables data scientists to create and manage multiple simple and complex workflows concurrently. A typical pipeline would have multiple tasks to prepare data, train, deploy and evaluate models. Individual steps in the pipeline can make use of diverse compute options (for example: CPU for data preparation and GPU for training) and languages. [Learn More](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/machine-learning-pipelines)\n",
        "\n",
        "\n",
        "### Step 1: data preparation\n",
@@ -222,28 +219,11 @@
        "Each image is 28 pixels in height and 28 pixels in width, for a total of 784 pixels in total. Each pixel has a single pixel-value associated with it, indicating the lightness or darkness of that pixel, with higher numbers meaning darker. This pixel-value is an integer between 0 and 255. Both mnist_train.csv and mnist_test.csv contain 785 columns. The first column consists of the class labels, which represent the article of clothing. The rest of the columns contain the pixel-values of the associated image."
      ]
    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# set up the compute environment to install required packages\n",
-        "conda = CondaDependencies.create(\n",
-        "        pip_packages=['azureml-sdk','azureml-dataset-runtime[fuse,pandas]'],\n",
-        "        pin_sdk_version=False)\n",
-        "\n",
-        "conda.set_pip_option('--pre')\n",
-        "\n",
-        "run_config = RunConfiguration()\n",
-        "run_config.environment.python.conda_dependencies = conda"
-      ]
-    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "Intermediate data (or output of a step) is represented by a `PipelineData` object. preprared_fashion_ds is produced as the output of step 1, and used as the input of step 2. PipelineData introduces a data dependency between steps, and creates an implicit execution order in the pipeline. You can register a `PipelineData` as a dataset and version the output data automatically. [Learn More](https://docs.microsoft.com/azure/machine-learning/service/how-to-version-track-datasets#version-a-pipeline-output-dataset) "
+        "Intermediate data (or output of a step) is represented by a `OutputFileDatasetConfig` object. preprared_fashion_ds is produced as the output of step 1, and used as the input of step 2. `OutputFileDatasetConfig` introduces a data dependency between steps, and creates an implicit execution order in the pipeline. You can register a `OutputFileDatasetConfig` as a dataset and version the output data automatically."
      ]
    },
    {
@@ -252,18 +232,28 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "# define output data\n",
-        "prepared_fashion_ds = PipelineData('prepared_fashion_ds', datastore=datastore).as_dataset()\n",
+        "from azureml.data import OutputFileDatasetConfig\n",
        "\n",
-        "# register output data as dataset\n",
-        "prepared_fashion_ds = prepared_fashion_ds.register(name='prepared_fashion_ds', create_new_version=True)"
+        "# learn more about the output config\n",
+        "help(OutputFileDatasetConfig)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# write output to datastore under folder `outputdataset` and register it as a dataset after the experiment completes\n",
+        "# make sure the service principal in your datastore has blob data contributor role in order to write data back\n",
+        "prepared_fashion_ds = OutputFileDatasetConfig(destination=(datastore, 'outputdataset/{run-id}')).register_on_complete(name='prepared_fashion_ds')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used. You can also use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify requirements for the PythonScriptStep, such as conda dependencies and docker image."
+        "A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used. You can also use a [**RunConfiguration**](https://docs.microsoft.com/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify requirements for the PythonScriptStep, such as conda dependencies and docker image."
      ]
    },
    {
@@ -275,12 +265,10 @@
        "prep_step = PythonScriptStep(name='prepare step',\n",
        "                             script_name=\"prepare.py\",\n",
        "                             # mount fashion_ds dataset to the compute_target\n",
-        "                             inputs=[fashion_ds.as_named_input('fashion_ds').as_mount()],\n",
-        "                             outputs=[prepared_fashion_ds],\n",
+        "                             arguments=[fashion_ds.as_named_input('fashion_ds').as_mount(), prepared_fashion_ds],\n",
        "                             source_directory=script_folder,\n",
        "                             compute_target=compute_target,\n",
-        "                             runconfig=run_config,\n",
-        "                             allow_reuse=False)"
+        "                             allow_reuse=True)"
      ]
    },
    {
@@ -289,9 +277,7 @@
      "source": [
        "### Step 2: train CNN with Keras\n",
        "\n",
-        "Next, we construct an `azureml.train.dnn.TensorFlow` estimator object. The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed.\n",
-        "\n",
-        "[EstimatorStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py) adds a step to run Tensorflow Estimator in a Pipeline. It takes a dataset as the input."
+        "Next, we construct an `azureml.train.Estimator` estimator object. [EstimatorStep](https://docs.microsoft.com/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py) adds a step to run Tensorflow Estimator in a Pipeline. It takes a dataset as the input."
      ]
    },
    {
@@ -300,17 +286,17 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "# set up training step with Tensorflow estimator\n",
-        "est = TensorFlow(entry_script='train.py',\n",
-        "                 source_directory=script_folder,\n",
-        "                 pip_packages = ['azureml-sdk', 'keras<=2.3.1', 'tensorflow==2.1.0', 'numpy','scikit-learn', 'matplotlib'],\n",
-        "                 compute_target=compute_target)\n",
+        "from azureml.train.estimator import Estimator\n",
+        "# set up training step with Estimator\n",
+        "est = Estimator(entry_script='train.py',\n",
+        "                source_directory=script_folder,\n",
+        "                pip_packages=['keras','tensorflow','numpy','scikit-learn', 'matplotlib','pandas'],\n",
+        "                compute_target=compute_target)\n",
        "\n",
        "est_step = EstimatorStep(name='train step',\n",
        "                         estimator=est,\n",
-        "                         estimator_entry_script_arguments=[],\n",
-        "                         # parse prepared_fashion_ds into TabularDataset and use it as the input\n",
-        "                         inputs=[prepared_fashion_ds.parse_delimited_files()],\n",
+        "                         # parse prepared_fashion_ds into tabulardataset and use it as input\n",
+        "                         estimator_entry_script_arguments=[prepared_fashion_ds.read_delimited_files().as_input(name='prepared_fashion_ds')],\n",
        "                         compute_target=compute_target)"
      ]
    },
@@ -321,7 +307,7 @@
        "### Build the pipeline\n",
        "Once we have the steps (or steps collection), we can build the [pipeline](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.pipeline.pipeline?view=azure-ml-py).\n",
        "\n",
-        "A pipeline is created with a list of steps and a workspace. Submit a pipeline using [submit](https://docs.microsoft.com/python/api/azureml-core/azureml.core.experiment(class)?view=azure-ml-py#submit-config--tags-none----kwargs-). When submit is called, a [PipelineRun](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelinerun?view=azure-ml-py) is created which in turn creates [StepRun](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.steprun?view=azure-ml-py) objects for each step in the workflow."
+        "A pipeline is created with a list of steps and a workspace. Submit a pipeline using `submit`. When submit is called, a [PipelineRun](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelinerun?view=azure-ml-py) is created which in turn creates [StepRun](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.steprun?view=azure-ml-py) objects for each step in the workflow."
      ]
    },
    {
@@ -374,23 +360,7 @@
      "cell_type": "markdown",
      "metadata": {},
      "source": [
-        "Azure Machine Learning dataset makes it easy to trace how your data is used in ML. [Learn More](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-version-track-datasets#track-datasets-in-experiments)<br>\n",
-        "For each Machine Learning experiment, you can easily trace the datasets used as the input through `Run` object."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# get input datasets\n",
-        "prep_step = run.find_step_run('prepare step')[0]\n",
-        "inputs = prep_step.get_details()['inputDatasets']\n",
-        "input_dataset = inputs[0]['dataset']\n",
-        "\n",
-        "# list the files referenced by input_dataset\n",
-        "input_dataset.to_path()"
+        "Azure Machine Learning dataset makes it easy to trace how your data is used in ML. [Learn More](https://docs.microsoft.com/azure/machine-learning/service/how-to-version-track-datasets#track-datasets-in-experiments)<br>"
      ]
    },
    {
@@ -406,11 +376,10 @@
      "metadata": {},
      "outputs": [],
      "source": [
-        "fashion_ds = input_dataset.register(workspace = workspace,\n",
-        "                                    name = 'fashion_ds',\n",
-        "                                    description = 'image and label files from fashion mnist',\n",
-        "                                    create_new_version = True)\n",
-        "fashion_ds"
+        "fashion_ds = fashion_ds.register(workspace = workspace,\n",
+        "                                 name = 'fashion_ds',\n",
+        "                                 description = 'image and label files from fashion mnist',\n",
+        "                                 create_new_version = True)"
      ]
    },
    {
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/scriptrun-with-data-input-output/how-to-use-scriptrun.ipynb
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/scriptrun-with-data-input-output/how-to-use-scriptrun.ipynb
@@ -0,0 +1,320 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Copyright (c) Microsoft Corporation. All rights reserved.\n",
+        "\n",
+        "Licensed under the MIT License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/work-with-data/datasets-tutorial/scriptun-with-data-input-output.png)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# How to use ScriptRun with data input and output\n",
+        "\n",
+        "This notebook shows how to use [ScriptRun](https://docs.microsoft.com/python/api/azureml-core/azureml.core.script_run.scriptrun?view=azure-ml-py) with input and output. A run submitted with ScriptRunConfig represents a single trial in an experiment. Submitting the run returns a ScriptRun object, which can be used to monitor the asynchronous execution of the run, log metrics and store output of the run, and analyze results and access artifacts generated by the run.\n",
+        "\n",
+        "\n",
+        "## Prerequisite:\n",
+        "* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
+        "* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](https://aka.ms/pl-config) to:\n",
+        "    * install the AML SDK\n",
+        "    * create a workspace and its configuration file (`config.json`)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Initialize workspace\n",
+        "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from azureml.core import Workspace\n",
+        "ws = Workspace.from_config()\n",
+        "print('Workspace name: ' + ws.name, \n",
+        "      'Azure region: ' + ws.location, \n",
+        "      'Subscription id: ' + ws.subscription_id, \n",
+        "      'Resource group: ' + ws.resource_group, sep = '\\n')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Create or Attach existing AmlCompute\n",
+        "You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for training your model. In this tutorial, you create `AmlCompute` as your training compute resource."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "If we could not find the cluster with the given name, then we will create a new cluster here. We will create an `AmlCompute` cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
+        "1. create the configuration (this step is local and only takes a second)\n",
+        "2. create the cluster (this step will take about **20 seconds**)\n",
+        "3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from azureml.core.compute import ComputeTarget, AmlCompute\n",
+        "from azureml.core.compute_target import ComputeTargetException\n",
+        "\n",
+        "# choose a name for your cluster\n",
+        "cluster_name = \"amlcomp\"\n",
+        "\n",
+        "try:\n",
+        "    cpu_cluster = ComputeTarget(workspace=ws, name=cluster_name)\n",
+        "    print('Found existing compute target')\n",
+        "except ComputeTargetException:\n",
+        "    print('Creating a new compute target...')\n",
+        "    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', max_nodes=4)\n",
+        "\n",
+        "    # create the cluster\n",
+        "    cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)\n",
+        "\n",
+        "    # can poll for a minimum number of nodes and for a specific timeout. \n",
+        "    # if no min node count is provided it uses the scale settings for the cluster\n",
+        "    cpu_cluster.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
+        "\n",
+        "# use get_status() to get a detailed status for the current cluster. \n",
+        "print(cpu_cluster.get_status().serialize())"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'mlc' of type `AmlCompute`."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Use a simple script\n",
+        "We have already created a simple \"hello world\" script. This is the script that we will submit through the [ScriptRunConfig](https://docs.microsoft.com/python/api/azureml-core/azureml.core.script_run_config.scriptrunconfig?view=azure-ml-py). It reads iris dataset as input, and write it out to `outputdataset` folder in default blob datastore. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "source_directory = 'script_run'"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%writefile $source_directory/dummy_train.py\n",
+        "\n",
+        "# Copyright (c) Microsoft Corporation. All rights reserved.\n",
+        "# Licensed under the MIT License.\n",
+        "import sys\n",
+        "import os\n",
+        "\n",
+        "print(\"*********************************************************\")\n",
+        "print(\"Hello Azure ML!\")\n",
+        "\n",
+        "mounted_input_path = sys.argv[1]\n",
+        "mounted_output_path = sys.argv[2]\n",
+        "\n",
+        "print(\"Argument 1: %s\" % mounted_input_path)\n",
+        "print(\"Argument 2: %s\" % mounted_output_path)\n",
+        "    \n",
+        "with open(mounted_input_path, 'r') as f:\n",
+        "    content = f.read()\n",
+        "    with open(os.path.join(mounted_output_path, 'output.csv'), 'w') as fw:\n",
+        "        fw.write(content)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Every workspace comes with a default datastore (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create dataset from it. We will now upload the Iris data to the default datastore (blob) within your workspace."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def_blob_store = ws.get_default_datastore()\n",
+        "def_blob_store.upload_files(files = ['iris.csv'],\n",
+        "                       target_path = 'script-run/',\n",
+        "                       overwrite = True,\n",
+        "                       show_progress = True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Now we are ready to define the input and output of your script. They can be passed in via `arguments`, which is a list of command-line arguments to pass to the training script specified in `script`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from azureml.core import Dataset\n",
+        "from azureml.data import OutputFileDatasetConfig\n",
+        "\n",
+        "input_data = Dataset.File.from_files(def_blob_store.path('script-run/iris.csv')).as_named_input('input').as_mount()\n",
+        "\n",
+        "# output is configured to write the result back to def_blob_store, under\"may_sample/outputdataset\" folder\n",
+        "# learn more about options to configure the output, run 'help(OutputFileDatasetConfig)'\n",
+        "output = OutputFileDatasetConfig(destination=(def_blob_store, 'sample/outputdataset'))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from azureml.core import Environment\n",
+        "from azureml.core.conda_dependencies import CondaDependencies\n",
+        "\n",
+        "myenv = Environment(\"myenv\")\n",
+        "\n",
+        "myenv.docker.enabled = True\n",
+        "myenv.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk>=1.12.0'])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from azureml.core import ScriptRunConfig\n",
+        "\n",
+        "src = ScriptRunConfig(source_directory=source_directory, \n",
+        "                      script='dummy_train.py', \n",
+        "                      # to mount the dataset on the remote compute and pass the mounted path as an argument to the training script\n",
+        "                      arguments =[input_data, output])\n",
+        "\n",
+        "src.run_config.framework = 'python'\n",
+        "src.run_config.target = cpu_cluster.name\n",
+        "\n",
+        "# Set environment\n",
+        "src.run_config.environment = myenv"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Build and Submit the Experiment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from azureml.core import Experiment\n",
+        "exp = Experiment(ws, 'ScriptRun_sample')\n",
+        "run = exp.submit(config=src)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## View Run Details"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "run.wait_for_completion(show_output=True)"
+      ]
+    }
+  ],
+  "metadata": {
+    "authors": [
+      {
+        "name": "sihhu"
+      }
+    ],
+    "category": "tutorial",
+    "compute": [
+      "AML Compute"
+    ],
+    "datasets": [
+      "Custom"
+    ],
+    "deployment": [
+      "None"
+    ],
+    "exclude_from_index": false,
+    "framework": [
+      "Azure ML"
+    ],
+    "friendly_name": "How to use ScriptRun with data input and output",
+    "kernelspec": {
+      "display_name": "Python 3.6",
+      "language": "python",
+      "name": "python36"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.9"
+    },
+    "order_index": 7,
+    "star_tag": [
+      "None"
+    ],
+    "tags": [
+      "Dataset",
+      "ScriptRun"
+    ],
+    "task": "Demonstrates the use of Scriptrun with datasets"
+  },
+  "nbformat": 4,
+  "nbformat_minor": 2
+}
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/scriptrun-with-data-input-output/how-to-use-scriptrun.yml
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/scriptrun-with-data-input-output/how-to-use-scriptrun.yml
@@ -0,0 +1,4 @@
+name: how-to-use-scriptrun
+dependencies:
+- pip:
+  - azureml-sdk
--- a/how-to-use-azureml/work-with-data/datasets-tutorial/scriptrun-with-data-input-output/script_run/dummy_train.py
+++ b/how-to-use-azureml/work-with-data/datasets-tutorial/scriptrun-with-data-input-output/script_run/dummy_train.py
@@ -0,0 +1,19 @@
+
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+import sys
+import os
+
+print("*********************************************************")
+print("Hello Azure ML!")
+
+mounted_input_path = sys.argv[1]
+mounted_output_path = sys.argv[2]
+
+print("Argument 1: %s" % mounted_input_path)
+print("Argument 2: %s" % mounted_output_path)
+
+with open(mounted_input_path, 'r') as f:
+    content = f.read()
+    with open(os.path.join(mounted_output_path, 'output.csv'), 'w') as fw:
+        fw.write(content)