diff --git a/01.getting-started/01.train-within-notebook/01.train-within-notebook.ipynb b/01.getting-started/01.train-within-notebook/01.train-within-notebook.ipynb index 85ca1c40..885a5e8a 100644 --- a/01.getting-started/01.train-within-notebook/01.train-within-notebook.ipynb +++ b/01.getting-started/01.train-within-notebook/01.train-within-notebook.ipynb @@ -448,8 +448,8 @@ "outputs": [], "source": [ "models = ws.models(name='best_model')\n", - "for name, m in models.items():\n", - " print(name, m.version)" + "for m in models:\n", + " print(m.name, m.version)" ] }, { @@ -778,7 +778,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -792,7 +792,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.4" } }, "nbformat": 4, diff --git a/01.getting-started/03.train-on-aci/.ipynb_checkpoints/03.train-on-aci-checkpoint.ipynb b/01.getting-started/03.train-on-aci/.ipynb_checkpoints/03.train-on-aci-checkpoint.ipynb new file mode 100644 index 00000000..00667e74 --- /dev/null +++ b/01.getting-started/03.train-on-aci/.ipynb_checkpoints/03.train-on-aci-checkpoint.ipynb @@ -0,0 +1,325 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 03. Train on Azure Container Instance (EXPERIMENTAL)\n", + "\n", + "* Create Workspace\n", + "* Create Project\n", + "* Create `train.py` in the project folder.\n", + "* Configure an ACI (Azure Container Instance) run\n", + "* Execute in ACI" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create An Experiment\n", + "\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "experiment_name = 'train-on-aci'\n", + "experiment = Experiment(workspace = ws, name = experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a folder to store the training script." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "script_folder = './samples/train-on-aci'\n", + "os.makedirs(script_folder, exist_ok = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Remote execution on ACI\n", + "\n", + "Use `%%writefile` magic to write training code to `train.py` file under the project folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile $script_folder/train.py\n", + "\n", + "import os\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.model_selection import train_test_split\n", + "from azureml.core.run import Run\n", + "from sklearn.externals import joblib\n", + "\n", + "import numpy as np\n", + "\n", + "os.makedirs('./outputs', exist_ok=True)\n", + "\n", + "X, y = load_diabetes(return_X_y = True)\n", + "\n", + "run = Run.get_submitted_run()\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n", + "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n", + " \"test\": {\"X\": X_test, \"y\": y_test}}\n", + "\n", + "# list of numbers from 0.0 to 1.0 with a 0.05 interval\n", + "alphas = np.arange(0.0, 1.0, 0.05)\n", + "\n", + "for alpha in alphas:\n", + " # Use Ridge algorithm to create a regression model\n", + " reg = Ridge(alpha = alpha)\n", + " reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n", + "\n", + " preds = reg.predict(data[\"test\"][\"X\"])\n", + " mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n", + " run.log('alpha', alpha)\n", + " run.log('mse', mse)\n", + " \n", + " model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n", + " with open(model_file_name, \"wb\") as file:\n", + " joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n", + "\n", + " print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure for using ACI\n", + "Linux-based ACI is available in `westus`, `eastus`, `westeurope`, `northeurope`, `westus2` and `southeastasia` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "configure run" + ] + }, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "# create a new runconfig object\n", + "run_config = RunConfiguration()\n", + "\n", + "# signal that you want to use ACI to execute script.\n", + "run_config.target = \"containerinstance\"\n", + "\n", + "# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n", + "run_config.container_instance.region = 'eastus'\n", + "\n", + "# set the ACI CPU and Memory \n", + "run_config.container_instance.cpu_cores = 1\n", + "run_config.container_instance.memory_gb = 2\n", + "\n", + "# enable Docker \n", + "run_config.environment.docker.enabled = True\n", + "\n", + "# set Docker base image to the default CPU-based image\n", + "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", + "#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n", + "\n", + "# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n", + "run_config.environment.python.user_managed_dependencies = False\n", + "\n", + "# auto-prepare the Docker image when used for execution (if it is not already prepared)\n", + "run_config.auto_prepare_environment = True\n", + "\n", + "# specify CondaDependencies obj\n", + "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit the Experiment\n", + "Finally, run the training job on the ACI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "aci" + ] + }, + "outputs": [], + "source": [ + "%%time \n", + "from azureml.core.script_run_config import ScriptRunConfig\n", + "\n", + "script_run_config = ScriptRunConfig(source_directory = script_folder,\n", + " script= 'train.py',\n", + " run_config = run_config)\n", + "\n", + "run = experiment.submit(script_run_config)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "remote run", + "aci" + ] + }, + "outputs": [], + "source": [ + "%%time\n", + "# Shows output of the run on stdout.\n", + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "query history" + ] + }, + "outputs": [], + "source": [ + "# Show run details\n", + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "get metrics" + ] + }, + "outputs": [], + "source": [ + "# get all metris logged in the run\n", + "run.get_metrics()\n", + "metrics = run.get_metrics()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n", + " min(metrics['mse']), \n", + " metrics['alpha'][np.argmin(metrics['mse'])]\n", + "))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/01.getting-started/04.train-on-remote-vm/.ipynb_checkpoints/04.train-on-remote-vm-checkpoint.ipynb b/01.getting-started/04.train-on-remote-vm/.ipynb_checkpoints/04.train-on-remote-vm-checkpoint.ipynb new file mode 100644 index 00000000..f5fe77c5 --- /dev/null +++ b/01.getting-started/04.train-on-remote-vm/.ipynb_checkpoints/04.train-on-remote-vm-checkpoint.ipynb @@ -0,0 +1,321 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 04. Train in a remote VM (MLC managed DSVM)\n", + "* Create Workspace\n", + "* Create Project\n", + "* Create `train.py` file\n", + "* Create DSVM as Machine Learning Compute (MLC) resource\n", + "* Configure & execute a run in a conda environment in the default miniconda Docker container on DSVM" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n", + "\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment_name = 'train-on-remote-vm'\n", + "\n", + "from azureml.core import Experiment\n", + "\n", + "exp = Experiment(workspace = ws, name = experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## View `train.py`\n", + "\n", + "For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train.py` in a cell to show the file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('./train.py', 'r') as training_script:\n", + " print(training_script.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Linux DSVM as a compute target\n", + "\n", + "**Note**: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n", + " \n", + "**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below. [Read more](../../documentation/sdk/ssh-issue.md) on this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import DsvmCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "compute_target_name = 'mydsvm'\n", + "\n", + "try:\n", + " dsvm_compute = DsvmCompute(workspace = ws, name = compute_target_name)\n", + " print('found existing:', dsvm_compute.name)\n", + "except ComputeTargetException:\n", + " print('creating new.')\n", + " dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n", + " dsvm_compute = DsvmCompute.create(ws, name = compute_target_name, provisioning_configuration = dsvm_config)\n", + " dsvm_compute.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Attach an existing Linux DSVM as a compute target\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "'''\n", + " from azureml.core.compute import RemoteCompute \n", + " # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n", + " dsvm_compute = RemoteCompute.attach(ws,name=\"attach-from-sdk6\",username=,address=,ssh_port=22,password=)\n", + "'''" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure & Run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure a Docker run with new conda environment on the VM\n", + "You can execute in a Docker container in the VM. If you choose this route, you don't need to install anything on the VM yourself. Azure ML execution service will take care of it for you." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "\n", + "# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n", + "run_config = RunConfiguration(framework = \"python\")\n", + "\n", + "# Set compute target to the Linux DSVM\n", + "run_config.target = compute_target_name\n", + "\n", + "# Use Docker in the remote VM\n", + "run_config.environment.docker.enabled = True\n", + "\n", + "# Use CPU base image from DockerHub\n", + "run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", + "print('Base Docker image is:', run_config.environment.docker.base_image)\n", + "\n", + "# Ask system to provision a new one based on the conda_dependencies.yml file\n", + "run_config.environment.python.user_managed_dependencies = False\n", + "\n", + "# Prepare the Docker and conda environment automatically when executingfor the first time.\n", + "run_config.prepare_environment = True\n", + "\n", + "# specify CondaDependencies obj\n", + "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the Experiment\n", + "Submit script to run in the Docker image in the remote VM. If you run this for the first time, the system will download the base image, layer in packages specified in the `conda_dependencies.yml` file on top of the base image, create a container and then execute the script in the container." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Run\n", + "from azureml.core import ScriptRunConfig\n", + "\n", + "src = ScriptRunConfig(source_directory = '.', script = 'train.py', run_config = run_config)\n", + "run = exp.submit(src)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### View run history details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find the best run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get all metris logged in the run\n", + "run.get_metrics()\n", + "metrics = run.get_metrics()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n", + " min(metrics['mse']), \n", + " metrics['alpha'][np.argmin(metrics['mse'])]\n", + "))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up compute resource" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dsvm_compute.delete()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb b/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb index 4576d155..71f19676 100644 --- a/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb +++ b/01.getting-started/04.train-on-remote-vm/04.train-on-remote-vm.ipynb @@ -66,7 +66,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Create Experiment" + "## Create Experiment\n", + "\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." ] }, { @@ -76,18 +78,7 @@ "outputs": [], "source": [ "experiment_name = 'train-on-remote-vm'\n", - "script_folder = './samples/train-on-remote-vm'\n", "\n", - "import os\n", - "os.makedirs(script_folder, exist_ok = True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ "from azureml.core import Experiment\n", "\n", "exp = Experiment(workspace = ws, name = experiment_name)" @@ -97,9 +88,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Create `train.py`\n", + "## View `train.py`\n", "\n", - "Use `%%writefile` magic to write training code to `train.py` file under your project folder." + "For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train.py` in a cell to show the file." ] }, { @@ -108,46 +99,8 @@ "metadata": {}, "outputs": [], "source": [ - "%%writefile $script_folder/train.py\n", - "\n", - "import os\n", - "from sklearn.datasets import load_diabetes\n", - "from sklearn.linear_model import Ridge\n", - "from sklearn.metrics import mean_squared_error\n", - "from sklearn.model_selection import train_test_split\n", - "from azureml.core.run import Run\n", - "from sklearn.externals import joblib\n", - "\n", - "import numpy as np\n", - "\n", - "os.makedirs('./outputs', exist_ok=True)\n", - "\n", - "X, y = load_diabetes(return_X_y = True)\n", - "\n", - "run = Run.get_submitted_run()\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n", - "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n", - " \"test\": {\"X\": X_test, \"y\": y_test}}\n", - "\n", - "# list of numbers from 0.0 to 1.0 with a 0.05 interval\n", - "alphas = np.arange(0.0, 1.0, 0.05)\n", - "\n", - "for alpha in alphas:\n", - " # Use Ridge algorithm to create a regression model\n", - " reg = Ridge(alpha = alpha)\n", - " reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n", - "\n", - " preds = reg.predict(data[\"test\"][\"X\"])\n", - " mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n", - " run.log('alpha', alpha)\n", - " run.log('mse', mse)\n", - " \n", - " model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n", - " with open(model_file_name, \"wb\") as file:\n", - " joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n", - "\n", - " print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))" + "with open('./train.py', 'r') as training_script:\n", + " print(training_script.read())" ] }, { @@ -158,7 +111,7 @@ "\n", "**Note**: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n", " \n", - "**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below. [Read more](../../documentation/sdk/ssh-issue.md) on this." + "**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below." ] }, { @@ -267,9 +220,8 @@ "from azureml.core import Run\n", "from azureml.core import ScriptRunConfig\n", "\n", - "src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_config)\n", - "run = exp.submit(src)\n", - "run.wait_for_completion(show_output = True)" + "src = ScriptRunConfig(source_directory = '.', script = 'train.py', run_config = run_config)\n", + "run = exp.submit(src)" ] }, { @@ -288,6 +240,15 @@ "run" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -334,13 +295,6 @@ "source": [ "dsvm_compute.delete()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/01.getting-started/04.train-on-remote-vm/train.py b/01.getting-started/04.train-on-remote-vm/train.py index e140e85f..9f039de5 100644 --- a/01.getting-started/04.train-on-remote-vm/train.py +++ b/01.getting-started/04.train-on-remote-vm/train.py @@ -1,10 +1,12 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. import os from sklearn.datasets import load_diabetes from sklearn.linear_model import Ridge from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split -from azureml.core import Run +from azureml.core.run import Run from sklearn.externals import joblib import numpy as np @@ -15,7 +17,8 @@ X, y = load_diabetes(return_X_y=True) run = Run.get_submitted_run() -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=0) data = {"train": {"X": X_train, "y": y_train}, "test": {"X": X_test, "y": y_test}} diff --git a/01.getting-started/05.train-in-spark/.ipynb_checkpoints/05.train-in-spark-checkpoint.ipynb b/01.getting-started/05.train-in-spark/.ipynb_checkpoints/05.train-in-spark-checkpoint.ipynb new file mode 100644 index 00000000..0eb8763f --- /dev/null +++ b/01.getting-started/05.train-in-spark/.ipynb_checkpoints/05.train-in-spark-checkpoint.ipynb @@ -0,0 +1,257 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 05. Train in Spark\n", + "* Create Workspace\n", + "* Create Experiment\n", + "* Copy relevant files to the script folder\n", + "* Configure and Run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Experiment\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "experiment_name = 'train-on-remote-vm'\n", + "\n", + "from azureml.core import Experiment\n", + "\n", + "exp = Experiment(workspace = ws, name = experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## View `train-spark.py`\n", + "\n", + "For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('train-spark.py', 'r') as training_script:\n", + " print(training_script.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure & Run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Attach an HDI cluster\n", + "To use HDI commpute target:\n", + " 1. Create an Spark for HDI cluster in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS.\n", + " 2. Enter the IP address, username and password below" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import HDInsightCompute\n", + "\n", + "try:\n", + " # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n", + " hdi_compute_new = HDInsightCompute.attach(ws, \n", + " name=\"hdi-attach\", \n", + " address=\"hdi-ignite-demo-ssh.azurehdinsight.net\", \n", + " ssh_port=22, \n", + " username='', \n", + " password='')\n", + "\n", + "except UserErrorException as e:\n", + " print(\"Caught = {}\".format(e.message))\n", + " print(\"Compute config already attached.\")\n", + " \n", + " \n", + "hdi_compute_new.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure HDI run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "\n", + "# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n", + "run_config = RunConfiguration(framework = \"python\")\n", + "\n", + "# Set compute target to the Linux DSVM\n", + "run_config.target = hdi_compute.name\n", + "\n", + "# Use Docker in the remote VM\n", + "# run_config.environment.docker.enabled = True\n", + "\n", + "# Use CPU base image from DockerHub\n", + "# run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", + "# print('Base Docker image is:', run_config.environment.docker.base_image)\n", + "\n", + "# Ask system to provision a new one based on the conda_dependencies.yml file\n", + "run_config.environment.python.user_managed_dependencies = False\n", + "\n", + "# Prepare the Docker and conda environment automatically when executingfor the first time.\n", + "# run_config.prepare_environment = True\n", + "\n", + "# specify CondaDependencies obj\n", + "# run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n", + "# load the runconfig object from the \"myhdi.runconfig\" file generated by the attach operaton above." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Submit the script to HDI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "script_run_config = ScriptRunConfig(source_directory = '.',\n", + " script= 'train-spark.py',\n", + " run_config = run_config)\n", + "run = experiment.submit(script_run_config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get the URL of the run history web page\n", + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# get all metris logged in the run\n", + "metrics = run.get_metrics()\n", + "print(metrics)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/01.getting-started/05.train-in-spark/05.train-in-spark.ipynb b/01.getting-started/05.train-in-spark/05.train-in-spark.ipynb index 6ba366a4..6acea698 100644 --- a/01.getting-started/05.train-in-spark/05.train-in-spark.ipynb +++ b/01.getting-started/05.train-in-spark/05.train-in-spark.ipynb @@ -74,11 +74,7 @@ "metadata": {}, "outputs": [], "source": [ - "experiment_name = 'train-on-remote-vm'\n", - "script_folder = './samples/train-on-remote-vm'\n", - "\n", - "import os\n", - "os.makedirs(script_folder, exist_ok = True)\n", + "experiment_name = 'train-on-spark'\n", "\n", "from azureml.core import Experiment\n", "\n", @@ -89,10 +85,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Copy files\n", + "## View `train-spark.py`\n", "\n", - "\n", - "Copy `train-spark.py` and `iris.csv` into the project folde" + "For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file." ] }, { @@ -101,31 +96,8 @@ "metadata": {}, "outputs": [], "source": [ - "from shutil import copyfile\n", - "\n", - "# copy iris dataset in to project folder\n", - "copyfile('iris.csv', os.path.join(script_folder, 'iris.csv'))\n", - "\n", - "# copy train-spark.py file into project folder\n", - "# train-spark.py trains a simple LogisticRegression model using Spark.ML algorithm\n", - "copyfile('train-spark.py', os.path.join(script_folder, 'train-spark.py'))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Review the train-spark.py file in the project folder." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with open(os.path.join(project_folder, 'train-spark.py'), 'r') as fin:\n", - " print(fin.read())" + "with open('train-spark.py', 'r') as training_script:\n", + " print(training_script.read())" ] }, { @@ -224,12 +196,10 @@ "metadata": {}, "outputs": [], "source": [ - "script_run_config = ScriptRunConfig(source_directory = project.project_directory,\n", + "script_run_config = ScriptRunConfig(source_directory = '.',\n", " script= 'train-spark.py',\n", " run_config = run_config)\n", - "run = experiment.submit(script_run_config)\n", - "\n", - "run.wait_for_completion(show_output = True)" + "run = experiment.submit(script_run_config)" ] }, { @@ -239,7 +209,16 @@ "outputs": [], "source": [ "# get the URL of the run history web page\n", - "print(helpers.get_run_history_url(run))" + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.wait_for_completion(show_output = True)" ] }, { @@ -256,7 +235,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -270,7 +249,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.5" } }, "nbformat": 4, diff --git a/01.getting-started/05.train-in-spark/train-spark.py b/01.getting-started/05.train-in-spark/train-spark.py index 6fc7718e..bba3cf8b 100644 --- a/01.getting-started/05.train-in-spark/train-spark.py +++ b/01.getting-started/05.train-in-spark/train-spark.py @@ -1,3 +1,5 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. import numpy as np import pyspark diff --git a/01.getting-started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb b/01.getting-started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb index 720dccbf..bc300341 100644 --- a/01.getting-started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb +++ b/01.getting-started/11.production-deploy-to-aks/11.production-deploy-to-aks.ipynb @@ -218,7 +218,7 @@ "source": [ "'''\n", "# Use the default configuration (can also provide parameters to customize)\n", - "resource_id = '/subscriptions//resourcegroups//providers/Microsoft.ContainerService/managedClusters/'\n", + "resource_id = '/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/raymondsdk0604/providers/Microsoft.ContainerService/managedClusters/my-aks-0605d37425356b7d01'\n", "\n", "create_name='my-existing-aks' \n", "# Create the cluster\n", diff --git a/automl/09.auto-ml-classification-with-deployment.ipynb b/automl/09.auto-ml-classification-with-deployment.ipynb index 3ea27316..08eb64d7 100644 --- a/automl/09.auto-ml-classification-with-deployment.ipynb +++ b/automl/09.auto-ml-classification-with-deployment.ipynb @@ -204,7 +204,7 @@ "description = 'AutoML Model'\n", "tags = None\n", "model = local_run.register_model(description=description, tags=tags, iteration=8)\n", - "local_run.model_id # Use this id to deploy the model as a web service in Azure" + "local_run.model_id # This will be written to the script file later in the notebook." ] }, { @@ -230,7 +230,7 @@ "\n", "def init():\n", " global model\n", - " model_path = Model.get_model_path(model_name = 'AutoMLbcfe9c23e8') # this name is model.id of model that we want to deploy\n", + " model_path = Model.get_model_path(model_name = '<>') # this name is model.id of model that we want to deploy\n", " # deserialize the model file back into a sklearn model\n", " model = joblib.load(model_path)\n", "\n", @@ -290,13 +290,6 @@ " print('{}\\t{}'.format(p, dependencies[p]))" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then copy the version " - ] - }, { "cell_type": "code", "execution_count": null, @@ -311,12 +304,34 @@ " - pip:\n", " - numpy==1.14.2\n", " - scikit-learn==0.19.2\n", - " - --index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/Preview/E7501C02541B433786111FE8E140CAA1\n", - " - --extra-index-url https://pypi.python.org/simple\n", - " - azureml-requirements\n", - " - azureml-train-automl==0.1.50\n", - " - azureml-sdk==0.1.50\n", - " - azureml-core==0.1.50" + " - azureml-sdk[notebooks,automl]==<> " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Substitute the actual version number in the environment file.\n", + "\n", + "conda_env_file_name = 'myenv.yml'\n", + "\n", + "with open(conda_env_file_name, 'r') as cefr:\n", + " content = cefr.read()\n", + "\n", + "with open(conda_env_file_name, 'w') as cefw:\n", + " cefw.write(content.replace('<>', dependencies['azureml-sdk']))\n", + "\n", + "# Substitute the actual model id in the script file.\n", + "\n", + "script_file_name = 'score.py'\n", + "\n", + "with open(script_file_name, 'r') as cefr:\n", + " content = cefr.read()\n", + "\n", + "with open(script_file_name, 'w') as cefw:\n", + " cefw.write(content.replace('<>', local_run.model_id))" ] }, { @@ -335,8 +350,8 @@ "from azureml.core.image import Image, ContainerImage\n", "\n", "image_config = ContainerImage.image_configuration(runtime= \"python\",\n", - " execution_script=\"score.py\",\n", - " conda_file=\"myenv.yml\",\n", + " execution_script = script_file_name,\n", + " conda_file = conda_env_file_name,\n", " tags = {'area': \"digits\", 'type': \"automl_classification\"},\n", " description = \"Image for automl classification sample\")\n", "\n", diff --git a/automl/13.auto-ml-dataprep.ipynb b/automl/13.auto-ml-dataprep.ipynb index 1d8bea42..c280f40c 100644 --- a/automl/13.auto-ml-dataprep.ipynb +++ b/automl/13.auto-ml-dataprep.ipynb @@ -46,7 +46,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install --upgrade --extra-index-url https://dataprepdownloads.azureedge.net/pypi/autoML-BD0E9CABED27C837/0.1.1809.11043 azureml-dataprep --no-cache-dir --force-reinstall\n", + "!pip install azureml-dataprep\n", "!pip install tornado==4.5.1" ] }, @@ -56,7 +56,7 @@ "source": [ "## Diagnostics\n", "\n", - "Opt-in diagnostics for better experience, quality, and security of future releases" + "Opt-in diagnostics for better experience, quality, and security of future releases." ] }, { @@ -66,7 +66,7 @@ "outputs": [], "source": [ "from azureml.telemetry import set_diagnostics_collection\n", - "set_diagnostics_collection(send_diagnostics=True)" + "set_diagnostics_collection(send_diagnostics = True)" ] }, { @@ -112,7 +112,7 @@ "# project folder\n", "project_folder = './sample_projects/automl-dataprep-classification'\n", " \n", - "experiment=Experiment(ws, experiment_name)\n", + "experiment = Experiment(ws, experiment_name)\n", " \n", "output = {}\n", "output['SDK version'] = azureml.core.VERSION\n", @@ -144,10 +144,10 @@ "simple_example_data_root = 'https://dprepdata.blob.core.windows.net/automl-notebook-data/'\n", "X = dprep.smart_read_file(simple_example_data_root + 'X.csv').skip(1) # remove header\n", "\n", - "# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter) \n", - "# and convert column types manually. \n", + "# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter).\n", + "# and convert column types manually.\n", "# Here we read a comma delimited file and convert all columns to integers.\n", - "y = dprep.read_csv(simple_example_data_root + 'y.csv').to_long(dprep.ColumnSelector(term='.*', use_regex=True))" + "y = dprep.read_csv(simple_example_data_root + 'y.csv').to_long(dprep.ColumnSelector(term='.*', use_regex = True))" ] }, { @@ -218,7 +218,7 @@ "automl_config = AutoMLConfig(task = 'classification',\n", " debug_log = 'automl_errors.log',\n", " X = X,\n", - " y = y, \n", + " y = y,\n", " **automl_settings)" ] }, @@ -235,7 +235,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Remote Run" + "## Remote Run\n", + "*Note: This feature might not work properly in your workspace region before the October update. You may jump to the \"Exploring the results\" section below to explore other features AutoML and DataPrep has to offer.*" ] }, { @@ -278,12 +279,6 @@ "outputs": [], "source": [ "cd = CondaDependencies()\n", - "cd.set_pip_index_url(index_url=\"--index-url https://azuremlsdktestpypi.azureedge.net/sdk-release/master/588E708E0DF342C4A80BD954289657CF\")\n", - "cd.set_pip_index_url(index_url=\"--extra-index-url https://dataprepdownloads.azureedge.net/pypi/autoML-BD0E9CABED27C837/0.1.1809.11043 --extra-index-url https://pypi.python.org/simple\")\n", - "cd.remove_pip_package(pip_package=\"azureml-defaults\")\n", - "cd.add_pip_package(pip_package='azureml-core')\n", - "cd.add_pip_package(pip_package='azureml-telemetry')\n", - "cd.add_pip_package(pip_package='azureml-train-automl')\n", "cd.add_pip_package(pip_package='azureml-dataprep')\n", "cd.add_pip_package(pip_package='tornado==4.5.1')" ] @@ -322,13 +317,15 @@ "outputs": [], "source": [ "automl_config = AutoMLConfig(task = 'classification',\n", - " debug_log = 'automl_errors.log',\n", - " path=project_folder,\n", - " run_configuration = run_config,\n", - " X = X,\n", - " y = y,\n", - " **automl_settings)\n", - "remote_run = experiment.submit(automl_config, show_output=True)" + " debug_log = 'automl_errors.log',\n", + " path = project_folder,\n", + " run_configuration = run_config,\n", + " X = X,\n", + " y = y,\n", + " **automl_settings)\n", + "# Please uncomment the line below to try out remote run with dataprep. \n", + "# This feature might not work properly in your workspace region before the October update.\n", + "# remote_run = experiment.submit(automl_config, show_output = True)" ] }, { @@ -363,8 +360,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Retrieve All Child Runs\n", - "You can also use sdk methods to fetch all the child runs and see individual metrics that we log. " + "#### Retrieve all child runs\n", + "You can also use SDK methods to fetch all the child runs and see individual metrics that we log." ] }, { @@ -377,7 +374,7 @@ "metricslist = {}\n", "for run in children:\n", " properties = run.get_properties()\n", - " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n", " metricslist[int(properties['iteration'])] = metrics\n", " \n", "import pandas as pd\n", @@ -541,8 +538,8 @@ "source": [ "digits_complete.to_pandas_dataframe().shape\n", "labels_column = 'Column64'\n", - "dflow_X = digits_complete.drop_columns(columns=[labels_column])\n", - "dflow_y = digits_complete.keep_columns(columns=[labels_column])" + "dflow_X = digits_complete.drop_columns(columns = [labels_column])\n", + "dflow_y = digits_complete.keep_columns(columns = [labels_column])" ] } ], @@ -562,7 +559,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.6" + "version": "3.6.5" } }, "nbformat": 4, diff --git a/automl/14a.auto-ml-classification-ensemble.ipynb b/automl/14a.auto-ml-classification-ensemble.ipynb index f5dd3981..42fd4fb9 100644 --- a/automl/14a.auto-ml-classification-ensemble.ipynb +++ b/automl/14a.auto-ml-classification-ensemble.ipynb @@ -21,10 +21,15 @@ "\n", "In this notebook you would see\n", "1. Creating an Experiment in an existing Workspace\n", - "2. Instantiating AutoMLConfig\n", + "2. Instantiating AutoMLConfig which enables an extra ensembling iteration\n", "3. Training the Model using local compute\n", "4. Exploring the results\n", - "5. Testing the fitted model\n" + "5. Testing the fitted model\n", + "\n", + "\n", + "** Disclaimers / Limitations **\n", + " - currently only local compute is supported for the ensembling iteration; support for remote compute will be coming soon\n", + " - currently only Train/Validation split is supported; support for cross-validation will be coming soon" ] }, { @@ -206,18 +211,6 @@ "local_run" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "local_run = local_run.continue_experiment(X = X_digits, \n", - " y = y_digits, \n", - " show_output = True,\n", - " iterations = 5)" - ] - }, { "cell_type": "markdown", "metadata": {}, diff --git a/automl/14b.auto-ml-regression-ensemble.ipynb b/automl/14b.auto-ml-regression-ensemble.ipynb index dec79f4b..d3855a37 100644 --- a/automl/14b.auto-ml-regression-ensemble.ipynb +++ b/automl/14b.auto-ml-regression-ensemble.ipynb @@ -21,10 +21,15 @@ "\n", "In this notebook you would see\n", "1. Creating an Experiment using an existing Workspace\n", - "2. Instantiating AutoMLConfig\n", + "2. Instantiating AutoMLConfig which enables an extra ensembling iteration\n", "3. Training the Model using local compute\n", "4. Exploring the results\n", - "5. Testing the fitted model" + "5. Testing the fitted model\n", + "\n", + "\n", + "** Disclaimers / Limitations **\n", + "- currently only local compute is supported for the ensembling iteration; support for remote compute will be coming soon\n", + "- currently only Train/Validation split is supported; support for cross-validation will be coming soon" ] }, { diff --git a/automl/README.md b/automl/README.md index bd1ad392..729ad62d 100644 --- a/automl/README.md +++ b/automl/README.md @@ -6,12 +6,12 @@ 5. [Running using python command](#pythoncommand) 6. [Troubleshooting](#troubleshooting) -# Automated machine learning introduction -Automated machine learning (automated ML) builds high quality machine learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, automated ML will give you a high quality machine learning model that you can use for predictions. +# Auto ML Introduction +AutoML builds high quality Machine Learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, AutoML will give you a high quality machine learning model that you can use for predictions. -If you are new to Data Science, automated ML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use. +If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use. -If you are an experienced data scientist, automated ML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. automated ML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire. +If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire. # Running samples in a Local Conda environment @@ -25,7 +25,7 @@ It is best if you create a new conda environment locally to try this SDK, so it There's no need to install mini-conda specifically. ### 2. Downloading the sample notebooks -- Download the sample notebooks from [GitHub](https://github.com/Azure/MachineLearningNotebooks) as zip and extract the contents to a local directory. The automated ML sample notebooks are in the "automl" folder. +- Download the sample notebooks from [GitHub](https://github.com/Azure/MachineLearningNotebooks) as zip and extract the contents to a local directory. The AutoML sample notebooks are in the "automl" folder. ### 3. Setup a new conda environment The **automl/automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook. @@ -58,7 +58,7 @@ automl_setup_linux.sh ### 5. Running Samples - Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks. -- Follow the instructions in the individual notebooks to explore various features in automated ML +- Follow the instructions in the individual notebooks to explore various features in AutoML # Auto ML SDK Sample Notebooks - [00.configuration.ipynb](00.configuration.ipynb) @@ -113,8 +113,8 @@ automl_setup_linux.sh - [07.auto-ml-exploring-previous-runs.ipynb](07.auto-ml-exploring-previous-runs) - List all projects for the workspace - - List all automated ML Runs for a given project - - Get details for a automated ML Run. (Automl settings, run widget & all metrics) + - List all AutoML Runs for a given project + - Get details for a AutoML Run. (Automl settings, run widget & all metrics) - Downlaod fitted pipeline for any iteration - [08.auto-ml-remote-execution-with-text-file-on-DSVM](08.auto-ml-remote-execution-with-text-file-on-DSVM.ipynb) @@ -151,11 +151,12 @@ automl_setup_linux.sh # Documentation ## Table of Contents -1. [Automated ML Settings ](#automlsettings) +1. [Auto ML Settings ](#automlsettings) 2. [Cross validation split options](#cvsplits) 3. [Get Data Syntax](#getdata) +4. [Data pre-processing and featurization](#preprocessing) -## Automated ML Settings +## Auto ML Settings |Property|Description|Default| |-|-|-| |**primary_metric**|This is the metric that you want to optimize.

Classification supports the following primary metrics
accuracy
AUC_weighted
balanced_accuracy
average_precision_score_weighted
precision_score_weighted

Regression supports the following primary metrics
spearman_correlation
normalized_root_mean_squared_error
r2_score
normalized_mean_absolute_error
normalized_root_mean_squared_log_error| Classification: accuracy

Regression: spearman_correlation @@ -195,6 +196,20 @@ The *get_data()* function can be used to return a dictionary with these values: |columns|Array of strings|data_train||*Optional* Whitelist of columns to use for features| |cv_splits_indices|Array of integers|data_train||*Optional* List of indexes to split the data for cross validation| +## Data pre-processing and featurization +If you use "preprocess=True", the following data preprocessing steps are performed automatically for you: +### 1. Dropping high cardinality or no variance features +- Features with no useful information are dropped from training and validation sets. These include features with all values missing, same value across all rows or with extremely high cardinality (e.g., hashes, IDs or GUIDs). +### 2. Missing value imputation +- For numerical features, missing values are imputed with average of values in the column. +- For categorical features, missing values are imputed with most frequent value. +### 3. Generating additional features +- For DateTime features: Year, Month, Day, Day of week, Day of year, Quarter, Week of the year, Hour, Minute, Second. +- For Text features: Term frequency based on bi-grams and tri-grams, Count vectorizer. +### 4. Transformations and encodings +- Numeric features with very few unique values are transformed into categorical features. +- Depending on cardinality of categorical features label encoding or (hashing) one-hot encoding is performed. + # Running using python command Jupyter notebook provides a File / Download as / Python (.py) option for saving the notebook as a Python file. You can then run this file using the python command. @@ -207,7 +222,7 @@ The main code of the file must be indented so that it is under this condition. # Troubleshooting ## Iterations fail and the log contains "MemoryError" -This can be caused by insufficient memory on the DSVM. Automated ML loads all training data into memory. So, the available memory should be more than the training data size. +This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size. If you are using a remote DSVM, memory is needed for each concurrent iteration. The concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and concurrent_iterations is set to 10, the minimum memory required is at least 80Gb. To resolve this issue, allocate a DSVM with more memory or reduce the value specified for concurrent_iterations. diff --git a/onnx/release.json b/onnx/release.json deleted file mode 100644 index b9e97250..00000000 --- a/onnx/release.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "channels": { - "master": [ - "sample-01", - "sample-02" - ], - "candidate": [ - "sample-01", - "sample-02" - ], - "preview": [ - "sample-01", - "sample-02" - ] - }, - "notebooks": { - "sample-01": { - "name": "onnx-inference-mnist.ipynb", - "widgets": [ "azureml.train.widgets" ], - "dependencies": [], - "requirements": [ "matplotlib", "numpy", "onnx"] - }, - "sample-02": { - "name": "onnx-inference-emotion-recognition.ipynb", - "widgets": ["azureml.train.widgets"], - "dependencies": [], - "requirements": [ "matplotlib", "numpy", "onnx"] - } - } -} \ No newline at end of file diff --git a/project-brainwave/project-brainwave-custom-weights.ipynb b/project-brainwave/project-brainwave-custom-weights.ipynb index 8e7129d3..99509235 100644 --- a/project-brainwave/project-brainwave-custom-weights.ipynb +++ b/project-brainwave/project-brainwave-custom-weights.ipynb @@ -388,7 +388,7 @@ "source": [ "from azureml.contrib.brainwave.pipeline import ModelDefinition, TensorflowStage, BrainWaveStage\n", "\n", - "model_def_path = os.path.join(save_path, 'model_def.zip')\n", + "model_def_path = os.path.join(saved_model_dir, 'model_def.zip')\n", "\n", "model_def = ModelDefinition()\n", "model_def.pipeline.append(TensorflowStage(sess, in_images, image_tensors))\n", @@ -609,7 +609,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.5.2" } }, "nbformat": 4, diff --git a/project-brainwave/project-brainwave-quickstart.ipynb b/project-brainwave/project-brainwave-quickstart.ipynb index ba71f37b..e0b72dce 100644 --- a/project-brainwave/project-brainwave-quickstart.ipynb +++ b/project-brainwave/project-brainwave-quickstart.ipynb @@ -125,7 +125,7 @@ "from azureml.contrib.brainwave.pipeline import ModelDefinition, TensorflowStage, BrainWaveStage\n", "\n", "save_path = os.path.expanduser('~/models/save')\n", - "model_def_path = os.path.join(save_path, 'service_def.zip')\n", + "model_def_path = os.path.join(save_path, 'model_def.zip')\n", "\n", "model_def = ModelDefinition()\n", "with tf.Session() as sess:\n", @@ -301,7 +301,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.5.2" } }, "nbformat": 4, diff --git a/project-brainwave/project-brainwave-transfer-learning.ipynb b/project-brainwave/project-brainwave-transfer-learning.ipynb index 6005b5ec..e5551f20 100644 --- a/project-brainwave/project-brainwave-transfer-learning.ipynb +++ b/project-brainwave/project-brainwave-transfer-learning.ipynb @@ -559,7 +559,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.2" + "version": "3.5.2" } }, "nbformat": 4, diff --git a/training/03.train-hyperparameter-tune-deploy-with-tensorflow/.ipynb_checkpoints/03.train-hyperparameter-tune-deploy-with-tensorflow-checkpoint.ipynb b/training/03.train-hyperparameter-tune-deploy-with-tensorflow/.ipynb_checkpoints/03.train-hyperparameter-tune-deploy-with-tensorflow-checkpoint.ipynb new file mode 100644 index 00000000..4d62b5f4 --- /dev/null +++ b/training/03.train-hyperparameter-tune-deploy-with-tensorflow/.ipynb_checkpoints/03.train-hyperparameter-tune-deploy-with-tensorflow-checkpoint.ipynb @@ -0,0 +1,1624 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "bf74d2e9-2708-49b1-934b-e0ede342f475" + } + }, + "source": [ + "# 03. Training MNIST dataset with hyperparameter tuning & deploy to ACI\n", + "\n", + "## Introduction\n", + "This tutorial shows how to train a simple deep neural network using the MNIST dataset and TensorFlow on Azure Machine Learning. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of `28x28` pixels, representing number from 0 to 9. The goal is to create a multi-class classifier to identify the digit each image represents, and deploy it as a web service in Azure.\n", + "\n", + "For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/).\n", + "\n", + "## Prerequisite:\n", + "* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n", + "* Go through the [00.configuration.ipynb](https://github.com/Azure/MachineLearningNotebooks/blob/master/00.configuration.ipynb) notebook to:\n", + " * install the AML SDK\n", + " * create a workspace and its configuration file (`config.json`)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's get started. First let's import some Python libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbpresent": { + "id": "c377ea0c-0cd9-4345-9be2-e20fb29c94c3" + } + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import numpy as np\n", + "import os\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbpresent": { + "id": "edaa7f2f-2439-4148-b57a-8c794c0945ec" + } + }, + "outputs": [], + "source": [ + "import azureml\n", + "from azureml.core import Workspace, Run\n", + "\n", + "# check core SDK version number\n", + "print(\"Azure ML SDK Version: \", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "Initialize a [Workspace](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#workspace) object from the existing workspace you created in the Prerequisites step. `Workspace.from_config()` creates a workspace object from the details stored in `config.json`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print('Workspace name: ' + ws.name, \n", + " 'Azure region: ' + ws.location, \n", + " 'Subscription id: ' + ws.subscription_id, \n", + " 'Resource group: ' + ws.resource_group, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "59f52294-4a25-4c92-bab8-3b07f0f44d15" + } + }, + "source": [ + "## Create an Azure ML experiment\n", + "Let's create an experiment named \"tf-mnist\" and a folder to hold the training scripts. The script runs will be recorded under the experiment in Azure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbpresent": { + "id": "bc70f780-c240-4779-96f3-bc5ef9a37d59" + } + }, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "\n", + "script_folder = './tf-mnist'\n", + "os.makedirs(script_folder, exist_ok=True)\n", + "\n", + "exp = Experiment(workspace=ws, name='tf-mnist')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "defe921f-8097-44c3-8336-8af6700804a7" + } + }, + "source": [ + "## Download MNIST dataset\n", + "In order to train on the MNIST dataset we will first need to download it from Yan LeCun's web site directly and save them in a `data` folder locally." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import urllib\n", + "\n", + "os.makedirs('./data/mnist', exist_ok=True)\n", + "\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename = './data/mnist/train-images.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename = './data/mnist/train-labels.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n", + "urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "c3f2f57c-7454-4d3e-b38d-b0946cf066ea" + } + }, + "source": [ + "## Show some sample images\n", + "Let's load the downloaded compressed file into numpy arrays using some utility functions included in the `utils.py` library file from the current folder. Then we use `matplotlib` to plot 30 random images from the dataset along with their labels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "nbpresent": { + "id": "396d478b-34aa-4afa-9898-cdce8222a516" + } + }, + "outputs": [], + "source": [ + "from utils import load_data\n", + "\n", + "# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n", + "X_train = load_data('./data/mnist/train-images.gz', False) / 255.0\n", + "y_train = load_data('./data/mnist/train-labels.gz', True).reshape(-1)\n", + "\n", + "X_test = load_data('./data/mnist/test-images.gz', False) / 255.0\n", + "y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n", + "\n", + "count = 0\n", + "sample_size = 30\n", + "plt.figure(figsize = (16, 6))\n", + "for i in np.random.permutation(X_train.shape[0])[:sample_size]:\n", + " count = count + 1\n", + " plt.subplot(1, sample_size, count)\n", + " plt.axhline('')\n", + " plt.axvline('')\n", + " plt.text(x = 10, y = -10, s = y_train[i], fontsize = 18)\n", + " plt.imshow(X_train[i].reshape(28, 28), cmap = plt.cm.Greys)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Upload MNIST dataset to default datastore \n", + "A [datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data) is a place where data can be stored that is then made accessible to a Run either by means of mounting or copying the data to the compute target. A datastore can either be backed by an Azure Blob Storage or and Azure File Share (ADLS will be supported in the future). For simple data handling, each workspace provides a default datastore that can be used, in case the data is not already in Blob Storage or File Share.\n", + "\n", + "In this next step, we will upload the training and test set into the workspace's default datastore, which we will then later be mount on a Batch AI cluster for training.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ds = ws.get_default_datastore()\n", + "ds.upload(src_dir='./data/mnist', target_path='mnist', overwrite=True, show_progress=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create Batch AI cluster as compute target\n", + "[Batch AI](https://docs.microsoft.com/en-us/azure/batch-ai/overview) is a service for provisioning and managing clusters of Azure virtual machines for running machine learning workloads. Let's create a new Batch AI cluster in the current workspace, if it doesn't already exist. We will then run the training script on this compute target." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we could not find the cluster with the given name in the previous cell, then we will create a new cluster here. We will create a Batch AI Cluster of `STANDARD_D2_V2` CPU VMs. This process is broken down into 3 steps:\n", + "1. create the configuration (this step is local and only takes a second)\n", + "2. create the Batch AI cluster (this step will take about **20 seconds**)\n", + "3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, BatchAiCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# choose a name for your cluster\n", + "batchai_cluster_name = \"gpucluster\"\n", + "\n", + "try:\n", + " # look for the existing cluster by name\n", + " compute_target = ComputeTarget(workspace=ws, name=batchai_cluster_name)\n", + " if compute_target is BatchAiCompute:\n", + " print('found compute target {}, just use it.'.format(batchai_cluster_name))\n", + " else:\n", + " print('{} exists but it is not a Batch AI cluster. Please choose a different name.'.format(batchai_cluster_name))\n", + "except ComputeTargetException:\n", + " print('creating a new compute target...')\n", + " compute_config = BatchAiCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\", # GPU-based VM\n", + " #vm_priority='lowpriority', # optional\n", + " autoscale_enabled=True,\n", + " cluster_min_nodes=0, \n", + " cluster_max_nodes=4)\n", + "\n", + " # create the cluster\n", + " compute_target = ComputeTarget.create(ws, batchai_cluster_name, compute_config)\n", + " \n", + " # can poll for a minimum number of nodes and for a specific timeout. \n", + " # if no min node count is provided it uses the scale settings for the cluster\n", + " compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n", + " \n", + " # Use the 'status' property to get a detailed status for the current cluster. \n", + " print(compute_target.status.serialize())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that you have created the compute target, let's see what the workspace's `compute_targets()` function returns. You should now see one entry named 'cpucluster' of type BatchAI." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for ct in ws.compute_targets():\n", + " print(ct.name, ct.type, ct.provisioning_state)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Copy the training files into the script folder\n", + "The TensorFlow training script is already created for you. You can simply copy it into the script folder, together with the utility library used to load compressed data file into numpy array." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import shutil\n", + "# the training logic is in the tf_mnist.py file.\n", + "shutil.copy('./tf_mnist.py', script_folder)\n", + "\n", + "# the utils.py just helps loading data from the downloaded MNIST dataset into numpy arrays.\n", + "shutil.copy('./utils.py', script_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "nbpresent": { + "id": "2039d2d5-aca6-4f25-a12f-df9ae6529cae" + } + }, + "source": [ + "## Construct neural network in TensorFlow\n", + "In the training script `tf_mnist.py`, it creates a very simple DNN (deep neural network), with just 2 hidden layers. The input layer has 28 * 28 = 784 neurons, each representing a pixel in an image. The first hidden layer has 300 neurons, and the second hidden layer has 100 neurons. The output layer has 10 neurons, each representing a targeted label from 0 to 9.\n", + "\n", + "![DNN](nn.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Azure ML concepts \n", + "Please note the following three things in the code below:\n", + "1. The script accepts arguments using the argparse package. In this case there is one argument `--data_folder` which specifies the file system folder in which the script can find the MNIST data\n", + "```\n", + " parser = argparse.ArgumentParser()\n", + " parser.add_argument('--data_folder')\n", + "```\n", + "2. The script is accessing the Azure ML `Run` object by executing `run = Run.get_submitted_run()`. Further down the script is using the `run` to report the training accuracy and the validation accuracy as training progresses.\n", + "```\n", + " run.log('training_acc', np.float(acc_train))\n", + " run.log('validation_acc', np.float(acc_val))\n", + "```\n", + "3. When running the script on Azure ML, you can write files out to a folder `./outputs` that is relative to the root directory. This folder is specially tracked by Azure ML in the sense that any files written to that folder during script execution on the remote target will be picked up by Run History; these files (known as artifacts) will be available as part of the run history record." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next cell will print out the training code for you to inspect it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open(os.path.join(script_folder, './tf_mnist.py'), 'r') as f:\n", + " print(f.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create TensorFlow estimator\n", + "Next, we construct an `azureml.train.dnn.TensorFlow` estimator object, use the Batch AI cluster as compute target, and pass the mount-point of the datastore to the training code as a parameter.\n", + "The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.dnn import TensorFlow\n", + "\n", + "script_params = {\n", + " '--data-folder': ws.get_default_datastore().as_mount(),\n", + " '--batch-size': 50,\n", + " '--first-layer-neurons': 300,\n", + " '--second-layer-neurons': 100,\n", + " '--learning-rate': 0.01\n", + "}\n", + "\n", + "est = TensorFlow(source_directory=script_folder,\n", + " script_params=script_params,\n", + " compute_target=compute_target,\n", + " entry_script='tf_mnist.py', \n", + " use_gpu=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit job to run\n", + "Calling the `fit` function on the estimator submits the job to Azure ML for execution. Submitting the job should only take a few seconds." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run = exp.submit(config=est)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Monitor the Run\n", + "As the Run is executed, it will go through the following stages:\n", + "1. Preparing: A docker image is created matching the Python environment specified by the TensorFlow estimator and it will be uploaded to the workspace's Azure Container Registry. This step will only happen once for each Python environment -- the container will then be cached for subsequent runs. Creating and uploading the image takes about **5 minutes**. While the job is preparing, logs are streamed to the run history and can be viewed to monitor the progress of the image creation.\n", + "\n", + "2. Scaling: If the compute needs to be scaled up (i.e. the Batch AI cluster requires more nodes to execute the run than currently available), the Batch AI cluster will attempt to scale up in order to make the required amount of nodes available. Scaling typically takes about **5 minutes**.\n", + "\n", + "3. Running: All scripts in the script folder are uploaded to the compute target, data stores are mounted/copied and the `entry_script` is executed. While the job is running, stdout and the `./logs` folder are streamed to the run history and can be viewed to monitor the progress of the run.\n", + "\n", + "4. Post-Processing: The `./outputs` folder of the run is copied over to the run history\n", + "\n", + "There are multiple ways to check the progress of a running job. We can use a Jupyter notebook widget. \n", + "\n", + "**Note: The widget will automatically update ever 10-15 seconds, always showing you the most up-to-date information about the run**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.widgets import RunDetails\n", + "RunDetails(run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also periodically check the status of the run object, and navigate to Azure portal to monitor the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The Run object\n", + "The Run object provides the interface to the run history -- both to the job and to the control plane (this notebook), and both while the job is running and after it has completed. It provides a number of interesting features for instance:\n", + "* `run.get_details()`: Provides a rich set of properties of the run\n", + "* `run.get_metrics()`: Provides a dictionary with all the metrics that were reported for the Run\n", + "* `run.get_file_names()`: List all the files that were uploaded to the run history for this Run. This will include the `outputs` and `logs` folder, azureml-logs and other logs, as well as files that were explicitly uploaded to the run using `run.upload_file()`\n", + "\n", + "Below are some examples -- please run through them and inspect their output. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.get_details()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.get_metrics()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.get_file_names()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plot accuracy over epochs\n", + "Since we can retrieve the metrics from the run, we can easily make plots using `matplotlib` in the notebook. Then we can add the plotted image to the run using `run.log_image()`, so all information about the run is kept together." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.makedirs('./imgs', exist_ok = True)\n", + "metrics = run.get_metrics()\n", + "\n", + "plt.figure(figsize = (13,5))\n", + "plt.plot(metrics['validation_acc'], 'r-', lw = 4, alpha = .6)\n", + "plt.plot(metrics['training_acc'], 'b--', alpha = 0.5)\n", + "plt.legend(['Full evaluation set', 'Training set mini-batch'])\n", + "plt.xlabel('epochs', fontsize = 14)\n", + "plt.ylabel('accuracy', fontsize = 14)\n", + "plt.title('Accuracy over Epochs', fontsize = 16)\n", + "run.log_image(name = 'acc_over_epochs.png', plot = plt)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download the saved model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the training script, a TensorFlow `saver` object is used to persist the model in a local folder (local to the compute target). The model was saved to the `./outputs` folder on the disk of the Batch AI cluster node where the job is run. Azure ML automatically uploaded anything written in the `./outputs` folder into run history file store. Subsequently, we can use the `Run` object to download the model files the `saver` object saved. They are under the the `outputs/model` folder in the run history file store, and are downloaded into a local folder named `model`. Note the TensorFlow model consists of four files in binary format and they are not human-readable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# create a model folder in the current directory\n", + "os.makedirs('./model', exist_ok = True)\n", + "\n", + "for f in run.get_file_names():\n", + " if f.startswith('outputs/model'):\n", + " output_file_path = os.path.join('./model', f.split('/')[-1])\n", + " print('Downloading from {} to {} ...'.format(f, output_file_path))\n", + " run.download_file(name = f, output_file_path = output_file_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Predict on the test set\n", + "Now load the saved TensorFlow graph, and list all operations under the `network` scope. This way we can discover the input tensor `network/X:0` and the output tensor `network/output/MatMul:0`, and use them in the scoring script in the next step.\n", + "\n", + "Note: if your local TensorFlow version is different than the version running in the cluster where the model is trained, you might see a \"compiletime version mismatch\" warning. You can ignore it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "tf.reset_default_graph()\n", + "\n", + "saver = tf.train.import_meta_graph(\"./model/mnist-tf.model.meta\")\n", + "graph = tf.get_default_graph()\n", + "\n", + "for op in graph.get_operations():\n", + " if op.name.startswith('network'):\n", + " print(op.name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Feed test dataset to the persisted model to get predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# input tensor. this is an array of 784 elements, each representing the intensity of a pixel in the digit image.\n", + "X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n", + "# output tensor. this is an array of 10 elements, each representing the probability of predicted value of the digit.\n", + "output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n", + "\n", + "with tf.Session() as sess:\n", + " saver.restore(sess, './model/mnist-tf.model')\n", + " k = output.eval(feed_dict = {X : X_test})\n", + "# get the prediction, which is the index of the element that has the largest probability value.\n", + "y_hat = np.argmax(k, axis = 1)\n", + "\n", + "# print the first 30 labels and predictions\n", + "print('labels: \\t', y_test[:30])\n", + "print('predictions:\\t', y_hat[:30])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate the overall accuracy by comparing the predicted value against the test set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Accuracy on the test set:\", np.average(y_hat == y_test))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Intelligent hyperparameter tuning\n", + "We have trained the model with one set of hyperparameters, now let's how we can do hyperparameter tuning by launching multiple runs on the cluster. First let's define the parameter space using random sampling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.train.hyperdrive import *\n", + "\n", + "ps = RandomParameterSampling(\n", + " {\n", + " '--batch-size': choice(25, 50, 100),\n", + " '--first-layer-neurons': choice(10, 50, 200, 300, 500),\n", + " '--second-layer-neurons': choice(10, 50, 200, 500),\n", + " '--learning-rate': loguniform(-6, -1)\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we will create a new estimator without the above parameters since they will be passed in later. Note we still need to keep the `data-folder` parameter since that's not a hyperparamter we will sweep." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "est = TensorFlow(source_directory=script_folder,\n", + " script_params={'--data-folder': ws.get_default_datastore().as_mount()},\n", + " compute_target=compute_target,\n", + " entry_script='tf_mnist.py', \n", + " use_gpu=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we will define an early termnination policy. The `BanditPolicy` basically states to check the job every 2 iterations. If the primary metric (defined later) falls outside of the top 10% range, Azure ML terminate the job. This saves us from continuing to explore hyperparameters that don't show promise of helping reach our target metric." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we are ready to configure a run configuration object, and specify the primary metric `validation_acc` that's recorded in your training runs. If you go back to visit the training script, you will notice that this value is being logged after every epoch (a full batch set). We also want to tell the service that we are looking to maximizing this value. We also set the number of samples to 20, and maximal concurrent job to 4, which is the same as the number of nodes in our computer cluster." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "htc = HyperDriveRunConfig(estimator=est, \n", + " hyperparameter_sampling=ps, \n", + " primary_metric_name='validation_acc', \n", + " primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, \n", + " max_total_runs=20,\n", + " max_concurrent_runs=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, let's launch the hyperparameter tuning job." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "htr = exp.submit(config=htc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use a run history widget to show the progress. Be patient as this might take a while to complete." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "RunDetails(htr).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Find and register best model\n", + "When all the jobs finish, we can find out the one that has the highest accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run = htr.get_best_run_by_primary_metric()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's list the model files uploaded during the run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(best_run.get_file_names()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can then register the folder (and all files in it) as a model named `tf-dnn-mnist` under the workspace for deployment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = best_run.register_model(model_name='tf-dnn-mnist', model_path='outputs/model')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Deploy the model in ACI\n", + "Now we are ready to deploy the model as a web service running in Azure Container Instance [ACI](https://azure.microsoft.com/en-us/services/container-instances/). Azure Machine Learning accomplishes this by constructing a Docker image with the scoring logic and model baked in.\n", + "### Create score.py\n", + "First, we will create a scoring script that will be invoked by the web service call. \n", + "\n", + "* Note that the scoring script must have two required functions, `init()` and `run(input_data)`. \n", + " * In `init()` function, you typically load the model into a global object. This function is executed only once when the Docker container is started. \n", + " * In `run(input_data)` function, the model is used to predict a value based on the input data. The input and output to `run` typically use JSON as serialization and de-serialization format but you are not limited to that." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile score.py\n", + "import json\n", + "import numpy as np\n", + "import os\n", + "import tensorflow as tf\n", + "\n", + "from azureml.core.model import Model\n", + "\n", + "def init():\n", + " global X, output, sess\n", + " tf.reset_default_graph()\n", + " model_root = Model.get_model_path('tf-dnn-mnist')\n", + " saver = tf.train.import_meta_graph(os.path.join(model_root, 'mnist-tf.model.meta'))\n", + " X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n", + " output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n", + " \n", + " sess = tf.Session()\n", + " saver.restore(sess, os.path.join(model_root, 'mnist-tf.model'))\n", + "\n", + "def run(raw_data):\n", + " data = np.array(json.loads(raw_data)['data'])\n", + " # make prediction\n", + " out = output.eval(session = sess, feed_dict = {X: data})\n", + " y_hat = np.argmax(out, axis = 1)\n", + " return json.dumps(y_hat.tolist())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create myenv.yml\n", + "We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify packages `numpy`, `tensorflow`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import CondaDependencies\n", + "cd = CondaDependencies.create()\n", + "cd.add_conda_package('numpy')\n", + "cd.add_tensorflow_conda_package()\n", + "cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n", + "\n", + "print(cd.serialize_to_string())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deploy to ACI\n", + "We are almost ready to deploy. Create a deployment configuration and specify the number of CPUs and gigbyte of RAM needed for your ACI container. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.webservice import AciWebservice\n", + "\n", + "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", + " memory_gb=1, \n", + " tags={'name':'mnist', 'framework': 'TensorFlow DNN'},\n", + " description='Tensorflow DNN on MNIST')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Deployment Process\n", + "Now we can deploy. **This cell will run for about 7-8 minutes**. Behind the scene, it will do the following:\n", + "1. **Register model** \n", + "Take the local `model` folder (which contains our previously downloaded trained model files) and register it (and the files inside that folder) as a model named `model` under the workspace. Azure ML will register the model directory or model file(s) we specify to the `model_paths` parameter of the `Webservice.deploy` call.\n", + "2. **Build Docker image** \n", + "Build a Docker image using the scoring file (`score.py`), the environment file (`myenv.yml`), and the `model` folder containing the TensorFlow model files. \n", + "3. **Register image** \n", + "Register that image under the workspace. \n", + "4. **Ship to ACI** \n", + "And finally ship the image to the ACI infrastructure, start up a container in ACI using that image, and expose an HTTP endpoint to accept REST client calls." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.image import ContainerImage\n", + "imgconfig = ContainerImage.image_configuration(execution_script=\"score.py\", \n", + " runtime=\"python\", \n", + " conda_file=\"myenv.yml\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "from azureml.core.webservice import Webservice\n", + "\n", + "service = Webservice.deploy_from_model(workspace=ws,\n", + " name='tf-mnist-svc',\n", + " deployment_config=aciconfig,\n", + " models=[model],\n", + " image_config=imgconfig)\n", + "\n", + "service.wait_for_deployment(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Tip: If something goes wrong with the deployment, the first thing to look at is the logs from the service by running the following command:**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(service.get_logs())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is the scoring web service endpoint:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(service.scoring_uri)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test the deployed model\n", + "Let's test the deployed model. Pick 30 random samples from the test set, and send it to the web service hosted in ACI. Note here we are using the `run` API in the SDK to invoke the service. You can also make raw HTTP calls using any HTTP tool such as curl.\n", + "\n", + "After the invocation, we print the returned predictions and plot them along with the input images. Use red font color and inversed image (white on black) to highlight the misclassified samples. Note since the model accuracy is pretty high, you might have to run the below cell a few times before you can see a misclassified sample." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "# find 30 random samples from test set\n", + "n = 30\n", + "sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n", + "\n", + "test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n", + "test_samples = bytes(test_samples, encoding = 'utf8')\n", + "\n", + "# predict using the deployed model\n", + "result = json.loads(service.run(input_data = test_samples))\n", + "\n", + "# compare actual value vs. the predicted values:\n", + "i = 0\n", + "plt.figure(figsize = (20, 1))\n", + "\n", + "for s in sample_indices:\n", + " plt.subplot(1, n, i + 1)\n", + " plt.axhline('')\n", + " plt.axvline('')\n", + " \n", + " # use different color for misclassified sample\n", + " font_color = 'red' if y_test[s] != result[i] else 'black'\n", + " clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n", + " \n", + " plt.text(x = 10, y = -10, s = y_hat[s], fontsize = 18, color = font_color)\n", + " plt.imshow(X_test[s].reshape(28, 28), cmap = clr_map)\n", + " \n", + " i = i + 1\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also send raw HTTP request to the service." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json\n", + "\n", + "# send a random row from the test set to score\n", + "random_index = np.random.randint(0, len(X_test)-1)\n", + "input_data = \"{\\\"data\\\": [\" + str(list(X_test[random_index])) + \"]}\"\n", + "\n", + "headers = {'Content-Type':'application/json'}\n", + "\n", + "resp = requests.post(service.scoring_uri, input_data, headers=headers)\n", + "\n", + "print(\"POST to url\", service.scoring_uri)\n", + "#print(\"input data:\", input_data)\n", + "print(\"label:\", y_test[random_index])\n", + "print(\"prediction:\", resp.text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's look at the workspace after the web service was deployed. You should see \n", + "* a registered model named 'model' and with the id 'model:1'\n", + "* an image called 'tf-mnist' and with a docker image location pointing to your workspace's Azure Container Registry (ACR) \n", + "* a webservice called 'tf-mnist' with some scoring URL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for model in ws.models():\n", + " print(\"Model:\", model.name, model.id)\n", + "\n", + "for image in ws.images():\n", + " print(\"Image:\", image.name, image.image_location)\n", + "\n", + "for webservice in ws.webservices():\n", + " print(\"Webservice:\", webservice.name, webservice.scoring_uri)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clean up\n", + "You can delete the ACI deployment with a simple delete API call." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "service.delete()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also delete the computer cluster. But remember if you set the `cluster_min_nodes` value to 0 when you created the cluster, once the jobs are finished, all nodes are deleted automatically. So you don't have to delete the cluster itself since it won't incur any cost. Next time you submit jobs to it, the cluster will then automatically \"grow\" up to the `cluster_min_nodes` which is set to 4." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# delete the cluster if you need to.\n", + "compute_target.delete()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + }, + "nbpresent": { + "slides": { + "05bb34ad-74b0-42b3-9654-8357d1ba9c99": { + "id": "05bb34ad-74b0-42b3-9654-8357d1ba9c99", + "prev": "851089af-9725-40c9-8f0b-9bf892b2b1fe", + "regions": { + "23fb396d-50f9-4770-adb3-0d6abcb40767": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "2039d2d5-aca6-4f25-a12f-df9ae6529cae", + "part": "whole" + }, + "id": "23fb396d-50f9-4770-adb3-0d6abcb40767" + } + } + }, + "11bebe14-d1dc-476d-a31a-5828b9c3adf0": { + "id": "11bebe14-d1dc-476d-a31a-5828b9c3adf0", + "prev": "502648cb-26fe-496b-899f-84c8fe1dcbc0", + "regions": { + "a42499db-623e-4414-bea2-ff3617fd8fc5": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "4788c040-27a2-4dc1-8ed0-378a99b3a255", + "part": "whole" + }, + "id": "a42499db-623e-4414-bea2-ff3617fd8fc5" + } + } + }, + "134f92d0-6389-4226-af51-1134ae8e8278": { + "id": "134f92d0-6389-4226-af51-1134ae8e8278", + "prev": "36b8728c-32ad-4941-be03-5cef51cdc430", + "regions": { + "b6d82a77-2d58-4b9e-a375-3103214b826c": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "7ab0e6d0-1f1c-451b-8ac5-687da44a8287", + "part": "whole" + }, + "id": "b6d82a77-2d58-4b9e-a375-3103214b826c" + } + } + }, + "282a2421-697b-4fd0-9485-755abf5a0c18": { + "id": "282a2421-697b-4fd0-9485-755abf5a0c18", + "prev": "a8b9ceb9-b38f-4489-84df-b644c6fe28f2", + "regions": { + "522fec96-abe7-4a34-bd34-633733afecc8": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "d58e7785-c2ee-4a45-8e3d-4c538bf8075a", + "part": "whole" + }, + "id": "522fec96-abe7-4a34-bd34-633733afecc8" + } + } + }, + "2dfec088-8a70-411a-9199-904ef3fa2383": { + "id": "2dfec088-8a70-411a-9199-904ef3fa2383", + "prev": "282a2421-697b-4fd0-9485-755abf5a0c18", + "regions": { + "0535fcb6-3a2b-4b46-98a7-3ebb1a38c47e": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "c377ea0c-0cd9-4345-9be2-e20fb29c94c3", + "part": "whole" + }, + "id": "0535fcb6-3a2b-4b46-98a7-3ebb1a38c47e" + } + } + }, + "36a814c9-c540-4a6d-92d9-c03553d3d2c2": { + "id": "36a814c9-c540-4a6d-92d9-c03553d3d2c2", + "prev": "b52e4d09-5186-44e5-84db-3371c087acde", + "regions": { + "8bfba503-9907-43f0-b1a6-46a0b4311793": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "d5e4a56c-dfac-4346-be83-1c15b503deac", + "part": "whole" + }, + "id": "8bfba503-9907-43f0-b1a6-46a0b4311793" + } + } + }, + "36b8728c-32ad-4941-be03-5cef51cdc430": { + "id": "36b8728c-32ad-4941-be03-5cef51cdc430", + "prev": "05bb34ad-74b0-42b3-9654-8357d1ba9c99", + "regions": { + "a36a5bdf-7f62-49b0-8634-e155a98851dc": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "e33dfc47-e7df-4623-a7a6-ab6bcf944629", + "part": "whole" + }, + "id": "a36a5bdf-7f62-49b0-8634-e155a98851dc" + } + } + }, + "3f136f2a-f14c-4a4b-afea-13380556a79c": { + "id": "3f136f2a-f14c-4a4b-afea-13380556a79c", + "prev": "54cb8dfd-a89c-4922-867b-3c87d8b67cd3", + "regions": { + "80ecf237-d1b0-401e-83d2-6d04b7fcebd3": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "7debeb2b-ecea-414f-9b50-49657abb3e6a", + "part": "whole" + }, + "id": "80ecf237-d1b0-401e-83d2-6d04b7fcebd3" + } + } + }, + "502648cb-26fe-496b-899f-84c8fe1dcbc0": { + "id": "502648cb-26fe-496b-899f-84c8fe1dcbc0", + "prev": "3f136f2a-f14c-4a4b-afea-13380556a79c", + "regions": { + "4c83bb4d-2a52-41ba-a77f-0c6efebd83a6": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "dbd22f6b-6d49-4005-b8fe-422ef8ef1d42", + "part": "whole" + }, + "id": "4c83bb4d-2a52-41ba-a77f-0c6efebd83a6" + } + } + }, + "54cb8dfd-a89c-4922-867b-3c87d8b67cd3": { + "id": "54cb8dfd-a89c-4922-867b-3c87d8b67cd3", + "prev": "aa224267-f885-4c0c-95af-7bacfcc186d9", + "regions": { + "0848f0a7-032d-46c7-b35c-bfb69c83f961": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "3c32c557-d0e8-4bb3-a61a-aa51a767cd4e", + "part": "whole" + }, + "id": "0848f0a7-032d-46c7-b35c-bfb69c83f961" + } + } + }, + "636b563c-faee-4c9e-a6a3-f46a905bfa82": { + "id": "636b563c-faee-4c9e-a6a3-f46a905bfa82", + "prev": "c5f59b98-a227-4344-9d6d-03abdd01c6aa", + "regions": { + "9c64f662-05dc-4b14-9cdc-d450b96f4368": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "70640ac0-7041-47a8-9a7f-e871defd74b2", + "part": "whole" + }, + "id": "9c64f662-05dc-4b14-9cdc-d450b96f4368" + } + } + }, + "793cec2f-8413-484d-aa1e-388fd2b53a45": { + "id": "793cec2f-8413-484d-aa1e-388fd2b53a45", + "prev": "c66f3dfd-2d27-482b-be78-10ba733e826b", + "regions": { + "d08f9cfa-3b8d-4fb4-91ba-82d9858ea93e": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "dd56113e-e3db-41ae-91b7-2472ed194308", + "part": "whole" + }, + "id": "d08f9cfa-3b8d-4fb4-91ba-82d9858ea93e" + } + } + }, + "83e912ff-260a-4391-8a12-331aba098506": { + "id": "83e912ff-260a-4391-8a12-331aba098506", + "prev": "fe5a0732-69f5-462a-8af6-851f84a9fdec", + "regions": { + "2fefcf5f-ea20-4604-a528-5e6c91bcb100": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "c3f2f57c-7454-4d3e-b38d-b0946cf066ea", + "part": "whole" + }, + "id": "2fefcf5f-ea20-4604-a528-5e6c91bcb100" + } + } + }, + "851089af-9725-40c9-8f0b-9bf892b2b1fe": { + "id": "851089af-9725-40c9-8f0b-9bf892b2b1fe", + "prev": "636b563c-faee-4c9e-a6a3-f46a905bfa82", + "regions": { + "31c9dda5-fdf4-45e2-bcb7-12aa0f30e1d8": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "8408b90e-6cdd-44d1-86d3-648c23f877ac", + "part": "whole" + }, + "id": "31c9dda5-fdf4-45e2-bcb7-12aa0f30e1d8" + } + } + }, + "87ab653d-e804-470f-bde9-c67caaa0f354": { + "id": "87ab653d-e804-470f-bde9-c67caaa0f354", + "prev": "a8c2d446-caee-42c8-886a-ed98f4935d78", + "regions": { + "bc3aeb56-c465-4868-a1ea-2de82584de98": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "59f52294-4a25-4c92-bab8-3b07f0f44d15", + "part": "whole" + }, + "id": "bc3aeb56-c465-4868-a1ea-2de82584de98" + } + } + }, + "8b887c97-83bc-4395-83ac-f6703cbe243d": { + "id": "8b887c97-83bc-4395-83ac-f6703cbe243d", + "prev": "36a814c9-c540-4a6d-92d9-c03553d3d2c2", + "regions": { + "9d0bc72a-cb13-483f-a572-2bf60d0d145f": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "75499c85-d0a1-43db-8244-25778b9b2736", + "part": "whole" + }, + "id": "9d0bc72a-cb13-483f-a572-2bf60d0d145f" + } + } + }, + "a8b9ceb9-b38f-4489-84df-b644c6fe28f2": { + "id": "a8b9ceb9-b38f-4489-84df-b644c6fe28f2", + "prev": null, + "regions": { + "f741ed94-3f24-4427-b615-3ab8753e5814": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "bf74d2e9-2708-49b1-934b-e0ede342f475", + "part": "whole" + }, + "id": "f741ed94-3f24-4427-b615-3ab8753e5814" + } + } + }, + "a8c2d446-caee-42c8-886a-ed98f4935d78": { + "id": "a8c2d446-caee-42c8-886a-ed98f4935d78", + "prev": "2dfec088-8a70-411a-9199-904ef3fa2383", + "regions": { + "f03457d8-b2a7-4e14-9a73-cab80c5b815d": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "edaa7f2f-2439-4148-b57a-8c794c0945ec", + "part": "whole" + }, + "id": "f03457d8-b2a7-4e14-9a73-cab80c5b815d" + } + } + }, + "aa224267-f885-4c0c-95af-7bacfcc186d9": { + "id": "aa224267-f885-4c0c-95af-7bacfcc186d9", + "prev": "793cec2f-8413-484d-aa1e-388fd2b53a45", + "regions": { + "0d7ac442-5e1d-49a5-91b3-1432d72449d8": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "4d6826fe-2cb8-4468-85ed-a242a1ce7155", + "part": "whole" + }, + "id": "0d7ac442-5e1d-49a5-91b3-1432d72449d8" + } + } + }, + "b52e4d09-5186-44e5-84db-3371c087acde": { + "id": "b52e4d09-5186-44e5-84db-3371c087acde", + "prev": "134f92d0-6389-4226-af51-1134ae8e8278", + "regions": { + "7af7d997-80b2-497d-bced-ef8341763439": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "376882ec-d469-4fad-9462-18e4bbea64ca", + "part": "whole" + }, + "id": "7af7d997-80b2-497d-bced-ef8341763439" + } + } + }, + "c5f59b98-a227-4344-9d6d-03abdd01c6aa": { + "id": "c5f59b98-a227-4344-9d6d-03abdd01c6aa", + "prev": "83e912ff-260a-4391-8a12-331aba098506", + "regions": { + "7268abff-0540-4c06-aefc-c386410c0953": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "396d478b-34aa-4afa-9898-cdce8222a516", + "part": "whole" + }, + "id": "7268abff-0540-4c06-aefc-c386410c0953" + } + } + }, + "c66f3dfd-2d27-482b-be78-10ba733e826b": { + "id": "c66f3dfd-2d27-482b-be78-10ba733e826b", + "prev": "8b887c97-83bc-4395-83ac-f6703cbe243d", + "regions": { + "6cbe8e0e-8645-41a1-8a38-e44acb81be4b": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "7594c7c7-b808-48f7-9500-d7830a07968a", + "part": "whole" + }, + "id": "6cbe8e0e-8645-41a1-8a38-e44acb81be4b" + } + } + }, + "d22045e5-7e3e-452e-bc7b-c6c4a893da8e": { + "id": "d22045e5-7e3e-452e-bc7b-c6c4a893da8e", + "prev": "ec41f96a-63a3-4825-9295-f4657a440ddb", + "regions": { + "24e2a3a9-bf65-4dab-927f-0bf6ffbe581d": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "defe921f-8097-44c3-8336-8af6700804a7", + "part": "whole" + }, + "id": "24e2a3a9-bf65-4dab-927f-0bf6ffbe581d" + } + } + }, + "d24c958c-e419-4e4d-aa9c-d228a8ca55e4": { + "id": "d24c958c-e419-4e4d-aa9c-d228a8ca55e4", + "prev": "11bebe14-d1dc-476d-a31a-5828b9c3adf0", + "regions": { + "25312144-9faa-4680-bb8e-6307ea71370f": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "bed09a92-9a7a-473b-9464-90e479883a3e", + "part": "whole" + }, + "id": "25312144-9faa-4680-bb8e-6307ea71370f" + } + } + }, + "ec41f96a-63a3-4825-9295-f4657a440ddb": { + "id": "ec41f96a-63a3-4825-9295-f4657a440ddb", + "prev": "87ab653d-e804-470f-bde9-c67caaa0f354", + "regions": { + "22e8be98-c254-4d04-b0e4-b9b5ae46eefe": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "bc70f780-c240-4779-96f3-bc5ef9a37d59", + "part": "whole" + }, + "id": "22e8be98-c254-4d04-b0e4-b9b5ae46eefe" + } + } + }, + "fe5a0732-69f5-462a-8af6-851f84a9fdec": { + "id": "fe5a0732-69f5-462a-8af6-851f84a9fdec", + "prev": "d22045e5-7e3e-452e-bc7b-c6c4a893da8e", + "regions": { + "671b89f5-fa9c-4bc1-bdeb-6e0a4ce8939b": { + "attrs": { + "height": 0.8, + "width": 0.8, + "x": 0.1, + "y": 0.1 + }, + "content": { + "cell": "fd46e2ab-4ab6-4001-b536-1f323525d7d3", + "part": "whole" + }, + "id": "671b89f5-fa9c-4bc1-bdeb-6e0a4ce8939b" + } + } + } + }, + "themes": {} + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/training/03.train-hyperparameter-tune-deploy-with-tensorflow/03.train-hyperparameter-tune-deploy-with-tensorflow.ipynb b/training/03.train-hyperparameter-tune-deploy-with-tensorflow/03.train-hyperparameter-tune-deploy-with-tensorflow.ipynb index 76684cfb..7bb4bb85 100644 --- a/training/03.train-hyperparameter-tune-deploy-with-tensorflow/03.train-hyperparameter-tune-deploy-with-tensorflow.ipynb +++ b/training/03.train-hyperparameter-tune-deploy-with-tensorflow/03.train-hyperparameter-tune-deploy-with-tensorflow.ipynb @@ -251,7 +251,7 @@ "try:\n", " # look for the existing cluster by name\n", " compute_target = ComputeTarget(workspace=ws, name=batchai_cluster_name)\n", - " if compute_target is BatchAiCompute:\n", + " if type(compute_target) is BatchAiCompute:\n", " print('found compute target {}, just use it.'.format(batchai_cluster_name))\n", " else:\n", " print('{} exists but it is not a Batch AI cluster. Please choose a different name.'.format(batchai_cluster_name))\n", diff --git a/tutorials/01.train-models.ipynb b/tutorials/01.train-models.ipynb index 2d1d9c4e..d12b04e7 100644 --- a/tutorials/01.train-models.ipynb +++ b/tutorials/01.train-models.ipynb @@ -98,7 +98,7 @@ "source": [ "### Create experiment\n", "\n", - "Create an experiment to track the runs in your workspace. A workspace can have muliple experiments; an experiment must belongn to a workspace." + "Create an experiment to track the runs in your workspace. A workspace can have muliple experiments. " ] }, { @@ -121,9 +121,7 @@ "\n", "Azure Azure ML Managed Compute is a managed service that enables data scientists to train machine learning models on clusters of Azure virtual machines, including VMs with GPU support. In this tutorial, you create an Azure Managed Compute cluster as your training environment. This code creates a cluster for you if it does not already exist in your workspace. \n", "\n", - " **Creation of the cluster takes approximately 5 minutes.** If the cluster is already in the workspace this code uses it and skips the creation process.\n", - "\n", - "**Note**: As with other Azure services, there are limits on certain resources (for eg. BatchAI cluster size) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota." + " **Creation of the cluster takes approximately 5 minutes.** If the cluster is already in the workspace this code uses it and skips the creation process." ] }, { @@ -146,7 +144,7 @@ "try:\n", " # look for the existing cluster by name\n", " compute_target = ComputeTarget(workspace=ws, name=batchai_cluster_name)\n", - " if compute_target is BatchAiCompute:\n", + " if type(compute_target) is BatchAiCompute:\n", " print('found compute target {}, just use it.'.format(batchai_cluster_name))\n", " else:\n", " print('{} exists but it is not a Batch AI cluster. Please choose a different name.'.format(batchai_cluster_name))\n", @@ -188,6 +186,13 @@ "Download the MNIST dataset and save the files into a `data` directory locally. Images and labels for both training and testing are downloaded." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -330,7 +335,7 @@ "\n", "### Create a directory\n", "\n", - "Create a directory to hold all script files are other assets." + "Create a directory to deliver the necessary code from your computer to the remote resource." ] }, { @@ -434,7 +439,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Copy the utility library that loads the dataset into the script folder to be accessed by the training script." + "The file `utils.py` is referenced from the training script to load the dataset correctly. Copy this script into the script folder so that it can be accessed along with the training script on the remote resource." ] }, { @@ -457,11 +462,12 @@ "\n", "* The name of the estimator object, `est`\n", "* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n", - "* The compute target. In this case you will use the Managed Compute cluster you created\n", + "* The compute target. In this case you will use the Batch AI cluster you created\n", "* The training script name, train.py\n", - "* The `data-folder` parameter used by the training script to access the data\n", - "* Any Python packages needed for training\n", - "In this tutorial, this target is the Managed Compute cluster. All files in the script folder are uploaded into the cluster nodes for execution." + "* Parameters required from the training script \n", + "* Python packages needed for training\n", + "\n", + "In this tutorial, this target is the Batch AI cluster. All files in the script folder are uploaded into the cluster nodes for execution. The data_folder is set to use the datastore (`ds.as_mount()`)." ] }, { @@ -507,7 +513,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Since the call is asynchronous, it returns a **Preparing** or **running** state as soon as the job is started.\n", + "Since the call is asynchronous, it returns a **Preparing** or **Running** state as soon as the job is started.\n", "\n", "## Monitor a remote run\n", "\n", @@ -595,7 +601,7 @@ "\n", "## Register model\n", "\n", - "The last step in the training script wrote the file `outputs/sklearn_mnist_model.pkl` in a folder named `outputs` in the VM of the cluster where the job is executed. `outputs` is a special folder in that all content in the `outputs` directory is automatically uploaded as part of the run record in the experiment under your workspace. Hence, the model file is now also available in your workspace. \n", + "The last step in the training script wrote the file `outputs/sklearn_mnist_model.pkl` in a directory named `outputs` in the VM of the cluster where the job is executed. `outputs` is a special directory in that all content in this directory is automatically uploaded to your workspace. This content appears in the run record in the experiment under your workspace. Hence, the model file is now also available in your workspace.\n", "\n", "You can see files associated with that run." ] diff --git a/tutorials/02.deploy-models.ipynb b/tutorials/02.deploy-models.ipynb index 2a92a8db..3a36c1fc 100644 --- a/tutorials/02.deploy-models.ipynb +++ b/tutorials/02.deploy-models.ipynb @@ -15,7 +15,7 @@ "source": [ "# Tutorial #2: Deploy an image classification model in Azure Container Instance (ACI)\n", "\n", - "This tutorial is **part two of a two-part tutorial series**. In the [previous tutorial](01.train-models.ipynb), you trained machine learning models and then registered the best one in your workspace on the cloud. \n", + "This tutorial is **part two of a two-part tutorial series**. In the [previous tutorial](01.train-models.ipynb), you trained machine learning models and then registered a model in your workspace on the cloud. \n", "\n", "Now, you're ready to deploy the model as a web service in [Azure Container Instances](https://docs.microsoft.com/azure/container-instances/) (ACI). A web service is an image, in this case a Docker image, that encapsulates the scoring logic and the model itself. \n", "\n", @@ -33,8 +33,7 @@ "## Prerequisites\n", "\n", "Complete the model training in the [Tutorial #1: Train an image classification model with Azure Machine Learning](01.train-models.ipynb) notebook. \n", - "\n", - "If you did NOT complete the tutorial, you can instead run this cell to create a model and download the data needed for this tutorial:" + "\n" ] }, { @@ -43,6 +42,8 @@ "metadata": {}, "outputs": [], "source": [ + "# If you did NOT complete the tutorial, you can instead run this cell \n", + "# This will register a model and download the data needed for this tutorial\n", "# These prerequisites are created in the training tutorial\n", "# Feel free to skip this cell if you completed the training tutorial \n", "\n", @@ -251,9 +252,9 @@ "Create the scoring script, called score.py, used by the web service call to show how to use the model.\n", "\n", "You must include two required functions into the scoring script:\n", - "* The `init()` function, which typically loads the model into a global object. This function is executed only once when the Docker container is started. \n", + "* The `init()` function, which typically loads the model into a global object. This function is run only once when the Docker container is started. \n", "\n", - "* The `run(input_data)` function uses the model to predict a value based on the input data. Inputs and outputs to the run typically use JSON for serialization and de-serialization, but other formats are supported." + "* The `run(input_data)` function uses the model to predict a value based on the input data. Inputs and outputs to the run typically use JSON for serialization and de-serialization, but other formats are supported.\n" ] }, { @@ -332,7 +333,7 @@ "source": [ "### Create configuration file\n", "\n", - "Create a deployment configuration file and specify the number of CPUs and gigabyte of RAM needed for your ACI container. While it depends on your model, the default of 1 core and 1 gigabyte of RAM is usually sufficient for many models. If you feel you need more later, you can always modify the configuration and redeploy the service." + "Create a deployment configuration file and specify the number of CPUs and gigabyte of RAM needed for your ACI container. While it depends on your model, the default of 1 core and 1 gigabyte of RAM is usually sufficient for many models. If you feel you need more later, you would have to recreate the image and redeploy the service." ] }, { diff --git a/tutorials/03.auto-train-models.ipynb b/tutorials/03.auto-train-models.ipynb index 2a9c4815..76e7d3a9 100644 --- a/tutorials/03.auto-train-models.ipynb +++ b/tutorials/03.auto-train-models.ipynb @@ -13,13 +13,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Tutorial: Automatically train a classification model with Azure Automated Machine Learning\n", + "# Tutorial: Train a classification model with automated machine learning\n", "\n", - "In this tutorial, you'll learn how to automatically generate a machine learning model. This model can then be deployed following the workflow in the [Deploy a model](02.deploy-models.ipynb) tutorial.\n", + "In this tutorial, you'll learn how to generate a machine learning model using automated machine learning (automated ML). Azure Machine Learning can perform data preprocessing, algorithm selection and hyperparameter selection in an automated way for you. The final model can then be deployed following the workflow in the [Deploy a model](02.deploy-models.ipynb) tutorial.\n", "\n", "[flow diagram](./imgs/flow2.png)\n", "\n", - "Similar to the [train models tutorial](01.train-models.ipynb), this tutorial classifies handwritten images of digits (0-9) from the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset.\n", + "Similar to the [train models tutorial](01.train-models.ipynb), this tutorial classifies handwritten images of digits (0-9) from the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset. But this time you don't to specify an algorithm or tune hyperparameters. The automated ML technique iterates over many combinations of algorithms and hyperparameters until it finds the best model based on your criterion.\n", "\n", "You'll learn how to:\n", "\n", @@ -190,11 +190,10 @@ "|**primary_metric**|AUC Weighted | Metric that you want to optimize.|\n", "|**max_time_sec**|12,000|Time limit in seconds for each iteration|\n", "|**iterations**|20|Number of iterations. In each iteration, the model trains with the data with a specific pipeline|\n", - "|**n_cross_validations**|5|Number of cross validation splits|\n", + "|**n_cross_validations**|3|Number of cross validation splits|\n", "|**preprocess**|True| *True/False* Enables experiment to perform preprocessing on the input. Preprocessing handles *missing data*, and performs some common *feature extraction*|\n", - "|**exit_score**|0.994|*double* value indicating the target for *primary_metric*. Once the target is surpassed the run terminates|\n", - "|**blacklist_algos**|['kNN','LinearSVM']|*Array* of *strings* indicating algorithms to ignore.\n", - "|**concurrent_iterations**|5|Max number of iterations that would be executed in parallel. This number should be less than the number of cores on the DSVM. Used in remote training.|" + "|**exit_score**|0.995|*double* value indicating the target for *primary_metric*. Once the target is surpassed the run terminates|\n", + "|**blacklist_algos**|['kNN','LinearSVM']|*Array* of *strings* indicating algorithms to ignore.\n" ] }, { @@ -211,6 +210,8 @@ " max_time_sec = 12000,\n", " iterations = 20,\n", " n_cross_validations = 3,\n", + " preprocess = True,\n", + " exit_score = 0.995,\n", " blacklist_algos = ['kNN','LinearSVM'],\n", " X = X_digits,\n", " y = y_digits,\n", @@ -279,7 +280,7 @@ "metricslist = {}\n", "for run in children:\n", " properties = run.get_properties()\n", - " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n", " metricslist[int(properties['iteration'])] = metrics\n", "\n", "import pandas as pd\n", @@ -287,6 +288,15 @@ "rundata" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Register the best model \n", + "\n", + "Use the `local_run` object to get the best model and register it into the workspace. " + ] + }, { "cell_type": "code", "execution_count": null,