Compare commits

...

23 Commits

Author SHA1 Message Date
Roope Astala
7bb906b53c Merge pull request #87 from rastala/master
Update to version 0.1.80
2018-11-20 11:02:28 -05:00
rastala
5726fe3ddb Version 0.1.80 2018-11-20 11:00:48 -05:00
rastala
d10b1fa796 Revert "Updated notebook folders"
This reverts commit 06728004b6.
2018-11-20 10:39:48 -05:00
rastala
d7127de03c Revert "Update tutorials/README.md"
This reverts commit 50787f4ccc.
2018-11-20 10:39:34 -05:00
Roope Astala
50787f4ccc Update tutorials/README.md 2018-11-19 13:35:11 -05:00
Roope Astala
06728004b6 Updated notebook folders 2018-11-19 13:28:49 -05:00
Roope Astala
f5bcc55fe3 Merge pull request #74 from yueguoguo/master
Typo in README
2018-11-09 09:51:01 -05:00
Roope Astala
f23fb58200 Merge pull request #77 from rastala/master
Fix autoscale
2018-11-09 09:47:46 -05:00
Roope Astala
dbce7b8db2 Fix autoscase 2018-11-09 09:47:01 -05:00
Roope Astala
303090adf6 Merge pull request #76 from rastala/master
Update 00.configuration.ipynb
2018-11-09 09:33:07 -05:00
Roope Astala
b091d1f5f1 Update 00.configuration.ipynb
Create computes in 00.configuration, and link to tutorial
2018-11-09 09:31:25 -05:00
Hai Ning
803d69c539 Update 03.train-hyperparameter-tune-deploy-with-tensorflow.ipynb 2018-11-07 13:54:11 -05:00
Zhang Le
37848e9686 Merge pull request #1 from yueguoguo/yueguoguo-patch-1
Typo in README
2018-11-07 13:18:31 +08:00
Zhang Le
7d9227441e Typo in README
Typo of `psutil`.
2018-11-07 13:17:53 +08:00
Roope Astala
21c454b0f2 Merge pull request #72 from rastala/master
Add logging API notebook
2018-11-06 12:46:39 -05:00
Roope Astala
c7b0960ae4 Add logging API notebook 2018-11-06 12:46:05 -05:00
Roope Astala
14e11fefd6 Delete .gitignore 2018-11-06 12:31:53 -05:00
Roope Astala
4deaeb04cf Delete 05.train-in-spark-checkpoint.ipynb 2018-11-06 12:31:32 -05:00
Roope Astala
ee78323df2 Delete 03.train-on-aci-checkpoint.ipynb 2018-11-06 12:31:18 -05:00
Roope Astala
89c2622938 Delete 02.train-on-local-checkpoint.ipynb 2018-11-06 12:31:03 -05:00
Roope Astala
96b352e3be Delete 04.train-on-remote-vm-checkpoint.ipynb 2018-11-06 12:30:43 -05:00
Roope Astala
5280201f93 Merge pull request #70 from wchill/fix_macos_sigsegv
Fix segfault under certain conditions when running AutoML pipelines on MacOS
2018-11-05 19:04:14 -05:00
Eric Ahn
3825fd2c10 Fix segfault under certain conditions on MacOS 2018-11-05 15:06:38 -08:00
65 changed files with 23146 additions and 25742 deletions

View File

@@ -101,11 +101,20 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"import os\n",
"\n",
"subscription_id ='<subscription-id>'\n",
"resource_group ='<resource-group>'\n",
"workspace_name = '<workspace-name>'\n",
"subscription_id = os.environ.get(\"SUBSCRIPTION_ID\", \"<my-subscription-id>\")\n",
"resource_group = os.environ.get(\"RESOURCE_GROUP\", \"<my-resource-group>\")\n",
"workspace_name = os.environ.get(\"WORKSPACE_NAME\", \"<my-workspace-name>\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"try:\n",
" ws = Workspace(subscription_id = subscription_id, resource_group = resource_group, workspace_name = workspace_name)\n",
@@ -131,7 +140,7 @@
"* Your subscription id\n",
"* The resource group name\n",
"\n",
"**Note**: As with other Azure services, there are limits on certain resources (for eg. BatchAI cluster size) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
"**Note**: As with other Azure services, there are limits on certain resources (for eg. AmlCompute quota) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
]
},
{
@@ -142,15 +151,6 @@
"Specify a region where your workspace will be located from the list of [Azure Machine Learning regions](https://linktoregions)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"workspace_region = \"eastus2\""
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -159,10 +159,11 @@
"source": [
"import os\n",
"\n",
"subscription_id = os.environ.get(\"SUBSCRIPTION_ID\", subscription_id)\n",
"resource_group = os.environ.get(\"RESOURCE_GROUP\", resource_group)\n",
"workspace_name = os.environ.get(\"WORKSPACE_NAME\", workspace_name)\n",
"workspace_region = os.environ.get(\"WORKSPACE_REGION\", workspace_region)"
"subscription_id = os.environ.get(\"SUBSCRIPTION_ID\", \"<my-subscription-id>\")\n",
"resource_group = os.environ.get(\"RESOURCE_GROUP\", \"my-aml-resource-group\")\n",
"workspace_name = os.environ.get(\"WORKSPACE_NAME\", \"my-first-workspace\")\n",
"\n",
"workspace_region = os.environ.get(\"WORKSPACE_REGION\", \"eastus2\")"
]
},
{
@@ -207,12 +208,88 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Success!\n",
"Great, you are ready to move on to the rest of the sample notebooks."
"## Create compute resources for your training experiments\n",
"\n",
"Many of the subsequent examples use Azure Machine Learning managed compute (AmlCompute) to train models at scale. To create a **CPU** cluster now, run the cell below. The autoscale settings mean that the cluster will scale down to 0 nodes when inactive and up to 4 nodes when busy."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# Choose a name for your CPU cluster\n",
"cpu_cluster_name = \"cpucluster\"\n",
"\n",
"# Verify that cluster does not exist already\n",
"try:\n",
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
" print('Found existing cluster, use it.')\n",
"except ComputeTargetException:\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
" max_nodes=4)\n",
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
"\n",
"cpu_cluster.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To create a **GPU** cluster, run the cell below. Note that your subscription must have sufficient quota for GPU VMs or the command will fail. To increase quota, see [these instructions](https://docs.microsoft.com/en-us/azure/azure-supportability/resource-manager-core-quotas-request). "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# Choose a name for your GPU cluster\n",
"gpu_cluster_name = \"gpucluster\"\n",
"\n",
"# Check if cluster exists already\n",
"try:\n",
" gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)\n",
" print('Found existing cluster, use it.')\n",
"except ComputeTargetException:\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
" max_nodes=4)\n",
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)\n",
"\n",
"gpu_cluster.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Success!\n",
"Great, you are ready to move on to the rest of the sample notebooks. A good place to start is the [01.train-model tutorial](./tutorials/01.train-model.ipynb) to learn how to train and then deploy an image classification model."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"authors": [
{
"name": "roastala"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
@@ -228,7 +305,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.2"
}
},
"nbformat": 4,

View File

@@ -1 +0,0 @@
/samples/

View File

@@ -1,477 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 02. Train locally\n",
"* Create or load workspace.\n",
"* Create scripts locally.\n",
"* Create `train.py` in a folder, along with a `my.lib` file.\n",
"* Configure & execute a local run in a user-managed Python environment.\n",
"* Configure & execute a local run in a system-managed Python environment.\n",
"* Configure & execute a local run in a Docker environment.\n",
"* Query run metrics to find the best model\n",
"* Register model for operationalization."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.workspace import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create An Experiment\n",
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Experiment\n",
"experiment_name = 'train-on-local'\n",
"exp = Experiment(workspace=ws, name=experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View `train.py`\n",
"\n",
"`train.py` is already created for you."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('./train.py', 'r') as f:\n",
" print(f.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note `train.py` also references a `mylib.py` file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('./mylib.py', 'r') as f:\n",
" print(f.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure & Run\n",
"### User-managed environment\n",
"Below, we use a user-managed run, which means you are responsible to ensure all the necessary packages are available in the Python environment you choose to run the script."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"\n",
"# Editing a run configuration property on-fly.\n",
"run_config_user_managed = RunConfiguration()\n",
"\n",
"run_config_user_managed.environment.python.user_managed_dependencies = True\n",
"\n",
"# You can choose a specific Python environment by pointing to a Python path \n",
"#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Submit script to run in the user-managed environment\n",
"Note whole script folder is submitted for execution, including the `mylib.py` file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import ScriptRunConfig\n",
"\n",
"src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n",
"run = exp.submit(src)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get run history details"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Block to wait till run finishes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### System-managed environment\n",
"You can also ask the system to build a new conda environment and execute your scripts in it. The environment is built once and will be reused in subsequent executions as long as the conda dependencies remain unchanged. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"run_config_system_managed = RunConfiguration()\n",
"\n",
"run_config_system_managed.environment.python.user_managed_dependencies = False\n",
"run_config_system_managed.auto_prepare_environment = True\n",
"\n",
"# Specify conda dependencies with scikit-learn\n",
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
"run_config_system_managed.environment.python.conda_dependencies = cd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Submit script to run in the system-managed environment\n",
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n",
"run = exp.submit(src)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get run history details"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Block and wait till run finishes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Docker-based execution\n",
"**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
"\n",
"You can also ask the system to pull down a Docker image and execute your scripts in it."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run_config_docker = RunConfiguration()\n",
"run_config_docker.environment.python.user_managed_dependencies = False\n",
"run_config_docker.auto_prepare_environment = True\n",
"run_config_docker.environment.docker.enabled = True\n",
"run_config_docker.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"\n",
"# Specify conda dependencies with scikit-learn\n",
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
"run_config_docker.environment.python.conda_dependencies = cd\n",
"\n",
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Submit script to run in the system-managed environment\n",
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies.\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
"\n",
"# Check if Docker is installed and Linux containers are enables\n",
"if subprocess.run(\"docker -v\", shell=True) == 0:\n",
" out = subprocess.check_output(\"docker system info\", shell=True, encoding=\"ascii\").split(\"\\n\")\n",
" if not \"OSType: linux\" in out:\n",
" print(\"Switch Docker engine to use Linux containers.\")\n",
" else:\n",
" run = exp.submit(src)\n",
"else:\n",
" print(\"Docker engine not installed.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Get run history details\n",
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Query run metrics"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"query history",
"get metrics"
]
},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"run.get_metrics()\n",
"metrics = run.get_metrics()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's find the model that has the lowest MSE value logged."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"best_alpha = metrics['alpha'][np.argmin(metrics['mse'])]\n",
"\n",
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
" min(metrics['mse']), \n",
" best_alpha\n",
"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also list all the files that are associated with this run record"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.get_file_names()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We know the model `ridge_0.40.pkl` is the best performing model from the eariler queries. So let's register it with the workspace."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# supply a model name, and the full path to the serialized model file.\n",
"model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(model.name, model.version, model.url)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now you can deploy this model following the example in the 01 notebook."
]
}
],
"metadata": {
"authors": [
{
"name": "roastala"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,325 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 03. Train on Azure Container Instance (EXPERIMENTAL)\n",
"\n",
"* Create Workspace\n",
"* Create Project\n",
"* Create `train.py` in the project folder.\n",
"* Configure an ACI (Azure Container Instance) run\n",
"* Execute in ACI"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"create workspace"
]
},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create An Experiment\n",
"\n",
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Experiment\n",
"experiment_name = 'train-on-aci'\n",
"experiment = Experiment(workspace = ws, name = experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a folder to store the training script."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"script_folder = './samples/train-on-aci'\n",
"os.makedirs(script_folder, exist_ok = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Remote execution on ACI\n",
"\n",
"Use `%%writefile` magic to write training code to `train.py` file under the project folder."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile $script_folder/train.py\n",
"\n",
"import os\n",
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import Ridge\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"from azureml.core.run import Run\n",
"from sklearn.externals import joblib\n",
"\n",
"import numpy as np\n",
"\n",
"os.makedirs('./outputs', exist_ok=True)\n",
"\n",
"X, y = load_diabetes(return_X_y = True)\n",
"\n",
"run = Run.get_submitted_run()\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
" \"test\": {\"X\": X_test, \"y\": y_test}}\n",
"\n",
"# list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
"alphas = np.arange(0.0, 1.0, 0.05)\n",
"\n",
"for alpha in alphas:\n",
" # Use Ridge algorithm to create a regression model\n",
" reg = Ridge(alpha = alpha)\n",
" reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
"\n",
" preds = reg.predict(data[\"test\"][\"X\"])\n",
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
" run.log('alpha', alpha)\n",
" run.log('mse', mse)\n",
" \n",
" model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n",
" with open(model_file_name, \"wb\") as file:\n",
" joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n",
"\n",
" print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure for using ACI\n",
"Linux-based ACI is available in `westus`, `eastus`, `westeurope`, `northeurope`, `westus2` and `southeastasia` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"configure run"
]
},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"# create a new runconfig object\n",
"run_config = RunConfiguration()\n",
"\n",
"# signal that you want to use ACI to execute script.\n",
"run_config.target = \"containerinstance\"\n",
"\n",
"# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n",
"run_config.container_instance.region = 'eastus'\n",
"\n",
"# set the ACI CPU and Memory \n",
"run_config.container_instance.cpu_cores = 1\n",
"run_config.container_instance.memory_gb = 2\n",
"\n",
"# enable Docker \n",
"run_config.environment.docker.enabled = True\n",
"\n",
"# set Docker base image to the default CPU-based image\n",
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n",
"\n",
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
"run_config.auto_prepare_environment = True\n",
"\n",
"# specify CondaDependencies obj\n",
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Submit the Experiment\n",
"Finally, run the training job on the ACI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"remote run",
"aci"
]
},
"outputs": [],
"source": [
"%%time \n",
"from azureml.core.script_run_config import ScriptRunConfig\n",
"\n",
"script_run_config = ScriptRunConfig(source_directory = script_folder,\n",
" script= 'train.py',\n",
" run_config = run_config)\n",
"\n",
"run = experiment.submit(script_run_config)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"remote run",
"aci"
]
},
"outputs": [],
"source": [
"%%time\n",
"# Shows output of the run on stdout.\n",
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"query history"
]
},
"outputs": [],
"source": [
"# Show run details\n",
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"get metrics"
]
},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"run.get_metrics()\n",
"metrics = run.get_metrics()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
" min(metrics['mse']), \n",
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,321 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 04. Train in a remote VM (MLC managed DSVM)\n",
"* Create Workspace\n",
"* Create Project\n",
"* Create `train.py` file\n",
"* Create DSVM as Machine Learning Compute (MLC) resource\n",
"* Configure & execute a run in a conda environment in the default miniconda Docker container on DSVM"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Experiment\n",
"\n",
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"experiment_name = 'train-on-remote-vm'\n",
"\n",
"from azureml.core import Experiment\n",
"\n",
"exp = Experiment(workspace = ws, name = experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View `train.py`\n",
"\n",
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train.py` in a cell to show the file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('./train.py', 'r') as training_script:\n",
" print(training_script.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Linux DSVM as a compute target\n",
"\n",
"**Note**: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n",
" \n",
"**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below. [Read more](../../documentation/sdk/ssh-issue.md) on this."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import DsvmCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"compute_target_name = 'mydsvm'\n",
"\n",
"try:\n",
" dsvm_compute = DsvmCompute(workspace = ws, name = compute_target_name)\n",
" print('found existing:', dsvm_compute.name)\n",
"except ComputeTargetException:\n",
" print('creating new.')\n",
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
" dsvm_compute = DsvmCompute.create(ws, name = compute_target_name, provisioning_configuration = dsvm_config)\n",
" dsvm_compute.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Attach an existing Linux DSVM as a compute target\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"'''\n",
" from azureml.core.compute import RemoteCompute \n",
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n",
" dsvm_compute = RemoteCompute.attach(ws,name=\"attach-from-sdk6\",username=<username>,address=<ipaddress>,ssh_port=22,password=<password>)\n",
"'''"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure & Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure a Docker run with new conda environment on the VM\n",
"You can execute in a Docker container in the VM. If you choose this route, you don't need to install anything on the VM yourself. Azure ML execution service will take care of it for you."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"\n",
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
"run_config = RunConfiguration(framework = \"python\")\n",
"\n",
"# Set compute target to the Linux DSVM\n",
"run_config.target = compute_target_name\n",
"\n",
"# Use Docker in the remote VM\n",
"run_config.environment.docker.enabled = True\n",
"\n",
"# Use CPU base image from DockerHub\n",
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"print('Base Docker image is:', run_config.environment.docker.base_image)\n",
"\n",
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
"run_config.prepare_environment = True\n",
"\n",
"# specify CondaDependencies obj\n",
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Submit the Experiment\n",
"Submit script to run in the Docker image in the remote VM. If you run this for the first time, the system will download the base image, layer in packages specified in the `conda_dependencies.yml` file on top of the base image, create a container and then execute the script in the container."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Run\n",
"from azureml.core import ScriptRunConfig\n",
"\n",
"src = ScriptRunConfig(source_directory = '.', script = 'train.py', run_config = run_config)\n",
"run = exp.submit(src)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### View run history details"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Find the best run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"run.get_metrics()\n",
"metrics = run.get_metrics()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
" min(metrics['mse']), \n",
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clean up compute resource"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dsvm_compute.delete()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -190,6 +190,16 @@
"source": [
"## Create Linux DSVM as a compute target\n",
"\n",
"**Note**: To streamline the compute that Azure Machine Learning creates, we are making updates to support creating only single to multi-node AmlCompute. The DSVMCompute class will be deprecated in a later release, but the DSVM can be created using the below single line command and then attached(like any VM) using the sample code below. Also note, that we only support Linux VMs and the commands below will spin a Linux VM only.\n",
"\n",
"```shell\n",
"# create a DSVM in your resource group\n",
"# note you need to be at least a contributor to the resource group in order to execute this command successfully.\n",
"(myenv) $ az vm create --resource-group <resource_group_name> --name <some_vm_name> --image microsoft-dsvm:linux-data-science-vm-ubuntu:linuxdsvmubuntu:latest --admin-username <username> --admin-password <password> --generate-ssh-keys --authentication-type password\n",
"```\n",
"\n",
"**Note**: You can also use [this url](https://portal.azure.com/#create/microsoft-dsvm.linux-data-science-vm-ubuntulinuxdsvmubuntu) to create the VM using the Azure Portal\n",
"\n",
"**Note**: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n",
" \n",
"**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can specify the port number in the provisioning configuration object."
@@ -613,7 +623,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.2"
}
},
"nbformat": 4,

View File

@@ -1,257 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 05. Train in Spark\n",
"* Create Workspace\n",
"* Create Experiment\n",
"* Copy relevant files to the script folder\n",
"* Configure and Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Experiment\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"experiment_name = 'train-on-remote-vm'\n",
"\n",
"from azureml.core import Experiment\n",
"\n",
"exp = Experiment(workspace = ws, name = experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View `train-spark.py`\n",
"\n",
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('train-spark.py', 'r') as training_script:\n",
" print(training_script.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure & Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Attach an HDI cluster\n",
"To use HDI commpute target:\n",
" 1. Create an Spark for HDI cluster in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
" 2. Enter the IP address, username and password below"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import HDInsightCompute\n",
"\n",
"try:\n",
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n",
" hdi_compute_new = HDInsightCompute.attach(ws, \n",
" name=\"hdi-attach\", \n",
" address=\"hdi-ignite-demo-ssh.azurehdinsight.net\", \n",
" ssh_port=22, \n",
" username='<username>', \n",
" password='<password>')\n",
"\n",
"except UserErrorException as e:\n",
" print(\"Caught = {}\".format(e.message))\n",
" print(\"Compute config already attached.\")\n",
" \n",
" \n",
"hdi_compute_new.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure HDI run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"\n",
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
"run_config = RunConfiguration(framework = \"python\")\n",
"\n",
"# Set compute target to the Linux DSVM\n",
"run_config.target = hdi_compute.name\n",
"\n",
"# Use Docker in the remote VM\n",
"# run_config.environment.docker.enabled = True\n",
"\n",
"# Use CPU base image from DockerHub\n",
"# run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"# print('Base Docker image is:', run_config.environment.docker.base_image)\n",
"\n",
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
"# run_config.prepare_environment = True\n",
"\n",
"# specify CondaDependencies obj\n",
"# run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
"# load the runconfig object from the \"myhdi.runconfig\" file generated by the attach operaton above."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Submit the script to HDI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
" script= 'train-spark.py',\n",
" run_config = run_config)\n",
"run = experiment.submit(script_run_config)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get the URL of the run history web page\n",
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"metrics = run.get_metrics()\n",
"print(metrics)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,328 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 06. Logging APIs\n",
"This notebook showcase various ways to use the Azure Machine Learning service run logging APIs, and view the results in the Azure portal."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](../../00.configuration.ipynb) Notebook first if you haven't. Also make sure you have tqdm and matplotlib installed in the current kernel.\n",
"\n",
"```\n",
"(myenv) $ conda install -y tqdm matplotlib\n",
"```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Validate Azure ML SDK installation and get version number for debugging purposes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"install"
]
},
"outputs": [],
"source": [
"from azureml.core import Experiment, Run, Workspace\n",
"import azureml.core\n",
"import numpy as np\n",
"\n",
"# Check core SDK version number\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"create workspace"
]
},
"outputs": [],
"source": [
"ws = Workspace.from_config()\n",
"print('Workspace name: ' + ws.name, \n",
" 'Azure region: ' + ws.location, \n",
" 'Subscription id: ' + ws.subscription_id, \n",
" 'Resource group: ' + ws.resource_group, sep='\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set experiment\n",
"Create a new experiment (or get the one with such name)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"exp = Experiment(workspace=ws, name='logging-api-test')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Log metrics\n",
"We will start a run, and use the various logging APIs to record different types of metrics during the run."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from tqdm import tqdm\n",
"\n",
"# start logging for the run\n",
"run = exp.start_logging()\n",
"\n",
"# log a string value\n",
"run.log(name='Name', value='Logging API run')\n",
"\n",
"# log a numerical value\n",
"run.log(name='Magic Number', value=42)\n",
"\n",
"# Log a list of values. Note this will generate a single-variable line chart.\n",
"run.log_list(name='Fibonacci', value=[0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89])\n",
"\n",
"# create a dictionary to hold a table of values\n",
"sines = {}\n",
"sines['angle'] = []\n",
"sines['sine'] = []\n",
"\n",
"for i in tqdm(range(-10, 10)):\n",
" # log a metric value repeatedly, this will generate a single-variable line chart.\n",
" run.log(name='Sigmoid', value=1 / (1 + np.exp(-i)))\n",
" angle = i / 2.0\n",
" \n",
" # log a 2 (or more) values as a metric repeatedly. This will generate a 2-variable line chart if you have 2 numerical columns.\n",
" run.log_row(name='Cosine Wave', angle=angle, cos=np.cos(angle))\n",
" \n",
" sines['angle'].append(angle)\n",
" sines['sine'].append(np.sin(angle))\n",
"\n",
"# log a dictionary as a table, this will generate a 2-variable chart if you have 2 numerical columns\n",
"run.log_table(name='Sine Wave', value=sines)\n",
"\n",
"run.complete()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Even after the run is marked completed, you can still log things."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Log an image\n",
"This is how to log a _matplotlib_ pyplot object."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"angle = np.linspace(-3, 3, 50)\n",
"plt.plot(angle, np.tanh(angle), label='tanh')\n",
"plt.legend(fontsize=12)\n",
"plt.title('Hyperbolic Tangent', fontsize=16)\n",
"plt.grid(True)\n",
"\n",
"run.log_image(name='Hyperbolic Tangent', plot=plt)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Upload a file"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also upload an abitrary file. First, let's create a dummy file locally."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile myfile.txt\n",
"\n",
"This is a dummy file."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's upload this file into the run record as a run artifact, and display the properties after the upload."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"props = run.upload_file(name='myfile_in_the_cloud.txt', path_or_stream='./myfile.txt')\n",
"props.serialize()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Examine the run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's take a look at the run detail page in Azure portal. Make sure you checkout the various charts and plots generated/uploaded."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can get all the metrics in that run back."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.get_metrics()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also see the files uploaded for this run."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.get_file_names()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also download all the files locally."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.makedirs('files', exist_ok=True)\n",
"\n",
"for f in run.get_file_names():\n",
" dest = os.path.join('files', f.split('/')[-1])\n",
" print('Downloading file {} to {}...'.format(f, dest))\n",
" run.download_file(f, dest) "
]
}
],
"metadata": {
"authors": [
{
"name": "haining"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -140,7 +140,7 @@
"|-|-|\n",
"|**task**|classification or regression|\n",
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n",
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
@@ -157,8 +157,8 @@
"automl_config = AutoMLConfig(task = 'classification',\n",
" debug_log = 'automl_errors.log',\n",
" primary_metric = 'AUC_weighted',\n",
" max_time_sec = 3600,\n",
" iterations = 50,\n",
" iteration_timeout_minutes = 60,\n",
" iterations = 25,\n",
" n_cross_validations = 3,\n",
" verbosity = logging.INFO,\n",
" X = X_train, \n",
@@ -246,7 +246,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(local_run).show() "
]
},

View File

@@ -143,7 +143,7 @@
"|-|-|\n",
"|**task**|classification or regression|\n",
"|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n",
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
@@ -158,7 +158,7 @@
"outputs": [],
"source": [
"automl_config = AutoMLConfig(task = 'regression',\n",
" max_time_sec = 600,\n",
" iteration_timeout_minutes = 10,\n",
" iterations = 10,\n",
" primary_metric = 'spearman_correlation',\n",
" n_cross_validations = 5,\n",
@@ -221,7 +221,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(local_run).show() "
]
},

View File

@@ -128,7 +128,7 @@
"source": [
"from azureml.core.compute import DsvmCompute\n",
"\n",
"dsvm_name = 'mydsvm'\n",
"dsvm_name = 'mydsvma'\n",
"try:\n",
" dsvm_compute = DsvmCompute(ws, dsvm_name)\n",
" print('Found an existing DSVM.')\n",
@@ -192,10 +192,10 @@
"|Property|Description|\n",
"|-|-|\n",
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n",
"|**concurrent_iterations**|Maximum number of iterations to execute in parallel. This should be less than the number of cores on the DSVM.|"
"|**max_concurrent_iterations**|Maximum number of iterations to execute in parallel. This should be less than the number of cores on the DSVM.|"
]
},
{
@@ -205,12 +205,12 @@
"outputs": [],
"source": [
"automl_settings = {\n",
" \"max_time_sec\": 600,\n",
" \"iteration_timeout_minutes\": 10,\n",
" \"iterations\": 20,\n",
" \"n_cross_validations\": 5,\n",
" \"primary_metric\": 'AUC_weighted',\n",
" \"preprocess\": False,\n",
" \"concurrent_iterations\": 2,\n",
" \"max_concurrent_iterations\": 2,\n",
" \"verbosity\": logging.INFO\n",
"}\n",
"\n",
@@ -286,7 +286,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(remote_run).show() "
]
},

View File

@@ -130,11 +130,11 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import BatchAiCompute\n",
"from azureml.core.compute import AmlCompute\n",
"from azureml.core.compute import ComputeTarget\n",
"\n",
"# Choose a name for your cluster.\n",
"batchai_cluster_name = \"mybatchai\"\n",
"batchai_cluster_name = \"cpucluster\"\n",
"\n",
"found = False\n",
"# Check if this compute target already exists in the workspace.\n",
@@ -146,11 +146,9 @@
" \n",
"if not found:\n",
" print('Creating a new compute target...')\n",
" provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
" #vm_priority = 'lowpriority', # optional\n",
" autoscale_enabled = True,\n",
" cluster_min_nodes = 1, \n",
" cluster_max_nodes = 4)\n",
" max_nodes = 6)\n",
"\n",
" # Create the cluster.\n",
" compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n",
@@ -215,10 +213,10 @@
"|Property|Description|\n",
"|-|-|\n",
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n",
"|**concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|"
"|**max_concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|"
]
},
{
@@ -228,12 +226,12 @@
"outputs": [],
"source": [
"automl_settings = {\n",
" \"max_time_sec\": 120,\n",
" \"iteration_timeout_minutes\": 2,\n",
" \"iterations\": 20,\n",
" \"n_cross_validations\": 5,\n",
" \"primary_metric\": 'AUC_weighted',\n",
" \"preprocess\": False,\n",
" \"concurrent_iterations\": 5,\n",
" \"max_concurrent_iterations\": 5,\n",
" \"verbosity\": logging.INFO\n",
"}\n",
"\n",
@@ -310,7 +308,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(remote_run).show() "
]
},

View File

@@ -137,7 +137,7 @@
"# Add your VM information below\n",
"# If a compute with the specified compute_name already exists, it will be used and the dsvm_ip_addr, dsvm_ssh_port, \n",
"# dsvm_username and dsvm_password will be ignored.\n",
"compute_name = 'mydsvm'\n",
"compute_name = 'mydsvmb'\n",
"dsvm_ip_addr = '<<ip_addr>>'\n",
"dsvm_ssh_port = 22\n",
"dsvm_username = '<<username>>'\n",
@@ -243,10 +243,10 @@
"|Property|Description|\n",
"|-|-|\n",
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n",
"|**concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|\n",
"|**max_concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|\n",
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
"|**max_cores_per_iteration**|Indicates how many cores on the compute target would be used to train a single pipeline.<br>Default is *1*; you can set it to *-1* to use all cores.|"
]
@@ -258,8 +258,8 @@
"outputs": [],
"source": [
"automl_settings = {\n",
" \"max_time_sec\": 3600,\n",
" \"iterations\": 10,\n",
" \"iteration_timeout_minutes\": 60,\n",
" \"iterations\": 4,\n",
" \"n_cross_validations\": 5,\n",
" \"primary_metric\": 'AUC_weighted',\n",
" \"preprocess\": True,\n",
@@ -312,10 +312,20 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(remote_run).show() "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Wait until the run finishes.\n",
"remote_run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},

View File

@@ -155,18 +155,18 @@
"source": [
"## Configure AutoML\n",
"\n",
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment. This includes setting `exit_score`, which should cause the run to complete before the `iterations` count is reached.\n",
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment. This includes setting `experiment_exit_score`, which should cause the run to complete before the `iterations` count is reached.\n",
"\n",
"|Property|Description|\n",
"|-|-|\n",
"|**task**|classification or regression|\n",
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n",
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
"|**exit_score**|*double* value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
"|**blacklist_algos**|*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet<i><br><i>GradientBoostingRegressor<i><br><i>DecisionTreeRegressor<i><br><i>KNeighborsRegressor<i><br><i>LassoLars<i><br><i>SGDRegressor<i><br><i>RandomForestRegressor<i><br><i>ExtraTreesRegressor<i>|\n",
"|**experiment_exit_score**|*double* value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
"|**blacklist_models**|*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i>|\n",
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
@@ -181,12 +181,12 @@
"automl_config = AutoMLConfig(task = 'classification',\n",
" debug_log = 'automl_errors.log',\n",
" primary_metric = 'AUC_weighted',\n",
" max_time_sec = 3600,\n",
" iteration_timeout_minutes = 60,\n",
" iterations = 20,\n",
" n_cross_validations = 5,\n",
" preprocess = True,\n",
" exit_score = 0.9984,\n",
" blacklist_algos = ['KNeighborsClassifier','LinearSVMWrapper'],\n",
" experiment_exit_score = 0.9984,\n",
" blacklist_models = ['KNN','LinearSVM'],\n",
" verbosity = logging.INFO,\n",
" X = X_train, \n",
" y = y_train,\n",
@@ -236,7 +236,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(local_run).show() "
]
},

View File

@@ -163,7 +163,7 @@
"|-|-|\n",
"|**task**|classification or regression|\n",
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.<br>**Note:** If input data is sparse, you cannot use *True*.|\n",
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
@@ -182,7 +182,7 @@
"automl_config = AutoMLConfig(task = 'classification',\n",
" debug_log = 'automl_errors.log',\n",
" primary_metric = 'AUC_weighted',\n",
" max_time_sec = 3600,\n",
" iteration_timeout_minutes = 60,\n",
" iterations = 5,\n",
" preprocess = False,\n",
" verbosity = logging.INFO,\n",
@@ -236,7 +236,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(local_run).show() "
]
},

View File

@@ -160,7 +160,7 @@
"run_id = automl_runs_project[0] # Replace with your own run_id from above run ids\n",
"assert (run_id in summary_df.keys()), \"Run id not found! Please set run id to a value from above run ids\"\n",
"\n",
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"\n",
"experiment = Experiment(ws, experiment_name)\n",
"ml_run = AutoMLRun(experiment = experiment, run_id = run_id)\n",
@@ -241,7 +241,7 @@
"metadata": {},
"outputs": [],
"source": [
"iteration = 4 # Replace with an iteration number.\n",
"iteration = 1 # Replace with an iteration number.\n",
"best_run, fitted_model = ml_run.get_output(iteration = iteration)\n",
"fitted_model"
]
@@ -299,7 +299,7 @@
"metadata": {},
"outputs": [],
"source": [
"iteration = 4 # Replace with an iteration number.\n",
"iteration = 1 # Replace with an iteration number.\n",
"description = 'AutoML Model'\n",
"tags = None\n",
"ml_run.register_model(description = description, tags = tags, iteration = iteration)\n",

View File

@@ -46,6 +46,7 @@
"import logging\n",
"import os\n",
"import random\n",
"import time\n",
"\n",
"from matplotlib import pyplot as plt\n",
"from matplotlib.pyplot import imshow\n",
@@ -54,6 +55,7 @@
"from sklearn import datasets\n",
"\n",
"import azureml.core\n",
"from azureml.core.compute import DsvmCompute\n",
"from azureml.core.experiment import Experiment\n",
"from azureml.core.workspace import Workspace\n",
"from azureml.train.automl import AutoMLConfig\n",
@@ -122,15 +124,15 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import DsvmCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"compute_target_name = 'mydsvm'\n",
"compute_target_name = 'mydsvmc'\n",
"\n",
"try:\n",
" while ws.compute_targets[compute_target_name].provisioning_state == 'Creating':\n",
" time.sleep(1)\n",
" \n",
" dsvm_compute = DsvmCompute(workspace=ws, name=compute_target_name)\n",
" print('found existing:', dsvm_compute.name)\n",
"except ComputeTargetException:\n",
"except:\n",
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size=\"Standard_D2_v2\")\n",
" dsvm_compute = DsvmCompute.create(ws, name=compute_target_name, provisioning_configuration=dsvm_config)\n",
" dsvm_compute.wait_for_completion(show_output=True)"
@@ -234,6 +236,7 @@
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"# create a new RunConfig object\n",
"conda_run_config = RunConfiguration(framework=\"python\")\n",
@@ -241,7 +244,10 @@
"# Set compute target to the Linux DSVM\n",
"conda_run_config.target = dsvm_compute\n",
"# set the data reference of the run coonfiguration\n",
"conda_run_config.data_references = {ds.name: dr}"
"conda_run_config.data_references = {ds.name: dr}\n",
"\n",
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy'])\n",
"conda_run_config.environment.python.conda_dependencies = cd"
]
},
{
@@ -307,10 +313,10 @@
"|Property|Description|\n",
"|-|-|\n",
"|**primary_metric**|This is the metric that you want to optimize.<br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration|\n",
"|**iterations**|Number of iterations. In each iteration Auto ML trains a specific pipeline with the data|\n",
"|**n_cross_validations**|Number of cross validation splits|\n",
"|**concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM\n",
"|**max_concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM\n",
"|**preprocess**| *True/False* <br>Setting this to *True* enables Auto ML to perform preprocessing <br>on the input to handle *missing data*, and perform some common *feature extraction*|\n",
"|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.<br> Default is *1*, you can set it to *-1* to use all cores|"
]
@@ -322,12 +328,12 @@
"outputs": [],
"source": [
"automl_settings = {\n",
" \"max_time_sec\": 3600,\n",
" \"iterations\": 10,\n",
" \"iteration_timeout_minutes\": 60,\n",
" \"iterations\": 4,\n",
" \"n_cross_validations\": 5,\n",
" \"primary_metric\": 'AUC_weighted',\n",
" \"preprocess\": True,\n",
" \"max_cores_per_iteration\": 2,\n",
" \"max_cores_per_iteration\": 1,\n",
" \"verbosity\": logging.INFO\n",
"}\n",
"automl_config = AutoMLConfig(task = 'classification',\n",
@@ -378,10 +384,20 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(remote_run).show() "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Wait until the run finishes.\n",
"remote_run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -423,7 +439,7 @@
"outputs": [],
"source": [
"# Cancel the ongoing experiment and stop scheduling new iterations\n",
"remote_run.cancel()\n",
"# remote_run.cancel()\n",
"\n",
"# Cancel iteration 1 and move onto iteration 2\n",
"# remote_run.cancel_iteration(1)"

View File

@@ -121,7 +121,7 @@
"|-|-|\n",
"|**task**|classification or regression|\n",
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n",
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
@@ -143,7 +143,7 @@
" name = experiment_name,\n",
" debug_log = 'automl_errors.log',\n",
" primary_metric = 'AUC_weighted',\n",
" max_time_sec = 1200,\n",
" iteration_timeout_minutes = 20,\n",
" iterations = 10,\n",
" n_cross_validations = 2,\n",
" verbosity = logging.INFO,\n",
@@ -226,6 +226,7 @@
"import pickle\n",
"import json\n",
"import numpy\n",
"import azureml.train.automl\n",
"from sklearn.externals import joblib\n",
"from azureml.core.model import Model\n",
"\n",
@@ -298,15 +299,12 @@
"metadata": {},
"outputs": [],
"source": [
"%%writefile myenv.yml\n",
"name: myenv\n",
"channels:\n",
" - defaults\n",
"dependencies:\n",
" - pip:\n",
" - numpy==1.14.2\n",
" - scikit-learn==0.19.2\n",
" - azureml-sdk[notebooks,automl]==<<azureml-version>>"
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n",
"\n",
"conda_env_file_name = 'myenv.yml'\n",
"myenv.save_to_file('.', conda_env_file_name)"
]
},
{
@@ -316,14 +314,14 @@
"outputs": [],
"source": [
"# Substitute the actual version number in the environment file.\n",
"\n",
"conda_env_file_name = 'myenv.yml'\n",
"# This is not strictly needed in this notebook because the model should have been generated using the current SDK version.\n",
"# However, we include this in case this code is used on an experiment from a previous SDK version.\n",
"\n",
"with open(conda_env_file_name, 'r') as cefr:\n",
" content = cefr.read()\n",
"\n",
"with open(conda_env_file_name, 'w') as cefw:\n",
" cefw.write(content.replace('<<azureml-version>>', dependencies['azureml-sdk']))\n",
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n",
"\n",
"# Substitute the actual model id in the script file.\n",
"\n",
@@ -363,7 +361,10 @@
" image_config = image_config, \n",
" workspace = ws)\n",
"\n",
"image.wait_for_creation(show_output = True)"
"image.wait_for_creation(show_output = True)\n",
"\n",
"if image.creation_state == 'Failed':\n",
" print(\"Image build log at: \" + image.image_build_log_uri)"
]
},
{

View File

@@ -128,7 +128,7 @@
"automl_classifier = AutoMLConfig(task = 'classification',\n",
" debug_log = 'automl_errors.log',\n",
" primary_metric = 'AUC_weighted',\n",
" max_time_sec = 3600,\n",
" iteration_timeout_minutes = 60,\n",
" iterations = 10,\n",
" n_cross_validations = 2,\n",
" verbosity = logging.INFO,\n",
@@ -139,7 +139,7 @@
"automl_sample_weight = AutoMLConfig(task = 'classification',\n",
" debug_log = 'automl_errors.log',\n",
" primary_metric = 'AUC_weighted',\n",
" max_time_sec = 3600,\n",
" iteration_timeout_minutes = 60,\n",
" iterations = 10,\n",
" n_cross_validations = 2,\n",
" verbosity = logging.INFO,\n",

View File

@@ -155,7 +155,7 @@
"outputs": [],
"source": [
"automl_settings = {\n",
" \"max_time_sec\" : 600,\n",
" \"iteration_timeout_minutes\" : 10,\n",
" \"iterations\" : 2,\n",
" \"primary_metric\" : 'AUC_weighted',\n",
" \"preprocess\" : False,\n",
@@ -226,7 +226,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(local_run).show()"
]
},

View File

@@ -60,6 +60,7 @@
"source": [
"import logging\n",
"import os\n",
"import time\n",
"\n",
"import pandas as pd\n",
"\n",
@@ -156,7 +157,7 @@
"outputs": [],
"source": [
"automl_settings = {\n",
" \"max_time_sec\" : 600,\n",
" \"iteration_timeout_minutes\" : 10,\n",
" \"iterations\" : 2,\n",
" \"primary_metric\" : 'AUC_weighted',\n",
" \"preprocess\" : False,\n",
@@ -185,8 +186,12 @@
"metadata": {},
"outputs": [],
"source": [
"dsvm_name = 'mydsvm'\n",
"dsvm_name = 'mydsvmd'\n",
"\n",
"try:\n",
" while ws.compute_targets[dsvm_name].provisioning_state == 'Creating':\n",
" time.sleep(1)\n",
" \n",
" dsvm_compute = DsvmCompute(ws, dsvm_name)\n",
" print('Found existing DVSM.')\n",
"except:\n",
@@ -196,6 +201,23 @@
" dsvm_compute.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"conda_run_config = RunConfiguration(framework=\"python\")\n",
"\n",
"conda_run_config.target = dsvm_compute\n",
"\n",
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy'])\n",
"conda_run_config.environment.python.conda_dependencies = cd"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -214,7 +236,7 @@
"automl_config = AutoMLConfig(task = 'classification',\n",
" debug_log = 'automl_errors.log',\n",
" path = project_folder,\n",
" compute_target = dsvm_compute,\n",
" run_configuration=conda_run_config,\n",
" X = X,\n",
" y = y,\n",
" **automl_settings)"
@@ -253,7 +275,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(remote_run).show()"
]
},

View File

@@ -0,0 +1,374 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# AutoML 14: Explain classification model and visualize the explanation\n",
"\n",
"In this example we use the sklearn's [iris dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html) to showcase how you can use the AutoML Classifier for a simple classification problem.\n",
"\n",
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
"\n",
"In this notebook you would see\n",
"1. Creating an Experiment in an existing Workspace\n",
"2. Instantiating AutoMLConfig\n",
"3. Training the Model using local compute and explain the model\n",
"4. Visualization model's feature importance in widget\n",
"5. Explore best model's explanation\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Install AzureML Explainer SDK "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install azureml_sdk[explain]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Experiment\n",
"\n",
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"import os\n",
"import random\n",
"\n",
"import pandas as pd\n",
"import azureml.core\n",
"from azureml.core.experiment import Experiment\n",
"from azureml.core.workspace import Workspace\n",
"from azureml.train.automl import AutoMLConfig\n",
"from azureml.train.automl.run import AutoMLRun"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ws = Workspace.from_config()\n",
"\n",
"# choose a name for experiment\n",
"experiment_name = 'automl-local-classification'\n",
"# project folder\n",
"project_folder = './sample_projects/automl-local-classification-model-explanation'\n",
"\n",
"experiment=Experiment(ws, experiment_name)\n",
"\n",
"output = {}\n",
"output['SDK version'] = azureml.core.VERSION\n",
"output['Subscription ID'] = ws.subscription_id\n",
"output['Workspace Name'] = ws.name\n",
"output['Resource Group'] = ws.resource_group\n",
"output['Location'] = ws.location\n",
"output['Project Directory'] = project_folder\n",
"output['Experiment Name'] = experiment.name\n",
"pd.set_option('display.max_colwidth', -1)\n",
"pd.DataFrame(data = output, index = ['']).T"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Diagnostics\n",
"\n",
"Opt-in diagnostics for better experience, quality, and security of future releases"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.telemetry import set_diagnostics_collection\n",
"set_diagnostics_collection(send_diagnostics=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Iris Data Set"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import datasets\n",
"\n",
"iris = datasets.load_iris()\n",
"y = iris.target\n",
"X = iris.data\n",
"\n",
"features = iris.feature_names\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X,\n",
" y,\n",
" test_size=0.1,\n",
" random_state=100,\n",
" stratify=y)\n",
"\n",
"X_train = pd.DataFrame(X_train, columns=features)\n",
"X_test = pd.DataFrame(X_test, columns=features)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Instantiate Auto ML Config\n",
"\n",
"Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n",
"\n",
"|Property|Description|\n",
"|-|-|\n",
"|**task**|classification or regression|\n",
"|**primary_metric**|This is the metric that you want to optimize.<br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in minutes for each iterations|\n",
"|**iterations**|Number of iterations. In each iteration Auto ML trains the data with a specific pipeline|\n",
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n",
"|**X_valid**|(sparse) array-like, shape = [n_samples, n_features]|\n",
"|**y_valid**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]|\n",
"|**model_explainability**|Indicate to explain each trained pipeline or not |\n",
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. |"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"automl_config = AutoMLConfig(task = 'classification',\n",
" debug_log = 'automl_errors.log',\n",
" primary_metric = 'AUC_weighted',\n",
" max_time_sec = 12000,\n",
" iterations = 10,\n",
" verbosity = logging.INFO,\n",
" X = X_train, \n",
" y = y_train,\n",
" X_valid = X_test,\n",
" y_valid = y_test,\n",
" model_explainability=True,\n",
" path=project_folder)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Training the Model\n",
"\n",
"You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n",
"You will see the currently running iterations printing to the console."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"local_run = experiment.submit(automl_config, show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Exploring the results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Widget for monitoring runs\n",
"\n",
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
"\n",
"NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.widgets import RunDetails\n",
"RunDetails(local_run).show() "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"child_run = next(local_run.get_children())\n",
"RunDetails(child_run).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retrieve the Best Model\n",
"\n",
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"best_run, fitted_model = local_run.get_output()\n",
"print(best_run)\n",
"print(fitted_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Best Model 's explanation\n",
"\n",
"Retrieve the explanation from the best_run. And explanation information includes:\n",
"\n",
"1.\tshap_values: The explanation information generated by shap lib\n",
"2.\texpected_values: The expected value of the model applied to set of X_train data.\n",
"3.\toverall_summary: The model level feature importance values sorted in descending order\n",
"4.\toverall_imp: The feature names sorted in the same order as in overall_summary\n",
"5.\tper_class_summary: The class level feature importance values sorted in descending order. Only available for the classification case\n",
"6.\tper_class_imp: The feature names sorted in the same order as in per_class_summary. Only available for the classification case"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.automl.automlexplainer import retrieve_model_explanation\n",
"\n",
"shap_values, expected_values, overall_summary, overall_imp, per_class_summary, per_class_imp = \\\n",
" retrieve_model_explanation(best_run)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(overall_summary)\n",
"print(overall_imp)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(per_class_summary)\n",
"print(per_class_imp)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Beside retrieve the existed model explanation information, explain the model with different train/test data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.automl.automlexplainer import explain_model\n",
"\n",
"shap_values, expected_values, overall_summary, overall_imp, per_class_summary, per_class_imp = \\\n",
" explain_model(fitted_model, X_train, X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(overall_summary)\n",
"print(overall_imp)"
]
}
],
"metadata": {
"authors": [
{
"name": "xif"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -24,10 +24,7 @@
"2. Configure AutoML using `AutoMLConfig` which enables an extra ensembling iteration.\n",
"3. Train the model using local compute.\n",
"4. Explore the results.\n",
"5. Test the best fitted model.\n",
"\n",
"<b>Disclaimers / Limitations </b>\n",
"- Currently only Train/Validation split is supported; support for cross-validation will be coming soon.\n"
"5. Test the best fitted model.\n"
]
},
{
@@ -143,7 +140,7 @@
"|-|-|\n",
"|**task**|classification or regression|\n",
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n",
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
@@ -164,7 +161,7 @@
"automl_config = AutoMLConfig(task = 'classification',\n",
" debug_log = 'classification.log',\n",
" primary_metric = 'AUC_weighted',\n",
" max_time_sec = 3600,\n",
" iteration_timeout_minutes = 60,\n",
" iterations = 10,\n",
" verbosity = logging.INFO,\n",
" X = X_train, \n",
@@ -258,7 +255,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(local_run).show() "
]
},
@@ -404,7 +401,7 @@
}
],
"kernelspec": {
"display_name": "Python 3.6",
"display_name": "Python [default]",
"language": "python",
"name": "python36"
},

View File

@@ -24,10 +24,7 @@
"2. Configure AutoML using `AutoMLConfig`which enables an extra ensembling iteration.\n",
"3. Train the model using remote compute.\n",
"4. Explore the results.\n",
"5. Test the best fitted model.\n",
"\n",
"<b>Disclaimers / Limitations </b>\n",
"- Currently only Train/Validation split is supported; support for cross-validation will be coming soon.\n"
"5. Test the best fitted model.\n"
]
},
{
@@ -143,6 +140,16 @@
"In this example, the `get_data()` function returns data using scikit-learn's `diabetes` dataset."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if not os.path.exists(project_folder):\n",
" os.makedirs(project_folder)"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -179,7 +186,7 @@
"|-|-|\n",
"|**task**|classification or regression|\n",
"|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**enable_ensembling**|Flag to enable an ensembling iteration after all the other iterations complete.|\n",
"|**ensemble_iterations**|Number of iterations during which we choose a fitted pipeline to be part of the final ensemble.|\n",
@@ -193,7 +200,7 @@
"outputs": [],
"source": [
"automl_config = AutoMLConfig(task = 'regression',\n",
" max_time_sec = 600,\n",
" iteration_timeout_minutes = 10,\n",
" iterations = 20,\n",
" primary_metric = 'spearman_correlation',\n",
" debug_log = 'regression.log',\n",
@@ -257,7 +264,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(local_run).show() "
]
},
@@ -420,7 +427,7 @@
}
],
"kernelspec": {
"display_name": "Python 3.6",
"display_name": "Python [default]",
"language": "python",
"name": "python36"
},

View File

@@ -172,7 +172,7 @@ bash automl_setup_linux.sh
- Using DataPrep for reading data
- [14.auto-ml-model-explanation.ipynb](14.auto-ml-model-explanation.ipynb)
- Dataset: seaborn's [iris dataset](https://seaborn.pydata.org/generated/seaborn.load_dataset.html)
- Dataset: sklearn's [iris dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html)
- Explaining the AutoML classification pipeline
- Visualizing feature importance in widget
@@ -200,17 +200,46 @@ bash automl_setup_linux.sh
|Property|Description|Default|
|-|-|-|
|**primary_metric**|This is the metric that you want to optimize.<br><br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i><br><br> Regression supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i><br><i>normalized_root_mean_squared_log_error</i>| Classification: accuracy <br><br> Regression: spearman_correlation
|**max_time_sec**|Time limit in seconds for each iteration|None|
|**iteration_timeout_minutes**|Time limit in minutes for each iteration|None|
|**iterations**|Number of iterations. In each iteration trains the data with a specific pipeline. To get the best result, use at least 100. |100|
|**n_cross_validations**|Number of cross validation splits|None|
|**validation_size**|Size of validation set as percentage of all training samples|None|
|**concurrent_iterations**|Max number of iterations that would be executed in parallel|1|
|**max_concurrent_iterations**|Max number of iterations that would be executed in parallel|1|
|**preprocess**|*True/False* <br>Setting this to *True* enables preprocessing <br>on the input to handle missing data, and perform some common feature extraction<br>*Note: If input data is Sparse you cannot use preprocess=True*|False|
|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.<br> You can set it to *-1* to use all cores|1|
|**exit_score**|*double* value indicating the target for *primary_metric*. <br> Once the target is surpassed the run terminates|None|
|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoostingRegressor</i><br><i>DecisionTreeRegressor</i><br><i>KNeighborsRegressor</i><br><i>LassoLars</i><br><i>SGDRegressor</i><br><i>RandomForestRegressor</i><br><i>ExtraTreesRegressor</i>|None|
|**experiment_exit_score**|*double* value indicating the target for *primary_metric*. <br> Once the target is surpassed the run terminates|None|
|**blacklist_models**|*Array* of *strings* indicating models to ignore for Auto ML from the list of models.|None|
|**whilelist_models**|*Array* of *strings* use only models listed for Auto ML from the list of models..|None|
<a name="cvsplits"></a>
## List of models for white list/blacklist
**Classification**
<br><i>LogisticRegression</i>
<br><i>SGD</i>
<br><i>MultinomialNaiveBayes</i>
<br><i>BernoulliNaiveBayes</i>
<br><i>SVM</i>
<br><i>LinearSVM</i>
<br><i>KNN</i>
<br><i>DecisionTree</i>
<br><i>RandomForest</i>
<br><i>ExtremeRandomTrees</i>
<br><i>LightGBM</i>
<br><i>GradientBoosting</i>
<br><i>TensorFlowDNN</i>
<br><i>TensorFlowLinearClassifier</i>
<br><br>**Regression**
<br><i>ElasticNet</i>
<br><i>GradientBoosting</i>
<br><i>DecisionTree</i>
<br><i>KNN</i>
<br><i>LassoLars</i>
<br><i>SGD</i>
<br><i>RandomForest</i>
<br><i>ExtremeRandomTrees</i>
<br><i>LightGBM</i>
<br><i>TensorFlowLinearRegressor</i>
<br><i>TensorFlowDNN</i>
## Cross validation split options
### K-Folds Cross Validation
Use *n_cross_validations* setting to specify the number of cross validations. The training data set will be randomly split into *n_cross_validations* folds of equal size. During each cross validation round, one of the folds will be used for validation of the model trained on the remaining folds. This process repeats for *n_cross_validations* rounds until each fold is used once as validation set. Finally, the average scores accross all *n_cross_validations* rounds will be reported, and the corresponding model will be retrained on the whole training data set.
@@ -268,10 +297,10 @@ The main code of the file must be indented so that it is under this condition.
# Troubleshooting
## Iterations fail and the log contains "MemoryError"
This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size.
If you are using a remote DSVM, memory is needed for each concurrent iteration. The concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
To resolve this issue, allocate a DSVM with more memory or reduce the value specified for concurrent_iterations.
If you are using a remote DSVM, memory is needed for each concurrent iteration. The max_concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and max_concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
To resolve this issue, allocate a DSVM with more memory or reduce the value specified for max_concurrent_iterations.
## Iterations show as "Not Responding" in the RunDetails widget.
This can be caused by too many concurrent iterations for a remote DSVM. Each concurrent iteration usually takes 100% of a core when it is running. Some iterations can use multiple cores. So, the concurrent_iterations setting should always be less than the number of cores of the DSVM.
To resolve this issue, try reducing the value specified for the concurrent_iterations setting.
This can be caused by too many concurrent iterations for a remote DSVM. Each concurrent iteration usually takes 100% of a core when it is running. Some iterations can use multiple cores. So, the max_concurrent_iterations setting should always be less than the number of cores of the DSVM.
To resolve this issue, try reducing the value specified for the max_concurrent_iterations setting.

View File

@@ -4,7 +4,7 @@ dependencies:
# Currently Azure ML only supports 3.5.2 and later.
- python=3.6
- nb_conda
- matplotlib
- matplotlib==2.1.0
- numpy>=1.11.0,<1.15.0
- cython
- urllib3<1.24
@@ -12,7 +12,19 @@ dependencies:
- scikit-learn>=0.18.0,<=0.19.1
- pandas>=0.22.0,<0.23.0
# Required for azuremlftk
- dill
- pyodbc
- statsmodels
- numexpr
- keras
- distributed>=1.21.5,<1.24
- pip:
# Required for azuremlftk
- https://azuremlpackages.blob.core.windows.net/forecasting/azuremlftk-0.1.18313.5a1-py3-none-any.whl
# Required packages for AzureML execution, history, and data preparation.
- azureml-sdk[automl,notebooks]
- pandas_ml

31
automl/automl_env_mac.yml Normal file
View File

@@ -0,0 +1,31 @@
name: azure_automl
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.5.2 and later.
- python=3.6
- nb_conda
- matplotlib==2.1.0
- numpy>=1.15.3
- cython
- urllib3<1.24
- scipy>=0.19.0,<0.20.0
- scikit-learn>=0.18.0,<=0.19.1
- pandas>=0.22.0,<0.23.0
# Required for azuremlftk
- dill
- pyodbc
- statsmodels
- numexpr
- keras
- distributed>=1.21.5,<1.24
- pip:
# Required for azuremlftk
- https://azuremlpackages.blob.core.windows.net/forecasting/azuremlftk-0.1.18313.5a1-py3-none-any.whl
# Required packages for AzureML execution, history, and data preparation.
- azureml-sdk[automl,notebooks]
- pandas_ml

View File

@@ -23,10 +23,12 @@ if errorlevel 1 goto ErrorExit
call pip install psutil
call jupyter nbextension install --py azureml.train.widgets --user
call python -m ipykernel install --user --name %conda_env_name% --display-name "Python (%conda_env_name%)"
call jupyter nbextension install --py azureml.widgets --user
if errorlevel 1 goto ErrorExit
call jupyter nbextension enable --py azureml.train.widgets --user
call jupyter nbextension enable --py azureml.widgets --user
if errorlevel 1 goto ErrorExit
echo.

View File

@@ -26,8 +26,9 @@ then
else
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&
source activate $CONDA_ENV_NAME &&
jupyter nbextension install --py azureml.train.widgets --user &&
jupyter nbextension enable --py azureml.train.widgets --user &&
python -m ipykernel install --user --name $CONDA_ENV_NAME --display-name "Python ($CONDA_ENV_NAME)" &&
jupyter nbextension install --py azureml.widgets --user &&
jupyter nbextension enable --py azureml.widgets --user &&
echo "" &&
echo "" &&
echo "***************************************" &&

View File

@@ -11,7 +11,7 @@ fi
if [ "$AUTOML_ENV_FILE" == "" ]
then
AUTOML_ENV_FILE="automl_env.yml"
AUTOML_ENV_FILE="automl_env_mac.yml"
fi
if [ ! -f $AUTOML_ENV_FILE ]; then
@@ -27,8 +27,10 @@ else
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&
source activate $CONDA_ENV_NAME &&
conda install lightgbm -c conda-forge -y &&
jupyter nbextension install --py azureml.train.widgets --user &&
jupyter nbextension enable --py azureml.train.widgets --user &&
python -m ipykernel install --user --name $CONDA_ENV_NAME --display-name "Python ($CONDA_ENV_NAME)" &&
jupyter nbextension install --py azureml.widgets --user &&
jupyter nbextension enable --py azureml.widgets --user &&
pip install numpy==1.15.3
echo "" &&
echo "" &&
echo "***************************************" &&
@@ -46,3 +48,4 @@ then
fi

View File

@@ -76,9 +76,9 @@
"## Train model\n",
"\n",
"### Create a remote compute target\n",
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) to execute your training script on. In this tutorial, you create an [Azure Batch AI](https://docs.microsoft.com/azure/batch-ai/overview) cluster as your training compute resource. This code creates a cluster for you if it does not already exist in your workspace.\n",
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) to execute your training script on. In this tutorial, you create AmlCompute as your training compute resource. This code creates new compute for you if it does not already exist in your workspace.\n",
"\n",
"**Creation of the cluster takes approximately 5 minutes.** If the cluster is already in your workspace this code will skip the cluster creation process."
"**Creation of the compute takes approximately 5 minutes.** If the compute is already in your workspace this code will skip the creation process."
]
},
{
@@ -87,7 +87,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, BatchAiCompute\n",
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# choose a name for your cluster\n",
@@ -98,10 +98,8 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = BatchAiCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" autoscale_enabled=True,\n",
" cluster_min_nodes=0, \n",
" cluster_max_nodes=4)\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
@@ -251,7 +249,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(run).show()"
]
},
@@ -587,7 +585,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.6"
"version": "3.6.2"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
@@ -599,7 +597,7 @@
"state": {}
},
"d146cbdbd4e04710b3eebc15a66957ce": {
"model_module": "azureml_train_widgets",
"model_module": "azureml_widgets",
"model_module_version": "1.0.0",
"model_name": "ShowRunDetailsModel",
"state": {

View File

@@ -40,8 +40,8 @@
"metadata": {},
"outputs": [],
"source": [
"!jupyter nbextension install --py --user azureml.train.widgets\n",
"!jupyter nbextension enable --py --user azureml.train.widgets"
"!jupyter nbextension install --py --user azureml.widgets\n",
"!jupyter nbextension enable --py --user azureml.widgets"
]
},
{

View File

@@ -52,7 +52,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import BatchAiCompute, ComputeTarget\n",
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
"from azureml.core.datastore import Datastore\n",
"from azureml.data.data_reference import DataReference\n",
"from azureml.pipeline.core import Pipeline, PipelineData\n",
@@ -77,27 +77,25 @@
"import os\n",
"\n",
"# choose a name for your cluster\n",
"batchai_cluster_name = os.environ.get(\"BATCHAI_CLUSTER_NAME\", \"gpu-cluster\")\n",
"cluster_min_nodes = os.environ.get(\"BATCHAI_CLUSTER_MIN_NODES\", 0)\n",
"cluster_max_nodes = os.environ.get(\"BATCHAI_CLUSTER_MAX_NODES\", 1)\n",
"compute_name = os.environ.get(\"BATCHAI_CLUSTER_NAME\", \"gpucluster\")\n",
"compute_min_nodes = os.environ.get(\"BATCHAI_CLUSTER_MIN_NODES\", 0)\n",
"compute_max_nodes = os.environ.get(\"BATCHAI_CLUSTER_MAX_NODES\", 4)\n",
"vm_size = os.environ.get(\"BATCHAI_CLUSTER_SKU\", \"STANDARD_NC6\")\n",
"autoscale_enabled = os.environ.get(\"BATCHAI_CLUSTER_AUTOSCALE_ENABLED\", True)\n",
"\n",
"\n",
"if batchai_cluster_name in ws.compute_targets:\n",
" compute_target = ws.compute_targets[batchai_cluster_name]\n",
" if compute_target and type(compute_target) is BatchAiCompute:\n",
" print('found compute target. just use it. ' + batchai_cluster_name)\n",
"if compute_name in ws.compute_targets:\n",
" compute_target = ws.compute_targets[compute_name]\n",
" if compute_target and type(compute_target) is AmlCompute:\n",
" print('found compute target. just use it. ' + compute_name)\n",
"else:\n",
" print('creating a new compute target...')\n",
" provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = vm_size, # NC6 is GPU-enabled\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size, # NC6 is GPU-enabled\n",
" vm_priority = 'lowpriority', # optional\n",
" autoscale_enabled = autoscale_enabled,\n",
" cluster_min_nodes = cluster_min_nodes, \n",
" cluster_max_nodes = cluster_max_nodes)\n",
" min_nodes = compute_min_nodes, \n",
" max_nodes = compute_max_nodes)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n",
" compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)\n",
" \n",
" # can poll for a minimum number of nodes and for a specific timeout. \n",
" # if no min node count is provided it will use the scale settings for the cluster\n",
@@ -415,13 +413,15 @@
"metadata": {},
"outputs": [],
"source": [
"cd = CondaDependencies.create(pip_packages=[\"tensorflow-gpu==1.4.0\", \"azureml-defaults\"])\n",
"from azureml.core.runconfig import DEFAULT_GPU_IMAGE\n",
"\n",
"cd = CondaDependencies.create(pip_packages=[\"tensorflow-gpu==1.10.0\", \"azureml-defaults\"])\n",
"\n",
"# Runconfig\n",
"batchai_run_config = RunConfiguration(conda_dependencies=cd)\n",
"batchai_run_config.environment.docker.enabled = True\n",
"batchai_run_config.environment.docker.gpu_support = True\n",
"batchai_run_config.environment.docker.base_image = \"microsoft/mmlspark:gpu-0.12\"\n",
"batchai_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE\n",
"batchai_run_config.environment.spark.precache_packages = False"
]
},
@@ -465,7 +465,7 @@
" \"--label_dir\", label_dir, \n",
" \"--output_dir\", output_dir, \n",
" \"--batch_size\", batch_size_param],\n",
" target=compute_target,\n",
" compute_target=compute_target,\n",
" inputs=[input_images, label_dir],\n",
" outputs=[output_dir],\n",
" runconfig=batchai_run_config,\n",
@@ -496,7 +496,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(pipeline_run).show()"
]
},
@@ -656,7 +656,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.2"
}
},
"nbformat": 4,

View File

@@ -511,7 +511,7 @@
"\n",
"New BSD License\n",
"\n",
"Copyright (c) 2007\u00e2\u20ac\u201c2018 The scikit-learn developers.\n",
"Copyright (c) 20072018 The scikit-learn developers.\n",
"All rights reserved.\n",
"\n",
"\n",

View File

@@ -104,7 +104,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, BatchAiCompute\n",
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# choose a name for your cluster\n",
@@ -115,10 +115,8 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = BatchAiCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" autoscale_enabled=True,\n",
" cluster_min_nodes=0, \n",
" cluster_max_nodes=4)\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" max_nodes=6)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
@@ -333,7 +331,7 @@
"\n",
"script_params = {\n",
" '--data_dir': ds_data,\n",
" '--num_epochs': 25,\n",
" '--num_epochs': 10,\n",
" '--output_dir': './outputs'\n",
"}\n",
"\n",
@@ -387,10 +385,17 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(run).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Alternatively, you can block until the script has completed training before running more code."
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -440,7 +445,7 @@
" policy=early_termination_policy,\n",
" primary_metric_name='best_val_acc',\n",
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,\n",
" max_total_runs=20,\n",
" max_total_runs=8,\n",
" max_concurrent_runs=4)"
]
},
@@ -475,11 +480,18 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"\n",
"RunDetails(hyperdrive_run).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Or block until the HyperDrive sweep has completed:"
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -564,7 +576,7 @@
"metadata": {},
"source": [
"### Create environment file\n",
"Then, we will need to create an environment file (`myenv.yml`) that specifies all of the scoring script's package dependencies. This file is used to ensure that all of those dependencies are installed in the Docker image by AML. In this case, we need to specify `torch`, `torchvision`, `pillow`, and `azureml-sdk`."
"Then, we will need to create an environment file (`myenv.yml`) that specifies all of the scoring script's package dependencies. This file is used to ensure that all of those dependencies are installed in the Docker image by AML. In this case, we need to specify `azureml-core`, `torch` and `torchvision`."
]
},
{
@@ -573,16 +585,14 @@
"metadata": {},
"outputs": [],
"source": [
"%%writefile myenv.yml\n",
"name: myenv\n",
"channels:\n",
" - defaults\n",
"dependencies:\n",
" - pip:\n",
" - torch\n",
" - torchvision\n",
" - pillow\n",
" - azureml-core"
"from azureml.core.conda_dependencies import CondaDependencies \n",
"\n",
"myenv = CondaDependencies.create(pip_packages=['azureml-core', 'torch', 'torchvision'])\n",
"\n",
"with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())\n",
" \n",
"print(myenv.serialize_to_string())"
]
},
{
@@ -634,25 +644,7 @@
"metadata": {},
"source": [
"### Deploy the registered model\n",
"Finally, let's deploy a web service from our registered model. First, retrieve the model from your workspace."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.model import Model\n",
"\n",
"model = Model(ws, name='pytorch-hymenoptera')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Then, deploy the web service using the ACI config and image config files created in the previous steps. We pass the `model` object in a list to the `models` parameter. If you would like to deploy more than one registered model, append the additional models to this list."
"Finally, let's deploy a web service from our registered model. Deploy the web service using the ACI config and image config files created in the previous steps. We pass the `model` object in a list to the `models` parameter. If you would like to deploy more than one registered model, append the additional models to this list."
]
},
{
@@ -728,18 +720,10 @@
"metadata": {},
"outputs": [],
"source": [
"import os, json, base64\n",
"from io import BytesIO\n",
"import os, json\n",
"from PIL import Image\n",
"import matplotlib.pyplot as plt\n",
"\n",
"def imgToBase64(img):\n",
" \"\"\"Convert pillow image to base64-encoded image\"\"\"\n",
" imgio = BytesIO()\n",
" img.save(imgio, 'JPEG')\n",
" img_str = base64.b64encode(imgio.getvalue())\n",
" return img_str.decode('utf-8')\n",
"\n",
"test_img = os.path.join('hymenoptera_data', 'val', 'bees', '10870992_eebeeb3a12.jpg') #arbitary image from val dataset\n",
"plt.imshow(Image.open(test_img))"
]
@@ -750,18 +734,42 @@
"metadata": {},
"outputs": [],
"source": [
"base64Img = imgToBase64(Image.open(test_img))\n",
"import torch\n",
"from torchvision import transforms\n",
" \n",
"result = service.run(input_data=json.dumps({'data': base64Img}))\n",
"print(json.loads(result))"
"def preprocess(image_file):\n",
" \"\"\"Preprocess the input image.\"\"\"\n",
" data_transforms = transforms.Compose([\n",
" transforms.Resize(256),\n",
" transforms.CenterCrop(224),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])\n",
" ])\n",
"\n",
" image = Image.open(image_file)\n",
" image = data_transforms(image).float()\n",
" image = torch.tensor(image)\n",
" image = image.unsqueeze(0)\n",
" return image.numpy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"input_data = preprocess(test_img)\n",
"result = service.run(input_data=json.dumps({'data': input_data.tolist()}))\n",
"print(result)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Delete web service\n",
"Once you no longer need the web service, you should delete it."
"## Clean up\n",
"Once you no longer need the web service, you can delete it with a simple API call."
]
},
{
@@ -795,7 +803,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.2"
},
"msauthor": "minxia"
},

View File

@@ -5,35 +5,10 @@ import torch
import torch.nn as nn
from torchvision import transforms
import json
import base64
from io import BytesIO
from PIL import Image
from azureml.core.model import Model
def preprocess_image(image_file):
"""Preprocess the input image."""
data_transforms = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
image = Image.open(image_file)
image = data_transforms(image).float()
image = torch.tensor(image)
image = image.unsqueeze(0)
return image
def base64ToImg(base64ImgString):
base64Img = base64ImgString.encode('utf-8')
decoded_img = base64.b64decode(base64Img)
return BytesIO(decoded_img)
def init():
global model
model_path = Model.get_model_path('pytorch-hymenoptera')
@@ -42,15 +17,14 @@ def init():
def run(input_data):
img = base64ToImg(json.loads(input_data)['data'])
img = preprocess_image(img)
input_data = torch.tensor(json.loads(input_data)['data'])
# get prediction
output = model(img)
with torch.no_grad():
output = model(input_data)
classes = ['ants', 'bees']
softmax = nn.Softmax(dim=1)
pred_probs = softmax(model(img)).detach().numpy()[0]
pred_probs = softmax(output).numpy()[0]
index = torch.argmax(output, 1)
result = {"label": classes[index], "probability": str(pred_probs[index])}

View File

@@ -59,6 +59,7 @@ def train_model(model, criterion, optimizer, scheduler, num_epochs, data_dir):
dataloaders, dataset_sizes, class_names = load_data(data_dir)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
since = time.time()
best_model_wts = copy.deepcopy(model.state_dict())
@@ -146,12 +147,15 @@ def fine_tune_model(num_epochs, data_dir, learning_rate, momentum):
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=learning_rate, momentum=momentum)
optimizer_ft = optim.SGD(model_ft.parameters(),
lr=learning_rate, momentum=momentum)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
exp_lr_scheduler = lr_scheduler.StepLR(
optimizer_ft, step_size=7, gamma=0.1)
model = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs, data_dir)
model = train_model(model_ft, criterion, optimizer_ft,
exp_lr_scheduler, num_epochs, data_dir)
return model
@@ -159,15 +163,19 @@ def fine_tune_model(num_epochs, data_dir, learning_rate, momentum):
def main():
# get command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', type=str, help='directory of training data')
parser.add_argument('--num_epochs', type=int, default=25, help='number of epochs to train')
parser.add_argument('--data_dir', type=str,
help='directory of training data')
parser.add_argument('--num_epochs', type=int, default=25,
help='number of epochs to train')
parser.add_argument('--output_dir', type=str, help='output directory')
parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate')
parser.add_argument('--learning_rate', type=float,
default=0.001, help='learning rate')
parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
args = parser.parse_args()
print("data directory is: " + args.data_dir)
model = fine_tune_model(args.num_epochs, args.data_dir, args.learning_rate, args.momentum)
model = fine_tune_model(args.num_epochs, args.data_dir,
args.learning_rate, args.momentum)
os.makedirs(args.output_dir, exist_ok=True)
torch.save(model, os.path.join(args.output_dir, 'model.pt'))

View File

@@ -103,7 +103,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, BatchAiCompute\n",
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# choose a name for your cluster\n",
@@ -114,10 +114,8 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = BatchAiCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" autoscale_enabled=True,\n",
" cluster_min_nodes=0, \n",
" cluster_max_nodes=4)\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" max_nodes=6)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
@@ -264,7 +262,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(run).show()"
]
},

View File

@@ -225,9 +225,23 @@
"metadata": {},
"source": [
"## Upload MNIST dataset to default datastore \n",
"A [datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data) is a place where data can be stored that is then made accessible to a Run either by means of mounting or copying the data to the compute target. A datastore can either be backed by an Azure Blob Storage or and Azure File Share (ADLS will be supported in the future). For simple data handling, each workspace provides a default datastore that can be used, in case the data is not already in Blob Storage or File Share.\n",
"\n",
"In this next step, we will upload the training and test set into the workspace's default datastore, which we will then later be mount on a Batch AI cluster for training.\n"
"A [datastore](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data) is a place where data can be stored that is then made accessible to a Run either by means of mounting or copying the data to the compute target. A datastore can either be backed by an Azure Blob Storage or and Azure File Share (ADLS will be supported in the future). For simple data handling, each workspace provides a default datastore that can be used, in case the data is not already in Blob Storage or File Share."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds = ws.get_default_datastore()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this next step, we will upload the training and test set into the workspace's default datastore, which we will then later be mount on a Batch AI cluster for training."
]
},
{
@@ -236,7 +250,6 @@
"metadata": {},
"outputs": [],
"source": [
"ds = ws.get_default_datastore()\n",
"ds.upload(src_dir='./data/mnist', target_path='mnist', overwrite=True, show_progress=True)"
]
},
@@ -252,7 +265,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"If we could not find the cluster with the given name in the previous cell, then we will create a new cluster here. We will create a Batch AI Cluster of `STANDARD_D2_V2` CPU VMs. This process is broken down into 3 steps:\n",
"If we could not find the cluster with the given name in the previous cell, then we will create a new cluster here. We will create a Batch AI Cluster of `STANDARD_NC6` GPU VMs. This process is broken down into 3 steps:\n",
"1. create the configuration (this step is local and only takes a second)\n",
"2. create the Batch AI cluster (this step will take about **20 seconds**)\n",
"3. provision the VMs to bring the cluster to the initial size (of 1 in this case). This step will take about **3-5 minutes** and is providing only sparse output in the process. Please make sure to wait until the call returns before moving to the next cell"
@@ -264,7 +277,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, BatchAiCompute\n",
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# choose a name for your cluster\n",
@@ -273,17 +286,15 @@
"try:\n",
" # look for the existing cluster by name\n",
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
" if type(compute_target) is BatchAiCompute:\n",
" if type(compute_target) is AmlCompute:\n",
" print('Found existing compute target {}.'.format(cluster_name))\n",
" else:\n",
" print('{} exists but it is not a Batch AI cluster. Please choose a different name.'.format(cluster_name))\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = BatchAiCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\", # GPU-based VM\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\", # GPU-based VM\n",
" #vm_priority='lowpriority', # optional\n",
" autoscale_enabled=True,\n",
" cluster_min_nodes=0, \n",
" cluster_max_nodes=4)\n",
" max_nodes=6)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
@@ -432,7 +443,7 @@
"metadata": {},
"outputs": [],
"source": [
"run = exp.submit(config=est)"
"run = exp.submit(est)"
]
},
{
@@ -460,7 +471,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(run).show()"
]
},
@@ -737,9 +748,10 @@
"source": [
"htc = HyperDriveRunConfig(estimator=est, \n",
" hyperparameter_sampling=ps, \n",
" policy=policy, \n",
" primary_metric_name='validation_acc', \n",
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, \n",
" max_total_runs=20,\n",
" max_total_runs=8,\n",
" max_concurrent_runs=4)"
]
},
@@ -814,7 +826,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(best_run.get_file_names()"
"print(best_run.get_file_names())"
]
},
{
@@ -1029,7 +1041,7 @@
"test_samples = bytes(test_samples, encoding='utf8')\n",
"\n",
"# predict using the deployed model\n",
"result = json.loads(service.run(input_data=test_samples))\n",
"result = service.run(input_data=test_samples)\n",
"\n",
"# compare actual value vs. the predicted values:\n",
"i = 0\n",
@@ -1126,23 +1138,6 @@
"source": [
"service.delete()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can also delete the computer cluster. But remember if you set the `cluster_min_nodes` value to 0 when you created the cluster, once the jobs are finished, all nodes are deleted automatically. So you don't have to delete the cluster itself since it won't incur any cost. Next time you submit jobs to it, the cluster will then automatically \"grow\" up to the `cluster_min_nodes` which is set to 4."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# delete the cluster if you need to.\n",
"compute_target.delete()"
]
}
],
"metadata": {
@@ -1168,505 +1163,7 @@
"pygments_lexer": "ipython3",
"version": "3.6.6"
},
"nbpresent": {
"slides": {
"05bb34ad-74b0-42b3-9654-8357d1ba9c99": {
"id": "05bb34ad-74b0-42b3-9654-8357d1ba9c99",
"prev": "851089af-9725-40c9-8f0b-9bf892b2b1fe",
"regions": {
"23fb396d-50f9-4770-adb3-0d6abcb40767": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "2039d2d5-aca6-4f25-a12f-df9ae6529cae",
"part": "whole"
},
"id": "23fb396d-50f9-4770-adb3-0d6abcb40767"
}
}
},
"11bebe14-d1dc-476d-a31a-5828b9c3adf0": {
"id": "11bebe14-d1dc-476d-a31a-5828b9c3adf0",
"prev": "502648cb-26fe-496b-899f-84c8fe1dcbc0",
"regions": {
"a42499db-623e-4414-bea2-ff3617fd8fc5": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "4788c040-27a2-4dc1-8ed0-378a99b3a255",
"part": "whole"
},
"id": "a42499db-623e-4414-bea2-ff3617fd8fc5"
}
}
},
"134f92d0-6389-4226-af51-1134ae8e8278": {
"id": "134f92d0-6389-4226-af51-1134ae8e8278",
"prev": "36b8728c-32ad-4941-be03-5cef51cdc430",
"regions": {
"b6d82a77-2d58-4b9e-a375-3103214b826c": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "7ab0e6d0-1f1c-451b-8ac5-687da44a8287",
"part": "whole"
},
"id": "b6d82a77-2d58-4b9e-a375-3103214b826c"
}
}
},
"282a2421-697b-4fd0-9485-755abf5a0c18": {
"id": "282a2421-697b-4fd0-9485-755abf5a0c18",
"prev": "a8b9ceb9-b38f-4489-84df-b644c6fe28f2",
"regions": {
"522fec96-abe7-4a34-bd34-633733afecc8": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "d58e7785-c2ee-4a45-8e3d-4c538bf8075a",
"part": "whole"
},
"id": "522fec96-abe7-4a34-bd34-633733afecc8"
}
}
},
"2dfec088-8a70-411a-9199-904ef3fa2383": {
"id": "2dfec088-8a70-411a-9199-904ef3fa2383",
"prev": "282a2421-697b-4fd0-9485-755abf5a0c18",
"regions": {
"0535fcb6-3a2b-4b46-98a7-3ebb1a38c47e": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "c377ea0c-0cd9-4345-9be2-e20fb29c94c3",
"part": "whole"
},
"id": "0535fcb6-3a2b-4b46-98a7-3ebb1a38c47e"
}
}
},
"36a814c9-c540-4a6d-92d9-c03553d3d2c2": {
"id": "36a814c9-c540-4a6d-92d9-c03553d3d2c2",
"prev": "b52e4d09-5186-44e5-84db-3371c087acde",
"regions": {
"8bfba503-9907-43f0-b1a6-46a0b4311793": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "d5e4a56c-dfac-4346-be83-1c15b503deac",
"part": "whole"
},
"id": "8bfba503-9907-43f0-b1a6-46a0b4311793"
}
}
},
"36b8728c-32ad-4941-be03-5cef51cdc430": {
"id": "36b8728c-32ad-4941-be03-5cef51cdc430",
"prev": "05bb34ad-74b0-42b3-9654-8357d1ba9c99",
"regions": {
"a36a5bdf-7f62-49b0-8634-e155a98851dc": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "e33dfc47-e7df-4623-a7a6-ab6bcf944629",
"part": "whole"
},
"id": "a36a5bdf-7f62-49b0-8634-e155a98851dc"
}
}
},
"3f136f2a-f14c-4a4b-afea-13380556a79c": {
"id": "3f136f2a-f14c-4a4b-afea-13380556a79c",
"prev": "54cb8dfd-a89c-4922-867b-3c87d8b67cd3",
"regions": {
"80ecf237-d1b0-401e-83d2-6d04b7fcebd3": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "7debeb2b-ecea-414f-9b50-49657abb3e6a",
"part": "whole"
},
"id": "80ecf237-d1b0-401e-83d2-6d04b7fcebd3"
}
}
},
"502648cb-26fe-496b-899f-84c8fe1dcbc0": {
"id": "502648cb-26fe-496b-899f-84c8fe1dcbc0",
"prev": "3f136f2a-f14c-4a4b-afea-13380556a79c",
"regions": {
"4c83bb4d-2a52-41ba-a77f-0c6efebd83a6": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "dbd22f6b-6d49-4005-b8fe-422ef8ef1d42",
"part": "whole"
},
"id": "4c83bb4d-2a52-41ba-a77f-0c6efebd83a6"
}
}
},
"54cb8dfd-a89c-4922-867b-3c87d8b67cd3": {
"id": "54cb8dfd-a89c-4922-867b-3c87d8b67cd3",
"prev": "aa224267-f885-4c0c-95af-7bacfcc186d9",
"regions": {
"0848f0a7-032d-46c7-b35c-bfb69c83f961": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "3c32c557-d0e8-4bb3-a61a-aa51a767cd4e",
"part": "whole"
},
"id": "0848f0a7-032d-46c7-b35c-bfb69c83f961"
}
}
},
"636b563c-faee-4c9e-a6a3-f46a905bfa82": {
"id": "636b563c-faee-4c9e-a6a3-f46a905bfa82",
"prev": "c5f59b98-a227-4344-9d6d-03abdd01c6aa",
"regions": {
"9c64f662-05dc-4b14-9cdc-d450b96f4368": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "70640ac0-7041-47a8-9a7f-e871defd74b2",
"part": "whole"
},
"id": "9c64f662-05dc-4b14-9cdc-d450b96f4368"
}
}
},
"793cec2f-8413-484d-aa1e-388fd2b53a45": {
"id": "793cec2f-8413-484d-aa1e-388fd2b53a45",
"prev": "c66f3dfd-2d27-482b-be78-10ba733e826b",
"regions": {
"d08f9cfa-3b8d-4fb4-91ba-82d9858ea93e": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "dd56113e-e3db-41ae-91b7-2472ed194308",
"part": "whole"
},
"id": "d08f9cfa-3b8d-4fb4-91ba-82d9858ea93e"
}
}
},
"83e912ff-260a-4391-8a12-331aba098506": {
"id": "83e912ff-260a-4391-8a12-331aba098506",
"prev": "fe5a0732-69f5-462a-8af6-851f84a9fdec",
"regions": {
"2fefcf5f-ea20-4604-a528-5e6c91bcb100": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "c3f2f57c-7454-4d3e-b38d-b0946cf066ea",
"part": "whole"
},
"id": "2fefcf5f-ea20-4604-a528-5e6c91bcb100"
}
}
},
"851089af-9725-40c9-8f0b-9bf892b2b1fe": {
"id": "851089af-9725-40c9-8f0b-9bf892b2b1fe",
"prev": "636b563c-faee-4c9e-a6a3-f46a905bfa82",
"regions": {
"31c9dda5-fdf4-45e2-bcb7-12aa0f30e1d8": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "8408b90e-6cdd-44d1-86d3-648c23f877ac",
"part": "whole"
},
"id": "31c9dda5-fdf4-45e2-bcb7-12aa0f30e1d8"
}
}
},
"87ab653d-e804-470f-bde9-c67caaa0f354": {
"id": "87ab653d-e804-470f-bde9-c67caaa0f354",
"prev": "a8c2d446-caee-42c8-886a-ed98f4935d78",
"regions": {
"bc3aeb56-c465-4868-a1ea-2de82584de98": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "59f52294-4a25-4c92-bab8-3b07f0f44d15",
"part": "whole"
},
"id": "bc3aeb56-c465-4868-a1ea-2de82584de98"
}
}
},
"8b887c97-83bc-4395-83ac-f6703cbe243d": {
"id": "8b887c97-83bc-4395-83ac-f6703cbe243d",
"prev": "36a814c9-c540-4a6d-92d9-c03553d3d2c2",
"regions": {
"9d0bc72a-cb13-483f-a572-2bf60d0d145f": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "75499c85-d0a1-43db-8244-25778b9b2736",
"part": "whole"
},
"id": "9d0bc72a-cb13-483f-a572-2bf60d0d145f"
}
}
},
"a8b9ceb9-b38f-4489-84df-b644c6fe28f2": {
"id": "a8b9ceb9-b38f-4489-84df-b644c6fe28f2",
"prev": null,
"regions": {
"f741ed94-3f24-4427-b615-3ab8753e5814": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "bf74d2e9-2708-49b1-934b-e0ede342f475",
"part": "whole"
},
"id": "f741ed94-3f24-4427-b615-3ab8753e5814"
}
}
},
"a8c2d446-caee-42c8-886a-ed98f4935d78": {
"id": "a8c2d446-caee-42c8-886a-ed98f4935d78",
"prev": "2dfec088-8a70-411a-9199-904ef3fa2383",
"regions": {
"f03457d8-b2a7-4e14-9a73-cab80c5b815d": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "edaa7f2f-2439-4148-b57a-8c794c0945ec",
"part": "whole"
},
"id": "f03457d8-b2a7-4e14-9a73-cab80c5b815d"
}
}
},
"aa224267-f885-4c0c-95af-7bacfcc186d9": {
"id": "aa224267-f885-4c0c-95af-7bacfcc186d9",
"prev": "793cec2f-8413-484d-aa1e-388fd2b53a45",
"regions": {
"0d7ac442-5e1d-49a5-91b3-1432d72449d8": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "4d6826fe-2cb8-4468-85ed-a242a1ce7155",
"part": "whole"
},
"id": "0d7ac442-5e1d-49a5-91b3-1432d72449d8"
}
}
},
"b52e4d09-5186-44e5-84db-3371c087acde": {
"id": "b52e4d09-5186-44e5-84db-3371c087acde",
"prev": "134f92d0-6389-4226-af51-1134ae8e8278",
"regions": {
"7af7d997-80b2-497d-bced-ef8341763439": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "376882ec-d469-4fad-9462-18e4bbea64ca",
"part": "whole"
},
"id": "7af7d997-80b2-497d-bced-ef8341763439"
}
}
},
"c5f59b98-a227-4344-9d6d-03abdd01c6aa": {
"id": "c5f59b98-a227-4344-9d6d-03abdd01c6aa",
"prev": "83e912ff-260a-4391-8a12-331aba098506",
"regions": {
"7268abff-0540-4c06-aefc-c386410c0953": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "396d478b-34aa-4afa-9898-cdce8222a516",
"part": "whole"
},
"id": "7268abff-0540-4c06-aefc-c386410c0953"
}
}
},
"c66f3dfd-2d27-482b-be78-10ba733e826b": {
"id": "c66f3dfd-2d27-482b-be78-10ba733e826b",
"prev": "8b887c97-83bc-4395-83ac-f6703cbe243d",
"regions": {
"6cbe8e0e-8645-41a1-8a38-e44acb81be4b": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "7594c7c7-b808-48f7-9500-d7830a07968a",
"part": "whole"
},
"id": "6cbe8e0e-8645-41a1-8a38-e44acb81be4b"
}
}
},
"d22045e5-7e3e-452e-bc7b-c6c4a893da8e": {
"id": "d22045e5-7e3e-452e-bc7b-c6c4a893da8e",
"prev": "ec41f96a-63a3-4825-9295-f4657a440ddb",
"regions": {
"24e2a3a9-bf65-4dab-927f-0bf6ffbe581d": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "defe921f-8097-44c3-8336-8af6700804a7",
"part": "whole"
},
"id": "24e2a3a9-bf65-4dab-927f-0bf6ffbe581d"
}
}
},
"d24c958c-e419-4e4d-aa9c-d228a8ca55e4": {
"id": "d24c958c-e419-4e4d-aa9c-d228a8ca55e4",
"prev": "11bebe14-d1dc-476d-a31a-5828b9c3adf0",
"regions": {
"25312144-9faa-4680-bb8e-6307ea71370f": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "bed09a92-9a7a-473b-9464-90e479883a3e",
"part": "whole"
},
"id": "25312144-9faa-4680-bb8e-6307ea71370f"
}
}
},
"ec41f96a-63a3-4825-9295-f4657a440ddb": {
"id": "ec41f96a-63a3-4825-9295-f4657a440ddb",
"prev": "87ab653d-e804-470f-bde9-c67caaa0f354",
"regions": {
"22e8be98-c254-4d04-b0e4-b9b5ae46eefe": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "bc70f780-c240-4779-96f3-bc5ef9a37d59",
"part": "whole"
},
"id": "22e8be98-c254-4d04-b0e4-b9b5ae46eefe"
}
}
},
"fe5a0732-69f5-462a-8af6-851f84a9fdec": {
"id": "fe5a0732-69f5-462a-8af6-851f84a9fdec",
"prev": "d22045e5-7e3e-452e-bc7b-c6c4a893da8e",
"regions": {
"671b89f5-fa9c-4bc1-bdeb-6e0a4ce8939b": {
"attrs": {
"height": 0.8,
"width": 0.8,
"x": 0.1,
"y": 0.1
},
"content": {
"cell": "fd46e2ab-4ab6-4001-b536-1f323525d7d3",
"part": "whole"
},
"id": "671b89f5-fa9c-4bc1-bdeb-6e0a4ce8939b"
}
}
}
},
"themes": {}
}
"msauthor": "minxia"
},
"nbformat": 4,
"nbformat_minor": 2

View File

@@ -39,7 +39,7 @@ n_h1 = args.n_hidden_1
n_h2 = args.n_hidden_2
n_outputs = 10
learning_rate = args.learning_rate
n_epochs = 50
n_epochs = 20
batch_size = args.batch_size
with tf.name_scope('network'):

View File

@@ -102,7 +102,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, BatchAiCompute\n",
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# choose a name for your cluster\n",
@@ -113,10 +113,8 @@
" print('Found existing compute target')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = BatchAiCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" autoscale_enabled=True,\n",
" cluster_min_nodes=0, \n",
" cluster_max_nodes=4)\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" max_nodes=6)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
@@ -169,7 +167,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Each workspace is associated with a default datastore. In this tutorial, we will upload the training data to this default datastore. The below code will upload the contents of the data directory to the path `./data` on the default datastore."
"Each workspace is associated with a default datastore. In this tutorial, we will upload the training data to this default datastore."
]
},
{
@@ -179,8 +177,22 @@
"outputs": [],
"source": [
"ds = ws.get_default_datastore()\n",
"print(ds.datastore_type, ds.account_name, ds.container_name)\n",
"\n",
"print(ds.datastore_type, ds.account_name, ds.container_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Upload the contents of the data directory to the path `./data` on the default datastore."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ds.upload(src_dir='data', target_path='data', overwrite=True, show_progress=True)"
]
},
@@ -223,6 +235,8 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"project_folder = './tf-distr-hvd'\n",
"os.makedirs(project_folder, exist_ok=True)"
]
@@ -335,7 +349,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(run).show()"
]
},

View File

@@ -102,7 +102,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, BatchAiCompute\n",
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# choose a name for your cluster\n",
@@ -113,10 +113,8 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = BatchAiCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" autoscale_enabled=True,\n",
" cluster_min_nodes=0, \n",
" cluster_max_nodes=4)\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" max_nodes=6)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
@@ -209,7 +207,8 @@
"from azureml.train.dnn import TensorFlow\n",
"\n",
"script_params={\n",
" '--num_gpus': 1\n",
" '--num_gpus': 1,\n",
" '--train_steps': 500\n",
"}\n",
"\n",
"estimator = TensorFlow(source_directory=project_folder,\n",
@@ -245,7 +244,7 @@
"outputs": [],
"source": [
"run = experiment.submit(estimator)\n",
"print(run.get_details())"
"print(run)"
]
},
{
@@ -262,7 +261,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(run).show()"
]
},

View File

@@ -102,7 +102,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, BatchAiCompute\n",
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# choose a name for your cluster\n",
@@ -113,10 +113,8 @@
" print('Found existing compute target.')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = BatchAiCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" autoscale_enabled=True,\n",
" cluster_min_nodes=0, \n",
" cluster_max_nodes=4)\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" max_nodes=6)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
@@ -283,7 +281,7 @@
"from azureml.train.estimator import *\n",
"\n",
"script_params = {\n",
" '--num_epochs': 50,\n",
" '--num_epochs': 20,\n",
" '--data_dir': ds_data.as_mount(),\n",
" '--output_dir': './outputs'\n",
"}\n",
@@ -341,7 +339,7 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(run).show()"
]
},

View File

@@ -367,9 +367,9 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import BatchAiCompute\n",
"from azureml.core.compute import AmlCompute\n",
"\n",
"clust_name = ws.name + \"cpu\"\n",
"clust_name = \"cpucluster\"\n",
"\n",
"try:\n",
" # If you already have a cluster named this, we don't need to make a new one.\n",
@@ -378,12 +378,10 @@
" assert compute_target.type == 'BatchAI'\n",
"except:\n",
" # Let's make a new one here.\n",
" provisioning_config = BatchAiCompute.provisioning_configuration(cluster_max_nodes=2, \n",
" autoscale_enabled=True, \n",
" cluster_min_nodes=1,\n",
" vm_size='Standard_D11_V2')\n",
" provisioning_config = AmlCompute.provisioning_configuration(max_nodes=6, \n",
" vm_size='STANDARD_D2_V2')\n",
" \n",
" compute_target = BatchAiCompute.create(ws, clust_name, provisioning_config)\n",
" compute_target = AmlCompute.create(ws, clust_name, provisioning_config)\n",
"compute_target.wait_for_completion(show_output=True, min_node_count=1, timeout_in_minutes=20)\n",
"print(compute_target.name)\n",
"# For a more detailed view of current BatchAI cluster status, use the 'status' property \n",

View File

@@ -131,9 +131,9 @@
"source": [
"### Create remote compute target\n",
"\n",
"Azure Azure ML Managed Compute is a managed service that enables data scientists to train machine learning models on clusters of Azure virtual machines, including VMs with GPU support. In this tutorial, you create an Azure Managed Compute cluster as your training environment. This code creates a cluster for you if it does not already exist in your workspace. \n",
"Azure Machine Learning Managed Compute(AmlCompute) is a managed service that enables data scientists to train machine learning models on clusters of Azure virtual machines, including VMs with GPU support. In this tutorial, you create AmlCompute as your training environment. This code creates compute for you if it does not already exist in your workspace. \n",
"\n",
" **Creation of the cluster takes approximately 5 minutes.** If the cluster is already in the workspace this code uses it and skips the creation process."
" **Creation of the compute takes approximately 5 minutes.** If the compute is already in the workspace this code uses it and skips the creation process."
]
},
{
@@ -147,32 +147,31 @@
},
"outputs": [],
"source": [
"from azureml.core.compute import BatchAiCompute\n",
"from azureml.core.compute import AmlCompute\n",
"from azureml.core.compute import ComputeTarget\n",
"import os\n",
"\n",
"# choose a name for your cluster\n",
"batchai_cluster_name = os.environ.get(\"BATCHAI_CLUSTER_NAME\", ws.name + \"gpu\")\n",
"cluster_min_nodes = os.environ.get(\"BATCHAI_CLUSTER_MIN_NODES\", 1)\n",
"cluster_max_nodes = os.environ.get(\"BATCHAI_CLUSTER_MAX_NODES\", 3)\n",
"vm_size = os.environ.get(\"BATCHAI_CLUSTER_SKU\", \"STANDARD_NC6\")\n",
"autoscale_enabled = os.environ.get(\"BATCHAI_CLUSTER_AUTOSCALE_ENABLED\", True)\n",
"compute_name = os.environ.get(\"BATCHAI_CLUSTER_NAME\", \"cpucluster\")\n",
"compute_min_nodes = os.environ.get(\"BATCHAI_CLUSTER_MIN_NODES\", 0)\n",
"compute_max_nodes = os.environ.get(\"BATCHAI_CLUSTER_MAX_NODES\", 4)\n",
"\n",
"# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6\n",
"vm_size = os.environ.get(\"BATCHAI_CLUSTER_SKU\", \"STANDARD_D2_V2\")\n",
"\n",
"\n",
"if batchai_cluster_name in ws.compute_targets:\n",
" compute_target = ws.compute_targets[batchai_cluster_name]\n",
" if compute_target and type(compute_target) is BatchAiCompute:\n",
" print('found compute target. just use it. ' + batchai_cluster_name)\n",
"if compute_name in ws.compute_targets:\n",
" compute_target = ws.compute_targets[compute_name]\n",
" if compute_target and type(compute_target) is AmlCompute:\n",
" print('found compute target. just use it. ' + compute_name)\n",
"else:\n",
" print('creating a new compute target...')\n",
" provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = vm_size, # NC6 is GPU-enabled\n",
" vm_priority = 'lowpriority', # optional\n",
" autoscale_enabled = autoscale_enabled,\n",
" cluster_min_nodes = cluster_min_nodes, \n",
" cluster_max_nodes = cluster_max_nodes)\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,\n",
" min_nodes = compute_min_nodes, \n",
" max_nodes = compute_max_nodes)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n",
" compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)\n",
" \n",
" # can poll for a minimum number of nodes and for a specific timeout. \n",
" # if no min node count is provided it will use the scale settings for the cluster\n",
@@ -571,7 +570,7 @@
},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(run).show()"
]
},
@@ -710,7 +709,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.2"
},
"msauthor": "sgilley"
},

View File

@@ -264,7 +264,7 @@
},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"from azureml.widgets import RunDetails\n",
"RunDetails(local_run).show()"
]
},