mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 09:37:04 -05:00
Update notebooks
This commit is contained in:
@@ -527,7 +527,6 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies()\n",
|
"myenv = CondaDependencies()\n",
|
||||||
"myenv.add_conda_package(\"scikit-learn\")\n",
|
"myenv.add_conda_package(\"scikit-learn\")\n",
|
||||||
"myenv.add_pip_package(\"pynacl==1.2.1\")\n",
|
|
||||||
"print(myenv.serialize_to_string())\n",
|
"print(myenv.serialize_to_string())\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
@@ -789,6 +788,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -0,0 +1,473 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 02. Train locally\n",
|
||||||
|
"* Create or load workspace.\n",
|
||||||
|
"* Create scripts locally.\n",
|
||||||
|
"* Create `train.py` in a folder, along with a `my.lib` file.\n",
|
||||||
|
"* Configure & execute a local run in a user-managed Python environment.\n",
|
||||||
|
"* Configure & execute a local run in a system-managed Python environment.\n",
|
||||||
|
"* Configure & execute a local run in a Docker environment.\n",
|
||||||
|
"* Query run metrics to find the best model\n",
|
||||||
|
"* Register model for operationalization."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.workspace import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create An Experiment\n",
|
||||||
|
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"experiment_name = 'train-on-local'\n",
|
||||||
|
"exp = Experiment(workspace=ws, name=experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## View `train.py`\n",
|
||||||
|
"\n",
|
||||||
|
"`train.py` is already created for you."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"with open('./train.py', 'r') as f:\n",
|
||||||
|
" print(f.read())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Note `train.py` also references a `mylib.py` file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"with open('./mylib.py', 'r') as f:\n",
|
||||||
|
" print(f.read())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configure & Run\n",
|
||||||
|
"### User-managed environment\n",
|
||||||
|
"Below, we use a user-managed run, which means you are responsible to ensure all the necessary packages are available in the Python environment you choose to run the script."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
|
"\n",
|
||||||
|
"# Editing a run configuration property on-fly.\n",
|
||||||
|
"run_config_user_managed = RunConfiguration()\n",
|
||||||
|
"\n",
|
||||||
|
"run_config_user_managed.environment.python.user_managed_dependencies = True\n",
|
||||||
|
"\n",
|
||||||
|
"# You can choose a specific Python environment by pointing to a Python path \n",
|
||||||
|
"#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Submit script to run in the user-managed environment\n",
|
||||||
|
"Note whole script folder is submitted for execution, including the `mylib.py` file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import ScriptRunConfig\n",
|
||||||
|
"\n",
|
||||||
|
"src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n",
|
||||||
|
"run = exp.submit(src)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Get run history details"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Block to wait till run finishes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### System-managed environment\n",
|
||||||
|
"You can also ask the system to build a new conda environment and execute your scripts in it. The environment is built once and will be reused in subsequent executions as long as the conda dependencies remain unchanged. "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"run_config_system_managed = RunConfiguration()\n",
|
||||||
|
"\n",
|
||||||
|
"run_config_system_managed.environment.python.user_managed_dependencies = False\n",
|
||||||
|
"run_config_system_managed.auto_prepare_environment = True\n",
|
||||||
|
"\n",
|
||||||
|
"# Specify conda dependencies with scikit-learn\n",
|
||||||
|
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
||||||
|
"run_config_system_managed.environment.python.conda_dependencies = cd"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Submit script to run in the system-managed environment\n",
|
||||||
|
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n",
|
||||||
|
"run = exp.submit(src)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Get run history details"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Block and wait till run finishes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Docker-based execution\n",
|
||||||
|
"**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
|
||||||
|
"\n",
|
||||||
|
"You can also ask the system to pull down a Docker image and execute your scripts in it."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run_config_docker = RunConfiguration()\n",
|
||||||
|
"run_config_docker.environment.python.user_managed_dependencies = False\n",
|
||||||
|
"run_config_docker.auto_prepare_environment = True\n",
|
||||||
|
"run_config_docker.environment.docker.enabled = True\n",
|
||||||
|
"run_config_docker.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||||
|
"\n",
|
||||||
|
"# Specify conda dependencies with scikit-learn\n",
|
||||||
|
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
||||||
|
"run_config_docker.environment.python.conda_dependencies = cd\n",
|
||||||
|
"\n",
|
||||||
|
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Submit script to run in the system-managed environment\n",
|
||||||
|
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies.\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"# Check if Docker is installed\n",
|
||||||
|
"if os.system(\"docker -v\") == 0:\n",
|
||||||
|
" run = exp.submit(src)\n",
|
||||||
|
"else:\n",
|
||||||
|
" print(\"Docker engine not installed.\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Get run history details\n",
|
||||||
|
"run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Query run metrics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"query history",
|
||||||
|
"get metrics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get all metris logged in the run\n",
|
||||||
|
"run.get_metrics()\n",
|
||||||
|
"metrics = run.get_metrics()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Let's find the model that has the lowest MSE value logged."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"best_alpha = metrics['alpha'][np.argmin(metrics['mse'])]\n",
|
||||||
|
"\n",
|
||||||
|
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
|
||||||
|
" min(metrics['mse']), \n",
|
||||||
|
" best_alpha\n",
|
||||||
|
"))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"You can also list all the files that are associated with this run record"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.get_file_names()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We know the model `ridge_0.40.pkl` is the best performing model from the eariler queries. So let's register it with the workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# supply a model name, and the full path to the serialized model file.\n",
|
||||||
|
"model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(model.name, model.version, model.url)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now you can deploy this model following the example in the 01 notebook."
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -218,7 +218,7 @@
|
|||||||
"run_config_system_managed = RunConfiguration()\n",
|
"run_config_system_managed = RunConfiguration()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"run_config_system_managed.environment.python.user_managed_dependencies = False\n",
|
"run_config_system_managed.environment.python.user_managed_dependencies = False\n",
|
||||||
"run_config_system_managed.prepare_environment = True\n",
|
"run_config_system_managed.auto_prepare_environment = True\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Specify conda dependencies with scikit-learn\n",
|
"# Specify conda dependencies with scikit-learn\n",
|
||||||
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
||||||
@@ -297,7 +297,7 @@
|
|||||||
"run_config_docker = RunConfiguration()\n",
|
"run_config_docker = RunConfiguration()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"run_config_docker.environment.python.user_managed_dependencies = False\n",
|
"run_config_docker.environment.python.user_managed_dependencies = False\n",
|
||||||
"run_config_docker.prepare_environment = True\n",
|
"run_config_docker.auto_prepare_environment = True\n",
|
||||||
"run_config_docker.environment.docker.enabled = True\n",
|
"run_config_docker.environment.docker.enabled = True\n",
|
||||||
"run_config_docker.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
"run_config_docker.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -442,6 +442,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ os.makedirs('./outputs', exist_ok=True)
|
|||||||
|
|
||||||
X, y = load_diabetes(return_X_y=True)
|
X, y = load_diabetes(return_X_y=True)
|
||||||
|
|
||||||
run = Run.get_submitted_run()
|
run = Run.get_context()
|
||||||
|
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
||||||
test_size=0.2,
|
test_size=0.2,
|
||||||
|
|||||||
@@ -0,0 +1,325 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 03. Train on Azure Container Instance (EXPERIMENTAL)\n",
|
||||||
|
"\n",
|
||||||
|
"* Create Workspace\n",
|
||||||
|
"* Create Project\n",
|
||||||
|
"* Create `train.py` in the project folder.\n",
|
||||||
|
"* Configure an ACI (Azure Container Instance) run\n",
|
||||||
|
"* Execute in ACI"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"create workspace"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create An Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"experiment_name = 'train-on-aci'\n",
|
||||||
|
"experiment = Experiment(workspace = ws, name = experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Create a folder to store the training script."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"script_folder = './samples/train-on-aci'\n",
|
||||||
|
"os.makedirs(script_folder, exist_ok = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Remote execution on ACI\n",
|
||||||
|
"\n",
|
||||||
|
"Use `%%writefile` magic to write training code to `train.py` file under the project folder."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile $script_folder/train.py\n",
|
||||||
|
"\n",
|
||||||
|
"import os\n",
|
||||||
|
"from sklearn.datasets import load_diabetes\n",
|
||||||
|
"from sklearn.linear_model import Ridge\n",
|
||||||
|
"from sklearn.metrics import mean_squared_error\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"from azureml.core.run import Run\n",
|
||||||
|
"from sklearn.externals import joblib\n",
|
||||||
|
"\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"os.makedirs('./outputs', exist_ok=True)\n",
|
||||||
|
"\n",
|
||||||
|
"X, y = load_diabetes(return_X_y = True)\n",
|
||||||
|
"\n",
|
||||||
|
"run = Run.get_submitted_run()\n",
|
||||||
|
"\n",
|
||||||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
|
||||||
|
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
|
||||||
|
" \"test\": {\"X\": X_test, \"y\": y_test}}\n",
|
||||||
|
"\n",
|
||||||
|
"# list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
|
||||||
|
"alphas = np.arange(0.0, 1.0, 0.05)\n",
|
||||||
|
"\n",
|
||||||
|
"for alpha in alphas:\n",
|
||||||
|
" # Use Ridge algorithm to create a regression model\n",
|
||||||
|
" reg = Ridge(alpha = alpha)\n",
|
||||||
|
" reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
|
||||||
|
"\n",
|
||||||
|
" preds = reg.predict(data[\"test\"][\"X\"])\n",
|
||||||
|
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
|
||||||
|
" run.log('alpha', alpha)\n",
|
||||||
|
" run.log('mse', mse)\n",
|
||||||
|
" \n",
|
||||||
|
" model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n",
|
||||||
|
" with open(model_file_name, \"wb\") as file:\n",
|
||||||
|
" joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n",
|
||||||
|
"\n",
|
||||||
|
" print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configure for using ACI\n",
|
||||||
|
"Linux-based ACI is available in `westus`, `eastus`, `westeurope`, `northeurope`, `westus2` and `southeastasia` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"configure run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"# create a new runconfig object\n",
|
||||||
|
"run_config = RunConfiguration()\n",
|
||||||
|
"\n",
|
||||||
|
"# signal that you want to use ACI to execute script.\n",
|
||||||
|
"run_config.target = \"containerinstance\"\n",
|
||||||
|
"\n",
|
||||||
|
"# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n",
|
||||||
|
"run_config.container_instance.region = 'eastus'\n",
|
||||||
|
"\n",
|
||||||
|
"# set the ACI CPU and Memory \n",
|
||||||
|
"run_config.container_instance.cpu_cores = 1\n",
|
||||||
|
"run_config.container_instance.memory_gb = 2\n",
|
||||||
|
"\n",
|
||||||
|
"# enable Docker \n",
|
||||||
|
"run_config.environment.docker.enabled = True\n",
|
||||||
|
"\n",
|
||||||
|
"# set Docker base image to the default CPU-based image\n",
|
||||||
|
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||||
|
"#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n",
|
||||||
|
"\n",
|
||||||
|
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
|
||||||
|
"run_config.environment.python.user_managed_dependencies = False\n",
|
||||||
|
"\n",
|
||||||
|
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
|
||||||
|
"run_config.auto_prepare_environment = True\n",
|
||||||
|
"\n",
|
||||||
|
"# specify CondaDependencies obj\n",
|
||||||
|
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Submit the Experiment\n",
|
||||||
|
"Finally, run the training job on the ACI"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"remote run",
|
||||||
|
"aci"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time \n",
|
||||||
|
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
||||||
|
"\n",
|
||||||
|
"script_run_config = ScriptRunConfig(source_directory = script_folder,\n",
|
||||||
|
" script= 'train.py',\n",
|
||||||
|
" run_config = run_config)\n",
|
||||||
|
"\n",
|
||||||
|
"run = experiment.submit(script_run_config)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"remote run",
|
||||||
|
"aci"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%time\n",
|
||||||
|
"# Shows output of the run on stdout.\n",
|
||||||
|
"run.wait_for_completion(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"query history"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Show run details\n",
|
||||||
|
"run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"get metrics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get all metris logged in the run\n",
|
||||||
|
"run.get_metrics()\n",
|
||||||
|
"metrics = run.get_metrics()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
|
||||||
|
" min(metrics['mse']), \n",
|
||||||
|
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
|
||||||
|
"))"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -261,6 +261,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ os.makedirs('./outputs', exist_ok=True)
|
|||||||
|
|
||||||
X, y = load_diabetes(return_X_y=True)
|
X, y = load_diabetes(return_X_y=True)
|
||||||
|
|
||||||
run = Run.get_submitted_run()
|
run = Run.get_context()
|
||||||
|
|
||||||
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
||||||
test_size=0.2,
|
test_size=0.2,
|
||||||
|
|||||||
@@ -0,0 +1,321 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 04. Train in a remote VM (MLC managed DSVM)\n",
|
||||||
|
"* Create Workspace\n",
|
||||||
|
"* Create Project\n",
|
||||||
|
"* Create `train.py` file\n",
|
||||||
|
"* Create DSVM as Machine Learning Compute (MLC) resource\n",
|
||||||
|
"* Configure & execute a run in a conda environment in the default miniconda Docker container on DSVM"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"experiment_name = 'train-on-remote-vm'\n",
|
||||||
|
"\n",
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"exp = Experiment(workspace = ws, name = experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## View `train.py`\n",
|
||||||
|
"\n",
|
||||||
|
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train.py` in a cell to show the file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"with open('./train.py', 'r') as training_script:\n",
|
||||||
|
" print(training_script.read())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create Linux DSVM as a compute target\n",
|
||||||
|
"\n",
|
||||||
|
"**Note**: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n",
|
||||||
|
" \n",
|
||||||
|
"**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below. [Read more](../../documentation/sdk/ssh-issue.md) on this."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import DsvmCompute\n",
|
||||||
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
|
"\n",
|
||||||
|
"compute_target_name = 'mydsvm'\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" dsvm_compute = DsvmCompute(workspace = ws, name = compute_target_name)\n",
|
||||||
|
" print('found existing:', dsvm_compute.name)\n",
|
||||||
|
"except ComputeTargetException:\n",
|
||||||
|
" print('creating new.')\n",
|
||||||
|
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
|
||||||
|
" dsvm_compute = DsvmCompute.create(ws, name = compute_target_name, provisioning_configuration = dsvm_config)\n",
|
||||||
|
" dsvm_compute.wait_for_completion(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Attach an existing Linux DSVM as a compute target\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"'''\n",
|
||||||
|
" from azureml.core.compute import RemoteCompute \n",
|
||||||
|
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n",
|
||||||
|
" dsvm_compute = RemoteCompute.attach(ws,name=\"attach-from-sdk6\",username=<username>,address=<ipaddress>,ssh_port=22,password=<password>)\n",
|
||||||
|
"'''"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configure & Run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Configure a Docker run with new conda environment on the VM\n",
|
||||||
|
"You can execute in a Docker container in the VM. If you choose this route, you don't need to install anything on the VM yourself. Azure ML execution service will take care of it for you."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
|
||||||
|
"run_config = RunConfiguration(framework = \"python\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Set compute target to the Linux DSVM\n",
|
||||||
|
"run_config.target = compute_target_name\n",
|
||||||
|
"\n",
|
||||||
|
"# Use Docker in the remote VM\n",
|
||||||
|
"run_config.environment.docker.enabled = True\n",
|
||||||
|
"\n",
|
||||||
|
"# Use CPU base image from DockerHub\n",
|
||||||
|
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||||
|
"print('Base Docker image is:', run_config.environment.docker.base_image)\n",
|
||||||
|
"\n",
|
||||||
|
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
|
||||||
|
"run_config.environment.python.user_managed_dependencies = False\n",
|
||||||
|
"\n",
|
||||||
|
"# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
|
||||||
|
"run_config.prepare_environment = True\n",
|
||||||
|
"\n",
|
||||||
|
"# specify CondaDependencies obj\n",
|
||||||
|
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit the Experiment\n",
|
||||||
|
"Submit script to run in the Docker image in the remote VM. If you run this for the first time, the system will download the base image, layer in packages specified in the `conda_dependencies.yml` file on top of the base image, create a container and then execute the script in the container."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Run\n",
|
||||||
|
"from azureml.core import ScriptRunConfig\n",
|
||||||
|
"\n",
|
||||||
|
"src = ScriptRunConfig(source_directory = '.', script = 'train.py', run_config = run_config)\n",
|
||||||
|
"run = exp.submit(src)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### View run history details"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Find the best run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get all metris logged in the run\n",
|
||||||
|
"run.get_metrics()\n",
|
||||||
|
"metrics = run.get_metrics()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np\n",
|
||||||
|
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
|
||||||
|
" min(metrics['mse']), \n",
|
||||||
|
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
|
||||||
|
"))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Clean up compute resource"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dsvm_compute.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -158,7 +158,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"ds.upload_files(['./feeatures.npy', './labels.npy'], target_path='diabetes', overwrite=True)"
|
"ds.upload_files(['./features.npy', './labels.npy'], target_path='diabetes', overwrite=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -394,7 +394,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "raw",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"You can choose to SSH into the VM and install Azure ML SDK, and any other missing dependencies, in that Python environment. For demonstration purposes, we simply are going to create another script `train2.py` that doesn't have azureml dependencies, and submit it instead."
|
"You can choose to SSH into the VM and install Azure ML SDK, and any other missing dependencies, in that Python environment. For demonstration purposes, we simply are going to create another script `train2.py` that doesn't have azureml dependencies, and submit it instead."
|
||||||
@@ -407,6 +407,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"%%writefile $script_folder/train2.py\n",
|
"%%writefile $script_folder/train2.py\n",
|
||||||
|
"\n",
|
||||||
"print('####################################')\n",
|
"print('####################################')\n",
|
||||||
"print('Hello World (without Azure ML SDK)!')\n",
|
"print('Hello World (without Azure ML SDK)!')\n",
|
||||||
"print('####################################')"
|
"print('####################################')"
|
||||||
@@ -592,6 +593,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "haining"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
@@ -612,4 +618,4 @@
|
|||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 2
|
"nbformat_minor": 2
|
||||||
}
|
}
|
||||||
@@ -24,7 +24,7 @@ print('List all files: ', os.listdir(args.data_folder))
|
|||||||
X = np.load(os.path.join(args.data_folder, 'features.npy'))
|
X = np.load(os.path.join(args.data_folder, 'features.npy'))
|
||||||
y = np.load(os.path.join(args.data_folder, 'labels.npy'))
|
y = np.load(os.path.join(args.data_folder, 'labels.npy'))
|
||||||
|
|
||||||
run = Run.get_submitted_run()
|
run = Run.get_context()
|
||||||
|
|
||||||
X_train, X_test, y_train, y_test = train_test_split(
|
X_train, X_test, y_train, y_test = train_test_split(
|
||||||
X, y, test_size=0.2, random_state=0)
|
X, y, test_size=0.2, random_state=0)
|
||||||
|
|||||||
@@ -0,0 +1,257 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# 05. Train in Spark\n",
|
||||||
|
"* Create Workspace\n",
|
||||||
|
"* Create Experiment\n",
|
||||||
|
"* Copy relevant files to the script folder\n",
|
||||||
|
"* Configure and Run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Initialize Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Initialize a workspace object from persisted configuration."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Create Experiment\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"experiment_name = 'train-on-remote-vm'\n",
|
||||||
|
"\n",
|
||||||
|
"from azureml.core import Experiment\n",
|
||||||
|
"\n",
|
||||||
|
"exp = Experiment(workspace = ws, name = experiment_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## View `train-spark.py`\n",
|
||||||
|
"\n",
|
||||||
|
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"with open('train-spark.py', 'r') as training_script:\n",
|
||||||
|
" print(training_script.read())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configure & Run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Attach an HDI cluster\n",
|
||||||
|
"To use HDI commpute target:\n",
|
||||||
|
" 1. Create an Spark for HDI cluster in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
|
||||||
|
" 2. Enter the IP address, username and password below"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.compute import HDInsightCompute\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n",
|
||||||
|
" hdi_compute_new = HDInsightCompute.attach(ws, \n",
|
||||||
|
" name=\"hdi-attach\", \n",
|
||||||
|
" address=\"hdi-ignite-demo-ssh.azurehdinsight.net\", \n",
|
||||||
|
" ssh_port=22, \n",
|
||||||
|
" username='<username>', \n",
|
||||||
|
" password='<password>')\n",
|
||||||
|
"\n",
|
||||||
|
"except UserErrorException as e:\n",
|
||||||
|
" print(\"Caught = {}\".format(e.message))\n",
|
||||||
|
" print(\"Compute config already attached.\")\n",
|
||||||
|
" \n",
|
||||||
|
" \n",
|
||||||
|
"hdi_compute_new.wait_for_completion(show_output=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Configure HDI run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
|
||||||
|
"run_config = RunConfiguration(framework = \"python\")\n",
|
||||||
|
"\n",
|
||||||
|
"# Set compute target to the Linux DSVM\n",
|
||||||
|
"run_config.target = hdi_compute.name\n",
|
||||||
|
"\n",
|
||||||
|
"# Use Docker in the remote VM\n",
|
||||||
|
"# run_config.environment.docker.enabled = True\n",
|
||||||
|
"\n",
|
||||||
|
"# Use CPU base image from DockerHub\n",
|
||||||
|
"# run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||||
|
"# print('Base Docker image is:', run_config.environment.docker.base_image)\n",
|
||||||
|
"\n",
|
||||||
|
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
|
||||||
|
"run_config.environment.python.user_managed_dependencies = False\n",
|
||||||
|
"\n",
|
||||||
|
"# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
|
||||||
|
"# run_config.prepare_environment = True\n",
|
||||||
|
"\n",
|
||||||
|
"# specify CondaDependencies obj\n",
|
||||||
|
"# run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
||||||
|
"# load the runconfig object from the \"myhdi.runconfig\" file generated by the attach operaton above."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Submit the script to HDI"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
|
||||||
|
" script= 'train-spark.py',\n",
|
||||||
|
" run_config = run_config)\n",
|
||||||
|
"run = experiment.submit(script_run_config)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get the URL of the run history web page\n",
|
||||||
|
"run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run.wait_for_completion(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# get all metris logged in the run\n",
|
||||||
|
"metrics = run.get_metrics()\n",
|
||||||
|
"print(metrics)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -303,6 +303,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "aashishb"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ from pyspark.sql.types import DoubleType, IntegerType, StringType
|
|||||||
from azureml.core.run import Run
|
from azureml.core.run import Run
|
||||||
|
|
||||||
# initialize logger
|
# initialize logger
|
||||||
run = Run.get_submitted_run()
|
run = Run.get_context()
|
||||||
|
|
||||||
# start Spark session
|
# start Spark session
|
||||||
spark = pyspark.sql.SparkSession.builder.appName('Iris').getOrCreate()
|
spark = pyspark.sql.SparkSession.builder.appName('Iris').getOrCreate()
|
||||||
|
|||||||
@@ -206,7 +206,6 @@
|
|||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||||
"myenv.add_pip_package(\"pynacl==1.2.1\")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
@@ -398,6 +397,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "raymondl"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -136,7 +136,6 @@
|
|||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||||
"myenv.add_pip_package(\"pynacl==1.2.1\")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
@@ -313,6 +312,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "raymondl"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -180,7 +180,6 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||||
"myenv.add_pip_package(\"azureml-monitoring\")\n",
|
"myenv.add_pip_package(\"azureml-monitoring\")\n",
|
||||||
"myenv.add_pip_package(\"pynacl==1.2.1\")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
@@ -425,6 +424,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "marthalc"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -387,6 +387,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "marthalc"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -1,224 +1,224 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# AutoML 00. Configuration\n",
|
"# AutoML 00. Configuration\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example you will create an Azure Machine Learning `Workspace` object and initialize your notebook directory to easily reload this object from a configuration file. Typically you will only need to run this once per notebook directory, and all other notebooks in this directory or any sub-directories will automatically use the settings you indicate here.\n",
|
"In this example you will create an Azure Machine Learning `Workspace` object and initialize your notebook directory to easily reload this object from a configuration file. Typically you will only need to run this once per notebook directory, and all other notebooks in this directory or any sub-directories will automatically use the settings you indicate here.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Prerequisites:\n",
|
"## Prerequisites:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Before running this notebook, run the `automl_setup` script described in README.md.\n"
|
"Before running this notebook, run the `automl_setup` script described in README.md.\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Register Machine Learning Services Resource Provider\n",
|
"### Register Machine Learning Services Resource Provider\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Microsoft.MachineLearningServices only needs to be registed once in the subscription.\n",
|
"Microsoft.MachineLearningServices only needs to be registed once in the subscription.\n",
|
||||||
"To register it:\n",
|
"To register it:\n",
|
||||||
"1. Start the Azure portal.\n",
|
"1. Start the Azure portal.\n",
|
||||||
"2. Select your `All services` and then `Subscription`.\n",
|
"2. Select your `All services` and then `Subscription`.\n",
|
||||||
"3. Select the subscription that you want to use.\n",
|
"3. Select the subscription that you want to use.\n",
|
||||||
"4. Click on `Resource providers`\n",
|
"4. Click on `Resource providers`\n",
|
||||||
"3. Click the `Register` link next to Microsoft.MachineLearningServices"
|
"3. Click the `Register` link next to Microsoft.MachineLearningServices"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Check the Azure ML Core SDK Version to Validate Your Installation"
|
"### Check the Azure ML Core SDK Version to Validate Your Installation"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"SDK Version:\", azureml.core.VERSION)"
|
"print(\"SDK Version:\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Initialize an Azure ML Workspace\n",
|
"## Initialize an Azure ML Workspace\n",
|
||||||
"### What is an Azure ML Workspace and Why Do I Need One?\n",
|
"### What is an Azure ML Workspace and Why Do I Need One?\n",
|
||||||
"\n",
|
"\n",
|
||||||
"An Azure ML workspace is an Azure resource that organizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an Azure ML workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, operationalization, and the monitoring of operationalized models.\n",
|
"An Azure ML workspace is an Azure resource that organizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an Azure ML workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, operationalization, and the monitoring of operationalized models.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"### What do I Need?\n",
|
"### What do I Need?\n",
|
||||||
"\n",
|
"\n",
|
||||||
"To create or access an Azure ML workspace, you will need to import the Azure ML library and specify following information:\n",
|
"To create or access an Azure ML workspace, you will need to import the Azure ML library and specify following information:\n",
|
||||||
"* A name for your workspace. You can choose one.\n",
|
"* A name for your workspace. You can choose one.\n",
|
||||||
"* Your subscription id. Use the `id` value from the `az account show` command output above.\n",
|
"* Your subscription id. Use the `id` value from the `az account show` command output above.\n",
|
||||||
"* The resource group name. The resource group organizes Azure resources and provides a default region for the resources in the group. The resource group will be created if it doesn't exist. Resource groups can be created and viewed in the [Azure portal](https://portal.azure.com)\n",
|
"* The resource group name. The resource group organizes Azure resources and provides a default region for the resources in the group. The resource group will be created if it doesn't exist. Resource groups can be created and viewed in the [Azure portal](https://portal.azure.com)\n",
|
||||||
"* Supported regions include `eastus2`, `eastus`,`westcentralus`, `southeastasia`, `westeurope`, `australiaeast`, `westus2`, `southcentralus`."
|
"* Supported regions include `eastus2`, `eastus`,`westcentralus`, `southeastasia`, `westeurope`, `australiaeast`, `westus2`, `southcentralus`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"subscription_id = \"<subscription_id>\"\n",
|
"subscription_id = \"<subscription_id>\"\n",
|
||||||
"resource_group = \"myrg\"\n",
|
"resource_group = \"myrg\"\n",
|
||||||
"workspace_name = \"myws\"\n",
|
"workspace_name = \"myws\"\n",
|
||||||
"workspace_region = \"eastus2\""
|
"workspace_region = \"eastus2\""
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Creating a Workspace\n",
|
"## Creating a Workspace\n",
|
||||||
"If you already have access to an Azure ML workspace you want to use, you can skip this cell. Otherwise, this cell will create an Azure ML workspace for you in the specified subscription, provided you have the correct permissions for the given `subscription_id`.\n",
|
"If you already have access to an Azure ML workspace you want to use, you can skip this cell. Otherwise, this cell will create an Azure ML workspace for you in the specified subscription, provided you have the correct permissions for the given `subscription_id`.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This will fail when:\n",
|
"This will fail when:\n",
|
||||||
"1. The workspace already exists.\n",
|
"1. The workspace already exists.\n",
|
||||||
"2. You do not have permission to create a workspace in the resource group.\n",
|
"2. You do not have permission to create a workspace in the resource group.\n",
|
||||||
"3. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this subscription.\n",
|
"3. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this subscription.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"If workspace creation fails for any reason other than already existing, please work with your IT administrator to provide you with the appropriate permissions or to provision the required resources.\n",
|
"If workspace creation fails for any reason other than already existing, please work with your IT administrator to provide you with the appropriate permissions or to provision the required resources.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Note:** Creation of a new workspace can take several minutes."
|
"**Note:** Creation of a new workspace can take several minutes."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Import the Workspace class and check the Azure ML SDK version.\n",
|
"# Import the Workspace class and check the Azure ML SDK version.\n",
|
||||||
"from azureml.core import Workspace\n",
|
"from azureml.core import Workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ws = Workspace.create(name = workspace_name,\n",
|
"ws = Workspace.create(name = workspace_name,\n",
|
||||||
" subscription_id = subscription_id,\n",
|
" subscription_id = subscription_id,\n",
|
||||||
" resource_group = resource_group, \n",
|
" resource_group = resource_group, \n",
|
||||||
" location = workspace_region)\n",
|
" location = workspace_region)\n",
|
||||||
"ws.get_details()"
|
"ws.get_details()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Configuring Your Local Environment\n",
|
"## Configuring Your Local Environment\n",
|
||||||
"You can validate that you have access to the specified workspace and write a configuration file to the default configuration location, `./aml_config/config.json`."
|
"You can validate that you have access to the specified workspace and write a configuration file to the default configuration location, `./aml_config/config.json`."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Workspace\n",
|
"from azureml.core import Workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ws = Workspace(workspace_name = workspace_name,\n",
|
"ws = Workspace(workspace_name = workspace_name,\n",
|
||||||
" subscription_id = subscription_id,\n",
|
" subscription_id = subscription_id,\n",
|
||||||
" resource_group = resource_group)\n",
|
" resource_group = resource_group)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Persist the subscription id, resource group name, and workspace name in aml_config/config.json.\n",
|
"# Persist the subscription id, resource group name, and workspace name in aml_config/config.json.\n",
|
||||||
"ws.write_config()"
|
"ws.write_config()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"You can then load the workspace from this config file from any notebook in the current directory."
|
"You can then load the workspace from this config file from any notebook in the current directory."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Load workspace configuration from ./aml_config/config.json file.\n",
|
"# Load workspace configuration from ./aml_config/config.json file.\n",
|
||||||
"my_workspace = Workspace.from_config()\n",
|
"my_workspace = Workspace.from_config()\n",
|
||||||
"my_workspace.get_details()"
|
"my_workspace.get_details()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Create a Folder to Host All Sample Projects\n",
|
"## Create a Folder to Host All Sample Projects\n",
|
||||||
"Finally, create a folder where all the sample projects will be hosted."
|
"Finally, create a folder where all the sample projects will be hosted."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"sample_projects_folder = './sample_projects'\n",
|
"sample_projects_folder = './sample_projects'\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if not os.path.isdir(sample_projects_folder):\n",
|
"if not os.path.isdir(sample_projects_folder):\n",
|
||||||
" os.mkdir(sample_projects_folder)\n",
|
" os.mkdir(sample_projects_folder)\n",
|
||||||
" \n",
|
" \n",
|
||||||
"print('Sample projects will be created in {}.'.format(sample_projects_folder))"
|
"print('Sample projects will be created in {}.'.format(sample_projects_folder))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Success!\n",
|
"## Success!\n",
|
||||||
"Great, you are ready to move on to the rest of the sample notebooks."
|
"Great, you are ready to move on to the rest of the sample notebooks."
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "savitam"
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"metadata": {
|
||||||
"display_name": "Python 3.6",
|
"authors": [
|
||||||
"language": "python",
|
{
|
||||||
"name": "python36"
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.6"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"language_info": {
|
"nbformat": 4,
|
||||||
"codemirror_mode": {
|
"nbformat_minor": 2
|
||||||
"name": "ipython",
|
}
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -15,7 +15,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# AutoML 01: Classification with Local Compute\n",
|
"# AutoML 01: Classification with Local Compute\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -108,7 +108,9 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Load Training Data"
|
"## Load Training Data\n",
|
||||||
|
"\n",
|
||||||
|
"This uses scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) method."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -168,7 +170,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Train the Model\n",
|
"## Train the Models\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||||
@@ -280,7 +282,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Model\n",
|
"### Retrieve the Best Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -359,7 +361,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Testing Our Best Pipeline\n",
|
"#### Testing Our Best Fitted Model\n",
|
||||||
"We will try to predict 2 digits and see how our model works."
|
"We will try to predict 2 digits and see how our model works."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -384,6 +386,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# AutoML 02: Regression with Local Compute\n",
|
"# AutoML 02: Regression with Local Compute\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example we use the scikit-learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use AutoML for a simple regression problem.\n",
|
"In this example we use the scikit-learn's [diabetes dataset](http://scikit-learn.org/stable/datasets/index.html#diabetes-dataset) to showcase how you can use AutoML for a simple regression problem.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -108,7 +108,8 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Load Training Data"
|
"### Load Training Data\n",
|
||||||
|
"This uses scikit-learn's [load_diabetes](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) method."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -172,7 +173,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Train the Model\n",
|
"## Train the Models\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||||
@@ -256,7 +257,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Model\n",
|
"### Retrieve the Best Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -386,6 +387,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# AutoML 03: Remote Execution using DSVM (Ubuntu)\n",
|
"# AutoML 03: Remote Execution using DSVM (Ubuntu)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -145,7 +145,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Create Get Data File\n",
|
"## Create Get Data File\n",
|
||||||
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
||||||
"In this example, the `get_data()` function returns data from scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html)."
|
"In this example, the `get_data()` function returns data using scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) method."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -234,7 +234,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Train the Model\n",
|
"## Train the Models\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
|
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -354,7 +354,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Model\n",
|
"### Retrieve the Best Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -433,7 +433,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Test Our Best Pipeline"
|
"#### Test Our Best Fitted Model"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -457,6 +457,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# AutoML 03: Remote Execution using Batch AI\n",
|
"# AutoML 03: Remote Execution using Batch AI\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example we use the scikit-learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -170,7 +170,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Create Get Data File\n",
|
"## Create Get Data File\n",
|
||||||
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
||||||
"In this example, the `get_data()` function returns data from scikit-learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html)."
|
"In this example, the `get_data()` function returns data using scikit-learn's [load_digits](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) method."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -252,7 +252,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Train the Model\n",
|
"## Train the Models\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
|
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
|
||||||
"In this example, we specify `show_output = False` to suppress console output while the run is in progress."
|
"In this example, we specify `show_output = False` to suppress console output while the run is in progress."
|
||||||
@@ -356,7 +356,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Cancelling runs\n",
|
"## Cancelling Runs\n",
|
||||||
"\n",
|
"\n",
|
||||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
||||||
]
|
]
|
||||||
@@ -380,7 +380,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Model\n",
|
"### Retrieve the Best Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -434,25 +434,6 @@
|
|||||||
"print(third_model)"
|
"print(third_model)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Register the Fitted Model for Deployment"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"description = 'AutoML Model'\n",
|
|
||||||
"tags = None\n",
|
|
||||||
"remote_run.register_model(description = description, tags = tags)\n",
|
|
||||||
"remote_run.model_id # Use this id to deploy the model as a web service in Azure."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -478,7 +459,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Testing Our Best Pipeline"
|
"#### Testing Our Best Fitted Model"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -502,6 +483,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -132,15 +132,32 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.compute import RemoteCompute\n",
|
"from azureml.core.compute import RemoteCompute\n",
|
||||||
|
"import time\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Add your VM information below\n",
|
"# Add your VM information below\n",
|
||||||
"dsvm_name = 'mydsvm1'\n",
|
"# If a compute with the specified compute_name already exists, it will be used and the dsvm_ip_addr, dsvm_ssh_port, \n",
|
||||||
|
"# dsvm_username and dsvm_password will be ignored.\n",
|
||||||
|
"compute_name = 'mydsvm'\n",
|
||||||
"dsvm_ip_addr = '<<ip_addr>>'\n",
|
"dsvm_ip_addr = '<<ip_addr>>'\n",
|
||||||
"dsvm_ssh_port = 22\n",
|
"dsvm_ssh_port = 22\n",
|
||||||
"dsvm_username = '<<username>>'\n",
|
"dsvm_username = '<<username>>'\n",
|
||||||
"dsvm_password = '<<password>>'\n",
|
"dsvm_password = '<<password>>'\n",
|
||||||
"\n",
|
"\n",
|
||||||
"dsvm_compute = RemoteCompute.attach(workspace=ws, name=dsvm_name, address=dsvm_ip_addr, username=dsvm_username, password=dsvm_password, ssh_port=dsvm_ssh_port)"
|
"if compute_name in ws.compute_targets():\n",
|
||||||
|
" print('Using existing compute.')\n",
|
||||||
|
" dsvm_compute = ws.compute_targets()[compute_name]\n",
|
||||||
|
"else:\n",
|
||||||
|
" RemoteCompute.attach(workspace=ws, name=compute_name, address=dsvm_ip_addr, username=dsvm_username, password=dsvm_password, ssh_port=dsvm_ssh_port)\n",
|
||||||
|
"\n",
|
||||||
|
" while ws.compute_targets()[compute_name].provisioning_state == 'Creating':\n",
|
||||||
|
" time.sleep(1)\n",
|
||||||
|
"\n",
|
||||||
|
" dsvm_compute = ws.compute_targets()[compute_name]\n",
|
||||||
|
" \n",
|
||||||
|
" if dsvm_compute.provisioning_state == 'Failed':\n",
|
||||||
|
" print('Attached failed.')\n",
|
||||||
|
" print(dsvm_compute.provisioning_errors)\n",
|
||||||
|
" dsvm_compute.delete()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -182,11 +199,11 @@
|
|||||||
" le = LabelEncoder()\n",
|
" le = LabelEncoder()\n",
|
||||||
" le.fit(df[\"Label\"].values)\n",
|
" le.fit(df[\"Label\"].values)\n",
|
||||||
" y = le.transform(df[\"Label\"].values)\n",
|
" y = le.transform(df[\"Label\"].values)\n",
|
||||||
" df = df.drop([\"Label\"], axis=1)\n",
|
" X = df.drop([\"Label\"], axis=1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" df_train, _, y_train, _ = train_test_split(df, y, test_size = 0.1, random_state = 42)\n",
|
" X_train, _, y_train, _ = train_test_split(X, y, test_size = 0.1, random_state = 42)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" return { \"X\" : df, \"y\" : y }"
|
" return { \"X\" : X_train, \"y\" : y_train }"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -261,7 +278,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Train the Model <a class=\"anchor\" id=\"Training-the-model-Remote-DSVM\"></a>\n",
|
"## Train the Models <a class=\"anchor\" id=\"Training-the-model-Remote-DSVM\"></a>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run."
|
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run."
|
||||||
]
|
]
|
||||||
@@ -279,7 +296,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Exploring the results <a class=\"anchor\" id=\"Exploring-the-Results-Remote-DSVM\"></a>\n",
|
"## Exploring the Results <a class=\"anchor\" id=\"Exploring-the-Results-Remote-DSVM\"></a>\n",
|
||||||
"#### Widget for Monitoring Runs\n",
|
"#### Widget for Monitoring Runs\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||||
@@ -329,7 +346,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Cancelling runs\n",
|
"## Cancelling Runs\n",
|
||||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -352,7 +369,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Model\n",
|
"### Retrieve the Best Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -401,25 +418,6 @@
|
|||||||
"zero_run, zero_model = remote_run.get_output(iteration = iteration)"
|
"zero_run, zero_model = remote_run.get_output(iteration = iteration)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Register the Fitted Model for Deployment"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"description = 'AutoML Model'\n",
|
|
||||||
"tags = None\n",
|
|
||||||
"remote_run.register_model(description = description, tags = tags)\n",
|
|
||||||
"print(remote_run.model_id) # Use this id to deploy the model as a web service in Azure."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -445,12 +443,12 @@
|
|||||||
"le = LabelEncoder()\n",
|
"le = LabelEncoder()\n",
|
||||||
"le.fit(df[\"Label\"].values)\n",
|
"le.fit(df[\"Label\"].values)\n",
|
||||||
"y = le.transform(df[\"Label\"].values)\n",
|
"y = le.transform(df[\"Label\"].values)\n",
|
||||||
"df = df.drop([\"Label\"], axis=1)\n",
|
"X = df.drop([\"Label\"], axis=1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"_, df_test, _, y_test = train_test_split(df, y, test_size=0.1, random_state=42)\n",
|
"_, X_test, _, y_test = train_test_split(X, y, test_size=0.1, random_state=42)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ypred = fitted_model.predict(df_test.values)\n",
|
"ypred = fitted_model.predict(X_test.values)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ypred_strings = le.inverse_transform(ypred)\n",
|
"ypred_strings = le.inverse_transform(ypred)\n",
|
||||||
@@ -465,6 +463,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# AutoML 05: Blacklisting Models, Early Termination, and Handling Missing Data\n",
|
"# AutoML 05: Blacklisting Models, Early Termination, and Handling Missing Data\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for handling missing values in data. We also provide a stopping metric indicating a target for the primary metrics so that AutoML can terminate the run without necessarly going through all the iterations. Finally, if you want to avoid a certain pipeline, we allow you to specify a blacklist of algorithms that AutoML will ignore for this run.\n",
|
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use AutoML for handling missing values in data. We also provide a stopping metric indicating a target for the primary metrics so that AutoML can terminate the run without necessarly going through all the iterations. Finally, if you want to avoid a certain pipeline, we allow you to specify a blacklist of algorithms that AutoML will ignore for this run.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -166,7 +166,7 @@
|
|||||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||||
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
|
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
|
||||||
"|**exit_score**|*double* value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
|
"|**exit_score**|*double* value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
|
||||||
"|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for AutoML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet<i><br><i>GradientBoostingRegressor<i><br><i>DecisionTreeRegressor<i><br><i>KNeighborsRegressor<i><br><i>LassoLars<i><br><i>SGDRegressor<i><br><i>RandomForestRegressor<i><br><i>ExtraTreesRegressor<i>|\n",
|
"|**blacklist_algos**|*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet<i><br><i>GradientBoostingRegressor<i><br><i>DecisionTreeRegressor<i><br><i>KNeighborsRegressor<i><br><i>LassoLars<i><br><i>SGDRegressor<i><br><i>RandomForestRegressor<i><br><i>ExtraTreesRegressor<i>|\n",
|
||||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
||||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||||
@@ -197,7 +197,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Train the Model\n",
|
"## Train the Models\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||||
@@ -272,7 +272,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Model\n",
|
"### Retrieve the Best Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -324,26 +324,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Register the Fitted Model for Deployment"
|
"### Testing the best Fitted Model"
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"description = 'AutoML Model'\n",
|
|
||||||
"tags = None\n",
|
|
||||||
"local_run.register_model(description = description, tags = tags)\n",
|
|
||||||
"local_run.model_id # Use this id to deploy the model as a web service in Azure."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Testing the Fitted Model"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -372,6 +353,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -137,17 +137,17 @@
|
|||||||
" shuffle = True, random_state = 42,\n",
|
" shuffle = True, random_state = 42,\n",
|
||||||
" remove = remove)\n",
|
" remove = remove)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"X_train, X_validation, y_train, y_validation = train_test_split(data_train.data, data_train.target, test_size = 0.33, random_state = 42)\n",
|
"X_train, X_valid, y_train, y_valid = train_test_split(data_train.data, data_train.target, test_size = 0.33, random_state = 42)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"vectorizer = HashingVectorizer(stop_words = 'english', alternate_sign = False,\n",
|
"vectorizer = HashingVectorizer(stop_words = 'english', alternate_sign = False,\n",
|
||||||
" n_features = 2**16)\n",
|
" n_features = 2**16)\n",
|
||||||
"X_train = vectorizer.transform(X_train)\n",
|
"X_train = vectorizer.transform(X_train)\n",
|
||||||
"X_validation = vectorizer.transform(X_validation)\n",
|
"X_valid = vectorizer.transform(X_valid)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"summary_df = pd.DataFrame(index = ['No of Samples', 'No of Features'])\n",
|
"summary_df = pd.DataFrame(index = ['No of Samples', 'No of Features'])\n",
|
||||||
"summary_df['Train Set'] = [X_train.shape[0], X_train.shape[1]]\n",
|
"summary_df['Train Set'] = [X_train.shape[0], X_train.shape[1]]\n",
|
||||||
"summary_df['Validation Set'] = [X_validation.shape[0], X_validation.shape[1]]\n",
|
"summary_df['Validation Set'] = [X_valid.shape[0], X_valid.shape[1]]\n",
|
||||||
"summary_df"
|
"summary_df"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -188,8 +188,8 @@
|
|||||||
" verbosity = logging.INFO,\n",
|
" verbosity = logging.INFO,\n",
|
||||||
" X = X_train, \n",
|
" X = X_train, \n",
|
||||||
" y = y_train,\n",
|
" y = y_train,\n",
|
||||||
" X_valid = X_validation, \n",
|
" X_valid = X_valid, \n",
|
||||||
" y_valid = y_validation, \n",
|
" y_valid = y_valid, \n",
|
||||||
" path = project_folder)"
|
" path = project_folder)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -197,7 +197,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Train the Model\n",
|
"## Train the Models\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||||
@@ -266,20 +266,13 @@
|
|||||||
"rundata"
|
"rundata"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Model\n",
|
"### Retrieve the Best Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. The Model includes the pipeline and any pre-processing. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -331,26 +324,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Register the Fitted Model for Deployment"
|
"### Testing the Best Fitted Model"
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"description = 'AutoML Model'\n",
|
|
||||||
"tags = None\n",
|
|
||||||
"local_run.register_model(description = description, tags = tags)\n",
|
|
||||||
"local_run.model_id # Use this id to deploy the model as a web service in Azure."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Testing the Fitted Model"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -360,25 +334,12 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Load test data.\n",
|
"# Load test data.\n",
|
||||||
"import sklearn\n",
|
|
||||||
"from pandas_ml import ConfusionMatrix\n",
|
"from pandas_ml import ConfusionMatrix\n",
|
||||||
"\n",
|
"\n",
|
||||||
"remove = ('headers', 'footers', 'quotes')\n",
|
|
||||||
"categories = [\n",
|
|
||||||
" 'alt.atheism',\n",
|
|
||||||
" 'talk.religion.misc',\n",
|
|
||||||
" 'comp.graphics',\n",
|
|
||||||
" 'sci.space',\n",
|
|
||||||
"]\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"data_test = fetch_20newsgroups(subset = 'test', categories = categories,\n",
|
"data_test = fetch_20newsgroups(subset = 'test', categories = categories,\n",
|
||||||
" shuffle = True, random_state = 42,\n",
|
" shuffle = True, random_state = 42,\n",
|
||||||
" remove = remove)\n",
|
" remove = remove)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"vectorizer = HashingVectorizer(stop_words = 'english', alternate_sign = False,\n",
|
|
||||||
" n_features = 2**16)\n",
|
|
||||||
"\n",
|
|
||||||
"X_test = vectorizer.transform(data_test.data)\n",
|
"X_test = vectorizer.transform(data_test.data)\n",
|
||||||
"y_test = data_test.target\n",
|
"y_test = data_test.target\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -395,6 +356,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -247,7 +247,8 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Register fitted model for deployment"
|
"# Register fitted model for deployment\n",
|
||||||
|
"If neither `metric` nor `iteration` are specified in the `register_model` call, the iteration with the best primary metric is registered."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -304,6 +305,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# AutoML 08: Remote Execution with DataStore\n",
|
"# AutoML 08: Remote Execution with DataStore\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this sample accesses a data file on a remote DSVM through DataStore. Advantagets of using data store\n",
|
"This sample accesses a data file on a remote DSVM through DataStore. Advantages of using data store are:\n",
|
||||||
"1. DataStore secures the access details.\n",
|
"1. DataStore secures the access details.\n",
|
||||||
"2. DataStore supports read, write to blob and file store\n",
|
"2. DataStore supports read, write to blob and file store\n",
|
||||||
"3. AutoML natively supports copying data from DataStore to DSVM\n",
|
"3. AutoML natively supports copying data from DataStore to DSVM\n",
|
||||||
@@ -23,8 +23,8 @@
|
|||||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this notebook you would see\n",
|
"In this notebook you would see\n",
|
||||||
"1. Configuring the DSVM to allow files to be access directly by the get_data method.\n",
|
"1. Storing data in DataStore.\n",
|
||||||
"2. get_data returning data from a local file.\n",
|
"2. get_data returning data from DataStore.\n",
|
||||||
"\n"
|
"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -285,11 +285,11 @@
|
|||||||
" le = LabelEncoder()\n",
|
" le = LabelEncoder()\n",
|
||||||
" le.fit(df[\"Label\"].values)\n",
|
" le.fit(df[\"Label\"].values)\n",
|
||||||
" y = le.transform(df[\"Label\"].values)\n",
|
" y = le.transform(df[\"Label\"].values)\n",
|
||||||
" df = df.drop([\"Label\"], axis=1)\n",
|
" X = df.drop([\"Label\"], axis=1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" df_train, _, y_train, _ = train_test_split(df, y, test_size=0.1, random_state=42)\n",
|
" X_train, _, y_train, _ = train_test_split(X, y, test_size=0.1, random_state=42)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" return { \"X\" : df.values, \"y\" : y }"
|
" return { \"X\" : X_train.values, \"y\" : y_train }"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -300,7 +300,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n",
|
"You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"<i>Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to the fit method.</i>\n",
|
"<i>Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to AutoMLConfig.</i>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"|Property|Description|\n",
|
"|Property|Description|\n",
|
||||||
"|-|-|\n",
|
"|-|-|\n",
|
||||||
@@ -342,7 +342,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Training the Model <a class=\"anchor\" id=\"Training-the-model-Remote-DSVM\"></a>\n",
|
"## Training the Models <a class=\"anchor\" id=\"Training-the-model-Remote-DSVM\"></a>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets/models even when the experiment is running to retreive the best model up to that point. Once you are satisfied with the model you can cancel a particular iteration or the whole run."
|
"For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets/models even when the experiment is running to retreive the best model up to that point. Once you are satisfied with the model you can cancel a particular iteration or the whole run."
|
||||||
]
|
]
|
||||||
@@ -410,7 +410,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Canceling runs\n",
|
"## Canceling Runs\n",
|
||||||
"You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions"
|
"You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -433,7 +433,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Model\n",
|
"### Retrieve the Best Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
"Below we select the best pipeline from our iterations. The *get_output* method returns the best run and the fitted model. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -483,26 +483,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Register fitted model for deployment"
|
"### Testing the Best Fitted Model <a class=\"anchor\" id=\"Testing-the-Fitted-Model-Remote-DSVM\"></a>\n"
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#description = 'AutoML Model'\n",
|
|
||||||
"#tags = None\n",
|
|
||||||
"#remote_run.register_model(description=description, tags=tags)\n",
|
|
||||||
"#remote_run.model_id # Use this id to deploy the model as a web service in Azure"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Testing the Fitted Model <a class=\"anchor\" id=\"Testing-the-Fitted-Model-Remote-DSVM\"></a>\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -523,11 +504,11 @@
|
|||||||
"le = LabelEncoder()\n",
|
"le = LabelEncoder()\n",
|
||||||
"le.fit(df[\"Label\"].values)\n",
|
"le.fit(df[\"Label\"].values)\n",
|
||||||
"y = le.transform(df[\"Label\"].values)\n",
|
"y = le.transform(df[\"Label\"].values)\n",
|
||||||
"df = df.drop([\"Label\"], axis=1)\n",
|
"X = df.drop([\"Label\"], axis=1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"_, df_test, _, y_test = train_test_split(df, y, test_size=0.1, random_state=42)\n",
|
"_, X_test, _, y_test = train_test_split(X, y, test_size=0.1, random_state=42)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ypred = fitted_model.predict(df_test.values)\n",
|
"ypred = fitted_model.predict(X_test.values)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ypred_strings = le.inverse_transform(ypred)\n",
|
"ypred_strings = le.inverse_transform(ypred)\n",
|
||||||
"ytest_strings = le.inverse_transform(y_test)\n",
|
"ytest_strings = le.inverse_transform(y_test)\n",
|
||||||
@@ -541,6 +522,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -25,8 +25,9 @@
|
|||||||
"3. Train the model using local compute.\n",
|
"3. Train the model using local compute.\n",
|
||||||
"4. Explore the results.\n",
|
"4. Explore the results.\n",
|
||||||
"5. Register the model.\n",
|
"5. Register the model.\n",
|
||||||
"6. Create a container image and create and ACI service.\n",
|
"6. Create a container image.\n",
|
||||||
"7. Test the ACI service.\n"
|
"7. Create an Azure Container Instance (ACI) service.\n",
|
||||||
|
"8. Test the ACI service.\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -155,7 +156,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Train the Model\n",
|
"## Train the Models\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||||
@@ -192,7 +193,8 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Register the Fitted Model for Deployment"
|
"### Register the Fitted Model for Deployment\n",
|
||||||
|
"If neither `metric` nor `iteration` are specified in the `register_model` call, the iteration with the best primary metric is registered."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -203,7 +205,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"description = 'AutoML Model'\n",
|
"description = 'AutoML Model'\n",
|
||||||
"tags = None\n",
|
"tags = None\n",
|
||||||
"model = local_run.register_model(description = description, tags = tags, iteration = 8)\n",
|
"model = local_run.register_model(description = description, tags = tags)\n",
|
||||||
"local_run.model_id # This will be written to the script file later in the notebook."
|
"local_run.model_id # This will be written to the script file later in the notebook."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -256,7 +258,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"To ensure the consistency of the fit results with the training results, the SDK dependency versions need to be the same as the environment that trains the model. Details about retrieving the versions can be found in notebook [12.auto-ml-retrieve-the-training-sdk-versions](12.auto-ml-retrieve-the-training-sdk-versions.ipynb)."
|
"To ensure the fit results are consistent with the training results, the SDK dependency versions need to be the same as the environment that trains the model. Details about retrieving the versions can be found in notebook [12.auto-ml-retrieve-the-training-sdk-versions](12.auto-ml-retrieve-the-training-sdk-versions.ipynb)."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -304,7 +306,6 @@
|
|||||||
" - pip:\n",
|
" - pip:\n",
|
||||||
" - numpy==1.14.2\n",
|
" - numpy==1.14.2\n",
|
||||||
" - scikit-learn==0.19.2\n",
|
" - scikit-learn==0.19.2\n",
|
||||||
" - pynacl==1.2.1\n",
|
|
||||||
" - azureml-sdk[notebooks,automl]==<<azureml-version>>"
|
" - azureml-sdk[notebooks,automl]==<<azureml-version>>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -471,6 +472,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -15,7 +15,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# AutoML 10: Multi-output\n",
|
"# AutoML 10: Multi-output\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This notebook shows how to use AutoML to train multi-output problems by leveraging the correlation between the outputs using indicator vectors."
|
"This notebook shows how to use AutoML to train multi-output problems by leveraging the correlation between the outputs using indicator vectors.\n",
|
||||||
|
"\n",
|
||||||
|
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -264,6 +266,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -15,7 +15,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# AutoML 11: Sample Weight\n",
|
"# AutoML 11: Sample Weight\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use sample weight with AutoML. Sample weight is used where some sample values are more important than others.\n",
|
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/datasets/index.html#optical-recognition-of-handwritten-digits-dataset) to showcase how you can use sample weight with AutoML. Sample weight is used where some sample values are more important than others.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -197,7 +197,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Compare the Pipelines\n",
|
"#### Compare the Models\n",
|
||||||
"The prediction from the sample weight model is more likely to correctly predict 4's. However, it is also more likely to predict 4 for some images that are not labelled as 4."
|
"The prediction from the sample weight model is more likely to correctly predict 4's. However, it is also more likely to predict 4 for some images that are not labelled as 4."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -223,6 +223,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -13,7 +13,11 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# AutoML 12: Retrieving Training SDK Versions"
|
"# AutoML 12: Retrieving Training SDK Versions\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows how to find the SDK versions used for an experiment.\n",
|
||||||
|
"\n",
|
||||||
|
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -63,7 +67,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 1. Retrieve the SDK versions in the current environment"
|
"# Retrieve the SDK versions in the current environment"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -86,7 +90,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 2. Train model using AutoML"
|
"# Train models using AutoML"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -142,7 +146,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 3. Retrieve the SDK versions from RunHistory"
|
"# Retrieve the SDK versions from RunHistory"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -219,6 +223,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -174,11 +174,11 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"automl_settings = {\n",
|
"automl_settings = {\n",
|
||||||
" \"max_time_sec\": 600,\n",
|
" \"max_time_sec\" : 600,\n",
|
||||||
" \"iterations\": 2,\n",
|
" \"iterations\" : 2,\n",
|
||||||
" \"primary_metric\": 'AUC_weighted',\n",
|
" \"primary_metric\" : 'AUC_weighted',\n",
|
||||||
" \"preprocess\": False,\n",
|
" \"preprocess\" : False,\n",
|
||||||
" \"verbosity\": logging.INFO,\n",
|
" \"verbosity\" : logging.INFO,\n",
|
||||||
" \"n_cross_validations\": 3\n",
|
" \"n_cross_validations\": 3\n",
|
||||||
"}"
|
"}"
|
||||||
]
|
]
|
||||||
@@ -225,8 +225,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Remote Run\n",
|
"## Remote Run"
|
||||||
"*Note: This feature might not work properly in your workspace region before the October update. You may jump to the \"Exploring the results\" section below to explore other features AutoML and DataPrep has to offer.*"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -269,8 +268,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"cd = CondaDependencies()\n",
|
"cd = CondaDependencies()\n",
|
||||||
"cd.add_pip_package(pip_package='azureml-dataprep')\n",
|
"cd.add_pip_package(pip_package='azureml-dataprep')"
|
||||||
"cd.add_pip_package(pip_package='tornado==4.5.1')"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -313,9 +311,7 @@
|
|||||||
" X = X,\n",
|
" X = X,\n",
|
||||||
" y = y,\n",
|
" y = y,\n",
|
||||||
" **automl_settings)\n",
|
" **automl_settings)\n",
|
||||||
"# Please uncomment the line below to try out remote run with dataprep. \n",
|
"remote_run = experiment.submit(automl_config, show_output = True)"
|
||||||
"# This feature might not work properly in your workspace region before the October update.\n",
|
|
||||||
"# remote_run = experiment.submit(automl_config, show_output = True)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -378,7 +374,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### Retrieve the Best Model\n",
|
"### Retrieve the Best Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -459,7 +455,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Testing Our Best Pipeline\n",
|
"#### Testing Our Best Fitted Model\n",
|
||||||
"We will try to predict 2 digits and see how our model works."
|
"We will try to predict 2 digits and see how our model works."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -534,6 +530,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "savitam"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
118
automl/README.md
118
automl/README.md
@@ -1,52 +1,24 @@
|
|||||||
# Table of Contents
|
# Table of Contents
|
||||||
1. [Automated ML Introduction](#introduction)
|
1. [Auto ML Introduction](#introduction)
|
||||||
1. [Running samples in Azure Notebooks](#jupyter)
|
2. [Running samples in a Local Conda environment](#localconda)
|
||||||
1. [Running samples in a Local Conda environment](#localconda)
|
3. [Auto ML SDK Sample Notebooks](#samples)
|
||||||
1. [Automated ML SDK Sample Notebooks](#samples)
|
4. [Documentation](#documentation)
|
||||||
1. [Documentation](#documentation)
|
5. [Running using python command](#pythoncommand)
|
||||||
1. [Running using python command](#pythoncommand)
|
6. [Troubleshooting](#troubleshooting)
|
||||||
1. [Troubleshooting](#troubleshooting)
|
|
||||||
|
|
||||||
<a name="introduction"></a>
|
|
||||||
# Automated ML introduction
|
|
||||||
Automated machine learning (automated ML) builds high quality machine learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, automated ML will give you a high quality machine learning model that you can use for predictions.
|
|
||||||
|
|
||||||
|
# Auto ML Introduction <a name="introduction"></a>
|
||||||
|
AutoML builds high quality Machine Learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, AutoML will give you a high quality machine learning model that you can use for predictions.
|
||||||
|
|
||||||
If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
|
If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
|
||||||
|
|
||||||
If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
|
If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
|
||||||
|
|
||||||
<a name="jupyter"></a>
|
|
||||||
## Running samples in Azure Notebooks - Jupyter based notebooks in the Azure cloud
|
|
||||||
|
|
||||||
1. [](https://aka.ms/aml-clone-azure-notebooks)
|
# Running samples in a Local Conda environment <a name="localconda"></a>
|
||||||
[Import sample notebooks ](https://aka.ms/aml-clone-azure-notebooks) into Azure Notebooks.
|
|
||||||
1. Follow the instructions in the [../00.configuration](00.configuration.ipynb) notebook to create and connect to a workspace.
|
|
||||||
1. Open one of the sample notebooks.
|
|
||||||
|
|
||||||
**Make sure the Azure Notebook kernel is set to `Python 3.6`** when you open a notebook.
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
<a name="localconda"></a>
|
You can run these notebooks in Azure Notebooks without any extra installation. To run these notebook on your own notebook server, use these installation instructions.
|
||||||
## Running samples in a Local Conda environment
|
|
||||||
|
|
||||||
To run these notebook on your own notebook server, use these installation instructions.
|
|
||||||
|
|
||||||
The instructions below will install everything you need and then start a Jupyter notebook. To start your Jupyter notebook manually, use:
|
|
||||||
|
|
||||||
```
|
|
||||||
conda activate azure_automl
|
|
||||||
jupyter notebook
|
|
||||||
```
|
|
||||||
|
|
||||||
or on Mac:
|
|
||||||
|
|
||||||
```
|
|
||||||
source activate azure_automl
|
|
||||||
jupyter notebook
|
|
||||||
```
|
|
||||||
|
|
||||||
|
It is best if you create a new conda environment locally to try this SDK, so it doesn't mess up with your existing Python environment.
|
||||||
|
|
||||||
### 1. Install mini-conda from [here](https://conda.io/miniconda.html), choose Python 3.7 or higher.
|
### 1. Install mini-conda from [here](https://conda.io/miniconda.html), choose Python 3.7 or higher.
|
||||||
- **Note**: if you already have conda installed, you can keep using it but it should be version 4.4.10 or later (as shown by: conda -V). If you have a previous version installed, you can update it using the command: conda update conda.
|
- **Note**: if you already have conda installed, you can keep using it but it should be version 4.4.10 or later (as shown by: conda -V). If you have a previous version installed, you can update it using the command: conda update conda.
|
||||||
@@ -76,19 +48,19 @@ bash automl_setup_mac.sh
|
|||||||
cd to the **automl** folder where the sample notebooks were extracted and then run:
|
cd to the **automl** folder where the sample notebooks were extracted and then run:
|
||||||
|
|
||||||
```
|
```
|
||||||
bash automl_setup_linux.sh
|
automl_setup_linux.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
### 4. Running configuration.ipynb
|
### 4. Running configuration.ipynb
|
||||||
- Before running any samples you next need to run the configuration notebook. Click on 00.configuration.ipynb notebook
|
- Before running any samples you next need to run the configuration notebook. Click on 00.configuration.ipynb notebook
|
||||||
|
- Please make sure you use the Python [conda env:azure_automl] kernel when running this notebook.
|
||||||
- Execute the cells in the notebook to Register Machine Learning Services Resource Provider and create a workspace. (*instructions in notebook*)
|
- Execute the cells in the notebook to Register Machine Learning Services Resource Provider and create a workspace. (*instructions in notebook*)
|
||||||
|
|
||||||
### 5. Running Samples
|
### 5. Running Samples
|
||||||
- Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks.
|
- Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks.
|
||||||
- Follow the instructions in the individual notebooks to explore various features in AutoML
|
- Follow the instructions in the individual notebooks to explore various features in AutoML
|
||||||
|
|
||||||
<a name="samples"></a>
|
# Auto ML SDK Sample Notebooks <a name="samples"></a>
|
||||||
# Automated ML SDK Sample Notebooks
|
|
||||||
- [00.configuration.ipynb](00.configuration.ipynb)
|
- [00.configuration.ipynb](00.configuration.ipynb)
|
||||||
- Register Machine Learning Services Resource Provider
|
- Register Machine Learning Services Resource Provider
|
||||||
- Create new Azure ML Workspace
|
- Create new Azure ML Workspace
|
||||||
@@ -115,7 +87,7 @@ bash automl_setup_linux.sh
|
|||||||
|
|
||||||
- [03b.auto-ml-remote-batchai.ipynb](03b.auto-ml-remote-batchai.ipynb)
|
- [03b.auto-ml-remote-batchai.ipynb](03b.auto-ml-remote-batchai.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
- Example of using automated ML for classification using a remote Batch AI compute for training
|
- Example of using Auto ML for classification using a remote Batch AI compute for training
|
||||||
- Parallel execution of iterations
|
- Parallel execution of iterations
|
||||||
- Async tracking of progress
|
- Async tracking of progress
|
||||||
- Cancelling individual iterations or entire run
|
- Cancelling individual iterations or entire run
|
||||||
@@ -171,17 +143,20 @@ bash automl_setup_linux.sh
|
|||||||
- [13.auto-ml-dataprep.ipynb](13.auto-ml-dataprep.ipynb)
|
- [13.auto-ml-dataprep.ipynb](13.auto-ml-dataprep.ipynb)
|
||||||
- Using DataPrep for reading data
|
- Using DataPrep for reading data
|
||||||
|
|
||||||
<a name="documentation"></a>
|
- [14a.auto-ml-classification-ensemble.ipynb](14a.auto-ml-classification-ensemble.ipynb)
|
||||||
# Documentation
|
- Classification with ensembling
|
||||||
|
|
||||||
|
- [14b.auto-ml-regression-ensemble.ipynb](14b.auto-ml-regression-ensemble.ipynb)
|
||||||
|
- Regression with ensembling
|
||||||
|
|
||||||
|
# Documentation <a name="documentation"></a>
|
||||||
## Table of Contents
|
## Table of Contents
|
||||||
1. [Automated ML Settings ](#automlsettings)
|
1. [Auto ML Settings ](#automlsettings)
|
||||||
1. [Cross validation split options](#cvsplits)
|
2. [Cross validation split options](#cvsplits)
|
||||||
1. [Get Data Syntax](#getdata)
|
3. [Get Data Syntax](#getdata)
|
||||||
1. [Data pre-processing and featurization](#preprocessing)
|
4. [Data pre-processing and featurization](#preprocessing)
|
||||||
|
|
||||||
<a name="automlsettings"></a>
|
|
||||||
## Automated ML Settings
|
|
||||||
|
|
||||||
|
## Auto ML Settings <a name="automlsettings"></a>
|
||||||
|Property|Description|Default|
|
|Property|Description|Default|
|
||||||
|-|-|-|
|
|-|-|-|
|
||||||
|**primary_metric**|This is the metric that you want to optimize.<br><br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i><br><br> Regression supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i><br><i>normalized_root_mean_squared_log_error</i>| Classification: accuracy <br><br> Regression: spearman_correlation
|
|**primary_metric**|This is the metric that you want to optimize.<br><br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i><br><br> Regression supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i><br><i>normalized_root_mean_squared_log_error</i>| Classification: accuracy <br><br> Regression: spearman_correlation
|
||||||
@@ -195,8 +170,7 @@ bash automl_setup_linux.sh
|
|||||||
|**exit_score**|*double* value indicating the target for *primary_metric*. <br> Once the target is surpassed the run terminates|None|
|
|**exit_score**|*double* value indicating the target for *primary_metric*. <br> Once the target is surpassed the run terminates|None|
|
||||||
|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>gradient boosting</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoostingRegressor</i><br><i>DecisionTreeRegressor</i><br><i>KNeighborsRegressor</i><br><i>LassoLars</i><br><i>SGDRegressor</i><br><i>RandomForestRegressor</i><br><i>ExtraTreesRegressor</i>|None|
|
|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>gradient boosting</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoostingRegressor</i><br><i>DecisionTreeRegressor</i><br><i>KNeighborsRegressor</i><br><i>LassoLars</i><br><i>SGDRegressor</i><br><i>RandomForestRegressor</i><br><i>ExtraTreesRegressor</i>|None|
|
||||||
|
|
||||||
<a name="cvsplits"></a>
|
## Cross validation split options <a name="cvsplits"></a>
|
||||||
## Cross validation split options
|
|
||||||
### K-Folds Cross Validation
|
### K-Folds Cross Validation
|
||||||
Use *n_cross_validations* setting to specify the number of cross validations. The training data set will be randomly split into *n_cross_validations* folds of equal size. During each cross validation round, one of the folds will be used for validation of the model trained on the remaining folds. This process repeats for *n_cross_validations* rounds until each fold is used once as validation set. Finally, the average scores accross all *n_cross_validations* rounds will be reported, and the corresponding model will be retrained on the whole training data set.
|
Use *n_cross_validations* setting to specify the number of cross validations. The training data set will be randomly split into *n_cross_validations* folds of equal size. During each cross validation round, one of the folds will be used for validation of the model trained on the remaining folds. This process repeats for *n_cross_validations* rounds until each fold is used once as validation set. Finally, the average scores accross all *n_cross_validations* rounds will be reported, and the corresponding model will be retrained on the whole training data set.
|
||||||
|
|
||||||
@@ -206,8 +180,7 @@ Use *validation_size* to specify the percentage of the training data set that sh
|
|||||||
### Custom train and validation set
|
### Custom train and validation set
|
||||||
You can specify seperate train and validation set either through the get_data() or directly to the fit method.
|
You can specify seperate train and validation set either through the get_data() or directly to the fit method.
|
||||||
|
|
||||||
<a name="getdata"></a>
|
## get_data() syntax <a name="getdata"></a>
|
||||||
## get_data() syntax
|
|
||||||
The *get_data()* function can be used to return a dictionary with these values:
|
The *get_data()* function can be used to return a dictionary with these values:
|
||||||
|
|
||||||
|Key|Type|Dependency|Mutually Exclusive with|Description|
|
|Key|Type|Dependency|Mutually Exclusive with|Description|
|
||||||
@@ -223,23 +196,21 @@ The *get_data()* function can be used to return a dictionary with these values:
|
|||||||
|columns|Array of strings|data_train||*Optional* Whitelist of columns to use for features|
|
|columns|Array of strings|data_train||*Optional* Whitelist of columns to use for features|
|
||||||
|cv_splits_indices|Array of integers|data_train||*Optional* List of indexes to split the data for cross validation|
|
|cv_splits_indices|Array of integers|data_train||*Optional* List of indexes to split the data for cross validation|
|
||||||
|
|
||||||
<a name="preprocessing"></a>
|
## Data pre-processing and featurization <a name="preprocessing"></a>
|
||||||
## Data pre-processing and featurization
|
If you use "preprocess=True", the following data preprocessing steps are performed automatically for you:
|
||||||
If you use `preprocess=True`, the following data preprocessing steps are performed automatically for you:
|
### 1. Dropping high cardinality or no variance features
|
||||||
|
- Features with no useful information are dropped from training and validation sets. These include features with all values missing, same value across all rows or with extremely high cardinality (e.g., hashes, IDs or GUIDs).
|
||||||
|
### 2. Missing value imputation
|
||||||
|
- For numerical features, missing values are imputed with average of values in the column.
|
||||||
|
- For categorical features, missing values are imputed with most frequent value.
|
||||||
|
### 3. Generating additional features
|
||||||
|
- For DateTime features: Year, Month, Day, Day of week, Day of year, Quarter, Week of the year, Hour, Minute, Second.
|
||||||
|
- For Text features: Term frequency based on bi-grams and tri-grams, Count vectorizer.
|
||||||
|
### 4. Transformations and encodings
|
||||||
|
- Numeric features with very few unique values are transformed into categorical features.
|
||||||
|
- Depending on cardinality of categorical features label encoding or (hashing) one-hot encoding is performed.
|
||||||
|
|
||||||
1. Dropping high cardinality or no variance features
|
# Running using python command <a name="pythoncommand"></a>
|
||||||
- Features with no useful information are dropped from training and validation sets. These include features with all values missing, same value across all rows or with extremely high cardinality (e.g., hashes, IDs or GUIDs).
|
|
||||||
2. Missing value imputation
|
|
||||||
- For numerical features, missing values are imputed with average of values in the column.
|
|
||||||
- For categorical features, missing values are imputed with most frequent value.
|
|
||||||
3. Generating additional features
|
|
||||||
- For DateTime features: Year, Month, Day, Day of week, Day of year, Quarter, Week of the year, Hour, Minute, Second.
|
|
||||||
- For Text features: Term frequency based on bi-grams and tri-grams, Count vectorizer.
|
|
||||||
4. Transformations and encodings
|
|
||||||
- Numeric features with very few unique values are transformed into categorical features.
|
|
||||||
|
|
||||||
<a name="pythoncommand"></a>
|
|
||||||
# Running using python command
|
|
||||||
Jupyter notebook provides a File / Download as / Python (.py) option for saving the notebook as a Python file.
|
Jupyter notebook provides a File / Download as / Python (.py) option for saving the notebook as a Python file.
|
||||||
You can then run this file using the python command.
|
You can then run this file using the python command.
|
||||||
However, on Windows the file needs to be modified before it can be run.
|
However, on Windows the file needs to be modified before it can be run.
|
||||||
@@ -249,8 +220,7 @@ The following condition must be added to the main code in the file:
|
|||||||
|
|
||||||
The main code of the file must be indented so that it is under this condition.
|
The main code of the file must be indented so that it is under this condition.
|
||||||
|
|
||||||
<a name="troubleshooting"></a>
|
# Troubleshooting <a name="troubleshooting"></a>
|
||||||
# Troubleshooting
|
|
||||||
## Iterations fail and the log contains "MemoryError"
|
## Iterations fail and the log contains "MemoryError"
|
||||||
This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size.
|
This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size.
|
||||||
If you are using a remote DSVM, memory is needed for each concurrent iteration. The concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
|
If you are using a remote DSVM, memory is needed for each concurrent iteration. The concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
|
||||||
|
|||||||
431
onnx/onnx-convert-aml-deploy-tinyyolo.ipynb
Normal file
431
onnx/onnx-convert-aml-deploy-tinyyolo.ipynb
Normal file
@@ -0,0 +1,431 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# YOLO Real-time Object Detection using ONNX on AzureML\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows how to convert the TinyYOLO model from CoreML to ONNX and operationalize it as a web service using Azure Machine Learning services and the ONNX Runtime.\n",
|
||||||
|
"\n",
|
||||||
|
"## What is ONNX\n",
|
||||||
|
"ONNX is an open format for representing machine learning and deep learning models. ONNX enables open and interoperable AI by enabling data scientists and developers to use the tools of their choice without worrying about lock-in and flexibility to deploy to a variety of platforms. ONNX is developed and supported by a community of partners including Microsoft, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai).\n",
|
||||||
|
"\n",
|
||||||
|
"## YOLO Details\n",
|
||||||
|
"You Only Look Once (YOLO) is a state-of-the-art, real-time object detection system. For more information about YOLO, please visit the [YOLO website](https://pjreddie.com/darknet/yolo/)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"\n",
|
||||||
|
"To make the best use of your time, make sure you have done the following:\n",
|
||||||
|
"\n",
|
||||||
|
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||||
|
"* Go through the [00.configuration.ipynb](../00.configuration.ipynb) notebook to:\n",
|
||||||
|
" * install the AML SDK\n",
|
||||||
|
" * create a workspace and its configuration file (config.json)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Install necessary packages\n",
|
||||||
|
"\n",
|
||||||
|
"You'll need to run the following commands to use this tutorial:\n",
|
||||||
|
"\n",
|
||||||
|
"```sh\n",
|
||||||
|
"pip install coremltools\n",
|
||||||
|
"pip install onnxmltools\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Convert model to ONNX\n",
|
||||||
|
"\n",
|
||||||
|
"First we download the CoreML model. We use the CoreML model listed at https://coreml.store/tinyyolo. This may take a few minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!wget https://s3-us-west-2.amazonaws.com/coreml-models/TinyYOLO.mlmodel"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then we use ONNXMLTools to convert the model."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import onnxmltools\n",
|
||||||
|
"import coremltools\n",
|
||||||
|
"\n",
|
||||||
|
"# Load a CoreML model\n",
|
||||||
|
"coreml_model = coremltools.utils.load_spec('TinyYOLO.mlmodel')\n",
|
||||||
|
"\n",
|
||||||
|
"# Convert from CoreML into ONNX\n",
|
||||||
|
"onnx_model = onnxmltools.convert_coreml(coreml_model, 'TinyYOLOv2')\n",
|
||||||
|
"\n",
|
||||||
|
"# Save ONNX model\n",
|
||||||
|
"onnxmltools.utils.save_model(onnx_model, 'tinyyolov2.onnx')\n",
|
||||||
|
"\n",
|
||||||
|
"import os\n",
|
||||||
|
"print(os.path.getsize('tinyyolov2.onnx'))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploying as a web service with Azure ML\n",
|
||||||
|
"\n",
|
||||||
|
"### Load Azure ML workspace\n",
|
||||||
|
"\n",
|
||||||
|
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.location, ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Registering your model with Azure ML\n",
|
||||||
|
"\n",
|
||||||
|
"Now we upload the model and register it in the workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"model = Model.register(model_path = \"tinyyolov2.onnx\",\n",
|
||||||
|
" model_name = \"tinyyolov2\",\n",
|
||||||
|
" tags = {\"onnx\": \"demo\"},\n",
|
||||||
|
" description = \"TinyYOLO\",\n",
|
||||||
|
" workspace = ws)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Displaying your registered models\n",
|
||||||
|
"\n",
|
||||||
|
"You can optionally list out all the models that you have registered in this workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"models = ws.models()\n",
|
||||||
|
"for m in models:\n",
|
||||||
|
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Write scoring file\n",
|
||||||
|
"\n",
|
||||||
|
"We are now going to deploy our ONNX model on Azure ML using the ONNX Runtime. We begin by writing a score.py file that will be invoked by the web service call. The `init()` function is called once when the container is started so we load the model using the ONNX Runtime into a global session object."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score.py\n",
|
||||||
|
"import json\n",
|
||||||
|
"import time\n",
|
||||||
|
"import sys\n",
|
||||||
|
"import os\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"import numpy as np # we're going to use numpy to process input and output data\n",
|
||||||
|
"import onnxruntime # to inference ONNX models, we use the ONNX Runtime\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global session\n",
|
||||||
|
" model = Model.get_model_path(model_name = 'tinyyolov2')\n",
|
||||||
|
" session = onnxruntime.InferenceSession(model)\n",
|
||||||
|
"\n",
|
||||||
|
"def preprocess(input_data_json):\n",
|
||||||
|
" # convert the JSON data into the tensor input\n",
|
||||||
|
" return np.array(json.loads(input_data_json)['data']).astype('float32')\n",
|
||||||
|
"\n",
|
||||||
|
"def postprocess(result):\n",
|
||||||
|
" return np.array(result).tolist()\n",
|
||||||
|
"\n",
|
||||||
|
"def run(input_data_json):\n",
|
||||||
|
" try:\n",
|
||||||
|
" start = time.time() # start timer\n",
|
||||||
|
" input_data = preprocess(input_data_json)\n",
|
||||||
|
" input_name = session.get_inputs()[0].name # get the id of the first input of the model \n",
|
||||||
|
" result = session.run([], {input_name: input_data})\n",
|
||||||
|
" end = time.time() # stop timer\n",
|
||||||
|
" return {\"result\": postprocess(result),\n",
|
||||||
|
" \"time\": end - start}\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" result = str(e)\n",
|
||||||
|
" return {\"error\": result}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create container image\n",
|
||||||
|
"First we create a YAML file that specifies which dependencies we would like to see in our container."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
|
"\n",
|
||||||
|
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\"])\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then we have Azure ML create the container. This step will likely take a few minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||||
|
" runtime = \"python\",\n",
|
||||||
|
" conda_file = \"myenv.yml\",\n",
|
||||||
|
" description = \"TinyYOLO ONNX Demo\",\n",
|
||||||
|
" tags = {\"demo\": \"onnx\"}\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"image = ContainerImage.create(name = \"onnxyolo\",\n",
|
||||||
|
" models = [model],\n",
|
||||||
|
" image_config = image_config,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"image.wait_for_creation(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"In case you need to debug your code, the next line of code accesses the log file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(image.image_build_log_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We're all set! Let's get our model chugging.\n",
|
||||||
|
"\n",
|
||||||
|
"### Deploy the container image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import AciWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||||
|
" memory_gb = 1, \n",
|
||||||
|
" tags = {'demo': 'onnx'}, \n",
|
||||||
|
" description = 'web service for TinyYOLO ONNX model')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The following cell will likely take a few minutes to run as well."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import Webservice\n",
|
||||||
|
"from random import randint\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service_name = 'onnx-tinyyolo'+str(randint(0,100))\n",
|
||||||
|
"print(\"Service\", aci_service_name)\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||||
|
" image = image,\n",
|
||||||
|
" name = aci_service_name,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service.wait_for_deployment(True)\n",
|
||||||
|
"print(aci_service.state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"In case the deployment fails, you can check the logs. Make sure to delete your aci_service before trying again."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if aci_service.state != 'Healthy':\n",
|
||||||
|
" # run this command for debugging.\n",
|
||||||
|
" print(aci_service.get_logs())\n",
|
||||||
|
" aci_service.delete()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Success!\n",
|
||||||
|
"\n",
|
||||||
|
"If you've made it this far, you've deployed a working web service that does object detection using an ONNX model. You can get the URL for the webservice with the code below."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(aci_service.scoring_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"When you are eventually done using the web service, remember to delete it."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#aci_service.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "onnx"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.5.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -12,7 +12,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Facial Expression Recognition using ONNX Runtime on AzureML\n",
|
"# Facial Expression Recognition (Emotion FER+) using ONNX Runtime on Azure ML\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This example shows how to deploy an image classification neural network using the Facial Expression Recognition ([FER](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. This tutorial will show you how to deploy a FER+ model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
|
"This example shows how to deploy an image classification neural network using the Facial Expression Recognition ([FER](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. This tutorial will show you how to deploy a FER+ model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -34,32 +34,54 @@
|
|||||||
"## Prerequisites\n",
|
"## Prerequisites\n",
|
||||||
"\n",
|
"\n",
|
||||||
"### 1. Install Azure ML SDK and create a new workspace\n",
|
"### 1. Install Azure ML SDK and create a new workspace\n",
|
||||||
"Please follow [00.configuration.ipynb](https://github.com/Azure/MachineLearningNotebooks/blob/master/00.configuration.ipynb) notebook.\n",
|
"Please follow [Azure ML configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/00.configuration.ipynb) to set up your environment.\n",
|
||||||
"\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"### 2. Install additional packages needed for this Notebook\n",
|
"### 2. Install additional packages needed for this Notebook\n",
|
||||||
"You need to install the popular plotting library `matplotlib`, the image manipulation library `PIL`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed.\n",
|
"You need to install the popular plotting library `matplotlib`, the image manipulation library `opencv`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"```sh\n",
|
"```sh\n",
|
||||||
"(myenv) $ pip install matplotlib onnx Pillow\n",
|
"(myenv) $ pip install matplotlib onnx opencv-python\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"**Debugging tip**: Make sure that to activate your virtual environment (myenv) before you re-launch this notebook using the `jupyter notebook` comand. Choose the respective Python kernel for your new virtual environment using the `Kernel > Change Kernel` menu above. If you have completed the steps correctly, the upper right corner of your screen should state `Python [conda env:myenv]` instead of `Python [default]`.\n",
|
||||||
|
"\n",
|
||||||
"### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n",
|
"### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"[Download the ONNX Emotion FER+ model and corresponding test data](https://www.cntk.ai/OnnxModels/emotion_ferplus/opset_7/emotion_ferplus.tar.gz) and place them in the same folder as this tutorial notebook. You can unzip the file through the following line of code.\n",
|
"In the following lines of code, we download [the trained ONNX Emotion FER+ model and corresponding test data](https://github.com/onnx/models/tree/master/emotion_ferplus) and place them in the same folder as this tutorial notebook. For more information about the FER+ dataset, please visit Microsoft Researcher Emad Barsoum's [FER+ source data repository](https://github.com/ebarsoum/FERPlus)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# urllib is a built-in Python library to download files from URLs\n",
|
||||||
"\n",
|
"\n",
|
||||||
"```sh\n",
|
"# Objective: retrieve the latest version of the ONNX Emotion FER+ model files from the\n",
|
||||||
"(myenv) $ tar xvzf emotion_ferplus.tar.gz\n",
|
"# ONNX Model Zoo and save it in the same folder as this tutorial\n",
|
||||||
"```\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"More information can be found about the ONNX FER+ model on [github](https://github.com/onnx/models/tree/master/emotion_ferplus). For more information about the FER+ dataset, please visit Microsoft Researcher Emad Barsoum's [FER+ source data repository](https://github.com/ebarsoum/FERPlus)."
|
"import urllib.request\n",
|
||||||
|
"\n",
|
||||||
|
"onnx_model_url = \"https://www.cntk.ai/OnnxModels/emotion_ferplus/opset_7/emotion_ferplus.tar.gz\"\n",
|
||||||
|
"\n",
|
||||||
|
"urllib.request.urlretrieve(onnx_model_url, filename=\"emotion_ferplus.tar.gz\")\n",
|
||||||
|
"\n",
|
||||||
|
"# the ! magic command tells our jupyter notebook kernel to run the following line of \n",
|
||||||
|
"# code from the command line instead of the notebook kernel\n",
|
||||||
|
"\n",
|
||||||
|
"# We use tar and xvcf to unzip the files we just retrieved from the ONNX model zoo\n",
|
||||||
|
"\n",
|
||||||
|
"!tar xvzf emotion_ferplus.tar.gz"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Load Azure ML workspace\n",
|
"## Deploy a VM with your ONNX model in the Cloud\n",
|
||||||
|
"\n",
|
||||||
|
"### Load Azure ML workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
||||||
]
|
]
|
||||||
@@ -137,8 +159,8 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"models = ws.models()\n",
|
"models = ws.models()\n",
|
||||||
"for m in models:\n",
|
"for name, m in models.items():\n",
|
||||||
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -147,9 +169,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"### ONNX FER+ Model Methodology\n",
|
"### ONNX FER+ Model Methodology\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the famous FER+ data set, provided as part of the [trained Emotion Recognition model](https://github.com/onnx/models/tree/master/emotion_ferplus) in the ONNX model zoo.\n",
|
"The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the well-known FER+ data set, provided as part of the [trained Emotion Recognition model](https://github.com/onnx/models/tree/master/emotion_ferplus) in the ONNX model zoo.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The original Facial Emotion Recognition (FER) Dataset was released in 2013, but some of the labels are not entirely appropriate for the expression. In the FER+ Dataset, each photo was evaluated by at least 10 croud sourced reviewers, creating a better basis for ground truth. \n",
|
"The original Facial Emotion Recognition (FER) Dataset was released in 2013 by Pierre-Luc Carrier and Aaron Courville as part of a [Kaggle Competition](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data), but some of the labels are not entirely appropriate for the expression. In the FER+ Dataset, each photo was evaluated by at least 10 croud sourced reviewers, creating a more accurate basis for ground truth. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"You can see the difference of label quality in the sample model input below. The FER labels are the first word below each image, and the FER+ labels are the second word below each image.\n",
|
"You can see the difference of label quality in the sample model input below. The FER labels are the first word below each image, and the FER+ labels are the second word below each image.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -202,20 +224,18 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Deploy our model on Azure ML"
|
"### Specify our Score and Environment Files"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We are now going to deploy our ONNX Model on AML with inference in ONNX Runtime. We begin by writing a score.py file, which will help us run the model in our Azure ML virtual machine (VM), and then specify our environment by writing a yml file.\n",
|
"We are now going to deploy our ONNX Model on AML with inference in ONNX Runtime. We begin by writing a score.py file, which will help us run the model in our Azure ML virtual machine (VM), and then specify our environment by writing a yml file. You will also notice that we import the onnxruntime library to do runtime inference on our ONNX models (passing in input and evaluating out model's predicted output). More information on the API and commands can be found in the [ONNX Runtime documentation](https://aka.ms/onnxruntime).\n",
|
||||||
"\n",
|
|
||||||
"You will also notice that we import the onnxruntime library to do runtime inference on our ONNX models (passing in input and evaluating out model's predicted output). More information on the API and commands can be found in the [ONNX Runtime documentation](https://aka.ms/onnxruntime).\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"### Write Score File\n",
|
"### Write Score File\n",
|
||||||
"\n",
|
"\n",
|
||||||
"A score file is what tells our Azure cloud service what to do. After initializing our model using azureml.core.model, we start an ONNX Runtime GPU inference session to evaluate the data passed in on our function calls."
|
"A score file is what tells our Azure cloud service what to do. After initializing our model using azureml.core.model, we start an ONNX Runtime inference session to evaluate the data passed in on our function calls."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -248,10 +268,13 @@
|
|||||||
" try:\n",
|
" try:\n",
|
||||||
" # load in our data, convert to readable format\n",
|
" # load in our data, convert to readable format\n",
|
||||||
" data = np.array(json.loads(input_data)['data']).astype('float32')\n",
|
" data = np.array(json.loads(input_data)['data']).astype('float32')\n",
|
||||||
|
" \n",
|
||||||
" start = time.time()\n",
|
" start = time.time()\n",
|
||||||
" r = session.run([output_name], {input_name : data})\n",
|
" r = session.run([output_name], {input_name : data})\n",
|
||||||
" end = time.time()\n",
|
" end = time.time()\n",
|
||||||
|
" \n",
|
||||||
" result = emotion_map(postprocess(r[0]))\n",
|
" result = emotion_map(postprocess(r[0]))\n",
|
||||||
|
" \n",
|
||||||
" result_dict = {\"result\": result,\n",
|
" result_dict = {\"result\": result,\n",
|
||||||
" \"time_in_sec\": [end - start]}\n",
|
" \"time_in_sec\": [end - start]}\n",
|
||||||
" except Exception as e:\n",
|
" except Exception as e:\n",
|
||||||
@@ -260,9 +283,12 @@
|
|||||||
" return json.dumps(result_dict)\n",
|
" return json.dumps(result_dict)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def emotion_map(classes, N=1):\n",
|
"def emotion_map(classes, N=1):\n",
|
||||||
" \"\"\"Take the most probable labels (output of postprocess) and returns the top N emotional labels that fit the picture.\"\"\"\n",
|
" \"\"\"Take the most probable labels (output of postprocess) and returns the \n",
|
||||||
|
" top N emotional labels that fit the picture.\"\"\"\n",
|
||||||
|
" \n",
|
||||||
|
" emotion_table = {'neutral':0, 'happiness':1, 'surprise':2, 'sadness':3, \n",
|
||||||
|
" 'anger':4, 'disgust':5, 'fear':6, 'contempt':7}\n",
|
||||||
" \n",
|
" \n",
|
||||||
" emotion_table = {'neutral':0, 'happiness':1, 'surprise':2, 'sadness':3, 'anger':4, 'disgust':5, 'fear':6, 'contempt':7}\n",
|
|
||||||
" emotion_keys = list(emotion_table.keys())\n",
|
" emotion_keys = list(emotion_table.keys())\n",
|
||||||
" emotions = []\n",
|
" emotions = []\n",
|
||||||
" for i in range(N):\n",
|
" for i in range(N):\n",
|
||||||
@@ -276,8 +302,8 @@
|
|||||||
" return e_x / e_x.sum(axis=0)\n",
|
" return e_x / e_x.sum(axis=0)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def postprocess(scores):\n",
|
"def postprocess(scores):\n",
|
||||||
" \"\"\"This function takes the scores generated by the network and returns the class IDs in decreasing \n",
|
" \"\"\"This function takes the scores generated by the network and \n",
|
||||||
" order of probability.\"\"\"\n",
|
" returns the class IDs in decreasing order of probability.\"\"\"\n",
|
||||||
" prob = softmax(scores)\n",
|
" prob = softmax(scores)\n",
|
||||||
" prob = np.squeeze(prob)\n",
|
" prob = np.squeeze(prob)\n",
|
||||||
" classes = np.argsort(prob)[::-1]\n",
|
" classes = np.argsort(prob)[::-1]\n",
|
||||||
@@ -329,7 +355,7 @@
|
|||||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||||
" runtime = \"python\",\n",
|
" runtime = \"python\",\n",
|
||||||
" conda_file = \"myenv.yml\",\n",
|
" conda_file = \"myenv.yml\",\n",
|
||||||
" description = \"test\",\n",
|
" description = \"Emotion ONNX Runtime container\",\n",
|
||||||
" tags = {\"demo\": \"onnx\"})\n",
|
" tags = {\"demo\": \"onnx\"})\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -346,8 +372,6 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Debugging\n",
|
|
||||||
"\n",
|
|
||||||
"In case you need to debug your code, the next line of code accesses the log file."
|
"In case you need to debug your code, the next line of code accesses the log file."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -364,9 +388,9 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We're all set! Let's get our model chugging.\n",
|
"We're all done specifying what we want our virtual machine to do. Let's configure and deploy our container image.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Deploy the container image"
|
"### Deploy the container image"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -439,23 +463,57 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Testing and Evaluation"
|
"## Testing and Evaluation\n",
|
||||||
]
|
"\n",
|
||||||
},
|
"### Useful Helper Functions\n",
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Useful Helper Functions\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"We preprocess and postprocess our data (see score.py file) using the helper functions specified in the [ONNX FER+ Model page in the Model Zoo repository](https://github.com/onnx/models/tree/master/emotion_ferplus)."
|
"We preprocess and postprocess our data (see score.py file) using the helper functions specified in the [ONNX FER+ Model page in the Model Zoo repository](https://github.com/onnx/models/tree/master/emotion_ferplus)."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def emotion_map(classes, N=1):\n",
|
||||||
|
" \"\"\"Take the most probable labels (output of postprocess) and returns the \n",
|
||||||
|
" top N emotional labels that fit the picture.\"\"\"\n",
|
||||||
|
" \n",
|
||||||
|
" emotion_table = {'neutral':0, 'happiness':1, 'surprise':2, 'sadness':3, \n",
|
||||||
|
" 'anger':4, 'disgust':5, 'fear':6, 'contempt':7}\n",
|
||||||
|
" \n",
|
||||||
|
" emotion_keys = list(emotion_table.keys())\n",
|
||||||
|
" emotions = []\n",
|
||||||
|
" for i in range(N):\n",
|
||||||
|
" emotions.append(emotion_keys[classes[i]])\n",
|
||||||
|
" \n",
|
||||||
|
" return emotions\n",
|
||||||
|
"\n",
|
||||||
|
"def softmax(x):\n",
|
||||||
|
" \"\"\"Compute softmax values (probabilities from 0 to 1) for each possible label.\"\"\"\n",
|
||||||
|
" x = x.reshape(-1)\n",
|
||||||
|
" e_x = np.exp(x - np.max(x))\n",
|
||||||
|
" return e_x / e_x.sum(axis=0)\n",
|
||||||
|
"\n",
|
||||||
|
"def postprocess(scores):\n",
|
||||||
|
" \"\"\"This function takes the scores generated by the network and \n",
|
||||||
|
" returns the class IDs in decreasing order of probability.\"\"\"\n",
|
||||||
|
" prob = softmax(scores)\n",
|
||||||
|
" prob = np.squeeze(prob)\n",
|
||||||
|
" classes = np.argsort(prob)[::-1]\n",
|
||||||
|
" return classes"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Load Test Data"
|
"### Load Test Data\n",
|
||||||
|
"\n",
|
||||||
|
"These are already in your directory from your ONNX model download (from the model zoo).\n",
|
||||||
|
"\n",
|
||||||
|
"Notice that our Model Zoo files have a .pb extension. This is because they are [protobuf files (Protocol Buffers)](https://developers.google.com/protocol-buffers/docs/pythontutorial), so we need to read in our data through our ONNX TensorProto reader into a format we can work with, like numerical arrays."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -475,8 +533,6 @@
|
|||||||
"import json\n",
|
"import json\n",
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from score import emotion_map, softmax, postprocess\n",
|
|
||||||
"\n",
|
|
||||||
"test_inputs = []\n",
|
"test_inputs = []\n",
|
||||||
"test_outputs = []\n",
|
"test_outputs = []\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -512,7 +568,7 @@
|
|||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"### Show some sample images\n",
|
"### Show some sample images\n",
|
||||||
"We use `matplotlib` to plot 3 test images from the model zoo with their labels over them."
|
"We use `matplotlib` to plot 3 test images from the dataset."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -532,7 +588,7 @@
|
|||||||
" plt.axhline('')\n",
|
" plt.axhline('')\n",
|
||||||
" plt.axvline('')\n",
|
" plt.axvline('')\n",
|
||||||
" plt.text(x = 10, y = -10, s = test_outputs[test_image], fontsize = 18)\n",
|
" plt.text(x = 10, y = -10, s = test_outputs[test_image], fontsize = 18)\n",
|
||||||
" plt.imshow(test_inputs[test_image].reshape(64, 64), cmap = plt.cm.Greys)\n",
|
" plt.imshow(test_inputs[test_image].reshape(64, 64), cmap = plt.cm.gray)\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -571,7 +627,7 @@
|
|||||||
" print(r['error'])\n",
|
" print(r['error'])\n",
|
||||||
" break\n",
|
" break\n",
|
||||||
" \n",
|
" \n",
|
||||||
" result = r['result'][0][0]\n",
|
" result = r['result'][0]\n",
|
||||||
" time_ms = np.round(r['time_in_sec'][0] * 1000, 2)\n",
|
" time_ms = np.round(r['time_in_sec'][0] * 1000, 2)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" ground_truth = test_outputs[i]\n",
|
" ground_truth = test_outputs[i]\n",
|
||||||
@@ -583,7 +639,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" # use different color for misclassified sample\n",
|
" # use different color for misclassified sample\n",
|
||||||
" font_color = 'red' if ground_truth != result else 'black'\n",
|
" font_color = 'red' if ground_truth != result else 'black'\n",
|
||||||
" clr_map = plt.cm.gray if ground_truth != result else plt.cm.Greys\n",
|
" clr_map = plt.cm.Greys if ground_truth != result else plt.cm.gray\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # ground truth labels are in blue\n",
|
" # ground truth labels are in blue\n",
|
||||||
" plt.text(x = 10, y = -70, s = ground_truth, fontsize = 18, color = 'blue')\n",
|
" plt.text(x = 10, y = -70, s = ground_truth, fontsize = 18, color = 'blue')\n",
|
||||||
@@ -611,15 +667,30 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from PIL import Image\n",
|
"# Preprocessing functions take your image and format it so it can be passed\n",
|
||||||
|
"# as input into our ONNX model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def preprocess(image_path):\n",
|
"import cv2\n",
|
||||||
" input_shape = (1, 1, 64, 64)\n",
|
"\n",
|
||||||
" img = Image.open(image_path)\n",
|
"def rgb2gray(rgb):\n",
|
||||||
" img = img.resize((64, 64), Image.ANTIALIAS)\n",
|
" \"\"\"Convert the input image into grayscale\"\"\"\n",
|
||||||
" img_data = np.array(img)\n",
|
" return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])\n",
|
||||||
" img_data = np.resize(img_data, input_shape)\n",
|
"\n",
|
||||||
" return img_data"
|
"def resize_img(img):\n",
|
||||||
|
" \"\"\"Resize image to MNIST model input dimensions\"\"\"\n",
|
||||||
|
" img = cv2.resize(img, dsize=(64, 64), interpolation=cv2.INTER_AREA)\n",
|
||||||
|
" img.resize((1, 1, 64, 64))\n",
|
||||||
|
" return img\n",
|
||||||
|
"\n",
|
||||||
|
"def preprocess(img):\n",
|
||||||
|
" \"\"\"Resize input images and convert them to grayscale.\"\"\"\n",
|
||||||
|
" if img.shape == (64, 64):\n",
|
||||||
|
" img.resize((1, 1, 64, 64))\n",
|
||||||
|
" return img\n",
|
||||||
|
" \n",
|
||||||
|
" grayscale = rgb2gray(img)\n",
|
||||||
|
" processed_img = resize_img(grayscale)\n",
|
||||||
|
" return processed_img"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -636,12 +707,15 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# e.g. your_test_image = \"C://Users//vinitra.swamy//Pictures//emotion_test_images//img_1.png\"\n",
|
"# e.g. your_test_image = \"C://Users//vinitra.swamy//Pictures//emotion_test_images//img_1.png\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"your_test_image = \"<path to file>\"\n",
|
"import matplotlib.image as mpimg\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if your_test_image != \"<path to file>\":\n",
|
"if your_test_image != \"<path to file>\":\n",
|
||||||
" img = preprocess(your_test_image)\n",
|
" img = mpimg.imread(your_test_image)\n",
|
||||||
" plt.subplot(1,3,1)\n",
|
" plt.subplot(1,3,1)\n",
|
||||||
" plt.imshow(img.reshape((64,64)), cmap = plt.cm.gray)\n",
|
" plt.imshow(img, cmap = plt.cm.Greys)\n",
|
||||||
|
" print(\"Old Dimensions: \", img.shape)\n",
|
||||||
|
" img = preprocess(img)\n",
|
||||||
|
" print(\"New Dimensions: \", img.shape)\n",
|
||||||
"else:\n",
|
"else:\n",
|
||||||
" img = None"
|
" img = None"
|
||||||
]
|
]
|
||||||
@@ -659,7 +733,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" try:\n",
|
" try:\n",
|
||||||
" r = json.loads(aci_service.run(input_data))\n",
|
" r = json.loads(aci_service.run(input_data))\n",
|
||||||
" result = r['result'][0][0]\n",
|
" result = r['result'][0]\n",
|
||||||
" time_ms = np.round(r['time_in_sec'][0] * 1000, 2)\n",
|
" time_ms = np.round(r['time_in_sec'][0] * 1000, 2)\n",
|
||||||
" except Exception as e:\n",
|
" except Exception as e:\n",
|
||||||
" print(str(e))\n",
|
" print(str(e))\n",
|
||||||
@@ -668,12 +742,13 @@
|
|||||||
" plt.subplot(1,8,1)\n",
|
" plt.subplot(1,8,1)\n",
|
||||||
" plt.axhline('')\n",
|
" plt.axhline('')\n",
|
||||||
" plt.axvline('')\n",
|
" plt.axvline('')\n",
|
||||||
" plt.text(x = -10, y = -35, s = \"Model prediction: \", fontsize = 14)\n",
|
" plt.text(x = -10, y = -40, s = \"Model prediction: \", fontsize = 14)\n",
|
||||||
" plt.text(x = -10, y = -20, s = \"Inference time: \", fontsize = 14)\n",
|
" plt.text(x = -10, y = -25, s = \"Inference time: \", fontsize = 14)\n",
|
||||||
" plt.text(x = 100, y = -35, s = str(result), fontsize = 14)\n",
|
" plt.text(x = 100, y = -40, s = str(result), fontsize = 14)\n",
|
||||||
" plt.text(x = 100, y = -20, s = str(time_ms) + \" ms\", fontsize = 14)\n",
|
" plt.text(x = 100, y = -25, s = str(time_ms) + \" ms\", fontsize = 14)\n",
|
||||||
" plt.text(x = -10, y = -8, s = \"Input image: \", fontsize = 14)\n",
|
" plt.text(x = -10, y = -10, s = \"Model Input image: \", fontsize = 14)\n",
|
||||||
" plt.imshow(img.reshape(64, 64), cmap = plt.cm.gray) "
|
" plt.imshow(img.reshape((64, 64)), cmap = plt.cm.gray) \n",
|
||||||
|
" "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -684,7 +759,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# remember to delete your service after you are done using it!\n",
|
"# remember to delete your service after you are done using it!\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# aci_service.delete()"
|
"aci_service.delete()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -708,6 +783,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "viswamy"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
@@ -723,7 +803,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.5"
|
"version": "3.6.6"
|
||||||
},
|
},
|
||||||
"msauthor": "vinitra.swamy"
|
"msauthor": "vinitra.swamy"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Handwritten Digit Classification (MNIST) using ONNX Runtime on AzureML\n",
|
"# Handwritten Digit Classification (MNIST) using ONNX Runtime on Azure ML\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This example shows how to deploy an image classification neural network using the Modified National Institute of Standards and Technology ([MNIST](http://yann.lecun.com/exdb/mnist/)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing number from 0 to 9. This tutorial will show you how to deploy a MNIST model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
|
"This example shows how to deploy an image classification neural network using the Modified National Institute of Standards and Technology ([MNIST](http://yann.lecun.com/exdb/mnist/)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing number from 0 to 9. This tutorial will show you how to deploy a MNIST model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -22,9 +22,9 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"#### Tutorial Objectives:\n",
|
"#### Tutorial Objectives:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"1. Describe the MNIST dataset and pretrained Convolutional Neural Net ONNX model, stored in the ONNX model zoo.\n",
|
"- Describe the MNIST dataset and pretrained Convolutional Neural Net ONNX model, stored in the ONNX model zoo.\n",
|
||||||
"2. Deploy and run the pretrained MNIST ONNX model on an Azure Machine Learning instance\n",
|
"- Deploy and run the pretrained MNIST ONNX model on an Azure Machine Learning instance\n",
|
||||||
"3. Predict labels for test set data points in the cloud using ONNX Runtime and Azure ML"
|
"- Predict labels for test set data points in the cloud using ONNX Runtime and Azure ML"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -34,31 +34,61 @@
|
|||||||
"## Prerequisites\n",
|
"## Prerequisites\n",
|
||||||
"\n",
|
"\n",
|
||||||
"### 1. Install Azure ML SDK and create a new workspace\n",
|
"### 1. Install Azure ML SDK and create a new workspace\n",
|
||||||
"Please follow [00.configuration.ipynb](https://github.com/Azure/MachineLearningNotebooks/blob/master/00.configuration.ipynb) notebook.\n",
|
"Please follow [Azure ML configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/00.configuration.ipynb) to set up your environment.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"### 2. Install additional packages needed for this Notebook\n",
|
"### 2. Install additional packages needed for this tutorial notebook\n",
|
||||||
"You need to install the popular plotting library `matplotlib`, the image manipulation library `opencv`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed.\n",
|
"You need to install the popular plotting library `matplotlib`, the image manipulation library `opencv`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"```sh\n",
|
"```sh\n",
|
||||||
"(myenv) $ pip install matplotlib onnx opencv-python\n",
|
"(myenv) $ pip install matplotlib onnx opencv-python\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"**Debugging tip**: Make sure that you run the \"jupyter notebook\" command to launch this notebook after activating your virtual environment. Choose the respective Python kernel for your new virtual environment using the `Kernel > Change Kernel` menu above. If you have completed the steps correctly, the upper right corner of your screen should state `Python [conda env:myenv]` instead of `Python [default]`.\n",
|
||||||
|
"\n",
|
||||||
"### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n",
|
"### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"[Download the ONNX MNIST model and corresponding test data](https://www.cntk.ai/OnnxModels/mnist/opset_7/mnist.tar.gz) and place them in the same folder as this tutorial notebook. You can unzip the file through the following line of code.\n",
|
"In the following lines of code, we download [the trained ONNX MNIST model and corresponding test data](https://github.com/onnx/models/tree/master/mnist) and place them in the same folder as this tutorial notebook. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# urllib is a built-in Python library to download files from URLs\n",
|
||||||
"\n",
|
"\n",
|
||||||
"```sh\n",
|
"# Objective: retrieve the latest version of the ONNX MNIST model files from the\n",
|
||||||
"(myenv) $ tar xvzf mnist.tar.gz\n",
|
"# ONNX Model Zoo and save it in the same folder as this tutorial\n",
|
||||||
"```\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"More information can be found about the ONNX MNIST model on [github](https://github.com/onnx/models/tree/master/mnist). For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/)."
|
"import urllib.request\n",
|
||||||
|
"\n",
|
||||||
|
"onnx_model_url = \"https://www.cntk.ai/OnnxModels/mnist/opset_7/mnist.tar.gz\"\n",
|
||||||
|
"\n",
|
||||||
|
"urllib.request.urlretrieve(onnx_model_url, filename=\"mnist.tar.gz\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# the ! magic command tells our jupyter notebook kernel to run the following line of \n",
|
||||||
|
"# code from the command line instead of the notebook kernel\n",
|
||||||
|
"\n",
|
||||||
|
"# We use tar and xvcf to unzip the files we just retrieved from the ONNX model zoo\n",
|
||||||
|
"\n",
|
||||||
|
"!tar xvzf mnist.tar.gz"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Load Azure ML workspace\n",
|
"## Deploy a VM with your ONNX model in the Cloud\n",
|
||||||
|
"\n",
|
||||||
|
"### Load Azure ML workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
||||||
]
|
]
|
||||||
@@ -113,11 +143,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.model import Model\n",
|
"from azureml.core.model import Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"model = Model.register(model_path = model_dir + \"//model.onnx\",\n",
|
"model = Model.register(workspace = ws,\n",
|
||||||
|
" model_path = model_dir + \"/\" + \"model.onnx\",\n",
|
||||||
" model_name = \"mnist_1\",\n",
|
" model_name = \"mnist_1\",\n",
|
||||||
" tags = {\"onnx\": \"demo\"},\n",
|
" tags = {\"onnx\": \"demo\"},\n",
|
||||||
" description = \"MNIST image classification CNN from ONNX Model Zoo\",\n",
|
" description = \"MNIST image classification CNN from ONNX Model Zoo\",)"
|
||||||
" workspace = ws)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -136,8 +166,8 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"models = ws.models()\n",
|
"models = ws.models()\n",
|
||||||
"for m in models:\n",
|
"for name, m in models.items():\n",
|
||||||
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -188,16 +218,14 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Deploy our model on Azure ML"
|
"### Specify our Score and Environment Files"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We are now going to deploy our ONNX Model on AML with inference in ONNX Runtime. We begin by writing a score.py file, which will help us run the model in our Azure ML virtual machine (VM), and then specify our environment by writing a yml file.\n",
|
"We are now going to deploy our ONNX Model on AML with inference in ONNX Runtime. We begin by writing a score.py file, which will help us run the model in our Azure ML virtual machine (VM), and then specify our environment by writing a yml file. You will also notice that we import the onnxruntime library to do runtime inference on our ONNX models (passing in input and evaluating out model's predicted output). More information on the API and commands can be found in the [ONNX Runtime documentation](https://aka.ms/onnxruntime).\n",
|
||||||
"\n",
|
|
||||||
"You will also notice that we import the onnxruntime library to do runtime inference on our ONNX models (passing in input and evaluating out model's predicted output). More information on the API and commands can be found in the [ONNX Runtime documentation](https://aka.ms/onnxruntime).\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"### Write Score File\n",
|
"### Write Score File\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -248,7 +276,7 @@
|
|||||||
" return json.dumps(result_dict)\n",
|
" return json.dumps(result_dict)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def choose_class(result_prob):\n",
|
"def choose_class(result_prob):\n",
|
||||||
" \"\"\"We use argmax to determine the right label to choose from our output, after calling softmax on the 10 numbers we receive\"\"\"\n",
|
" \"\"\"We use argmax to determine the right label to choose from our output\"\"\"\n",
|
||||||
" return int(np.argmax(result_prob, axis=0))"
|
" return int(np.argmax(result_prob, axis=0))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -256,14 +284,9 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Write Environment File"
|
"### Write Environment File\n",
|
||||||
]
|
"\n",
|
||||||
},
|
"This step creates a YAML environment file that specifies which dependencies we would like to see in our Linux Virtual Machine."
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"This step creates a YAML file that specifies which dependencies we would like to see in our Linux Virtual Machine."
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -289,10 +312,19 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Create the Container Image\n",
|
"### Create the Container Image\n",
|
||||||
"\n",
|
|
||||||
"This step will likely take a few minutes."
|
"This step will likely take a few minutes."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"help(ContainerImage.image_configuration)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -304,8 +336,8 @@
|
|||||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||||
" runtime = \"python\",\n",
|
" runtime = \"python\",\n",
|
||||||
" conda_file = \"myenv.yml\",\n",
|
" conda_file = \"myenv.yml\",\n",
|
||||||
" description = \"test\",\n",
|
" description = \"MNIST ONNX Runtime container\",\n",
|
||||||
" tags = {\"demo\": \"onnx\"}) )\n",
|
" tags = {\"demo\": \"onnx\"}) \n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"image = ContainerImage.create(name = \"onnxtest\",\n",
|
"image = ContainerImage.create(name = \"onnxtest\",\n",
|
||||||
@@ -321,8 +353,6 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Debugging\n",
|
|
||||||
"\n",
|
|
||||||
"In case you need to debug your code, the next line of code accesses the log file."
|
"In case you need to debug your code, the next line of code accesses the log file."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -339,9 +369,9 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We're all set! Let's get our model chugging.\n",
|
"We're all done specifying what we want our virtual machine to do. Let's configure and deploy our container image.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Deploy the container image"
|
"### Deploy the container image"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -373,7 +403,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.webservice import Webservice\n",
|
"from azureml.core.webservice import Webservice\n",
|
||||||
"\n",
|
"\n",
|
||||||
"aci_service_name = 'onnx-demo-mnist'\n",
|
"aci_service_name = 'onnx-demo-mnist20'\n",
|
||||||
"print(\"Service\", aci_service_name)\n",
|
"print(\"Service\", aci_service_name)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||||
@@ -414,16 +444,13 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Testing and Evaluation"
|
"## Testing and Evaluation\n",
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Load Test Data\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"These are already in your directory from your ONNX model download (from the model zoo). If you didn't place your model and test data in the same directory as this notebook, edit the \"model_dir\" filename below."
|
"### Load Test Data\n",
|
||||||
|
"\n",
|
||||||
|
"These are already in your directory from your ONNX model download (from the model zoo).\n",
|
||||||
|
"\n",
|
||||||
|
"Notice that our Model Zoo files have a .pb extension. This is because they are [protobuf files (Protocol Buffers)](https://developers.google.com/protocol-buffers/docs/pythontutorial), so we need to read in our data through our ONNX TensorProto reader into a format we can work with, like numerical arrays."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -579,7 +606,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Preprocessing functions\n",
|
"# Preprocessing functions take your image and format it so it can be passed\n",
|
||||||
|
"# as input into our ONNX model\n",
|
||||||
|
"\n",
|
||||||
"import cv2\n",
|
"import cv2\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def rgb2gray(rgb):\n",
|
"def rgb2gray(rgb):\n",
|
||||||
@@ -587,12 +616,17 @@
|
|||||||
" return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])\n",
|
" return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def resize_img(img):\n",
|
"def resize_img(img):\n",
|
||||||
|
" \"\"\"Resize image to MNIST model input dimensions\"\"\"\n",
|
||||||
" img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_AREA)\n",
|
" img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_AREA)\n",
|
||||||
" img.resize((1, 1, 28, 28))\n",
|
" img.resize((1, 1, 28, 28))\n",
|
||||||
" return img\n",
|
" return img\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def preprocess(img):\n",
|
"def preprocess(img):\n",
|
||||||
" \"\"\"Resize input images and convert them to grayscale.\"\"\"\n",
|
" \"\"\"Resize input images and convert them to grayscale.\"\"\"\n",
|
||||||
|
" if img.shape == (28, 28):\n",
|
||||||
|
" img.resize((1, 1, 28, 28))\n",
|
||||||
|
" return img\n",
|
||||||
|
" \n",
|
||||||
" grayscale = rgb2gray(img)\n",
|
" grayscale = rgb2gray(img)\n",
|
||||||
" processed_img = resize_img(grayscale)\n",
|
" processed_img = resize_img(grayscale)\n",
|
||||||
" return processed_img"
|
" return processed_img"
|
||||||
@@ -608,11 +642,8 @@
|
|||||||
"# Make sure your image is square and the dimensions are equal (i.e. 100 * 100 pixels or 28 * 28 pixels)\n",
|
"# Make sure your image is square and the dimensions are equal (i.e. 100 * 100 pixels or 28 * 28 pixels)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Any PNG or JPG image file should work\n",
|
"# Any PNG or JPG image file should work\n",
|
||||||
"# Make sure to include the entire path with // instead of /\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# e.g. your_test_image = \"C://Users//vinitra.swamy//Pictures//digit.png\"\n",
|
"# e.g. your_test_image = \"C:/Users/vinitra.swamy/Pictures/handwritten_digit.png\"\n",
|
||||||
"\n",
|
|
||||||
"your_test_image = \"<path to file>\"\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"import matplotlib.image as mpimg\n",
|
"import matplotlib.image as mpimg\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -721,7 +752,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# remember to delete your service after you are done using it!\n",
|
"# remember to delete your service after you are done using it!\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# aci_service.delete()"
|
"aci_service.delete()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -738,12 +769,17 @@
|
|||||||
"- ensured that your deep learning model is working perfectly (in the cloud) on test data, and checked it against some of your own!\n",
|
"- ensured that your deep learning model is working perfectly (in the cloud) on test data, and checked it against some of your own!\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Next steps:\n",
|
"Next steps:\n",
|
||||||
"- Check out another interesting application based on a Microsoft Research computer vision paper that lets you set up a [facial emotion recognition model](https://github.com/Azure/MachineLearningNotebooks/tree/master/onnx/onnx-inference-emotion-recognition.ipynb) in the cloud! This tutorial deploys a pre-trained ONNX Computer Vision model in an Azure ML virtual machine with GPU support.\n",
|
"- Check out another interesting application based on a Microsoft Research computer vision paper that lets you set up a [facial emotion recognition model](https://github.com/Azure/MachineLearningNotebooks/tree/master/onnx/onnx-inference-emotion-recognition.ipynb) in the cloud! This tutorial deploys a pre-trained ONNX Computer Vision model in an Azure ML virtual machine.\n",
|
||||||
"- Contribute to our [open source ONNX repository on github](http://github.com/onnx/onnx) and/or add to our [ONNX model zoo](http://github.com/onnx/models)"
|
"- Contribute to our [open source ONNX repository on github](http://github.com/onnx/onnx) and/or add to our [ONNX model zoo](http://github.com/onnx/models)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "viswamy"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
@@ -759,7 +795,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.5"
|
"version": "3.6.6"
|
||||||
},
|
},
|
||||||
"msauthor": "vinitra.swamy"
|
"msauthor": "vinitra.swamy"
|
||||||
},
|
},
|
||||||
|
|||||||
409
onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb
Normal file
409
onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb
Normal file
@@ -0,0 +1,409 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# ResNet50 Image Classification using ONNX and AzureML\n",
|
||||||
|
"\n",
|
||||||
|
"This example shows how to deploy the ResNet50 ONNX model as a web service using Azure Machine Learning services and the ONNX Runtime.\n",
|
||||||
|
"\n",
|
||||||
|
"## What is ONNX\n",
|
||||||
|
"ONNX is an open format for representing machine learning and deep learning models. ONNX enables open and interoperable AI by enabling data scientists and developers to use the tools of their choice without worrying about lock-in and flexibility to deploy to a variety of platforms. ONNX is developed and supported by a community of partners including Microsoft, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai).\n",
|
||||||
|
"\n",
|
||||||
|
"## ResNet50 Details\n",
|
||||||
|
"ResNet classifies the major object in an input image into a set of 1000 pre-defined classes. For more information about the ResNet50 model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/models/image_classification/resnet). "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Prerequisites\n",
|
||||||
|
"\n",
|
||||||
|
"To make the best use of your time, make sure you have done the following:\n",
|
||||||
|
"\n",
|
||||||
|
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||||
|
"* Go through the [00.configuration.ipynb](../00.configuration.ipynb) notebook to:\n",
|
||||||
|
" * install the AML SDK\n",
|
||||||
|
" * create a workspace and its configuration file (config.json)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Check core SDK version number\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Download pre-trained ONNX model from ONNX Model Zoo.\n",
|
||||||
|
"\n",
|
||||||
|
"Download the [ResNet50v2 model and test data](https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz) and place it in the same folder as this tutorial notebook. You can unzip the file through the following line of code.\n",
|
||||||
|
"\n",
|
||||||
|
"```sh\n",
|
||||||
|
"(myenv) $ tar xvzf resnet50v2.tar.gz\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Deploying as a web service with Azure ML"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Load your Azure ML workspace\n",
|
||||||
|
"\n",
|
||||||
|
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.location, ws.resource_group, sep = '\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Register your model with Azure ML\n",
|
||||||
|
"\n",
|
||||||
|
"Now we upload the model and register it in the workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"\n",
|
||||||
|
"model = Model.register(model_path = \"resnet50v2/resnet50v2.onnx\",\n",
|
||||||
|
" model_name = \"resnet50v2\",\n",
|
||||||
|
" tags = {\"onnx\": \"demo\"},\n",
|
||||||
|
" description = \"ResNet50v2 from ONNX Model Zoo\",\n",
|
||||||
|
" workspace = ws)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"#### Displaying your registered models\n",
|
||||||
|
"\n",
|
||||||
|
"You can optionally list out all the models that you have registered in this workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"models = ws.models()\n",
|
||||||
|
"for m in models:\n",
|
||||||
|
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Write scoring file\n",
|
||||||
|
"\n",
|
||||||
|
"We are now going to deploy our ONNX model on Azure ML using the ONNX Runtime. We begin by writing a score.py file that will be invoked by the web service call. The `init()` function is called once when the container is started so we load the model using the ONNX Runtime into a global session object."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%%writefile score.py\n",
|
||||||
|
"import json\n",
|
||||||
|
"import time\n",
|
||||||
|
"import sys\n",
|
||||||
|
"import os\n",
|
||||||
|
"from azureml.core.model import Model\n",
|
||||||
|
"import numpy as np # we're going to use numpy to process input and output data\n",
|
||||||
|
"import onnxruntime # to inference ONNX models, we use the ONNX Runtime\n",
|
||||||
|
"\n",
|
||||||
|
"def softmax(x):\n",
|
||||||
|
" x = x.reshape(-1)\n",
|
||||||
|
" e_x = np.exp(x - np.max(x))\n",
|
||||||
|
" return e_x / e_x.sum(axis=0)\n",
|
||||||
|
"\n",
|
||||||
|
"def init():\n",
|
||||||
|
" global session\n",
|
||||||
|
" model = Model.get_model_path(model_name = 'resnet50v2')\n",
|
||||||
|
" session = onnxruntime.InferenceSession(model, None)\n",
|
||||||
|
"\n",
|
||||||
|
"def preprocess(input_data_json):\n",
|
||||||
|
" # convert the JSON data into the tensor input\n",
|
||||||
|
" img_data = np.array(json.loads(input_data_json)['data']).astype('float32')\n",
|
||||||
|
" \n",
|
||||||
|
" #normalize\n",
|
||||||
|
" mean_vec = np.array([0.485, 0.456, 0.406])\n",
|
||||||
|
" stddev_vec = np.array([0.229, 0.224, 0.225])\n",
|
||||||
|
" norm_img_data = np.zeros(img_data.shape).astype('float32')\n",
|
||||||
|
" for i in range(img_data.shape[0]):\n",
|
||||||
|
" norm_img_data[i,:,:] = (img_data[i,:,:]/255 - mean_vec[i]) / stddev_vec[i]\n",
|
||||||
|
"\n",
|
||||||
|
" return norm_img_data\n",
|
||||||
|
"\n",
|
||||||
|
"def postprocess(result):\n",
|
||||||
|
" return softmax(np.array(result)).tolist()\n",
|
||||||
|
"\n",
|
||||||
|
"def run(input_data_json):\n",
|
||||||
|
" try:\n",
|
||||||
|
" start = time.time()\n",
|
||||||
|
" # load in our data which is expected as NCHW 224x224 image\n",
|
||||||
|
" input_data = preprocess(input_data_json)\n",
|
||||||
|
" input_name = session.get_inputs()[0].name # get the id of the first input of the model \n",
|
||||||
|
" result = session.run([], {input_name: input_data})\n",
|
||||||
|
" end = time.time() # stop timer\n",
|
||||||
|
" return {\"result\": postprocess(result),\n",
|
||||||
|
" \"time\": end - start}\n",
|
||||||
|
" except Exception as e:\n",
|
||||||
|
" result = str(e)\n",
|
||||||
|
" return {\"error\": result}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create container image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"First we create a YAML file that specifies which dependencies we would like to see in our container."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
|
"\n",
|
||||||
|
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\"])\n",
|
||||||
|
"\n",
|
||||||
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Then we have Azure ML create the container. This step will likely take a few minutes."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"\n",
|
||||||
|
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||||
|
" runtime = \"python\",\n",
|
||||||
|
" conda_file = \"myenv.yml\",\n",
|
||||||
|
" description = \"ONNX ResNet50 Demo\",\n",
|
||||||
|
" tags = {\"demo\": \"onnx\"}\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"image = ContainerImage.create(name = \"onnxresnet50v2\",\n",
|
||||||
|
" models = [model],\n",
|
||||||
|
" image_config = image_config,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"image.wait_for_creation(show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"In case you need to debug your code, the next line of code accesses the log file."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(image.image_build_log_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We're all set! Let's get our model chugging.\n",
|
||||||
|
"\n",
|
||||||
|
"### Deploy the container image"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import AciWebservice\n",
|
||||||
|
"\n",
|
||||||
|
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||||
|
" memory_gb = 1, \n",
|
||||||
|
" tags = {'demo': 'onnx'}, \n",
|
||||||
|
" description = 'web service for ResNet50 ONNX model')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"The following cell will likely take a few minutes to run as well."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.webservice import Webservice\n",
|
||||||
|
"from random import randint\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service_name = 'onnx-demo-resnet50'+str(randint(0,100))\n",
|
||||||
|
"print(\"Service\", aci_service_name)\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||||
|
" image = image,\n",
|
||||||
|
" name = aci_service_name,\n",
|
||||||
|
" workspace = ws)\n",
|
||||||
|
"\n",
|
||||||
|
"aci_service.wait_for_deployment(True)\n",
|
||||||
|
"print(aci_service.state)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"In case the deployment fails, you can check the logs. Make sure to delete your aci_service before trying again."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if aci_service.state != 'Healthy':\n",
|
||||||
|
" # run this command for debugging.\n",
|
||||||
|
" print(aci_service.get_logs())\n",
|
||||||
|
" aci_service.delete()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Success!\n",
|
||||||
|
"\n",
|
||||||
|
"If you've made it this far, you've deployed a working web service that does image classification using an ONNX model. You can get the URL for the webservice with the code below."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(aci_service.scoring_uri)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"When you are eventually done using the web service, remember to delete it."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#aci_service.delete()"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "onnx"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.5.6"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
651
onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb
Normal file
651
onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -53,6 +53,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "hichando"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1 @@
|
|||||||
|
{"cells":[{"cell_type":"markdown","source":["Azure ML & Azure Databricks notebooks by Parashar Shah.\n\nCopyright (c) Microsoft Corporation. All rights reserved.\n\nLicensed under the MIT License."],"metadata":{}},{"cell_type":"markdown","source":["Please ensure you have run this notebook before proceeding."],"metadata":{}},{"cell_type":"markdown","source":["Now we support installing AML SDK as library from GUI. When attaching a library follow this https://docs.databricks.com/user-guide/libraries.html and add the below string as your PyPi package (during private preview). You can select the option to attach the library to all clusters or just one cluster.\n\nProvide this full string to install the SDK:\n\nazureml-sdk[databricks]"],"metadata":{}},{"cell_type":"code","source":["import azureml.core\n\n# Check core SDK version number - based on build number of preview/master.\nprint(\"SDK version:\", azureml.core.VERSION)"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["subscription_id = \"<your-subscription-id>\"\nresource_group = \"<your-existing-resource-group>\"\nworkspace_name = \"<a-new-or-existing-workspace; it is unrelated to Databricks workspace>\"\nworkspace_region = \"<your-resource group-region>\""],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["# import the Workspace class and check the azureml SDK version\n# exist_ok checks if workspace exists or not.\n\nfrom azureml.core import Workspace\n\nws = Workspace.create(name = workspace_name,\n subscription_id = subscription_id,\n resource_group = resource_group, \n location = workspace_region,\n exist_ok=True)\n\nws.get_details()"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["ws = Workspace(workspace_name = workspace_name,\n subscription_id = subscription_id,\n resource_group = resource_group)\n\n# persist the subscription id, resource group name, and workspace name in aml_config/config.json.\nws.write_config()"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["%sh\ncat /databricks/driver/aml_config/config.json"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["# import the Workspace class and check the azureml SDK version\nfrom azureml.core import Workspace\n\nws = Workspace.from_config()\nprint('Workspace name: ' + ws.name, \n 'Azure region: ' + ws.location, \n 'Subscription id: ' + ws.subscription_id, \n 'Resource group: ' + ws.resource_group, sep = '\\n')"],"metadata":{},"outputs":[],"execution_count":9},{"cell_type":"code","source":["dbutils.notebook.exit(\"success\")"],"metadata":{},"outputs":[],"execution_count":10},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":11}],"metadata":{"name":"01.Installation_and_Configuration","notebookId":3874566296719377},"nbformat":4,"nbformat_minor":0}
|
||||||
1
project-brainwave/databricks/02.Ingest_data.ipynb
Normal file
1
project-brainwave/databricks/02.Ingest_data.ipynb
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"cells":[{"cell_type":"markdown","source":["Azure ML & Azure Databricks notebooks by Parashar Shah.\n\nCopyright (c) Microsoft Corporation. All rights reserved.\n\nLicensed under the MIT License."],"metadata":{}},{"cell_type":"markdown","source":["Please ensure you have run all previous notebooks in sequence before running this."],"metadata":{}},{"cell_type":"markdown","source":["#Data Ingestion"],"metadata":{}},{"cell_type":"code","source":["import os\nimport urllib"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["# Download AdultCensusIncome.csv from Azure CDN. This file has 32,561 rows.\nbasedataurl = \"https://amldockerdatasets.azureedge.net\"\ndatafile = \"AdultCensusIncome.csv\"\ndatafile_dbfs = os.path.join(\"/dbfs\", datafile)\n\nif os.path.isfile(datafile_dbfs):\n print(\"found {} at {}\".format(datafile, datafile_dbfs))\nelse:\n print(\"downloading {} to {}\".format(datafile, datafile_dbfs))\n urllib.request.urlretrieve(os.path.join(basedataurl, datafile), datafile_dbfs)"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["# Create a Spark dataframe out of the csv file.\ndata_all = sqlContext.read.format('csv').options(header='true', inferSchema='true', ignoreLeadingWhiteSpace='true', ignoreTrailingWhiteSpace='true').load(datafile)\nprint(\"({}, {})\".format(data_all.count(), len(data_all.columns)))\ndata_all.printSchema()"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["#renaming columns\ncolumns_new = [col.replace(\"-\", \"_\") for col in data_all.columns]\ndata_all = data_all.toDF(*columns_new)\ndata_all.printSchema()"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["display(data_all.limit(5))"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"markdown","source":["#Data Preparation"],"metadata":{}},{"cell_type":"code","source":["# Choose feature columns and the label column.\nlabel = \"income\"\nxvals_all = set(data_all.columns) - {label}\n\n#dbutils.widgets.remove(\"xvars_multiselect\")\ndbutils.widgets.removeAll()\n\ndbutils.widgets.multiselect('xvars_multiselect', 'hours_per_week', xvals_all)\nxvars_multiselect = dbutils.widgets.get(\"xvars_multiselect\")\nxvars = xvars_multiselect.split(\",\")\n\nprint(\"label = {}\".format(label))\nprint(\"features = {}\".format(xvars))\n\ndata = data_all.select([*xvars, label])\n\n# Split data into train and test.\ntrain, test = data.randomSplit([0.75, 0.25], seed=123)\n\nprint(\"train ({}, {})\".format(train.count(), len(train.columns)))\nprint(\"test ({}, {})\".format(test.count(), len(test.columns)))"],"metadata":{},"outputs":[],"execution_count":10},{"cell_type":"markdown","source":["#Data Persistence"],"metadata":{}},{"cell_type":"code","source":["# Write the train and test data sets to intermediate storage\ntrain_data_path = \"AdultCensusIncomeTrain\"\ntest_data_path = \"AdultCensusIncomeTest\"\n\ntrain_data_path_dbfs = os.path.join(\"/dbfs\", \"AdultCensusIncomeTrain\")\ntest_data_path_dbfs = os.path.join(\"/dbfs\", \"AdultCensusIncomeTest\")\n\ntrain.write.mode('overwrite').parquet(train_data_path)\ntest.write.mode('overwrite').parquet(test_data_path)\nprint(\"train and test datasets saved to {} and {}\".format(train_data_path_dbfs, test_data_path_dbfs))"],"metadata":{},"outputs":[],"execution_count":12},{"cell_type":"code","source":["dbutils.notebook.exit(\"success\")"],"metadata":{},"outputs":[],"execution_count":13},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":14}],"metadata":{"name":"02.Ingest_data","notebookId":3874566296719393},"nbformat":4,"nbformat_minor":0}
|
||||||
1
project-brainwave/databricks/03a.Build_model.ipynb
Normal file
1
project-brainwave/databricks/03a.Build_model.ipynb
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
project-brainwave/databricks/04.Deploy_to_ACI.ipynb
Normal file
1
project-brainwave/databricks/04.Deploy_to_ACI.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -0,0 +1 @@
|
|||||||
|
{"cells":[{"cell_type":"markdown","source":["Azure ML & Azure Databricks notebooks by Parashar Shah.\n\nCopyright (c) Microsoft Corporation. All rights reserved.\n\nLicensed under the MIT License."],"metadata":{}},{"cell_type":"markdown","source":["Please ensure you have run all previous notebooks in sequence before running this. This notebook uses image from ACI notebook for deploying to AKS."],"metadata":{}},{"cell_type":"code","source":["from azureml.core import Workspace\nimport azureml.core\n\n# Check core SDK version number\nprint(\"SDK version:\", azureml.core.VERSION)\n\n#'''\nws = Workspace.from_config()\nprint('Workspace name: ' + ws.name, \n 'Azure region: ' + ws.location, \n 'Subscription id: ' + ws.subscription_id, \n 'Resource group: ' + ws.resource_group, sep = '\\n')\n#'''"],"metadata":{},"outputs":[],"execution_count":3},{"cell_type":"code","source":["# List images by ws\n\nfrom azureml.core.image import ContainerImage\nfor i in ContainerImage.list(workspace = ws):\n print('{}(v.{} [{}]) stored at {} with build log {}'.format(i.name, i.version, i.creation_state, i.image_location, i.image_build_log_uri))"],"metadata":{},"outputs":[],"execution_count":4},{"cell_type":"code","source":["from azureml.core.image import Image\nmyimage = Image(workspace=ws, id=\"aciws:25\")"],"metadata":{},"outputs":[],"execution_count":5},{"cell_type":"code","source":["#create AKS compute\n#it may take 20-25 minutes to create a new cluster\n\nfrom azureml.core.compute import AksCompute, ComputeTarget\n\n# Use the default configuration (can also provide parameters to customize)\nprov_config = AksCompute.provisioning_configuration()\n\naks_name = 'ps-aks-clus2' \n\n# Create the cluster\naks_target = ComputeTarget.create(workspace = ws, \n name = aks_name, \n provisioning_configuration = prov_config)\n\naks_target.wait_for_completion(show_output = True)\n\nprint(aks_target.provisioning_state)\nprint(aks_target.provisioning_errors)"],"metadata":{},"outputs":[],"execution_count":6},{"cell_type":"code","source":["from azureml.core.webservice import Webservice\nhelp( Webservice.deploy_from_image)"],"metadata":{},"outputs":[],"execution_count":7},{"cell_type":"code","source":["from azureml.core.webservice import Webservice, AksWebservice\nfrom azureml.core.image import ContainerImage\n\n#Set the web service configuration (using default here)\naks_config = AksWebservice.deploy_configuration()\n\n#unique service name\nservice_name ='ps-aks-service'\n\n# Webservice creation using single command, there is a variant to use image directly as well.\naks_service = Webservice.deploy_from_image(\n workspace=ws, \n name=service_name,\n deployment_config = aks_config,\n image = myimage,\n deployment_target = aks_target\n )\n\naks_service.wait_for_deployment(show_output=True)"],"metadata":{},"outputs":[],"execution_count":8},{"cell_type":"code","source":["#for using the Web HTTP API \nprint(aks_service.scoring_uri)\nprint(aks_service.get_keys())"],"metadata":{},"outputs":[],"execution_count":9},{"cell_type":"code","source":["import json\n\n#get the some sample data\ntest_data_path = \"AdultCensusIncomeTest\"\ntest = spark.read.parquet(test_data_path).limit(5)\n\ntest_json = json.dumps(test.toJSON().collect())\n\nprint(test_json)"],"metadata":{},"outputs":[],"execution_count":10},{"cell_type":"code","source":["#using data defined above predict if income is >50K (1) or <=50K (0)\naks_service.run(input_data=test_json)"],"metadata":{},"outputs":[],"execution_count":11},{"cell_type":"code","source":["#comment to not delete the web service\naks_service.delete()\n#image.delete()\n#model.delete()\n#aks_target.delete()"],"metadata":{},"outputs":[],"execution_count":12},{"cell_type":"code","source":[""],"metadata":{},"outputs":[],"execution_count":13}],"metadata":{"name":"04.DeploytoACI","notebookId":3874566296719318},"nbformat":4,"nbformat_minor":0}
|
||||||
BIN
project-brainwave/databricks/Databricks_AMLSDK_github.dbc
Normal file
BIN
project-brainwave/databricks/Databricks_AMLSDK_github.dbc
Normal file
Binary file not shown.
26
project-brainwave/databricks/readme.md
Normal file
26
project-brainwave/databricks/readme.md
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# Azure Databricks - Azure ML SDK Sample Notebooks
|
||||||
|
|
||||||
|
**NOTE**: With the latest version of our AML SDK, there are some API changes due to which previous version of notebooks will not work.
|
||||||
|
Kindly use this v4 notebooks (updated Sep 18)– if you had installed the AML SDK in your Databricks cluster please update to latest SDK version by installing azureml-sdk[databricks] as a library from GUI.
|
||||||
|
|
||||||
|
**NOTE**: Please create your Azure Databricks cluster as v4.x (high concurrency preferred) with **Python 3** (dropdown). We are extending it to more runtimes asap.
|
||||||
|
|
||||||
|
**NOTE**: Some packages like psutil upgrade libs that can cause a conflict, please install such packages by freezing lib version. Eg. "pstuil **cryptography==1.5 pyopenssl==16.0.0 ipython=2.2.0**" to avoid install error. This issue is related to Databricks and not related to AML SDK.
|
||||||
|
|
||||||
|
**NOTE**: You should at least have contributor access to your Azure subcription to run some of the notebooks.
|
||||||
|
|
||||||
|
The iPython Notebooks have to be run sequentially after making changes based on your subscription. The corresponding DBC archive contains all the notebooks and can be imported into your Databricks workspace. You can the run notebooks after importing .dbc instead of downloading individually.
|
||||||
|
|
||||||
|
This set of notebooks are related to Income prediction experiment based on this [dataset](https://archive.ics.uci.edu/ml/datasets/adult) and demonstrate how to data prep, train and operationalize a Spark ML model with Azure ML Python SDK from within Azure Databricks. For details on SDK concepts, please refer to [Private preview notebooks](https://github.com/Azure/ViennaDocs/tree/master/PrivatePreview/notebooks)
|
||||||
|
|
||||||
|
(Recommended) [Azure Databricks AML SDK notebooks](Databricks_AMLSDK_github.dbc) A single DBC package to import all notebooks in your Databricks workspace.
|
||||||
|
|
||||||
|
01. [Installation and Configuration](01.Installation_and_Configuration.ipynb): Install the Azure ML Python SDK and Initialize an Azure ML Workspace and save the Workspace configuration file.
|
||||||
|
02. [Ingest data](02.Ingest_data.ipynb): Download the Adult Census Income dataset and split it into train and test sets.
|
||||||
|
03. [Build model](03a.Build_model.ipynb): Train a binary classification model in Azure Databricks with a Spark ML Pipeline.
|
||||||
|
04. [Build model with Run History](03b.Build_model_runHistory.ipynb): Train model and also capture run history (tracking) with Azure ML Python SDK.
|
||||||
|
05. [Deploy to ACI](04.Deploy_to_ACI.ipynb): Deploy model to Azure Container Instance (ACI) with Azure ML Python SDK.
|
||||||
|
06. [Deploy to AKS](04.Deploy_to_AKS_existingImage.ipynb): Deploy model to Azure Kubernetis Service (AKS) with Azure ML Python SDK from an existing Image with model, conda and score file.
|
||||||
|
|
||||||
|
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
All notebooks in this folder are licensed under the MIT License.
|
||||||
@@ -594,6 +594,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "coverste"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -1,314 +1,314 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Azure ML Hardware Accelerated Models Quickstart"
|
"# Azure ML Hardware Accelerated Models Quickstart"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"This tutorial will show you how to deploy an image recognition service based on the ResNet 50 classifier in just a few minutes using the Azure Machine Learning Accelerated AI service. Get more help from our [documentation](https://aka.ms/aml-real-time-ai) or [forum](https://aka.ms/aml-forum).\n",
|
"This tutorial will show you how to deploy an image recognition service based on the ResNet 50 classifier in just a few minutes using the Azure Machine Learning Accelerated AI service. Get more help from our [documentation](https://aka.ms/aml-real-time-ai) or [forum](https://aka.ms/aml-forum).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"We will use an accelerated ResNet50 featurizer running on an FPGA. This functionality is powered by Project Brainwave, which handles translating deep neural networks (DNN) into an FPGA program.\n",
|
"We will use an accelerated ResNet50 featurizer running on an FPGA. This functionality is powered by Project Brainwave, which handles translating deep neural networks (DNN) into an FPGA program.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Request Quota\n",
|
"## Request Quota\n",
|
||||||
"**IMPORTANT:** You must [request quota](https://aka.ms/aml-real-time-ai-request) and be approved before you can successfully run this notebook. Notebook 00 will show you how to create a workspace which you can use to request quota."
|
"**IMPORTANT:** You must [request quota](https://aka.ms/aml-real-time-ai-request) and be approved before you can successfully run this notebook. Notebook 00 will show you how to create a workspace which you can use to request quota."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Imports"
|
"## Imports"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"import os\n",
|
||||||
"import tensorflow as tf"
|
"import tensorflow as tf"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Image preprocessing\n",
|
"## Image preprocessing\n",
|
||||||
"We'd like our service to accept JPEG images as input. However the input to ResNet50 is a tensor. So we need code that decodes JPEG images and does the preprocessing required by ResNet50. The Accelerated AI service can execute TensorFlow graphs as part of the service and we'll use that ability to do the image preprocessing. This code defines a TensorFlow graph that preprocesses an array of JPEG images (as strings) and produces a tensor that is ready to be featurized by ResNet50."
|
"We'd like our service to accept JPEG images as input. However the input to ResNet50 is a tensor. So we need code that decodes JPEG images and does the preprocessing required by ResNet50. The Accelerated AI service can execute TensorFlow graphs as part of the service and we'll use that ability to do the image preprocessing. This code defines a TensorFlow graph that preprocesses an array of JPEG images (as strings) and produces a tensor that is ready to be featurized by ResNet50."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Input images as a two-dimensional tensor containing an arbitrary number of images represented a strings\n",
|
"# Input images as a two-dimensional tensor containing an arbitrary number of images represented a strings\n",
|
||||||
"import azureml.contrib.brainwave.models.utils as utils\n",
|
"import azureml.contrib.brainwave.models.utils as utils\n",
|
||||||
"in_images = tf.placeholder(tf.string)\n",
|
"in_images = tf.placeholder(tf.string)\n",
|
||||||
"image_tensors = utils.preprocess_array(in_images)\n",
|
"image_tensors = utils.preprocess_array(in_images)\n",
|
||||||
"print(image_tensors.shape)"
|
"print(image_tensors.shape)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Featurizer\n",
|
"## Featurizer\n",
|
||||||
"We use ResNet50 as a featurizer. In this step we initialize the model. This downloads a TensorFlow checkpoint of the quantized ResNet50."
|
"We use ResNet50 as a featurizer. In this step we initialize the model. This downloads a TensorFlow checkpoint of the quantized ResNet50."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.contrib.brainwave.models import QuantizedResnet50, Resnet50\n",
|
"from azureml.contrib.brainwave.models import QuantizedResnet50, Resnet50\n",
|
||||||
"model_path = os.path.expanduser('~/models')\n",
|
"model_path = os.path.expanduser('~/models')\n",
|
||||||
"model = QuantizedResnet50(model_path, is_frozen = True)\n",
|
"model = QuantizedResnet50(model_path, is_frozen = True)\n",
|
||||||
"feature_tensor = model.import_graph_def(image_tensors)\n",
|
"feature_tensor = model.import_graph_def(image_tensors)\n",
|
||||||
"print(model.version)\n",
|
"print(model.version)\n",
|
||||||
"print(feature_tensor.name)\n",
|
"print(feature_tensor.name)\n",
|
||||||
"print(feature_tensor.shape)"
|
"print(feature_tensor.shape)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Classifier\n",
|
"## Classifier\n",
|
||||||
"The model we downloaded includes a classifier which takes the output of the ResNet50 and identifies an image. This classifier is trained on the ImageNet dataset. We are going to use this classifier for our service. The next [notebook](project-brainwave-trainsfer-learning.ipynb) shows how to train a classifier for a different data set. The input to the classifier is a tensor matching the output of our ResNet50 featurizer."
|
"The model we downloaded includes a classifier which takes the output of the ResNet50 and identifies an image. This classifier is trained on the ImageNet dataset. We are going to use this classifier for our service. The next [notebook](project-brainwave-trainsfer-learning.ipynb) shows how to train a classifier for a different data set. The input to the classifier is a tensor matching the output of our ResNet50 featurizer."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"classifier_output = model.get_default_classifier(feature_tensor)"
|
"classifier_output = model.get_default_classifier(feature_tensor)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Service Definition\n",
|
"## Service Definition\n",
|
||||||
"Now that we've definied the image preprocessing, featurizer, and classifier that we will execute on our service we can create a service definition. The service definition is a set of files generated from the model that allow us to deploy to the FPGA service. The service definition consists of a pipeline. The pipeline is a series of stages that are executed in order. We support TensorFlow stages, Keras stages, and BrainWave stages. The stages will be executed in order on the service, with the output of each stage input into the subsequent stage.\n",
|
"Now that we've definied the image preprocessing, featurizer, and classifier that we will execute on our service we can create a service definition. The service definition is a set of files generated from the model that allow us to deploy to the FPGA service. The service definition consists of a pipeline. The pipeline is a series of stages that are executed in order. We support TensorFlow stages, Keras stages, and BrainWave stages. The stages will be executed in order on the service, with the output of each stage input into the subsequent stage.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"To create a TensorFlow stage we specify a session containing the graph (in this case we are using the default graph) and the input and output tensors to this stage. We use this information to save the graph so that we can execute it on the service."
|
"To create a TensorFlow stage we specify a session containing the graph (in this case we are using the default graph) and the input and output tensors to this stage. We use this information to save the graph so that we can execute it on the service."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.contrib.brainwave.pipeline import ModelDefinition, TensorflowStage, BrainWaveStage\n",
|
"from azureml.contrib.brainwave.pipeline import ModelDefinition, TensorflowStage, BrainWaveStage\n",
|
||||||
"\n",
|
"\n",
|
||||||
"save_path = os.path.expanduser('~/models/save')\n",
|
"save_path = os.path.expanduser('~/models/save')\n",
|
||||||
"model_def_path = os.path.join(save_path, 'model_def.zip')\n",
|
"model_def_path = os.path.join(save_path, 'model_def.zip')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"model_def = ModelDefinition()\n",
|
"model_def = ModelDefinition()\n",
|
||||||
"with tf.Session() as sess:\n",
|
"with tf.Session() as sess:\n",
|
||||||
" model_def.pipeline.append(TensorflowStage(sess, in_images, image_tensors))\n",
|
" model_def.pipeline.append(TensorflowStage(sess, in_images, image_tensors))\n",
|
||||||
" model_def.pipeline.append(BrainWaveStage(sess, model))\n",
|
" model_def.pipeline.append(BrainWaveStage(sess, model))\n",
|
||||||
" model_def.pipeline.append(TensorflowStage(sess, feature_tensor, classifier_output))\n",
|
" model_def.pipeline.append(TensorflowStage(sess, feature_tensor, classifier_output))\n",
|
||||||
" model_def.save(model_def_path)\n",
|
" model_def.save(model_def_path)\n",
|
||||||
" print(model_def_path)"
|
" print(model_def_path)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Deploy\n",
|
"## Deploy\n",
|
||||||
"Time to create a service from the service definition. You need a Workspace in the **East US 2** location. In the previous notebooks, you've created this Workspace. The code below will load that Workspace from a configuration file."
|
"Time to create a service from the service definition. You need a Workspace in the **East US 2** location. In the previous notebooks, you've created this Workspace. The code below will load that Workspace from a configuration file."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Workspace\n",
|
"from azureml.core import Workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ws = Workspace.from_config()\n",
|
"ws = Workspace.from_config()\n",
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Upload the model to the workspace."
|
"Upload the model to the workspace."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.model import Model\n",
|
"from azureml.core.model import Model\n",
|
||||||
"model_name = \"resnet-50-rtai\"\n",
|
"model_name = \"resnet-50-rtai\"\n",
|
||||||
"registered_model = Model.register(ws, model_def_path, model_name)"
|
"registered_model = Model.register(ws, model_def_path, model_name)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Create a service from the model that we registered. If this is a new service then we create it. If you already have a service with this name then the existing service will be updated to use this model."
|
"Create a service from the model that we registered. If this is a new service then we create it. If you already have a service with this name then the existing service will be updated to use this model."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.webservice import Webservice\n",
|
"from azureml.core.webservice import Webservice\n",
|
||||||
"from azureml.exceptions import WebserviceException\n",
|
"from azureml.exceptions import WebserviceException\n",
|
||||||
"from azureml.contrib.brainwave import BrainwaveWebservice, BrainwaveImage\n",
|
"from azureml.contrib.brainwave import BrainwaveWebservice, BrainwaveImage\n",
|
||||||
"service_name = \"imagenet-infer\"\n",
|
"service_name = \"imagenet-infer\"\n",
|
||||||
"service = None\n",
|
"service = None\n",
|
||||||
"try:\n",
|
"try:\n",
|
||||||
" service = Webservice(ws, service_name)\n",
|
" service = Webservice(ws, service_name)\n",
|
||||||
"except WebserviceException:\n",
|
"except WebserviceException:\n",
|
||||||
" image_config = BrainwaveImage.image_configuration()\n",
|
" image_config = BrainwaveImage.image_configuration()\n",
|
||||||
" deployment_config = BrainwaveWebservice.deploy_configuration()\n",
|
" deployment_config = BrainwaveWebservice.deploy_configuration()\n",
|
||||||
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
|
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
|
||||||
" service.wait_for_deployment(true)"
|
" service.wait_for_deployment(true)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Client\n",
|
"## Client\n",
|
||||||
"The service supports gRPC and the TensorFlow Serving \"predict\" API. We provide a client that can call the service to get predictions on aka.ms/rtai. You can also invoke the service like any other web service."
|
"The service supports gRPC and the TensorFlow Serving \"predict\" API. We provide a client that can call the service to get predictions on aka.ms/rtai. You can also invoke the service like any other web service."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"To understand the results we need a mapping to the human readable imagenet classes"
|
"To understand the results we need a mapping to the human readable imagenet classes"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import requests\n",
|
"import requests\n",
|
||||||
"classes_entries = requests.get(\"https://raw.githubusercontent.com/Lasagne/Recipes/master/examples/resnet50/imagenet_classes.txt\").text.splitlines()"
|
"classes_entries = requests.get(\"https://raw.githubusercontent.com/Lasagne/Recipes/master/examples/resnet50/imagenet_classes.txt\").text.splitlines()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We can now send an image to the service and get the predictions. Let's see if it can identify a snow leopard.\n",
|
"We can now send an image to the service and get the predictions. Let's see if it can identify a snow leopard.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Snow leopard in a zoo. Photo by Peter Bolliger.\n"
|
"Snow leopard in a zoo. Photo by Peter Bolliger.\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"results = service.run('snowleopardgaze.jpg')\n",
|
"results = service.run('snowleopardgaze.jpg')\n",
|
||||||
"# map results [class_id] => [confidence]\n",
|
"# map results [class_id] => [confidence]\n",
|
||||||
"results = enumerate(results)\n",
|
"results = enumerate(results)\n",
|
||||||
"# sort results by confidence\n",
|
"# sort results by confidence\n",
|
||||||
"sorted_results = sorted(results, key=lambda x: x[1], reverse=True)\n",
|
"sorted_results = sorted(results, key=lambda x: x[1], reverse=True)\n",
|
||||||
"# print top 5 results\n",
|
"# print top 5 results\n",
|
||||||
"for top in sorted_results[:5]:\n",
|
"for top in sorted_results[:5]:\n",
|
||||||
" print(classes_entries[top[0]], 'confidence:', top[1])"
|
" print(classes_entries[top[0]], 'confidence:', top[1])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Cleanup\n",
|
"## Cleanup\n",
|
||||||
"Run the cell below to delete your service."
|
"Run the cell below to delete your service."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"service.delete()\n",
|
"service.delete()\n",
|
||||||
" \n",
|
" \n",
|
||||||
"registered_model.delete()"
|
"registered_model.delete()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Congratulations! You've just created a service that does predictions using an FPGA. The next [notebook](project-brainwave-trainsfer-learning.ipynb) shows how to customize the service using transfer learning to classify different types of images."
|
"Congratulations! You've just created a service that does predictions using an FPGA. The next [notebook](project-brainwave-trainsfer-learning.ipynb) shows how to customize the service using transfer learning to classify different types of images."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "coverste"
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"metadata": {
|
||||||
"display_name": "Python 3.6",
|
"authors": [
|
||||||
"language": "python",
|
{
|
||||||
"name": "python36"
|
"name": "coverste"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.5.2"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"language_info": {
|
"nbformat": 4,
|
||||||
"codemirror_mode": {
|
"nbformat_minor": 2
|
||||||
"name": "ipython",
|
}
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.5.2"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -544,6 +544,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "coverste"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -43,6 +43,28 @@
|
|||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -244,7 +266,7 @@
|
|||||||
"In `pytorch_train.py`, we will log some metrics to our AML run. To do so, we will access the AML run object within the script:\n",
|
"In `pytorch_train.py`, we will log some metrics to our AML run. To do so, we will access the AML run object within the script:\n",
|
||||||
"```Python\n",
|
"```Python\n",
|
||||||
"from azureml.core.run import Run\n",
|
"from azureml.core.run import Run\n",
|
||||||
"run = Run.get_submitted_run()\n",
|
"run = Run.get_context()\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"Further within `pytorch_train.py`, we log the learning rate and momentum parameters, and the best validation accuracy the model achieves:\n",
|
"Further within `pytorch_train.py`, we log the learning rate and momentum parameters, and the best validation accuracy the model achieves:\n",
|
||||||
"```Python\n",
|
"```Python\n",
|
||||||
@@ -735,6 +757,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "minxia"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ import argparse
|
|||||||
|
|
||||||
from azureml.core.run import Run
|
from azureml.core.run import Run
|
||||||
# get the Azure ML run object
|
# get the Azure ML run object
|
||||||
run = Run.get_submitted_run()
|
run = Run.get_context()
|
||||||
|
|
||||||
|
|
||||||
def load_data(data_dir):
|
def load_data(data_dir):
|
||||||
@@ -162,8 +162,8 @@ def main():
|
|||||||
parser.add_argument('--data_dir', type=str, help='directory of training data')
|
parser.add_argument('--data_dir', type=str, help='directory of training data')
|
||||||
parser.add_argument('--num_epochs', type=int, default=25, help='number of epochs to train')
|
parser.add_argument('--num_epochs', type=int, default=25, help='number of epochs to train')
|
||||||
parser.add_argument('--output_dir', type=str, help='output directory')
|
parser.add_argument('--output_dir', type=str, help='output directory')
|
||||||
parser.add_argument('--learning_rate', type=float, help='learning rate')
|
parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate')
|
||||||
parser.add_argument('--momentum', type=float, help='momentum')
|
parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
print("data directory is: " + args.data_dir)
|
print("data directory is: " + args.data_dir)
|
||||||
|
|||||||
@@ -18,7 +18,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -42,6 +41,28 @@
|
|||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -265,6 +286,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "minxia"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -17,7 +17,7 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"# 03. Training MNIST dataset with hyperparameter tuning & deploy to ACI\n",
|
"# 03. Training, hyperparameter tune, and deploy with TensorFlow\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Introduction\n",
|
"## Introduction\n",
|
||||||
"This tutorial shows how to train a simple deep neural network using the MNIST dataset and TensorFlow on Azure Machine Learning. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of `28x28` pixels, representing number from 0 to 9. The goal is to create a multi-class classifier to identify the digit each image represents, and deploy it as a web service in Azure.\n",
|
"This tutorial shows how to train a simple deep neural network using the MNIST dataset and TensorFlow on Azure Machine Learning. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of `28x28` pixels, representing number from 0 to 9. The goal is to create a multi-class classifier to identify the digit each image represents, and deploy it as a web service in Azure.\n",
|
||||||
@@ -72,6 +72,28 @@
|
|||||||
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
|
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -246,17 +268,17 @@
|
|||||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# choose a name for your cluster\n",
|
"# choose a name for your cluster\n",
|
||||||
"batchai_cluster_name = \"gpucluster\"\n",
|
"cluster_name = \"gpucluster\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"try:\n",
|
"try:\n",
|
||||||
" # look for the existing cluster by name\n",
|
" # look for the existing cluster by name\n",
|
||||||
" compute_target = ComputeTarget(workspace=ws, name=batchai_cluster_name)\n",
|
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||||
" if type(compute_target) is BatchAiCompute:\n",
|
" if type(compute_target) is BatchAiCompute:\n",
|
||||||
" print('found compute target {}, just use it.'.format(batchai_cluster_name))\n",
|
" print('Found existing compute target {}.'.format(cluster_name))\n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
" print('{} exists but it is not a Batch AI cluster. Please choose a different name.'.format(batchai_cluster_name))\n",
|
" print('{} exists but it is not a Batch AI cluster. Please choose a different name.'.format(cluster_name))\n",
|
||||||
"except ComputeTargetException:\n",
|
"except ComputeTargetException:\n",
|
||||||
" print('creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" compute_config = BatchAiCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\", # GPU-based VM\n",
|
" compute_config = BatchAiCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\", # GPU-based VM\n",
|
||||||
" #vm_priority='lowpriority', # optional\n",
|
" #vm_priority='lowpriority', # optional\n",
|
||||||
" autoscale_enabled=True,\n",
|
" autoscale_enabled=True,\n",
|
||||||
@@ -264,7 +286,7 @@
|
|||||||
" cluster_max_nodes=4)\n",
|
" cluster_max_nodes=4)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
" compute_target = ComputeTarget.create(ws, batchai_cluster_name, compute_config)\n",
|
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||||
" # if no min node count is provided it uses the scale settings for the cluster\n",
|
" # if no min node count is provided it uses the scale settings for the cluster\n",
|
||||||
@@ -278,7 +300,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Now that you have created the compute target, let's see what the workspace's `compute_targets()` function returns. You should now see one entry named 'cpucluster' of type BatchAI."
|
"Now that you have created the compute target, let's see what the workspace's `compute_targets()` function returns. You should now see one entry named 'gpucluster' of type BatchAI."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -287,8 +309,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"for ct in ws.compute_targets():\n",
|
"compute_targets = ws.compute_targets()\n",
|
||||||
" print(ct.name, ct.type, ct.provisioning_state)"
|
"for name, ct in compute_targets.items():\n",
|
||||||
|
" print(name, ct.type, ct.provisioning_state)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -338,7 +361,7 @@
|
|||||||
" parser = argparse.ArgumentParser()\n",
|
" parser = argparse.ArgumentParser()\n",
|
||||||
" parser.add_argument('--data_folder')\n",
|
" parser.add_argument('--data_folder')\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
"2. The script is accessing the Azure ML `Run` object by executing `run = Run.get_submitted_run()`. Further down the script is using the `run` to report the training accuracy and the validation accuracy as training progresses.\n",
|
"2. The script is accessing the Azure ML `Run` object by executing `run = Run.get_context()`. Further down the script is using the `run` to report the training accuracy and the validation accuracy as training progresses.\n",
|
||||||
"```\n",
|
"```\n",
|
||||||
" run.log('training_acc', np.float(acc_train))\n",
|
" run.log('training_acc', np.float(acc_train))\n",
|
||||||
" run.log('validation_acc', np.float(acc_val))\n",
|
" run.log('validation_acc', np.float(acc_val))\n",
|
||||||
@@ -1056,14 +1079,17 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"for model in ws.models():\n",
|
"models = ws.models()\n",
|
||||||
" print(\"Model:\", model.name, model.id)\n",
|
"for name, model in models.items():\n",
|
||||||
"\n",
|
" print(\"Model: {}, ID: {}\".format(name, model.id))\n",
|
||||||
"for image in ws.images():\n",
|
" \n",
|
||||||
" print(\"Image:\", image.name, image.image_location)\n",
|
"images = ws.images()\n",
|
||||||
"\n",
|
"for name, image in images.items():\n",
|
||||||
"for webservice in ws.webservices():\n",
|
" print(\"Image: {}, location: {}\".format(name, image.image_location))\n",
|
||||||
" print(\"Webservice:\", webservice.name, webservice.scoring_uri)"
|
" \n",
|
||||||
|
"webservices = ws.webservices()\n",
|
||||||
|
"for name, webservice in webservices.items():\n",
|
||||||
|
" print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1102,6 +1128,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "minxia"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ init = tf.global_variables_initializer()
|
|||||||
saver = tf.train.Saver()
|
saver = tf.train.Saver()
|
||||||
|
|
||||||
# start an Azure ML run
|
# start an Azure ML run
|
||||||
run = Run.get_submitted_run()
|
run = Run.get_context()
|
||||||
|
|
||||||
with tf.Session() as sess:
|
with tf.Session() as sess:
|
||||||
init.run()
|
init.run()
|
||||||
|
|||||||
@@ -18,7 +18,6 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
@@ -42,6 +41,28 @@
|
|||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -336,6 +357,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -222,7 +222,7 @@ with tf.Session(graph=graph, config=config) as session:
|
|||||||
init.run()
|
init.run()
|
||||||
bcast.run()
|
bcast.run()
|
||||||
print('Initialized')
|
print('Initialized')
|
||||||
run = Run.get_submitted_run()
|
run = Run.get_context()
|
||||||
average_loss = 0
|
average_loss = 0
|
||||||
for step in xrange(num_steps):
|
for step in xrange(num_steps):
|
||||||
# simulate various sentence length by randomization
|
# simulate various sentence length by randomization
|
||||||
|
|||||||
@@ -41,6 +41,28 @@
|
|||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -262,6 +284,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "minxia"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -263,7 +263,7 @@ def main(unused_argv):
|
|||||||
print("After %d training step(s), validation cross entropy = %g" %
|
print("After %d training step(s), validation cross entropy = %g" %
|
||||||
(FLAGS.train_steps, val_xent))
|
(FLAGS.train_steps, val_xent))
|
||||||
if job_name == "worker" and task_index == 0:
|
if job_name == "worker" and task_index == 0:
|
||||||
run = Run.get_submitted_run()
|
run = Run.get_context()
|
||||||
run.log("CrossEntropy", val_xent)
|
run.log("CrossEntropy", val_xent)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -40,6 +40,28 @@
|
|||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -341,6 +363,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "minxia"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -41,6 +41,28 @@
|
|||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Diagnostics\n",
|
||||||
|
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"tags": [
|
||||||
|
"Diagnostics"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||||
|
"set_diagnostics_collection(send_diagnostics = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -481,6 +503,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -220,6 +220,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -413,7 +413,7 @@
|
|||||||
"print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\\n')\n",
|
"print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\\n')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# get hold of the current run\n",
|
"# get hold of the current run\n",
|
||||||
"run = Run.get_submitted_run()\n",
|
"run = Run.get_context()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print('Train a logistic regression model with regularizaion rate of', args.reg)\n",
|
"print('Train a logistic regression model with regularizaion rate of', args.reg)\n",
|
||||||
"clf = LogisticRegression(C=1.0/args.reg, random_state=42)\n",
|
"clf = LogisticRegression(C=1.0/args.reg, random_state=42)\n",
|
||||||
@@ -697,6 +697,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -322,7 +322,6 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies()\n",
|
"myenv = CondaDependencies()\n",
|
||||||
"myenv.add_conda_package(\"scikit-learn\")\n",
|
"myenv.add_conda_package(\"scikit-learn\")\n",
|
||||||
"myenv.add_pip_package(\"pynacl==1.2.1\")\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
@@ -586,6 +585,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "roastala"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
@@ -398,6 +398,11 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "jeffshep"
|
||||||
|
}
|
||||||
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
|
|||||||
Reference in New Issue
Block a user