mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 09:37:04 -05:00
Compare commits
31 Commits
azureml-sd
...
sdk-codete
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a145086d8 | ||
|
|
cb695c91ce | ||
|
|
de505d67bd | ||
|
|
f19cfa4630 | ||
|
|
7eed2e4b56 | ||
|
|
57b0f701f8 | ||
|
|
7db93bcb1d | ||
|
|
fcbe925640 | ||
|
|
bedfbd649e | ||
|
|
fb760f648d | ||
|
|
a9a0713d2f | ||
|
|
c9d018b52c | ||
|
|
53dbd0afcf | ||
|
|
e3a64b1f16 | ||
|
|
732eecfc7c | ||
|
|
6995c086ff | ||
|
|
80bba4c7ae | ||
|
|
3c581b533f | ||
|
|
cc688caa4e | ||
|
|
da225e116e | ||
|
|
73c5d02880 | ||
|
|
e472b54f1b | ||
|
|
716c6d8bb1 | ||
|
|
23189c6f40 | ||
|
|
361b57ed29 | ||
|
|
3f531fd211 | ||
|
|
111f5e8d73 | ||
|
|
96c59d5c2b | ||
|
|
ce3214b7c6 | ||
|
|
53199d17de | ||
|
|
54c883412c |
7
.amlignore
Normal file
7
.amlignore
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
.ipynb_checkpoints
|
||||||
|
azureml-logs
|
||||||
|
.azureml
|
||||||
|
.git
|
||||||
|
outputs
|
||||||
|
azureml-setup
|
||||||
|
docs
|
||||||
3
.vscode/settings.json
vendored
Normal file
3
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"python.pythonPath": "C:\\Users\\sgilley\\.azureml\\envs\\jan3\\python.exe"
|
||||||
|
}
|
||||||
@@ -27,7 +27,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Prerequisites\n",
|
"## Prerequisites\n",
|
||||||
"1. Make sure you go through the [00. Installation and Configuration](../../00.configuration.ipynb) Notebook first if you haven't. \n",
|
"1. Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"2. Install following pre-requisite libraries to your conda environment and restart notebook.\n",
|
"2. Install following pre-requisite libraries to your conda environment and restart notebook.\n",
|
||||||
"```shell\n",
|
"```shell\n",
|
||||||
@@ -525,7 +525,8 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(conda_packages=[\"scikit-learn\"])\n",
|
"myenv = CondaDependencies()\n",
|
||||||
|
"myenv.add_conda_package(\"scikit-learn\")\n",
|
||||||
"print(myenv.serialize_to_string())\n",
|
"print(myenv.serialize_to_string())\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
@@ -679,7 +680,7 @@
|
|||||||
"# score the entire test set.\n",
|
"# score the entire test set.\n",
|
||||||
"test_samples = json.dumps({'data': X_test.tolist()})\n",
|
"test_samples = json.dumps({'data': X_test.tolist()})\n",
|
||||||
"\n",
|
"\n",
|
||||||
"result = service.run(input_data = test_samples)\n",
|
"result = json.loads(service.run(input_data = test_samples))['result']\n",
|
||||||
"residual = result - y_test"
|
"residual = result - y_test"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -777,6 +778,13 @@
|
|||||||
"%%time\n",
|
"%%time\n",
|
||||||
"service.delete()"
|
"service.delete()"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -800,7 +808,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.6"
|
"version": "3.6.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -21,9 +21,7 @@ def run(raw_data):
|
|||||||
data = json.loads(raw_data)['data']
|
data = json.loads(raw_data)['data']
|
||||||
data = np.array(data)
|
data = np.array(data)
|
||||||
result = model.predict(data)
|
result = model.predict(data)
|
||||||
|
return json.dumps({"result": result.tolist()})
|
||||||
# you can return any data type as long as it is JSON-serializable
|
|
||||||
return result.tolist()
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
result = str(e)
|
result = str(e)
|
||||||
return result
|
return json.dumps({"error": result})
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
/samples/
|
|
||||||
@@ -1,477 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 02. Train locally\n",
|
|
||||||
"* Create or load workspace.\n",
|
|
||||||
"* Create scripts locally.\n",
|
|
||||||
"* Create `train.py` in a folder, along with a `my.lib` file.\n",
|
|
||||||
"* Configure & execute a local run in a user-managed Python environment.\n",
|
|
||||||
"* Configure & execute a local run in a system-managed Python environment.\n",
|
|
||||||
"* Configure & execute a local run in a Docker environment.\n",
|
|
||||||
"* Query run metrics to find the best model\n",
|
|
||||||
"* Register model for operationalization."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a workspace object from persisted configuration."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create An Experiment\n",
|
|
||||||
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"experiment_name = 'train-on-local'\n",
|
|
||||||
"exp = Experiment(workspace=ws, name=experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## View `train.py`\n",
|
|
||||||
"\n",
|
|
||||||
"`train.py` is already created for you."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"with open('./train.py', 'r') as f:\n",
|
|
||||||
" print(f.read())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Note `train.py` also references a `mylib.py` file."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"with open('./mylib.py', 'r') as f:\n",
|
|
||||||
" print(f.read())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Configure & Run\n",
|
|
||||||
"### User-managed environment\n",
|
|
||||||
"Below, we use a user-managed run, which means you are responsible to ensure all the necessary packages are available in the Python environment you choose to run the script."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"\n",
|
|
||||||
"# Editing a run configuration property on-fly.\n",
|
|
||||||
"run_config_user_managed = RunConfiguration()\n",
|
|
||||||
"\n",
|
|
||||||
"run_config_user_managed.environment.python.user_managed_dependencies = True\n",
|
|
||||||
"\n",
|
|
||||||
"# You can choose a specific Python environment by pointing to a Python path \n",
|
|
||||||
"#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Submit script to run in the user-managed environment\n",
|
|
||||||
"Note whole script folder is submitted for execution, including the `mylib.py` file."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import ScriptRunConfig\n",
|
|
||||||
"\n",
|
|
||||||
"src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n",
|
|
||||||
"run = exp.submit(src)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Get run history details"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Block to wait till run finishes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### System-managed environment\n",
|
|
||||||
"You can also ask the system to build a new conda environment and execute your scripts in it. The environment is built once and will be reused in subsequent executions as long as the conda dependencies remain unchanged. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"run_config_system_managed = RunConfiguration()\n",
|
|
||||||
"\n",
|
|
||||||
"run_config_system_managed.environment.python.user_managed_dependencies = False\n",
|
|
||||||
"run_config_system_managed.auto_prepare_environment = True\n",
|
|
||||||
"\n",
|
|
||||||
"# Specify conda dependencies with scikit-learn\n",
|
|
||||||
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
|
||||||
"run_config_system_managed.environment.python.conda_dependencies = cd"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Submit script to run in the system-managed environment\n",
|
|
||||||
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n",
|
|
||||||
"run = exp.submit(src)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Get run history details"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Block and wait till run finishes."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Docker-based execution\n",
|
|
||||||
"**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
|
|
||||||
"\n",
|
|
||||||
"You can also ask the system to pull down a Docker image and execute your scripts in it."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run_config_docker = RunConfiguration()\n",
|
|
||||||
"run_config_docker.environment.python.user_managed_dependencies = False\n",
|
|
||||||
"run_config_docker.auto_prepare_environment = True\n",
|
|
||||||
"run_config_docker.environment.docker.enabled = True\n",
|
|
||||||
"run_config_docker.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
|
||||||
"\n",
|
|
||||||
"# Specify conda dependencies with scikit-learn\n",
|
|
||||||
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
|
||||||
"run_config_docker.environment.python.conda_dependencies = cd\n",
|
|
||||||
"\n",
|
|
||||||
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Submit script to run in the system-managed environment\n",
|
|
||||||
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies.\n",
|
|
||||||
"\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import subprocess\n",
|
|
||||||
"\n",
|
|
||||||
"# Check if Docker is installed and Linux containers are enables\n",
|
|
||||||
"if subprocess.run(\"docker -v\", shell=True) == 0:\n",
|
|
||||||
" out = subprocess.check_output(\"docker system info\", shell=True, encoding=\"ascii\").split(\"\\n\")\n",
|
|
||||||
" if not \"OSType: linux\" in out:\n",
|
|
||||||
" print(\"Switch Docker engine to use Linux containers.\")\n",
|
|
||||||
" else:\n",
|
|
||||||
" run = exp.submit(src)\n",
|
|
||||||
"else:\n",
|
|
||||||
" print(\"Docker engine not installed.\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Get run history details\n",
|
|
||||||
"run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Query run metrics"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"query history",
|
|
||||||
"get metrics"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get all metris logged in the run\n",
|
|
||||||
"run.get_metrics()\n",
|
|
||||||
"metrics = run.get_metrics()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Let's find the model that has the lowest MSE value logged."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import numpy as np\n",
|
|
||||||
"\n",
|
|
||||||
"best_alpha = metrics['alpha'][np.argmin(metrics['mse'])]\n",
|
|
||||||
"\n",
|
|
||||||
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
|
|
||||||
" min(metrics['mse']), \n",
|
|
||||||
" best_alpha\n",
|
|
||||||
"))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"You can also list all the files that are associated with this run record"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.get_file_names()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"We know the model `ridge_0.40.pkl` is the best performing model from the eariler queries. So let's register it with the workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# supply a model name, and the full path to the serialized model file.\n",
|
|
||||||
"model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(model.name, model.version, model.url)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Now you can deploy this model following the example in the 01 notebook."
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "roastala"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,477 +1,470 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 02. Train locally\n",
|
"# 02. Train locally\n",
|
||||||
"* Create or load workspace.\n",
|
"* Create or load workspace.\n",
|
||||||
"* Create scripts locally.\n",
|
"* Create scripts locally.\n",
|
||||||
"* Create `train.py` in a folder, along with a `my.lib` file.\n",
|
"* Create `train.py` in a folder, along with a `my.lib` file.\n",
|
||||||
"* Configure & execute a local run in a user-managed Python environment.\n",
|
"* Configure & execute a local run in a user-managed Python environment.\n",
|
||||||
"* Configure & execute a local run in a system-managed Python environment.\n",
|
"* Configure & execute a local run in a system-managed Python environment.\n",
|
||||||
"* Configure & execute a local run in a Docker environment.\n",
|
"* Configure & execute a local run in a Docker environment.\n",
|
||||||
"* Query run metrics to find the best model\n",
|
"* Query run metrics to find the best model\n",
|
||||||
"* Register model for operationalization."
|
"* Register model for operationalization."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Prerequisites\n",
|
"## Prerequisites\n",
|
||||||
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Check core SDK version number\n",
|
"# Check core SDK version number\n",
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Initialize Workspace\n",
|
"## Initialize Workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Initialize a workspace object from persisted configuration."
|
"Initialize a workspace object from persisted configuration."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.workspace import Workspace\n",
|
"from azureml.core.workspace import Workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ws = Workspace.from_config()\n",
|
"ws = Workspace.from_config()\n",
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Create An Experiment\n",
|
"## Create An Experiment\n",
|
||||||
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Experiment\n",
|
"from azureml.core import Experiment\n",
|
||||||
"experiment_name = 'train-on-local'\n",
|
"experiment_name = 'train-on-local'\n",
|
||||||
"exp = Experiment(workspace=ws, name=experiment_name)"
|
"exp = Experiment(workspace=ws, name=experiment_name)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## View `train.py`\n",
|
"## View `train.py`\n",
|
||||||
"\n",
|
"\n",
|
||||||
"`train.py` is already created for you."
|
"`train.py` is already created for you."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"with open('./train.py', 'r') as f:\n",
|
"with open('./train.py', 'r') as f:\n",
|
||||||
" print(f.read())"
|
" print(f.read())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Note `train.py` also references a `mylib.py` file."
|
"Note `train.py` also references a `mylib.py` file."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"with open('./mylib.py', 'r') as f:\n",
|
"with open('./mylib.py', 'r') as f:\n",
|
||||||
" print(f.read())"
|
" print(f.read())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Configure & Run\n",
|
"## Configure & Run\n",
|
||||||
"### User-managed environment\n",
|
"### User-managed environment\n",
|
||||||
"Below, we use a user-managed run, which means you are responsible to ensure all the necessary packages are available in the Python environment you choose to run the script."
|
"Below, we use a user-managed run, which means you are responsible to ensure all the necessary packages are available in the Python environment you choose to run the script."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Editing a run configuration property on-fly.\n",
|
"# Editing a run configuration property on-fly.\n",
|
||||||
"run_config_user_managed = RunConfiguration()\n",
|
"run_config_user_managed = RunConfiguration()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"run_config_user_managed.environment.python.user_managed_dependencies = True\n",
|
"run_config_user_managed.environment.python.user_managed_dependencies = True\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# You can choose a specific Python environment by pointing to a Python path \n",
|
"# You can choose a specific Python environment by pointing to a Python path \n",
|
||||||
"#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'"
|
"#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Submit script to run in the user-managed environment\n",
|
"#### Submit script to run in the user-managed environment\n",
|
||||||
"Note whole script folder is submitted for execution, including the `mylib.py` file."
|
"Note whole script folder is submitted for execution, including the `mylib.py` file."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import ScriptRunConfig\n",
|
"from azureml.core import ScriptRunConfig\n",
|
||||||
"\n",
|
"\n",
|
||||||
"src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n",
|
"src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n",
|
||||||
"run = exp.submit(src)"
|
"run = exp.submit(src)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Get run history details"
|
"#### Get run history details"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"run"
|
"run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Block to wait till run finishes."
|
"Block to wait till run finishes."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"run.wait_for_completion(show_output=True)"
|
"run.wait_for_completion(show_output=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### System-managed environment\n",
|
"### System-managed environment\n",
|
||||||
"You can also ask the system to build a new conda environment and execute your scripts in it. The environment is built once and will be reused in subsequent executions as long as the conda dependencies remain unchanged. "
|
"You can also ask the system to build a new conda environment and execute your scripts in it. The environment is built once and will be reused in subsequent executions as long as the conda dependencies remain unchanged. "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
"\n",
|
"\n",
|
||||||
"run_config_system_managed = RunConfiguration()\n",
|
"run_config_system_managed = RunConfiguration()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"run_config_system_managed.environment.python.user_managed_dependencies = False\n",
|
"run_config_system_managed.environment.python.user_managed_dependencies = False\n",
|
||||||
"run_config_system_managed.auto_prepare_environment = True\n",
|
"run_config_system_managed.auto_prepare_environment = True\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Specify conda dependencies with scikit-learn\n",
|
"# Specify conda dependencies with scikit-learn\n",
|
||||||
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
||||||
"run_config_system_managed.environment.python.conda_dependencies = cd"
|
"run_config_system_managed.environment.python.conda_dependencies = cd"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Submit script to run in the system-managed environment\n",
|
"#### Submit script to run in the system-managed environment\n",
|
||||||
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies."
|
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n",
|
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n",
|
||||||
"run = exp.submit(src)"
|
"run = exp.submit(src)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### Get run history details"
|
"#### Get run history details"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"run"
|
"run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Block and wait till run finishes."
|
"Block and wait till run finishes."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"run.wait_for_completion(show_output = True)"
|
"run.wait_for_completion(show_output = True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Docker-based execution\n",
|
"### Docker-based execution\n",
|
||||||
"**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
|
"**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"You can also ask the system to pull down a Docker image and execute your scripts in it."
|
"You can also ask the system to pull down a Docker image and execute your scripts in it."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"run_config_docker = RunConfiguration()\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"run_config_docker.environment.python.user_managed_dependencies = False\n",
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
"run_config_docker.auto_prepare_environment = True\n",
|
"\n",
|
||||||
"run_config_docker.environment.docker.enabled = True\n",
|
"run_config_docker = RunConfiguration()\n",
|
||||||
"run_config_docker.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
"\n",
|
||||||
"\n",
|
"run_config_docker.environment.python.user_managed_dependencies = False\n",
|
||||||
"# Specify conda dependencies with scikit-learn\n",
|
"run_config_docker.auto_prepare_environment = True\n",
|
||||||
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
"run_config_docker.environment.docker.enabled = True\n",
|
||||||
"run_config_docker.environment.python.conda_dependencies = cd\n",
|
"run_config_docker.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||||
"\n",
|
"\n",
|
||||||
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)"
|
"# Specify conda dependencies with scikit-learn\n",
|
||||||
]
|
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
||||||
},
|
"run_config_docker.environment.python.conda_dependencies = cd"
|
||||||
{
|
]
|
||||||
"cell_type": "markdown",
|
},
|
||||||
"metadata": {},
|
{
|
||||||
"source": [
|
"cell_type": "markdown",
|
||||||
"Submit script to run in the system-managed environment\n",
|
"metadata": {},
|
||||||
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies.\n",
|
"source": [
|
||||||
"\n",
|
"Submit script to run in the system-managed environment\n",
|
||||||
"\n"
|
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies.\n",
|
||||||
]
|
"\n",
|
||||||
},
|
"\n"
|
||||||
{
|
]
|
||||||
"cell_type": "code",
|
},
|
||||||
"execution_count": null,
|
{
|
||||||
"metadata": {},
|
"cell_type": "code",
|
||||||
"outputs": [],
|
"execution_count": null,
|
||||||
"source": [
|
"metadata": {},
|
||||||
"import subprocess\n",
|
"outputs": [],
|
||||||
"\n",
|
"source": [
|
||||||
"# Check if Docker is installed and Linux containers are enables\n",
|
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)\n",
|
||||||
"if subprocess.run(\"docker -v\", shell=True) == 0:\n",
|
"run = exp.submit(src)"
|
||||||
" out = subprocess.check_output(\"docker system info\", shell=True, encoding=\"ascii\").split(\"\\n\")\n",
|
]
|
||||||
" if not \"OSType: linux\" in out:\n",
|
},
|
||||||
" print(\"Switch Docker engine to use Linux containers.\")\n",
|
{
|
||||||
" else:\n",
|
"cell_type": "code",
|
||||||
" run = exp.submit(src)\n",
|
"execution_count": null,
|
||||||
"else:\n",
|
"metadata": {},
|
||||||
" print(\"Docker engine not installed.\")"
|
"outputs": [],
|
||||||
]
|
"source": [
|
||||||
},
|
"#Get run history details\n",
|
||||||
{
|
"run"
|
||||||
"cell_type": "code",
|
]
|
||||||
"execution_count": null,
|
},
|
||||||
"metadata": {},
|
{
|
||||||
"outputs": [],
|
"cell_type": "code",
|
||||||
"source": [
|
"execution_count": null,
|
||||||
"#Get run history details\n",
|
"metadata": {},
|
||||||
"run"
|
"outputs": [],
|
||||||
]
|
"source": [
|
||||||
},
|
"run.wait_for_completion(show_output=True)"
|
||||||
{
|
]
|
||||||
"cell_type": "code",
|
},
|
||||||
"execution_count": null,
|
{
|
||||||
"metadata": {},
|
"cell_type": "markdown",
|
||||||
"outputs": [],
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"run.wait_for_completion(show_output=True)"
|
"## Query run metrics"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "code",
|
||||||
"metadata": {},
|
"execution_count": null,
|
||||||
"source": [
|
"metadata": {
|
||||||
"## Query run metrics"
|
"tags": [
|
||||||
]
|
"query history",
|
||||||
},
|
"get metrics"
|
||||||
{
|
]
|
||||||
"cell_type": "code",
|
},
|
||||||
"execution_count": null,
|
"outputs": [],
|
||||||
"metadata": {
|
"source": [
|
||||||
"tags": [
|
"# get all metris logged in the run\n",
|
||||||
"query history",
|
"run.get_metrics()\n",
|
||||||
"get metrics"
|
"metrics = run.get_metrics()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"outputs": [],
|
{
|
||||||
"source": [
|
"cell_type": "markdown",
|
||||||
"# get all metris logged in the run\n",
|
"metadata": {},
|
||||||
"run.get_metrics()\n",
|
"source": [
|
||||||
"metrics = run.get_metrics()"
|
"Let's find the model that has the lowest MSE value logged."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "code",
|
||||||
"metadata": {},
|
"execution_count": null,
|
||||||
"source": [
|
"metadata": {},
|
||||||
"Let's find the model that has the lowest MSE value logged."
|
"outputs": [],
|
||||||
]
|
"source": [
|
||||||
},
|
"import numpy as np\n",
|
||||||
{
|
"\n",
|
||||||
"cell_type": "code",
|
"best_alpha = metrics['alpha'][np.argmin(metrics['mse'])]\n",
|
||||||
"execution_count": null,
|
"\n",
|
||||||
"metadata": {},
|
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
|
||||||
"outputs": [],
|
" min(metrics['mse']), \n",
|
||||||
"source": [
|
" best_alpha\n",
|
||||||
"import numpy as np\n",
|
"))"
|
||||||
"\n",
|
]
|
||||||
"best_alpha = metrics['alpha'][np.argmin(metrics['mse'])]\n",
|
},
|
||||||
"\n",
|
{
|
||||||
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
|
"cell_type": "markdown",
|
||||||
" min(metrics['mse']), \n",
|
"metadata": {},
|
||||||
" best_alpha\n",
|
"source": [
|
||||||
"))"
|
"You can also list all the files that are associated with this run record"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "code",
|
||||||
"metadata": {},
|
"execution_count": null,
|
||||||
"source": [
|
"metadata": {},
|
||||||
"You can also list all the files that are associated with this run record"
|
"outputs": [],
|
||||||
]
|
"source": [
|
||||||
},
|
"run.get_file_names()"
|
||||||
{
|
]
|
||||||
"cell_type": "code",
|
},
|
||||||
"execution_count": null,
|
{
|
||||||
"metadata": {},
|
"cell_type": "markdown",
|
||||||
"outputs": [],
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"run.get_file_names()"
|
"We know the model `ridge_0.40.pkl` is the best performing model from the eariler queries. So let's register it with the workspace."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "code",
|
||||||
"metadata": {},
|
"execution_count": null,
|
||||||
"source": [
|
"metadata": {},
|
||||||
"We know the model `ridge_0.40.pkl` is the best performing model from the eariler queries. So let's register it with the workspace."
|
"outputs": [],
|
||||||
]
|
"source": [
|
||||||
},
|
"# supply a model name, and the full path to the serialized model file.\n",
|
||||||
{
|
"model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')"
|
||||||
"cell_type": "code",
|
]
|
||||||
"execution_count": null,
|
},
|
||||||
"metadata": {},
|
{
|
||||||
"outputs": [],
|
"cell_type": "code",
|
||||||
"source": [
|
"execution_count": null,
|
||||||
"# supply a model name, and the full path to the serialized model file.\n",
|
"metadata": {},
|
||||||
"model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')"
|
"outputs": [],
|
||||||
]
|
"source": [
|
||||||
},
|
"print(model.name, model.version, model.url)"
|
||||||
{
|
]
|
||||||
"cell_type": "code",
|
},
|
||||||
"execution_count": null,
|
{
|
||||||
"metadata": {},
|
"cell_type": "markdown",
|
||||||
"outputs": [],
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"print(model.name, model.version, model.url)"
|
"Now you can deploy this model following the example in the 01 notebook."
|
||||||
]
|
]
|
||||||
},
|
}
|
||||||
{
|
],
|
||||||
"cell_type": "markdown",
|
"metadata": {
|
||||||
"metadata": {},
|
"authors": [
|
||||||
"source": [
|
{
|
||||||
"Now you can deploy this model following the example in the 01 notebook."
|
"name": "roastala"
|
||||||
]
|
}
|
||||||
}
|
],
|
||||||
],
|
"kernelspec": {
|
||||||
"metadata": {
|
"display_name": "Python 3.6",
|
||||||
"authors": [
|
"language": "python",
|
||||||
{
|
"name": "python36"
|
||||||
"name": "roastala"
|
},
|
||||||
}
|
"language_info": {
|
||||||
],
|
"codemirror_mode": {
|
||||||
"kernelspec": {
|
"name": "ipython",
|
||||||
"display_name": "Python 3.6",
|
"version": 3
|
||||||
"language": "python",
|
},
|
||||||
"name": "python36"
|
"file_extension": ".py",
|
||||||
},
|
"mimetype": "text/x-python",
|
||||||
"language_info": {
|
"name": "python",
|
||||||
"codemirror_mode": {
|
"nbconvert_exporter": "python",
|
||||||
"name": "ipython",
|
"pygments_lexer": "ipython3",
|
||||||
"version": 3
|
"version": "3.6.6"
|
||||||
},
|
}
|
||||||
"file_extension": ".py",
|
},
|
||||||
"mimetype": "text/x-python",
|
"nbformat": 4,
|
||||||
"name": "python",
|
"nbformat_minor": 2
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
}
|
||||||
@@ -1,325 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 03. Train on Azure Container Instance (EXPERIMENTAL)\n",
|
|
||||||
"\n",
|
|
||||||
"* Create Workspace\n",
|
|
||||||
"* Create Project\n",
|
|
||||||
"* Create `train.py` in the project folder.\n",
|
|
||||||
"* Configure an ACI (Azure Container Instance) run\n",
|
|
||||||
"* Execute in ACI"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a workspace object from persisted configuration"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"create workspace"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create An Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"experiment_name = 'train-on-aci'\n",
|
|
||||||
"experiment = Experiment(workspace = ws, name = experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Create a folder to store the training script."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"script_folder = './samples/train-on-aci'\n",
|
|
||||||
"os.makedirs(script_folder, exist_ok = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Remote execution on ACI\n",
|
|
||||||
"\n",
|
|
||||||
"Use `%%writefile` magic to write training code to `train.py` file under the project folder."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile $script_folder/train.py\n",
|
|
||||||
"\n",
|
|
||||||
"import os\n",
|
|
||||||
"from sklearn.datasets import load_diabetes\n",
|
|
||||||
"from sklearn.linear_model import Ridge\n",
|
|
||||||
"from sklearn.metrics import mean_squared_error\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"from azureml.core.run import Run\n",
|
|
||||||
"from sklearn.externals import joblib\n",
|
|
||||||
"\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"\n",
|
|
||||||
"os.makedirs('./outputs', exist_ok=True)\n",
|
|
||||||
"\n",
|
|
||||||
"X, y = load_diabetes(return_X_y = True)\n",
|
|
||||||
"\n",
|
|
||||||
"run = Run.get_submitted_run()\n",
|
|
||||||
"\n",
|
|
||||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
|
|
||||||
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
|
|
||||||
" \"test\": {\"X\": X_test, \"y\": y_test}}\n",
|
|
||||||
"\n",
|
|
||||||
"# list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
|
|
||||||
"alphas = np.arange(0.0, 1.0, 0.05)\n",
|
|
||||||
"\n",
|
|
||||||
"for alpha in alphas:\n",
|
|
||||||
" # Use Ridge algorithm to create a regression model\n",
|
|
||||||
" reg = Ridge(alpha = alpha)\n",
|
|
||||||
" reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
|
|
||||||
"\n",
|
|
||||||
" preds = reg.predict(data[\"test\"][\"X\"])\n",
|
|
||||||
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
|
|
||||||
" run.log('alpha', alpha)\n",
|
|
||||||
" run.log('mse', mse)\n",
|
|
||||||
" \n",
|
|
||||||
" model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n",
|
|
||||||
" with open(model_file_name, \"wb\") as file:\n",
|
|
||||||
" joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n",
|
|
||||||
"\n",
|
|
||||||
" print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Configure for using ACI\n",
|
|
||||||
"Linux-based ACI is available in `westus`, `eastus`, `westeurope`, `northeurope`, `westus2` and `southeastasia` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"configure run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"# create a new runconfig object\n",
|
|
||||||
"run_config = RunConfiguration()\n",
|
|
||||||
"\n",
|
|
||||||
"# signal that you want to use ACI to execute script.\n",
|
|
||||||
"run_config.target = \"containerinstance\"\n",
|
|
||||||
"\n",
|
|
||||||
"# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n",
|
|
||||||
"run_config.container_instance.region = 'eastus'\n",
|
|
||||||
"\n",
|
|
||||||
"# set the ACI CPU and Memory \n",
|
|
||||||
"run_config.container_instance.cpu_cores = 1\n",
|
|
||||||
"run_config.container_instance.memory_gb = 2\n",
|
|
||||||
"\n",
|
|
||||||
"# enable Docker \n",
|
|
||||||
"run_config.environment.docker.enabled = True\n",
|
|
||||||
"\n",
|
|
||||||
"# set Docker base image to the default CPU-based image\n",
|
|
||||||
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
|
||||||
"#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n",
|
|
||||||
"\n",
|
|
||||||
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
|
|
||||||
"run_config.environment.python.user_managed_dependencies = False\n",
|
|
||||||
"\n",
|
|
||||||
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
|
|
||||||
"run_config.auto_prepare_environment = True\n",
|
|
||||||
"\n",
|
|
||||||
"# specify CondaDependencies obj\n",
|
|
||||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Submit the Experiment\n",
|
|
||||||
"Finally, run the training job on the ACI"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"remote run",
|
|
||||||
"aci"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time \n",
|
|
||||||
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
|
||||||
"\n",
|
|
||||||
"script_run_config = ScriptRunConfig(source_directory = script_folder,\n",
|
|
||||||
" script= 'train.py',\n",
|
|
||||||
" run_config = run_config)\n",
|
|
||||||
"\n",
|
|
||||||
"run = experiment.submit(script_run_config)\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"remote run",
|
|
||||||
"aci"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%time\n",
|
|
||||||
"# Shows output of the run on stdout.\n",
|
|
||||||
"run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"query history"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Show run details\n",
|
|
||||||
"run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"tags": [
|
|
||||||
"get metrics"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get all metris logged in the run\n",
|
|
||||||
"run.get_metrics()\n",
|
|
||||||
"metrics = run.get_metrics()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import numpy as np\n",
|
|
||||||
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
|
|
||||||
" min(metrics['mse']), \n",
|
|
||||||
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
|
|
||||||
"))"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,289 +1,289 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 03. Train on Azure Container Instance\n",
|
"# 03. Train on Azure Container Instance\n",
|
||||||
"\n",
|
"\n",
|
||||||
"* Create Workspace\n",
|
"* Create Workspace\n",
|
||||||
"* Create `train.py` in the project folder.\n",
|
"* Create `train.py` in the project folder.\n",
|
||||||
"* Configure an ACI (Azure Container Instance) run\n",
|
"* Configure an ACI (Azure Container Instance) run\n",
|
||||||
"* Execute in ACI"
|
"* Execute in ACI"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Prerequisites\n",
|
"## Prerequisites\n",
|
||||||
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Check core SDK version number\n",
|
"# Check core SDK version number\n",
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Initialize Workspace\n",
|
"## Initialize Workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Initialize a workspace object from persisted configuration"
|
"Initialize a workspace object from persisted configuration"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"create workspace"
|
"create workspace"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Workspace\n",
|
"from azureml.core import Workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ws = Workspace.from_config()\n",
|
"ws = Workspace.from_config()\n",
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Create An Experiment\n",
|
"## Create An Experiment\n",
|
||||||
"\n",
|
"\n",
|
||||||
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Experiment\n",
|
"from azureml.core import Experiment\n",
|
||||||
"experiment_name = 'train-on-aci'\n",
|
"experiment_name = 'train-on-aci'\n",
|
||||||
"experiment = Experiment(workspace = ws, name = experiment_name)"
|
"experiment = Experiment(workspace = ws, name = experiment_name)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Remote execution on ACI\n",
|
"## Remote execution on ACI\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The training script `train.py` is already created for you. Let's have a look."
|
"The training script `train.py` is already created for you. Let's have a look."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"with open('./train.py', 'r') as f:\n",
|
"with open('./train.py', 'r') as f:\n",
|
||||||
" print(f.read())"
|
" print(f.read())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Configure for using ACI\n",
|
"## Configure for using ACI\n",
|
||||||
"Linux-based ACI is available in `West US`, `East US`, `West Europe`, `North Europe`, `West US 2`, `Southeast Asia`, `Australia East`, `East US 2`, and `Central US` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)."
|
"Linux-based ACI is available in `West US`, `East US`, `West Europe`, `North Europe`, `West US 2`, `Southeast Asia`, `Australia East`, `East US 2`, and `Central US` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"configure run"
|
"configure run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# create a new runconfig object\n",
|
"# create a new runconfig object\n",
|
||||||
"run_config = RunConfiguration()\n",
|
"run_config = RunConfiguration()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# signal that you want to use ACI to execute script.\n",
|
"# signal that you want to use ACI to execute script.\n",
|
||||||
"run_config.target = \"containerinstance\"\n",
|
"run_config.target = \"containerinstance\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n",
|
"# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n",
|
||||||
"run_config.container_instance.region = 'eastus2'\n",
|
"run_config.container_instance.region = 'eastus2'\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# set the ACI CPU and Memory \n",
|
"# set the ACI CPU and Memory \n",
|
||||||
"run_config.container_instance.cpu_cores = 1\n",
|
"run_config.container_instance.cpu_cores = 1\n",
|
||||||
"run_config.container_instance.memory_gb = 2\n",
|
"run_config.container_instance.memory_gb = 2\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# enable Docker \n",
|
"# enable Docker \n",
|
||||||
"run_config.environment.docker.enabled = True\n",
|
"run_config.environment.docker.enabled = True\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# set Docker base image to the default CPU-based image\n",
|
"# set Docker base image to the default CPU-based image\n",
|
||||||
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
|
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
|
||||||
"run_config.environment.python.user_managed_dependencies = False\n",
|
"run_config.environment.python.user_managed_dependencies = False\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
|
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
|
||||||
"run_config.auto_prepare_environment = True\n",
|
"run_config.auto_prepare_environment = True\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# specify CondaDependencies obj\n",
|
"# specify CondaDependencies obj\n",
|
||||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
|
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Submit the Experiment\n",
|
"## Submit the Experiment\n",
|
||||||
"Finally, run the training job on the ACI"
|
"Finally, run the training job on the ACI"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"remote run",
|
"remote run",
|
||||||
"aci"
|
"aci"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"%%time \n",
|
"%%time \n",
|
||||||
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
||||||
"\n",
|
"\n",
|
||||||
"script_run_config = ScriptRunConfig(source_directory='./',\n",
|
"script_run_config = ScriptRunConfig(source_directory='./',\n",
|
||||||
" script='train.py',\n",
|
" script='train.py',\n",
|
||||||
" run_config=run_config)\n",
|
" run_config=run_config)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"run = experiment.submit(script_run_config)"
|
"run = experiment.submit(script_run_config)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"query history"
|
"query history"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Show run details\n",
|
"# Show run details\n",
|
||||||
"run"
|
"run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"remote run",
|
"remote run",
|
||||||
"aci"
|
"aci"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"%%time\n",
|
"%%time\n",
|
||||||
"# Shows output of the run on stdout.\n",
|
"# Shows output of the run on stdout.\n",
|
||||||
"run.wait_for_completion(show_output=True)"
|
"run.wait_for_completion(show_output=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": [
|
"tags": [
|
||||||
"get metrics"
|
"get metrics"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# get all metris logged in the run\n",
|
"# get all metris logged in the run\n",
|
||||||
"run.get_metrics()\n",
|
"run.get_metrics()\n",
|
||||||
"metrics = run.get_metrics()"
|
"metrics = run.get_metrics()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
|
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
|
||||||
" min(metrics['mse']), \n",
|
" min(metrics['mse']), \n",
|
||||||
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
|
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
|
||||||
"))"
|
"))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# show all the files stored within the run record\n",
|
"# show all the files stored within the run record\n",
|
||||||
"run.get_file_names()"
|
"run.get_file_names()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Now you can take a model produced here, register it and then deploy as a web service."
|
"Now you can take a model produced here, register it and then deploy as a web service."
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "roastala"
|
"name": "roastala"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python36"
|
"name": "python36"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
"version": 3
|
"version": 3
|
||||||
},
|
},
|
||||||
"file_extension": ".py",
|
"file_extension": ".py",
|
||||||
"mimetype": "text/x-python",
|
"mimetype": "text/x-python",
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.6"
|
"version": "3.6.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 2
|
"nbformat_minor": 2
|
||||||
}
|
}
|
||||||
@@ -1,321 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 04. Train in a remote VM (MLC managed DSVM)\n",
|
|
||||||
"* Create Workspace\n",
|
|
||||||
"* Create Project\n",
|
|
||||||
"* Create `train.py` file\n",
|
|
||||||
"* Create DSVM as Machine Learning Compute (MLC) resource\n",
|
|
||||||
"* Configure & execute a run in a conda environment in the default miniconda Docker container on DSVM"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a workspace object from persisted configuration."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"experiment_name = 'train-on-remote-vm'\n",
|
|
||||||
"\n",
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"exp = Experiment(workspace = ws, name = experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## View `train.py`\n",
|
|
||||||
"\n",
|
|
||||||
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train.py` in a cell to show the file."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"with open('./train.py', 'r') as training_script:\n",
|
|
||||||
" print(training_script.read())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create Linux DSVM as a compute target\n",
|
|
||||||
"\n",
|
|
||||||
"**Note**: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n",
|
|
||||||
" \n",
|
|
||||||
"**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below. [Read more](../../documentation/sdk/ssh-issue.md) on this."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import DsvmCompute\n",
|
|
||||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
|
||||||
"\n",
|
|
||||||
"compute_target_name = 'mydsvm'\n",
|
|
||||||
"\n",
|
|
||||||
"try:\n",
|
|
||||||
" dsvm_compute = DsvmCompute(workspace = ws, name = compute_target_name)\n",
|
|
||||||
" print('found existing:', dsvm_compute.name)\n",
|
|
||||||
"except ComputeTargetException:\n",
|
|
||||||
" print('creating new.')\n",
|
|
||||||
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
|
|
||||||
" dsvm_compute = DsvmCompute.create(ws, name = compute_target_name, provisioning_configuration = dsvm_config)\n",
|
|
||||||
" dsvm_compute.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Attach an existing Linux DSVM as a compute target\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"'''\n",
|
|
||||||
" from azureml.core.compute import RemoteCompute \n",
|
|
||||||
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n",
|
|
||||||
" dsvm_compute = RemoteCompute.attach(ws,name=\"attach-from-sdk6\",username=<username>,address=<ipaddress>,ssh_port=22,password=<password>)\n",
|
|
||||||
"'''"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Configure & Run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Configure a Docker run with new conda environment on the VM\n",
|
|
||||||
"You can execute in a Docker container in the VM. If you choose this route, you don't need to install anything on the VM yourself. Azure ML execution service will take care of it for you."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
|
|
||||||
"run_config = RunConfiguration(framework = \"python\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Set compute target to the Linux DSVM\n",
|
|
||||||
"run_config.target = compute_target_name\n",
|
|
||||||
"\n",
|
|
||||||
"# Use Docker in the remote VM\n",
|
|
||||||
"run_config.environment.docker.enabled = True\n",
|
|
||||||
"\n",
|
|
||||||
"# Use CPU base image from DockerHub\n",
|
|
||||||
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
|
||||||
"print('Base Docker image is:', run_config.environment.docker.base_image)\n",
|
|
||||||
"\n",
|
|
||||||
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
|
|
||||||
"run_config.environment.python.user_managed_dependencies = False\n",
|
|
||||||
"\n",
|
|
||||||
"# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
|
|
||||||
"run_config.prepare_environment = True\n",
|
|
||||||
"\n",
|
|
||||||
"# specify CondaDependencies obj\n",
|
|
||||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Submit the Experiment\n",
|
|
||||||
"Submit script to run in the Docker image in the remote VM. If you run this for the first time, the system will download the base image, layer in packages specified in the `conda_dependencies.yml` file on top of the base image, create a container and then execute the script in the container."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Run\n",
|
|
||||||
"from azureml.core import ScriptRunConfig\n",
|
|
||||||
"\n",
|
|
||||||
"src = ScriptRunConfig(source_directory = '.', script = 'train.py', run_config = run_config)\n",
|
|
||||||
"run = exp.submit(src)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### View run history details"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Find the best run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get all metris logged in the run\n",
|
|
||||||
"run.get_metrics()\n",
|
|
||||||
"metrics = run.get_metrics()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import numpy as np\n",
|
|
||||||
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
|
|
||||||
" min(metrics['mse']), \n",
|
|
||||||
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
|
|
||||||
"))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Clean up compute resource"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"dsvm_compute.delete()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,257 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# 05. Train in Spark\n",
|
|
||||||
"* Create Workspace\n",
|
|
||||||
"* Create Experiment\n",
|
|
||||||
"* Copy relevant files to the script folder\n",
|
|
||||||
"* Configure and Run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Check core SDK version number\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a workspace object from persisted configuration."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core import Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create Experiment\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"experiment_name = 'train-on-remote-vm'\n",
|
|
||||||
"\n",
|
|
||||||
"from azureml.core import Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"exp = Experiment(workspace = ws, name = experiment_name)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## View `train-spark.py`\n",
|
|
||||||
"\n",
|
|
||||||
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"with open('train-spark.py', 'r') as training_script:\n",
|
|
||||||
" print(training_script.read())"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Configure & Run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Attach an HDI cluster\n",
|
|
||||||
"To use HDI commpute target:\n",
|
|
||||||
" 1. Create an Spark for HDI cluster in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
|
|
||||||
" 2. Enter the IP address, username and password below"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import HDInsightCompute\n",
|
|
||||||
"\n",
|
|
||||||
"try:\n",
|
|
||||||
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n",
|
|
||||||
" hdi_compute_new = HDInsightCompute.attach(ws, \n",
|
|
||||||
" name=\"hdi-attach\", \n",
|
|
||||||
" address=\"hdi-ignite-demo-ssh.azurehdinsight.net\", \n",
|
|
||||||
" ssh_port=22, \n",
|
|
||||||
" username='<username>', \n",
|
|
||||||
" password='<password>')\n",
|
|
||||||
"\n",
|
|
||||||
"except UserErrorException as e:\n",
|
|
||||||
" print(\"Caught = {}\".format(e.message))\n",
|
|
||||||
" print(\"Compute config already attached.\")\n",
|
|
||||||
" \n",
|
|
||||||
" \n",
|
|
||||||
"hdi_compute_new.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Configure HDI run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
|
|
||||||
"run_config = RunConfiguration(framework = \"python\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Set compute target to the Linux DSVM\n",
|
|
||||||
"run_config.target = hdi_compute.name\n",
|
|
||||||
"\n",
|
|
||||||
"# Use Docker in the remote VM\n",
|
|
||||||
"# run_config.environment.docker.enabled = True\n",
|
|
||||||
"\n",
|
|
||||||
"# Use CPU base image from DockerHub\n",
|
|
||||||
"# run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
|
||||||
"# print('Base Docker image is:', run_config.environment.docker.base_image)\n",
|
|
||||||
"\n",
|
|
||||||
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
|
|
||||||
"run_config.environment.python.user_managed_dependencies = False\n",
|
|
||||||
"\n",
|
|
||||||
"# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
|
|
||||||
"# run_config.prepare_environment = True\n",
|
|
||||||
"\n",
|
|
||||||
"# specify CondaDependencies obj\n",
|
|
||||||
"# run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
|
|
||||||
"# load the runconfig object from the \"myhdi.runconfig\" file generated by the attach operaton above."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Submit the script to HDI"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
|
|
||||||
" script= 'train-spark.py',\n",
|
|
||||||
" run_config = run_config)\n",
|
|
||||||
"run = experiment.submit(script_run_config)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get the URL of the run history web page\n",
|
|
||||||
"run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# get all metris logged in the run\n",
|
|
||||||
"metrics = run.get_metrics()\n",
|
|
||||||
"print(metrics)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,331 +1,331 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Licensed under the MIT License."
|
"Licensed under the MIT License."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# 05. Train in Spark\n",
|
"# 05. Train in Spark\n",
|
||||||
"* Create Workspace\n",
|
"* Create Workspace\n",
|
||||||
"* Create Experiment\n",
|
"* Create Experiment\n",
|
||||||
"* Copy relevant files to the script folder\n",
|
"* Copy relevant files to the script folder\n",
|
||||||
"* Configure and Run"
|
"* Configure and Run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Prerequisites\n",
|
"## Prerequisites\n",
|
||||||
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Check core SDK version number\n",
|
"# Check core SDK version number\n",
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Initialize Workspace\n",
|
"## Initialize Workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Initialize a workspace object from persisted configuration."
|
"Initialize a workspace object from persisted configuration."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import Workspace\n",
|
"from azureml.core import Workspace\n",
|
||||||
"\n",
|
"\n",
|
||||||
"ws = Workspace.from_config()\n",
|
"ws = Workspace.from_config()\n",
|
||||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Create Experiment\n"
|
"## Create Experiment\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"experiment_name = 'train-on-spark'\n",
|
"experiment_name = 'train-on-spark'\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from azureml.core import Experiment\n",
|
"from azureml.core import Experiment\n",
|
||||||
"exp = Experiment(workspace=ws, name=experiment_name)"
|
"exp = Experiment(workspace=ws, name=experiment_name)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## View `train-spark.py`\n",
|
"## View `train-spark.py`\n",
|
||||||
"\n",
|
"\n",
|
||||||
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file."
|
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"with open('train-spark.py', 'r') as training_script:\n",
|
"with open('train-spark.py', 'r') as training_script:\n",
|
||||||
" print(training_script.read())"
|
" print(training_script.read())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Configure & Run"
|
"## Configure & Run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Configure an ACI run\n",
|
"### Configure an ACI run\n",
|
||||||
"Before you try running on an actual Spark cluster, you can use a Docker image with Spark already baked in, and run it in ACI(Azure Container Registry)."
|
"Before you try running on an actual Spark cluster, you can use a Docker image with Spark already baked in, and run it in ACI(Azure Container Registry)."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# use pyspark framework\n",
|
"# use pyspark framework\n",
|
||||||
"aci_run_config = RunConfiguration(framework=\"pyspark\")\n",
|
"aci_run_config = RunConfiguration(framework=\"pyspark\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# use ACI to run the Spark job\n",
|
"# use ACI to run the Spark job\n",
|
||||||
"aci_run_config.target = 'containerinstance'\n",
|
"aci_run_config.target = 'containerinstance'\n",
|
||||||
"aci_run_config.container_instance.region = 'eastus2'\n",
|
"aci_run_config.container_instance.region = 'eastus2'\n",
|
||||||
"aci_run_config.container_instance.cpu_cores = 1\n",
|
"aci_run_config.container_instance.cpu_cores = 1\n",
|
||||||
"aci_run_config.container_instance.memory_gb = 2\n",
|
"aci_run_config.container_instance.memory_gb = 2\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# specify base Docker image to use\n",
|
"# specify base Docker image to use\n",
|
||||||
"aci_run_config.environment.docker.enabled = True\n",
|
"aci_run_config.environment.docker.enabled = True\n",
|
||||||
"aci_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_MMLSPARK_CPU_IMAGE\n",
|
"aci_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_MMLSPARK_CPU_IMAGE\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# specify CondaDependencies\n",
|
"# specify CondaDependencies\n",
|
||||||
"cd = CondaDependencies()\n",
|
"cd = CondaDependencies()\n",
|
||||||
"cd.add_conda_package('numpy')\n",
|
"cd.add_conda_package('numpy')\n",
|
||||||
"aci_run_config.environment.python.conda_dependencies = cd"
|
"aci_run_config.environment.python.conda_dependencies = cd"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Submit script to ACI to run"
|
"### Submit script to ACI to run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import ScriptRunConfig\n",
|
"from azureml.core import ScriptRunConfig\n",
|
||||||
"\n",
|
"\n",
|
||||||
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
|
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
|
||||||
" script= 'train-spark.py',\n",
|
" script= 'train-spark.py',\n",
|
||||||
" run_config = aci_run_config)\n",
|
" run_config = aci_run_config)\n",
|
||||||
"run = exp.submit(script_run_config)"
|
"run = exp.submit(script_run_config)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"run"
|
"run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"run.wait_for_completion(show_output=True)"
|
"run.wait_for_completion(show_output=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"**Note** you can also create a new VM, or attach an existing VM, and use Docker-based execution to run the Spark job. Please see the `04.train-in-vm` for example on how to configure and run in Docker mode in a VM."
|
"**Note** you can also create a new VM, or attach an existing VM, and use Docker-based execution to run the Spark job. Please see the `04.train-in-vm` for example on how to configure and run in Docker mode in a VM."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Attach an HDI cluster\n",
|
"### Attach an HDI cluster\n",
|
||||||
"Now we can use a real Spark cluster, HDInsight for Spark, to run this job. To use HDI commpute target:\n",
|
"Now we can use a real Spark cluster, HDInsight for Spark, to run this job. To use HDI commpute target:\n",
|
||||||
" 1. Create a Spark for HDI cluster in Azure. Here are some [quick instructions](https://docs.microsoft.com/en-us/azure/hdinsight/spark/apache-spark-jupyter-spark-sql). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
|
" 1. Create a Spark for HDI cluster in Azure. Here are some [quick instructions](https://docs.microsoft.com/en-us/azure/hdinsight/spark/apache-spark-jupyter-spark-sql). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
|
||||||
" 2. Enter the IP address, username and password below"
|
" 2. Enter the IP address, username and password below"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.compute import HDInsightCompute\n",
|
"from azureml.core.compute import HDInsightCompute\n",
|
||||||
"from azureml.exceptions import ComputeTargetException\n",
|
"from azureml.exceptions import ComputeTargetException\n",
|
||||||
"\n",
|
"\n",
|
||||||
"try:\n",
|
"try:\n",
|
||||||
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n",
|
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n",
|
||||||
" hdi_compute = HDInsightCompute.attach(workspace=ws, \n",
|
" hdi_compute = HDInsightCompute.attach(workspace=ws, \n",
|
||||||
" name=\"myhdi\", \n",
|
" name=\"myhdi\", \n",
|
||||||
" address=\"<myhdi-ssh>.azurehdinsight.net\", \n",
|
" address=\"<myhdi-ssh>.azurehdinsight.net\", \n",
|
||||||
" ssh_port=22, \n",
|
" ssh_port=22, \n",
|
||||||
" username='<ssh-username>', \n",
|
" username='<ssh-username>', \n",
|
||||||
" password='<ssh-pwd>')\n",
|
" password='<ssh-pwd>')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"except ComputeTargetException as e:\n",
|
"except ComputeTargetException as e:\n",
|
||||||
" print(\"Caught = {}\".format(e.message))\n",
|
" print(\"Caught = {}\".format(e.message))\n",
|
||||||
" \n",
|
" \n",
|
||||||
" \n",
|
" \n",
|
||||||
"hdi_compute.wait_for_completion(show_output=True)"
|
"hdi_compute.wait_for_completion(show_output=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Configure HDI run"
|
"### Configure HDI run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.runconfig import RunConfiguration\n",
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# use pyspark framework\n",
|
"# use pyspark framework\n",
|
||||||
"hdi_run_config = RunConfiguration(framework=\"pyspark\")\n",
|
"hdi_run_config = RunConfiguration(framework=\"pyspark\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Set compute target to the HDI cluster\n",
|
"# Set compute target to the HDI cluster\n",
|
||||||
"hdi_run_config.target = hdi_compute.name\n",
|
"hdi_run_config.target = hdi_compute.name\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# specify CondaDependencies object to ask system installing numpy\n",
|
"# specify CondaDependencies object to ask system installing numpy\n",
|
||||||
"cd = CondaDependencies()\n",
|
"cd = CondaDependencies()\n",
|
||||||
"cd.add_conda_package('numpy')\n",
|
"cd.add_conda_package('numpy')\n",
|
||||||
"hdi_run_config.environment.python.conda_dependencies = cd"
|
"hdi_run_config.environment.python.conda_dependencies = cd"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Submit the script to HDI"
|
"### Submit the script to HDI"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.core import ScriptRunConfig\n",
|
"from azureml.core import ScriptRunConfig\n",
|
||||||
"\n",
|
"\n",
|
||||||
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
|
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
|
||||||
" script= 'train-spark.py',\n",
|
" script= 'train-spark.py',\n",
|
||||||
" run_config = hdi_run_config)\n",
|
" run_config = hdi_run_config)\n",
|
||||||
"run = exp.submit(config=script_run_config)"
|
"run = exp.submit(config=script_run_config)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# get the URL of the run history web page\n",
|
"# get the URL of the run history web page\n",
|
||||||
"run"
|
"run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# get all metris logged in the run\n",
|
"# get all metris logged in the run\n",
|
||||||
"metrics = run.get_metrics()\n",
|
"metrics = run.get_metrics()\n",
|
||||||
"print(metrics)"
|
"print(metrics)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"authors": [
|
"authors": [
|
||||||
{
|
{
|
||||||
"name": "aashishb"
|
"name": "aashishb"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3.6",
|
"display_name": "Python 3.6",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python36"
|
"name": "python36"
|
||||||
},
|
},
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
"version": 3
|
"version": 3
|
||||||
},
|
},
|
||||||
"file_extension": ".py",
|
"file_extension": ".py",
|
||||||
"mimetype": "text/x-python",
|
"mimetype": "text/x-python",
|
||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.6"
|
"version": "3.6.6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 2
|
"nbformat_minor": 2
|
||||||
}
|
}
|
||||||
@@ -192,11 +192,9 @@
|
|||||||
" data = json.loads(raw_data)['data']\n",
|
" data = json.loads(raw_data)['data']\n",
|
||||||
" data = numpy.array(data)\n",
|
" data = numpy.array(data)\n",
|
||||||
" result = model.predict(data)\n",
|
" result = model.predict(data)\n",
|
||||||
" # you can return any datatype as long as it is JSON-serializable\n",
|
|
||||||
" return result.tolist()\n",
|
|
||||||
" except Exception as e:\n",
|
" except Exception as e:\n",
|
||||||
" error = str(e)\n",
|
" result = str(e)\n",
|
||||||
" return error"
|
" return json.dumps({\"result\": result.tolist()})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -365,7 +363,7 @@
|
|||||||
"]})\n",
|
"]})\n",
|
||||||
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"prediction = aci_service.run(input_data=test_sample)\n",
|
"prediction = aci_service.run(input_data = test_sample)\n",
|
||||||
"print(prediction)"
|
"print(prediction)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -389,6 +387,13 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"aci_service.delete()"
|
"aci_service.delete()"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -412,7 +417,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.6"
|
"version": "3.6.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -122,11 +122,9 @@
|
|||||||
" data = json.loads(raw_data)['data']\n",
|
" data = json.loads(raw_data)['data']\n",
|
||||||
" data = numpy.array(data)\n",
|
" data = numpy.array(data)\n",
|
||||||
" result = model.predict(data)\n",
|
" result = model.predict(data)\n",
|
||||||
" # you can return any data type as long as it is JSON-serializable\n",
|
|
||||||
" return result.tolist()\n",
|
|
||||||
" except Exception as e:\n",
|
" except Exception as e:\n",
|
||||||
" error = str(e)\n",
|
" result = str(e)\n",
|
||||||
" return error"
|
" return json.dumps({\"result\": result.tolist()})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -334,7 +332,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.6"
|
"version": "3.6.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
@@ -156,12 +156,11 @@
|
|||||||
" inputs_dc.collect(data) #this call is saving our input data into our blob\n",
|
" inputs_dc.collect(data) #this call is saving our input data into our blob\n",
|
||||||
" prediction_dc.collect(result)#this call is saving our prediction data into our blob\n",
|
" prediction_dc.collect(result)#this call is saving our prediction data into our blob\n",
|
||||||
" print (\"saving prediction data\" + time.strftime(\"%H:%M:%S\"))\n",
|
" print (\"saving prediction data\" + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
" # you can return any data type as long as it is JSON-serializable\n",
|
" return json.dumps({\"result\": result.tolist()})\n",
|
||||||
" return result.tolist()\n",
|
|
||||||
" except Exception as e:\n",
|
" except Exception as e:\n",
|
||||||
" error = str(e)\n",
|
" result = str(e)\n",
|
||||||
" print (error + time.strftime(\"%H:%M:%S\"))\n",
|
" print (result + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
" return error"
|
" return json.dumps({\"error\": result})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -161,12 +161,13 @@
|
|||||||
" \n",
|
" \n",
|
||||||
" #Print statement for appinsights custom traces:\n",
|
" #Print statement for appinsights custom traces:\n",
|
||||||
" print (\"saving prediction data\" + time.strftime(\"%H:%M:%S\"))\n",
|
" print (\"saving prediction data\" + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
" # you can return any data type as long as it is JSON-serializable\n",
|
" \n",
|
||||||
" return result.tolist()\n",
|
" return json.dumps({\"result\": result.tolist()})\n",
|
||||||
|
" \n",
|
||||||
" except Exception as e:\n",
|
" except Exception as e:\n",
|
||||||
" error = str(e)\n",
|
" result = str(e)\n",
|
||||||
" print (error + time.strftime(\"%H:%M:%S\"))\n",
|
" print (result + time.strftime(\"%H:%M:%S\"))\n",
|
||||||
" return error"
|
" return json.dumps({\"error\": result})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -349,9 +350,9 @@
|
|||||||
" [1,28,13,45,54,6,57,8,8,10], \n",
|
" [1,28,13,45,54,6,57,8,8,10], \n",
|
||||||
" [101,9,8,37,6,45,4,3,2,41]\n",
|
" [101,9,8,37,6,45,4,3,2,41]\n",
|
||||||
"]})\n",
|
"]})\n",
|
||||||
"test_sample = bytes(test_sample,encoding='utf8')\n",
|
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"prediction = aks_service.run(input_data=test_sample)\n",
|
"prediction = aks_service.run(input_data = test_sample)\n",
|
||||||
"print(prediction)"
|
"print(prediction)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
22
README.md
22
README.md
@@ -1,5 +1,10 @@
|
|||||||
For full documentation for Azure Machine Learning service, visit **https://aka.ms/aml-docs**.
|
Get the full documentation for Azure Machine Learning service at:
|
||||||
# Sample Notebooks for Azure Machine Learning service
|
|
||||||
|
https://docs.microsoft.com/azure/machine-learning/service/
|
||||||
|
|
||||||
|
<br>
|
||||||
|
|
||||||
|
# Sample notebooks for Azure Machine Learning service
|
||||||
|
|
||||||
To run the notebooks in this repository use one of these methods:
|
To run the notebooks in this repository use one of these methods:
|
||||||
|
|
||||||
@@ -17,24 +22,13 @@ To run the notebooks in this repository use one of these methods:
|
|||||||
|
|
||||||
## **Use your own notebook server**
|
## **Use your own notebook server**
|
||||||
|
|
||||||
Video walkthrough:
|
|
||||||
|
|
||||||
[](https://youtu.be/VIsXeTuW3FU)
|
|
||||||
|
|
||||||
1. Setup a Jupyter Notebook server and [install the Azure Machine Learning SDK](https://docs.microsoft.com/en-us/azure/machine-learning/service/quickstart-create-workspace-with-python).
|
1. Setup a Jupyter Notebook server and [install the Azure Machine Learning SDK](https://docs.microsoft.com/en-us/azure/machine-learning/service/quickstart-create-workspace-with-python).
|
||||||
1. Clone [this repository](https://aka.ms/aml-notebooks).
|
1. Clone [this repository](https://aka.ms/aml-notebooks).
|
||||||
1. You may need to install other packages for specific notebook.
|
1. You may need to install other packages for specific notebooks
|
||||||
- For example, to run the Azure Machine Learning Data Prep notebooks, install the extra dataprep SDK:
|
|
||||||
```
|
|
||||||
pip install --upgrade azureml-dataprep
|
|
||||||
```
|
|
||||||
|
|
||||||
1. Start your notebook server.
|
1. Start your notebook server.
|
||||||
1. Follow the instructions in the [00.configuration](00.configuration.ipynb) notebook to create and connect to a workspace.
|
1. Follow the instructions in the [00.configuration](00.configuration.ipynb) notebook to create and connect to a workspace.
|
||||||
1. Open one of the sample notebooks.
|
1. Open one of the sample notebooks.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
> Note: **Looking for automated machine learning samples?**
|
> Note: **Looking for automated machine learning samples?**
|
||||||
> For your convenience, you can use an installation script instead of the steps below for the automated ML notebooks. Go to the [automl folder README](automl/README.md) and follow the instructions. The script installs all packages needed for notebooks in that folder.
|
> For your convenience, you can use an installation script instead of the steps below for the automated ML notebooks. Go to the [automl folder README](automl/README.md) and follow the instructions. The script installs all packages needed for notebooks in that folder.
|
||||||
|
|
||||||
|
|||||||
15
aml_config/conda_dependencies.yml
Normal file
15
aml_config/conda_dependencies.yml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Conda environment specification. The dependencies defined in this file will
|
||||||
|
|
||||||
|
# be automatically provisioned for runs with userManagedDependencies=False.
|
||||||
|
|
||||||
|
|
||||||
|
# Details about the Conda environment file format:
|
||||||
|
|
||||||
|
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
|
||||||
|
|
||||||
|
|
||||||
|
name: project_environment
|
||||||
|
dependencies:
|
||||||
|
# The python interpreter version.
|
||||||
|
|
||||||
|
# Currently Azure ML only supports 3.5.2 and later.
|
||||||
115
aml_config/docker.runconfig
Normal file
115
aml_config/docker.runconfig
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
# The script to run.
|
||||||
|
script: train.py
|
||||||
|
# The arguments to the script file.
|
||||||
|
arguments: []
|
||||||
|
# The name of the compute target to use for this run.
|
||||||
|
target: local
|
||||||
|
# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch".
|
||||||
|
framework: PySpark
|
||||||
|
# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi".
|
||||||
|
communicator: None
|
||||||
|
# Automatically prepare the run environment as part of the run itself.
|
||||||
|
autoPrepareEnvironment: true
|
||||||
|
# Maximum allowed duration for the run.
|
||||||
|
maxRunDurationSeconds:
|
||||||
|
# Number of nodes to use for running job.
|
||||||
|
nodeCount: 1
|
||||||
|
# Environment details.
|
||||||
|
environment:
|
||||||
|
# Environment variables set for the run.
|
||||||
|
environmentVariables:
|
||||||
|
EXAMPLE_ENV_VAR: EXAMPLE_VALUE
|
||||||
|
# Python details
|
||||||
|
python:
|
||||||
|
# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
|
||||||
|
userManagedDependencies: false
|
||||||
|
# The python interpreter path
|
||||||
|
interpreterPath: python
|
||||||
|
# Path to the conda dependencies file to use for this run. If a project
|
||||||
|
# contains multiple programs with different sets of dependencies, it may be
|
||||||
|
# convenient to manage those environments with separate files.
|
||||||
|
condaDependenciesFile: aml_config/conda_dependencies.yml
|
||||||
|
# Docker details
|
||||||
|
docker:
|
||||||
|
# Set True to perform this run inside a Docker container.
|
||||||
|
enabled: true
|
||||||
|
# Base image used for Docker-based runs.
|
||||||
|
baseImage: mcr.microsoft.com/azureml/base:0.2.0
|
||||||
|
# Set False if necessary to work around shared volume bugs.
|
||||||
|
sharedVolumes: true
|
||||||
|
# Run with NVidia Docker extension to support GPUs.
|
||||||
|
gpuSupport: false
|
||||||
|
# Extra arguments to the Docker run command.
|
||||||
|
arguments: []
|
||||||
|
# Image registry that contains the base image.
|
||||||
|
baseImageRegistry:
|
||||||
|
# DNS name or IP address of azure container registry(ACR)
|
||||||
|
address:
|
||||||
|
# The username for ACR
|
||||||
|
username:
|
||||||
|
# The password for ACR
|
||||||
|
password:
|
||||||
|
# Spark details
|
||||||
|
spark:
|
||||||
|
# List of spark repositories.
|
||||||
|
repositories:
|
||||||
|
- https://mmlspark.azureedge.net/maven
|
||||||
|
packages:
|
||||||
|
- group: com.microsoft.ml.spark
|
||||||
|
artifact: mmlspark_2.11
|
||||||
|
version: '0.12'
|
||||||
|
precachePackages: true
|
||||||
|
# Databricks details
|
||||||
|
databricks:
|
||||||
|
# List of maven libraries.
|
||||||
|
mavenLibraries: []
|
||||||
|
# List of PyPi libraries
|
||||||
|
pypiLibraries: []
|
||||||
|
# List of RCran libraries
|
||||||
|
rcranLibraries: []
|
||||||
|
# List of JAR libraries
|
||||||
|
jarLibraries: []
|
||||||
|
# List of Egg libraries
|
||||||
|
eggLibraries: []
|
||||||
|
# History details.
|
||||||
|
history:
|
||||||
|
# Enable history tracking -- this allows status, logs, metrics, and outputs
|
||||||
|
# to be collected for a run.
|
||||||
|
outputCollection: true
|
||||||
|
# whether to take snapshots for history.
|
||||||
|
snapshotProject: true
|
||||||
|
# Spark configuration details.
|
||||||
|
spark:
|
||||||
|
configuration:
|
||||||
|
spark.app.name: Azure ML Experiment
|
||||||
|
spark.yarn.maxAppAttempts: 1
|
||||||
|
# HDI details.
|
||||||
|
hdi:
|
||||||
|
# Yarn deploy mode. Options are cluster and client.
|
||||||
|
yarnDeployMode: cluster
|
||||||
|
# Tensorflow details.
|
||||||
|
tensorflow:
|
||||||
|
# The number of worker tasks.
|
||||||
|
workerCount: 1
|
||||||
|
# The number of parameter server tasks.
|
||||||
|
parameterServerCount: 1
|
||||||
|
# Mpi details.
|
||||||
|
mpi:
|
||||||
|
# When using MPI, number of processes per node.
|
||||||
|
processCountPerNode: 1
|
||||||
|
# data reference configuration details
|
||||||
|
dataReferences: {}
|
||||||
|
# Project share datastore reference.
|
||||||
|
sourceDirectoryDataStore:
|
||||||
|
# AmlCompute details.
|
||||||
|
amlcompute:
|
||||||
|
# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
|
||||||
|
vmSize:
|
||||||
|
# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
|
||||||
|
vmPriority:
|
||||||
|
# A bool that indicates if the cluster has to be retained after job completion.
|
||||||
|
retainCluster: false
|
||||||
|
# Name of the cluster to be created. If not specified, runId will be used as cluster name.
|
||||||
|
name:
|
||||||
|
# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
|
||||||
|
clusterMaxNodeCount: 1
|
||||||
115
aml_config/local.runconfig
Normal file
115
aml_config/local.runconfig
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
# The script to run.
|
||||||
|
script: train.py
|
||||||
|
# The arguments to the script file.
|
||||||
|
arguments: []
|
||||||
|
# The name of the compute target to use for this run.
|
||||||
|
target: local
|
||||||
|
# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch".
|
||||||
|
framework: Python
|
||||||
|
# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi".
|
||||||
|
communicator: None
|
||||||
|
# Automatically prepare the run environment as part of the run itself.
|
||||||
|
autoPrepareEnvironment: true
|
||||||
|
# Maximum allowed duration for the run.
|
||||||
|
maxRunDurationSeconds:
|
||||||
|
# Number of nodes to use for running job.
|
||||||
|
nodeCount: 1
|
||||||
|
# Environment details.
|
||||||
|
environment:
|
||||||
|
# Environment variables set for the run.
|
||||||
|
environmentVariables:
|
||||||
|
EXAMPLE_ENV_VAR: EXAMPLE_VALUE
|
||||||
|
# Python details
|
||||||
|
python:
|
||||||
|
# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
|
||||||
|
userManagedDependencies: false
|
||||||
|
# The python interpreter path
|
||||||
|
interpreterPath: python
|
||||||
|
# Path to the conda dependencies file to use for this run. If a project
|
||||||
|
# contains multiple programs with different sets of dependencies, it may be
|
||||||
|
# convenient to manage those environments with separate files.
|
||||||
|
condaDependenciesFile: aml_config/conda_dependencies.yml
|
||||||
|
# Docker details
|
||||||
|
docker:
|
||||||
|
# Set True to perform this run inside a Docker container.
|
||||||
|
enabled: false
|
||||||
|
# Base image used for Docker-based runs.
|
||||||
|
baseImage: mcr.microsoft.com/azureml/base:0.2.0
|
||||||
|
# Set False if necessary to work around shared volume bugs.
|
||||||
|
sharedVolumes: true
|
||||||
|
# Run with NVidia Docker extension to support GPUs.
|
||||||
|
gpuSupport: false
|
||||||
|
# Extra arguments to the Docker run command.
|
||||||
|
arguments: []
|
||||||
|
# Image registry that contains the base image.
|
||||||
|
baseImageRegistry:
|
||||||
|
# DNS name or IP address of azure container registry(ACR)
|
||||||
|
address:
|
||||||
|
# The username for ACR
|
||||||
|
username:
|
||||||
|
# The password for ACR
|
||||||
|
password:
|
||||||
|
# Spark details
|
||||||
|
spark:
|
||||||
|
# List of spark repositories.
|
||||||
|
repositories:
|
||||||
|
- https://mmlspark.azureedge.net/maven
|
||||||
|
packages:
|
||||||
|
- group: com.microsoft.ml.spark
|
||||||
|
artifact: mmlspark_2.11
|
||||||
|
version: '0.12'
|
||||||
|
precachePackages: true
|
||||||
|
# Databricks details
|
||||||
|
databricks:
|
||||||
|
# List of maven libraries.
|
||||||
|
mavenLibraries: []
|
||||||
|
# List of PyPi libraries
|
||||||
|
pypiLibraries: []
|
||||||
|
# List of RCran libraries
|
||||||
|
rcranLibraries: []
|
||||||
|
# List of JAR libraries
|
||||||
|
jarLibraries: []
|
||||||
|
# List of Egg libraries
|
||||||
|
eggLibraries: []
|
||||||
|
# History details.
|
||||||
|
history:
|
||||||
|
# Enable history tracking -- this allows status, logs, metrics, and outputs
|
||||||
|
# to be collected for a run.
|
||||||
|
outputCollection: true
|
||||||
|
# whether to take snapshots for history.
|
||||||
|
snapshotProject: true
|
||||||
|
# Spark configuration details.
|
||||||
|
spark:
|
||||||
|
configuration:
|
||||||
|
spark.app.name: Azure ML Experiment
|
||||||
|
spark.yarn.maxAppAttempts: 1
|
||||||
|
# HDI details.
|
||||||
|
hdi:
|
||||||
|
# Yarn deploy mode. Options are cluster and client.
|
||||||
|
yarnDeployMode: cluster
|
||||||
|
# Tensorflow details.
|
||||||
|
tensorflow:
|
||||||
|
# The number of worker tasks.
|
||||||
|
workerCount: 1
|
||||||
|
# The number of parameter server tasks.
|
||||||
|
parameterServerCount: 1
|
||||||
|
# Mpi details.
|
||||||
|
mpi:
|
||||||
|
# When using MPI, number of processes per node.
|
||||||
|
processCountPerNode: 1
|
||||||
|
# data reference configuration details
|
||||||
|
dataReferences: {}
|
||||||
|
# Project share datastore reference.
|
||||||
|
sourceDirectoryDataStore:
|
||||||
|
# AmlCompute details.
|
||||||
|
amlcompute:
|
||||||
|
# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
|
||||||
|
vmSize:
|
||||||
|
# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
|
||||||
|
vmPriority:
|
||||||
|
# A bool that indicates if the cluster has to be retained after job completion.
|
||||||
|
retainCluster: false
|
||||||
|
# Name of the cluster to be created. If not specified, runId will be used as cluster name.
|
||||||
|
name:
|
||||||
|
# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
|
||||||
|
clusterMaxNodeCount: 1
|
||||||
1
aml_config/project.json
Normal file
1
aml_config/project.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"Id": "local-compute", "Scope": "/subscriptions/65a1016d-0f67-45d2-b838-b8f373d6d52e/resourceGroups/sheri/providers/Microsoft.MachineLearningServices/workspaces/sheritestqs3/projects/local-compute"}
|
||||||
@@ -138,12 +138,14 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"found = False\n",
|
"found = False\n",
|
||||||
"# Check if this compute target already exists in the workspace.\n",
|
"# Check if this compute target already exists in the workspace.\n",
|
||||||
"cts = ws.compute_targets\n",
|
"for ct_name, ct in ws.compute_targets().items():\n",
|
||||||
"if batchai_cluster_name in cts and cts[batchai_cluster_name].type == 'BatchAI':\n",
|
" print(ct.name, ct.type)\n",
|
||||||
" found = True\n",
|
" if (ct.name == batchai_cluster_name and ct.type == 'BatchAI'):\n",
|
||||||
" print('Found existing compute target.')\n",
|
" found = True\n",
|
||||||
" compute_target = cts[batchai_cluster_name]\n",
|
" print('Found existing compute target.')\n",
|
||||||
" \n",
|
" compute_target = ct\n",
|
||||||
|
" break\n",
|
||||||
|
" \n",
|
||||||
"if not found:\n",
|
"if not found:\n",
|
||||||
" print('Creating a new compute target...')\n",
|
" print('Creating a new compute target...')\n",
|
||||||
" provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
" provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||||
|
|||||||
@@ -143,16 +143,16 @@
|
|||||||
"dsvm_username = '<<username>>'\n",
|
"dsvm_username = '<<username>>'\n",
|
||||||
"dsvm_password = '<<password>>'\n",
|
"dsvm_password = '<<password>>'\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if compute_name in ws.compute_targets:\n",
|
"if compute_name in ws.compute_targets():\n",
|
||||||
" print('Using existing compute.')\n",
|
" print('Using existing compute.')\n",
|
||||||
" dsvm_compute = ws.compute_targets[compute_name]\n",
|
" dsvm_compute = ws.compute_targets()[compute_name]\n",
|
||||||
"else:\n",
|
"else:\n",
|
||||||
" RemoteCompute.attach(workspace=ws, name=compute_name, address=dsvm_ip_addr, username=dsvm_username, password=dsvm_password, ssh_port=dsvm_ssh_port)\n",
|
" RemoteCompute.attach(workspace=ws, name=compute_name, address=dsvm_ip_addr, username=dsvm_username, password=dsvm_password, ssh_port=dsvm_ssh_port)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" while ws.compute_targets[compute_name].provisioning_state == 'Creating':\n",
|
" while ws.compute_targets()[compute_name].provisioning_state == 'Creating':\n",
|
||||||
" time.sleep(1)\n",
|
" time.sleep(1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" dsvm_compute = ws.compute_targets[compute_name]\n",
|
" dsvm_compute = ws.compute_targets()[compute_name]\n",
|
||||||
" \n",
|
" \n",
|
||||||
" if dsvm_compute.provisioning_state == 'Failed':\n",
|
" if dsvm_compute.provisioning_state == 'Failed':\n",
|
||||||
" print('Attached failed.')\n",
|
" print('Attached failed.')\n",
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# AutoML 06: Train Test Split and Handling Sparse Data\n",
|
"# AutoML 06: Custom CV Splits and Handling Sparse Data\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In this example we use the scikit-learn's [20newsgroup](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_20newsgroups.html) to showcase how you can use AutoML for handling sparse data and how to specify custom cross validations splits.\n",
|
"In this example we use the scikit-learn's [20newsgroup](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_20newsgroups.html) to showcase how you can use AutoML for handling sparse data and how to specify custom cross validations splits.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -27,7 +27,7 @@
|
|||||||
"6. Test the best fitted model.\n",
|
"6. Test the best fitted model.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"In addition this notebook showcases the following features\n",
|
"In addition this notebook showcases the following features\n",
|
||||||
"- Explicit train test splits \n",
|
"- **Custom CV** splits \n",
|
||||||
"- Handling **sparse data** in the input"
|
"- Handling **sparse data** in the input"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -116,11 +116,10 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"experiment_name = 'automl-local-classification' # Replace this with any project name from previous cell.\n",
|
"experiment_name = 'automl-local-classification' # Replace this with any project name from previous cell.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"proj = ws.experiments[experiment_name]\n",
|
"proj = ws.experiments()[experiment_name]\n",
|
||||||
"summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name'])\n",
|
"summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name'])\n",
|
||||||
"pattern = re.compile('^AutoML_[^_]*$')\n",
|
"pattern = re.compile('^AutoML_[^_]*$')\n",
|
||||||
"all_runs = list(proj.get_runs(properties={'azureml.runsource': 'automl'}))\n",
|
"all_runs = list(proj.get_runs(properties={'azureml.runsource': 'automl'}))\n",
|
||||||
"automl_runs_project = []\n",
|
|
||||||
"for run in all_runs:\n",
|
"for run in all_runs:\n",
|
||||||
" if(pattern.match(run.id)):\n",
|
" if(pattern.match(run.id)):\n",
|
||||||
" properties = run.get_properties()\n",
|
" properties = run.get_properties()\n",
|
||||||
@@ -131,8 +130,6 @@
|
|||||||
" else:\n",
|
" else:\n",
|
||||||
" iterations = properties['num_iterations']\n",
|
" iterations = properties['num_iterations']\n",
|
||||||
" summary_df[run.id] = [amlsettings['task_type'], run.get_details()['status'], properties['primary_metric'], iterations, properties['target'], amlsettings['name']]\n",
|
" summary_df[run.id] = [amlsettings['task_type'], run.get_details()['status'], properties['primary_metric'], iterations, properties['target'], amlsettings['name']]\n",
|
||||||
" if run.get_details()['status'] == 'Completed':\n",
|
|
||||||
" automl_runs_project.append(run.id)\n",
|
|
||||||
" \n",
|
" \n",
|
||||||
"from IPython.display import HTML\n",
|
"from IPython.display import HTML\n",
|
||||||
"projname_html = HTML(\"<h3>{}</h3>\".format(proj.name))\n",
|
"projname_html = HTML(\"<h3>{}</h3>\".format(proj.name))\n",
|
||||||
@@ -157,8 +154,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"run_id = automl_runs_project[0] # Replace with your own run_id from above run ids\n",
|
"run_id = '' # Filling your own run_id from above run ids\n",
|
||||||
"assert (run_id in summary_df.keys()), \"Run id not found! Please set run id to a value from above run ids\"\n",
|
"assert (run_id in summary_df.keys()),\"Run id not found! Please set run id to a value from above run ids\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from azureml.train.widgets import RunDetails\n",
|
"from azureml.train.widgets import RunDetails\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|||||||
@@ -209,8 +209,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"## Configure & Run\n",
|
"## Configure & Run\n",
|
||||||
"\n",
|
"\n",
|
||||||
"First let's create a DataReferenceConfigruation object to inform the system what data folder to download to the compute target.\n",
|
"First let's create a DataReferenceConfigruation object to inform the system what data folder to download to the copmute target."
|
||||||
"The path_on_compute should be an absolute path to ensure that the data files are downloaded only once. The get_data method should use this same path to access the data files."
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -222,9 +221,8 @@
|
|||||||
"from azureml.core.runconfig import DataReferenceConfiguration\n",
|
"from azureml.core.runconfig import DataReferenceConfiguration\n",
|
||||||
"dr = DataReferenceConfiguration(datastore_name=ds.name, \n",
|
"dr = DataReferenceConfiguration(datastore_name=ds.name, \n",
|
||||||
" path_on_datastore='data', \n",
|
" path_on_datastore='data', \n",
|
||||||
" path_on_compute='/tmp/azureml_runs',\n",
|
|
||||||
" mode='download', # download files from datastore to compute target\n",
|
" mode='download', # download files from datastore to compute target\n",
|
||||||
" overwrite=False)"
|
" overwrite=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -239,7 +237,7 @@
|
|||||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Set compute target to the Linux DSVM\n",
|
"# Set compute target to the Linux DSVM\n",
|
||||||
"conda_run_config.target = dsvm_compute\n",
|
"conda_run_config.target = dsvm_compute.name\n",
|
||||||
"# set the data reference of the run coonfiguration\n",
|
"# set the data reference of the run coonfiguration\n",
|
||||||
"conda_run_config.data_references = {ds.name: dr}"
|
"conda_run_config.data_references = {ds.name: dr}"
|
||||||
]
|
]
|
||||||
@@ -251,9 +249,7 @@
|
|||||||
"## Create Get Data File\n",
|
"## Create Get Data File\n",
|
||||||
"For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
"For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The *get_data()* function returns a [dictionary](README.md#getdata).\n",
|
"The *get_data()* function returns a [dictionary](README.md#getdata)."
|
||||||
"\n",
|
|
||||||
"The read_csv uses the path_on_compute value specified in the DataReferenceConfiguration call plus the path_on_datastore folder and then the actual file name."
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -282,7 +278,9 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"def get_data():\n",
|
"def get_data():\n",
|
||||||
" # Burning man 2016 data\n",
|
" # Burning man 2016 data\n",
|
||||||
" df = pd.read_csv(\"/tmp/azureml_runs/data/data.tsv\", delimiter=\"\\t\", quotechar='\"')\n",
|
" df = pd.read_csv(join(dirname(os.path.realpath(__file__)),\n",
|
||||||
|
" os.environ[\"AZUREML_DATAREFERENCE_workspacefilestore\"],\n",
|
||||||
|
" \"data.tsv\"), delimiter=\"\\t\", quotechar='\"')\n",
|
||||||
" # get integer labels\n",
|
" # get integer labels\n",
|
||||||
" le = LabelEncoder()\n",
|
" le = LabelEncoder()\n",
|
||||||
" le.fit(df[\"Label\"].values)\n",
|
" le.fit(df[\"Label\"].values)\n",
|
||||||
@@ -40,7 +40,8 @@
|
|||||||
"from azureml.core.experiment import Experiment\n",
|
"from azureml.core.experiment import Experiment\n",
|
||||||
"from azureml.core.workspace import Workspace\n",
|
"from azureml.core.workspace import Workspace\n",
|
||||||
"from azureml.train.automl import AutoMLConfig\n",
|
"from azureml.train.automl import AutoMLConfig\n",
|
||||||
"from azureml.train.automl.run import AutoMLRun\n"
|
"from azureml.train.automl.run import AutoMLRun\n",
|
||||||
|
"from azureml.train.automl.utilities import get_sdk_dependencies"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -62,6 +63,29 @@
|
|||||||
"set_diagnostics_collection(send_diagnostics = True)"
|
"set_diagnostics_collection(send_diagnostics = True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Retrieve the SDK versions in the current environment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"To retrieve the SDK versions in the current environment, run `get_sdk_dependencies`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"get_sdk_dependencies()"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# AutoML 13: Prepare Data using `azureml.dataprep` for Remote Execution (DSVM)\n",
|
"# AutoML 13: Prepare Data using `azureml.dataprep`\n",
|
||||||
"In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
|
"In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Make sure you have executed the [setup](00.configuration.ipynb) before running this notebook.\n",
|
"Make sure you have executed the [setup](00.configuration.ipynb) before running this notebook.\n",
|
||||||
@@ -24,6 +24,22 @@
|
|||||||
"3. Pass the `Dataflow` to AutoML for a remote run."
|
"3. Pass the `Dataflow` to AutoML for a remote run."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Install `azureml.dataprep` SDK"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"!pip install azureml-dataprep"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -66,6 +82,8 @@
|
|||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"from azureml.core.compute import DsvmCompute\n",
|
"from azureml.core.compute import DsvmCompute\n",
|
||||||
"from azureml.core.experiment import Experiment\n",
|
"from azureml.core.experiment import Experiment\n",
|
||||||
|
"from azureml.core.runconfig import CondaDependencies\n",
|
||||||
|
"from azureml.core.runconfig import RunConfiguration\n",
|
||||||
"from azureml.core.workspace import Workspace\n",
|
"from azureml.core.workspace import Workspace\n",
|
||||||
"import azureml.dataprep as dprep\n",
|
"import azureml.dataprep as dprep\n",
|
||||||
"from azureml.train.automl import AutoMLConfig"
|
"from azureml.train.automl import AutoMLConfig"
|
||||||
@@ -80,9 +98,9 @@
|
|||||||
"ws = Workspace.from_config()\n",
|
"ws = Workspace.from_config()\n",
|
||||||
" \n",
|
" \n",
|
||||||
"# choose a name for experiment\n",
|
"# choose a name for experiment\n",
|
||||||
"experiment_name = 'automl-dataprep-remote-dsvm'\n",
|
"experiment_name = 'automl-dataprep-classification'\n",
|
||||||
"# project folder\n",
|
"# project folder\n",
|
||||||
"project_folder = './sample_projects/automl-dataprep-remote-dsvm'\n",
|
"project_folder = './sample_projects/automl-dataprep-classification'\n",
|
||||||
" \n",
|
" \n",
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
"experiment = Experiment(ws, experiment_name)\n",
|
||||||
" \n",
|
" \n",
|
||||||
@@ -165,6 +183,44 @@
|
|||||||
"}"
|
"}"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Local Run"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Pass Data with `Dataflow` Objects\n",
|
||||||
|
"\n",
|
||||||
|
"The `Dataflow` objects captured above can be passed to the `submit` method for a local run. AutoML will retrieve the results from the `Dataflow` for model training."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||||
|
" debug_log = 'automl_errors.log',\n",
|
||||||
|
" X = X,\n",
|
||||||
|
" y = y,\n",
|
||||||
|
" **automl_settings)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -196,6 +252,43 @@
|
|||||||
" dsvm_compute.wait_for_completion(show_output = True)"
|
" dsvm_compute.wait_for_completion(show_output = True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Update Conda Dependency file to have AutoML and DataPrep SDK\n",
|
||||||
|
"\n",
|
||||||
|
"Currently the AutoML and DataPrep SDKs are not installed with the Azure ML SDK by default. To circumvent this limitation, we update the conda dependency file to add these dependencies."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"cd = CondaDependencies()\n",
|
||||||
|
"cd.add_pip_package(pip_package='azureml-dataprep')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Create a `RunConfiguration` with DSVM name"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run_config = RunConfiguration(conda_dependencies=cd)\n",
|
||||||
|
"run_config.target = dsvm_compute\n",
|
||||||
|
"run_config.auto_prepare_environment = True"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -214,18 +307,10 @@
|
|||||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||||
" debug_log = 'automl_errors.log',\n",
|
" debug_log = 'automl_errors.log',\n",
|
||||||
" path = project_folder,\n",
|
" path = project_folder,\n",
|
||||||
" compute_target = dsvm_compute,\n",
|
" run_configuration = run_config,\n",
|
||||||
" X = X,\n",
|
" X = X,\n",
|
||||||
" y = y,\n",
|
" y = y,\n",
|
||||||
" **automl_settings)"
|
" **automl_settings)\n",
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"remote_run = experiment.submit(automl_config, show_output = True)"
|
"remote_run = experiment.submit(automl_config, show_output = True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -254,7 +339,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.train.widgets import RunDetails\n",
|
"from azureml.train.widgets import RunDetails\n",
|
||||||
"RunDetails(remote_run).show()"
|
"RunDetails(local_run).show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -271,7 +356,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"children = list(remote_run.get_children())\n",
|
"children = list(local_run.get_children())\n",
|
||||||
"metricslist = {}\n",
|
"metricslist = {}\n",
|
||||||
"for run in children:\n",
|
"for run in children:\n",
|
||||||
" properties = run.get_properties()\n",
|
" properties = run.get_properties()\n",
|
||||||
@@ -298,7 +383,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"best_run, fitted_model = remote_run.get_output()\n",
|
"best_run, fitted_model = local_run.get_output()\n",
|
||||||
"print(best_run)\n",
|
"print(best_run)\n",
|
||||||
"print(fitted_model)"
|
"print(fitted_model)"
|
||||||
]
|
]
|
||||||
@@ -318,7 +403,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"lookup_metric = \"log_loss\"\n",
|
"lookup_metric = \"log_loss\"\n",
|
||||||
"best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n",
|
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
|
||||||
"print(best_run)\n",
|
"print(best_run)\n",
|
||||||
"print(fitted_model)"
|
"print(fitted_model)"
|
||||||
]
|
]
|
||||||
@@ -338,7 +423,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"iteration = 0\n",
|
"iteration = 0\n",
|
||||||
"best_run, fitted_model = remote_run.get_output(iteration = iteration)\n",
|
"best_run, fitted_model = local_run.get_output(iteration = iteration)\n",
|
||||||
"print(best_run)\n",
|
"print(best_run)\n",
|
||||||
"print(fitted_model)"
|
"print(fitted_model)"
|
||||||
]
|
]
|
||||||
@@ -1,446 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# AutoML 13: Prepare Data using `azureml.dataprep` for Local Execution\n",
|
|
||||||
"In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
|
|
||||||
"\n",
|
|
||||||
"Make sure you have executed the [setup](00.configuration.ipynb) before running this notebook.\n",
|
|
||||||
"\n",
|
|
||||||
"In this notebook you will learn how to:\n",
|
|
||||||
"1. Define data loading and preparation steps in a `Dataflow` using `azureml.dataprep`.\n",
|
|
||||||
"2. Pass the `Dataflow` to AutoML for a local run.\n",
|
|
||||||
"3. Pass the `Dataflow` to AutoML for a remote run."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Diagnostics\n",
|
|
||||||
"\n",
|
|
||||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
|
||||||
"set_diagnostics_collection(send_diagnostics = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create an Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import logging\n",
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core.experiment import Experiment\n",
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"import azureml.dataprep as dprep\n",
|
|
||||||
"from azureml.train.automl import AutoMLConfig"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
" \n",
|
|
||||||
"# choose a name for experiment\n",
|
|
||||||
"experiment_name = 'automl-dataprep-local'\n",
|
|
||||||
"# project folder\n",
|
|
||||||
"project_folder = './sample_projects/automl-dataprep-local'\n",
|
|
||||||
" \n",
|
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
|
||||||
" \n",
|
|
||||||
"output = {}\n",
|
|
||||||
"output['SDK version'] = azureml.core.VERSION\n",
|
|
||||||
"output['Subscription ID'] = ws.subscription_id\n",
|
|
||||||
"output['Workspace Name'] = ws.name\n",
|
|
||||||
"output['Resource Group'] = ws.resource_group\n",
|
|
||||||
"output['Location'] = ws.location\n",
|
|
||||||
"output['Project Directory'] = project_folder\n",
|
|
||||||
"output['Experiment Name'] = experiment.name\n",
|
|
||||||
"pd.set_option('display.max_colwidth', -1)\n",
|
|
||||||
"pd.DataFrame(data = output, index = ['']).T"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Loading Data using DataPrep"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# You can use `smart_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
|
|
||||||
"# The data referenced here was pulled from `sklearn.datasets.load_digits()`.\n",
|
|
||||||
"simple_example_data_root = 'https://dprepdata.blob.core.windows.net/automl-notebook-data/'\n",
|
|
||||||
"X = dprep.smart_read_file(simple_example_data_root + 'X.csv').skip(1) # Remove the header row.\n",
|
|
||||||
"\n",
|
|
||||||
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
|
|
||||||
"# and convert column types manually.\n",
|
|
||||||
"# Here we read a comma delimited file and convert all columns to integers.\n",
|
|
||||||
"y = dprep.read_csv(simple_example_data_root + 'y.csv').to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Review the Data Preparation Result\n",
|
|
||||||
"\n",
|
|
||||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"X.skip(1).head(5)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Configure AutoML\n",
|
|
||||||
"\n",
|
|
||||||
"This creates a general AutoML settings object applicable for both local and remote runs."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"automl_settings = {\n",
|
|
||||||
" \"max_time_sec\" : 600,\n",
|
|
||||||
" \"iterations\" : 2,\n",
|
|
||||||
" \"primary_metric\" : 'AUC_weighted',\n",
|
|
||||||
" \"preprocess\" : False,\n",
|
|
||||||
" \"verbosity\" : logging.INFO,\n",
|
|
||||||
" \"n_cross_validations\": 3\n",
|
|
||||||
"}"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Local Run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Pass Data with `Dataflow` Objects\n",
|
|
||||||
"\n",
|
|
||||||
"The `Dataflow` objects captured above can be passed to the `submit` method for a local run. AutoML will retrieve the results from the `Dataflow` for model training."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
|
||||||
" debug_log = 'automl_errors.log',\n",
|
|
||||||
" X = X,\n",
|
|
||||||
" y = y,\n",
|
|
||||||
" **automl_settings)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explore the Results"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Widget for Monitoring Runs\n",
|
|
||||||
"\n",
|
|
||||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.train.widgets import RunDetails\n",
|
|
||||||
"RunDetails(local_run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Retrieve All Child Runs\n",
|
|
||||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"children = list(local_run.get_children())\n",
|
|
||||||
"metricslist = {}\n",
|
|
||||||
"for run in children:\n",
|
|
||||||
" properties = run.get_properties()\n",
|
|
||||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
|
||||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
|
||||||
" \n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
|
||||||
"rundata"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Retrieve the Best Model\n",
|
|
||||||
"\n",
|
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"best_run, fitted_model = local_run.get_output()\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Best Model Based on Any Other Metric\n",
|
|
||||||
"Show the run and the model that has the smallest `log_loss` value:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"lookup_metric = \"log_loss\"\n",
|
|
||||||
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Model from a Specific Iteration\n",
|
|
||||||
"Show the run and the model from the first iteration:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"iteration = 0\n",
|
|
||||||
"best_run, fitted_model = local_run.get_output(iteration = iteration)\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Test the Best Fitted Model\n",
|
|
||||||
"\n",
|
|
||||||
"#### Load Test Data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn import datasets\n",
|
|
||||||
"\n",
|
|
||||||
"digits = datasets.load_digits()\n",
|
|
||||||
"X_test = digits.data[:10, :]\n",
|
|
||||||
"y_test = digits.target[:10]\n",
|
|
||||||
"images = digits.images[:10]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Testing Our Best Fitted Model\n",
|
|
||||||
"We will try to predict 2 digits and see how our model works."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Randomly select digits and test\n",
|
|
||||||
"from matplotlib import pyplot as plt\n",
|
|
||||||
"from matplotlib.pyplot import imshow\n",
|
|
||||||
"import random\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"\n",
|
|
||||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
|
||||||
" print(index)\n",
|
|
||||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
|
||||||
" label = y_test[index]\n",
|
|
||||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
|
||||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
|
||||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
|
||||||
" ax1.set_title(title)\n",
|
|
||||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
|
||||||
" plt.show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Appendix"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Capture the `Dataflow` Objects for Later Use in AutoML\n",
|
|
||||||
"\n",
|
|
||||||
"`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# sklearn.digits.data + target\n",
|
|
||||||
"digits_complete = dprep.smart_read_file('https://dprepdata.blob.core.windows.net/automl-notebook-data/digits-complete.csv')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"`digits_complete` (sourced from `sklearn.datasets.load_digits()`) is forked into `dflow_X` to capture all the feature columns and `dflow_y` to capture the label column."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"digits_complete.to_pandas_dataframe().shape\n",
|
|
||||||
"labels_column = 'Column64'\n",
|
|
||||||
"dflow_X = digits_complete.drop_columns(columns = [labels_column])\n",
|
|
||||||
"dflow_y = digits_complete.keep_columns(columns = [labels_column])"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "savitam"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.5"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,426 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# AutoML 15a: Classification with ensembling on local compute\n",
|
|
||||||
"\n",
|
|
||||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
|
||||||
"\n",
|
|
||||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
|
||||||
"\n",
|
|
||||||
"In this notebook you will learn how to:\n",
|
|
||||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
|
||||||
"2. Configure AutoML using `AutoMLConfig` which enables an extra ensembling iteration.\n",
|
|
||||||
"3. Train the model using local compute.\n",
|
|
||||||
"4. Explore the results.\n",
|
|
||||||
"5. Test the best fitted model.\n",
|
|
||||||
"\n",
|
|
||||||
"<b>Disclaimers / Limitations </b>\n",
|
|
||||||
"- Currently only Train/Validation split is supported; support for cross-validation will be coming soon.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create an Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import logging\n",
|
|
||||||
"import os\n",
|
|
||||||
"import random\n",
|
|
||||||
"\n",
|
|
||||||
"from matplotlib import pyplot as plt\n",
|
|
||||||
"from matplotlib.pyplot import imshow\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sklearn import datasets\n",
|
|
||||||
"\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core.experiment import Experiment\n",
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"from azureml.train.automl import AutoMLConfig\n",
|
|
||||||
"from azureml.train.automl.run import AutoMLRun"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose a name for the experiment and specify the project folder.\n",
|
|
||||||
"experiment_name = 'automl-local-classification'\n",
|
|
||||||
"project_folder = './sample_projects/automl-local-classification'\n",
|
|
||||||
"\n",
|
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
|
||||||
"\n",
|
|
||||||
"output = {}\n",
|
|
||||||
"output['SDK version'] = azureml.core.VERSION\n",
|
|
||||||
"output['Subscription ID'] = ws.subscription_id\n",
|
|
||||||
"output['Workspace Name'] = ws.name\n",
|
|
||||||
"output['Resource Group'] = ws.resource_group\n",
|
|
||||||
"output['Location'] = ws.location\n",
|
|
||||||
"output['Project Directory'] = project_folder\n",
|
|
||||||
"output['Experiment Name'] = experiment.name\n",
|
|
||||||
"pd.set_option('display.max_colwidth', -1)\n",
|
|
||||||
"pd.DataFrame(data = output, index = ['']).T"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Diagnostics\n",
|
|
||||||
"\n",
|
|
||||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
|
||||||
"set_diagnostics_collection(send_diagnostics = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Load Training Data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn import datasets\n",
|
|
||||||
"\n",
|
|
||||||
"digits = datasets.load_digits()\n",
|
|
||||||
"\n",
|
|
||||||
"# Exclude the first 50 rows from training so that they can be used for test.\n",
|
|
||||||
"X_train = digits.data[150:,:]\n",
|
|
||||||
"y_train = digits.target[150:]\n",
|
|
||||||
"X_valid = digits.data[50:150]\n",
|
|
||||||
"y_valid = digits.target[50:150]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Configure AutoML\n",
|
|
||||||
"\n",
|
|
||||||
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
|
|
||||||
"\n",
|
|
||||||
"|Property|Description|\n",
|
|
||||||
"|-|-|\n",
|
|
||||||
"|**task**|classification or regression|\n",
|
|
||||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
|
||||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
|
||||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
|
||||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
|
||||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
|
||||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
|
||||||
"|**X_valid**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
|
||||||
"|**y_valid**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
|
||||||
"|**enable_ensembling**|Flag to enable an ensembling iteration after all the other iterations complete.|\n",
|
|
||||||
"|**ensemble_iterations**|Number of iterations during which we choose a fitted pipeline to be part of the final ensemble.|\n",
|
|
||||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
|
||||||
" debug_log = 'classification.log',\n",
|
|
||||||
" primary_metric = 'AUC_weighted',\n",
|
|
||||||
" max_time_sec = 3600,\n",
|
|
||||||
" iterations = 10,\n",
|
|
||||||
" verbosity = logging.INFO,\n",
|
|
||||||
" X = X_train, \n",
|
|
||||||
" y = y_train,\n",
|
|
||||||
" X_valid = X_valid,\n",
|
|
||||||
" y_valid = y_valid,\n",
|
|
||||||
" enable_ensembling = True,\n",
|
|
||||||
" ensemble_iterations = 5,\n",
|
|
||||||
" path = project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train the Model\n",
|
|
||||||
"\n",
|
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
|
||||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"local_run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Optionally, you can continue an interrupted local run by calling `continue_experiment` without the `iterations` parameter, or run more iterations for a completed run by specifying the `iterations` parameter:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"local_run = local_run.continue_experiment(X = X_train, \n",
|
|
||||||
" y = y_train,\n",
|
|
||||||
" X_valid = X_valid,\n",
|
|
||||||
" y_valid = y_valid,\n",
|
|
||||||
" show_output = True,\n",
|
|
||||||
" iterations = 5)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"local_run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explore the Results"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Widget for Monitoring Runs\n",
|
|
||||||
"\n",
|
|
||||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.train.widgets import RunDetails\n",
|
|
||||||
"RunDetails(local_run).show() "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"\n",
|
|
||||||
"#### Retrieve All Child Runs\n",
|
|
||||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"children = list(local_run.get_children())\n",
|
|
||||||
"metricslist = {}\n",
|
|
||||||
"for run in children:\n",
|
|
||||||
" properties = run.get_properties()\n",
|
|
||||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
|
||||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
|
||||||
"\n",
|
|
||||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
|
||||||
"rundata"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Retrieve the Best Model\n",
|
|
||||||
"\n",
|
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"best_run, fitted_model = local_run.get_output()\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Best Model Based on Any Other Metric\n",
|
|
||||||
"Show the run and the model that has the smallest `log_loss` value:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"lookup_metric = \"log_loss\"\n",
|
|
||||||
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Model from a Specific Iteration\n",
|
|
||||||
"Show the run and the model from the third iteration:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"iteration = 3\n",
|
|
||||||
"third_run, third_model = local_run.get_output(iteration = iteration)\n",
|
|
||||||
"print(third_run)\n",
|
|
||||||
"print(third_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Test the Best Fitted Model\n",
|
|
||||||
"\n",
|
|
||||||
"#### Load Test Data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"digits = datasets.load_digits()\n",
|
|
||||||
"X_test = digits.data[:10, :]\n",
|
|
||||||
"y_test = digits.target[:10]\n",
|
|
||||||
"images = digits.images[:10]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Testing Our Best Pipeline\n",
|
|
||||||
"We will try to predict 2 digits and see how our model works."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Randomly select digits and test.\n",
|
|
||||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
|
||||||
" print(index)\n",
|
|
||||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
|
||||||
" label = y_test[index]\n",
|
|
||||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
|
||||||
" fig = plt.figure(1, figsize = (3,3))\n",
|
|
||||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
|
||||||
" ax1.set_title(title)\n",
|
|
||||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
|
||||||
" plt.show()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "ratanase"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,442 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# AutoML 15b: Regression with ensembling on remote compute\n",
|
|
||||||
"\n",
|
|
||||||
"In this example we use the scikit-learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use AutoML for a simple regression problem.\n",
|
|
||||||
"\n",
|
|
||||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
|
||||||
"\n",
|
|
||||||
"In this notebook you will learn how to:\n",
|
|
||||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
|
||||||
"2. Configure AutoML using `AutoMLConfig`which enables an extra ensembling iteration.\n",
|
|
||||||
"3. Train the model using remote compute.\n",
|
|
||||||
"4. Explore the results.\n",
|
|
||||||
"5. Test the best fitted model.\n",
|
|
||||||
"\n",
|
|
||||||
"<b>Disclaimers / Limitations </b>\n",
|
|
||||||
"- Currently only Train/Validation split is supported; support for cross-validation will be coming soon.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create an Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import logging\n",
|
|
||||||
"import os\n",
|
|
||||||
"import random\n",
|
|
||||||
"\n",
|
|
||||||
"from matplotlib import pyplot as plt\n",
|
|
||||||
"from matplotlib.pyplot import imshow\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"from sklearn import datasets\n",
|
|
||||||
"\n",
|
|
||||||
"import azureml.core\n",
|
|
||||||
"from azureml.core.experiment import Experiment\n",
|
|
||||||
"from azureml.core.workspace import Workspace\n",
|
|
||||||
"from azureml.train.automl import AutoMLConfig\n",
|
|
||||||
"from azureml.train.automl.run import AutoMLRun"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"\n",
|
|
||||||
"# Choose a name for the experiment and specify the project folder.\n",
|
|
||||||
"experiment_name = 'automl-local-regression'\n",
|
|
||||||
"project_folder = './sample_projects/automl-local-regression'\n",
|
|
||||||
"\n",
|
|
||||||
"experiment = Experiment(ws, experiment_name)\n",
|
|
||||||
"\n",
|
|
||||||
"output = {}\n",
|
|
||||||
"output['SDK version'] = azureml.core.VERSION\n",
|
|
||||||
"output['Subscription ID'] = ws.subscription_id\n",
|
|
||||||
"output['Workspace Name'] = ws.name\n",
|
|
||||||
"output['Resource Group'] = ws.resource_group\n",
|
|
||||||
"output['Location'] = ws.location\n",
|
|
||||||
"output['Project Directory'] = project_folder\n",
|
|
||||||
"output['Experiment Name'] = experiment.name\n",
|
|
||||||
"pd.set_option('display.max_colwidth', -1)\n",
|
|
||||||
"pd.DataFrame(data = output, index = ['']).T"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Diagnostics\n",
|
|
||||||
"\n",
|
|
||||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
|
||||||
"set_diagnostics_collection(send_diagnostics = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create a Remote Linux DSVM\n",
|
|
||||||
"**Note:** If creation fails with a message about Marketplace purchase eligibilty, start creation of a DSVM through the [Azure portal](https://portal.azure.com), and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled this setting, you can exit the portal without actually creating the DSVM, and creation of the DSVM through the notebook should work."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import DsvmCompute\n",
|
|
||||||
"\n",
|
|
||||||
"dsvm_name = 'mydsvm'\n",
|
|
||||||
"try:\n",
|
|
||||||
" dsvm_compute = DsvmCompute(ws, dsvm_name)\n",
|
|
||||||
" print('Found an existing DSVM.')\n",
|
|
||||||
"except:\n",
|
|
||||||
" print('Creating a new DSVM.')\n",
|
|
||||||
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
|
|
||||||
" dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n",
|
|
||||||
" dsvm_compute.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Create Get Data File\n",
|
|
||||||
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
|
||||||
"In this example, the `get_data()` function returns data using scikit-learn's `diabetes` dataset."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile $project_folder/get_data.py\n",
|
|
||||||
"\n",
|
|
||||||
"# Load the diabetes dataset, a well-known built-in small dataset that comes with scikit-learn.\n",
|
|
||||||
"from sklearn.datasets import load_diabetes\n",
|
|
||||||
"from sklearn.linear_model import Ridge\n",
|
|
||||||
"from sklearn.metrics import mean_squared_error\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"\n",
|
|
||||||
"def get_data():\n",
|
|
||||||
" X, y = load_diabetes(return_X_y = True)\n",
|
|
||||||
"\n",
|
|
||||||
" columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n",
|
|
||||||
"\n",
|
|
||||||
" X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
|
|
||||||
" X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 0)\n",
|
|
||||||
" return { \"X\" : X_train, \"y\" : y_train, \"X_valid\": X_valid, \"y_valid\": y_valid }"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Configure AutoML\n",
|
|
||||||
"\n",
|
|
||||||
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
|
|
||||||
"\n",
|
|
||||||
"|Property|Description|\n",
|
|
||||||
"|-|-|\n",
|
|
||||||
"|**task**|classification or regression|\n",
|
|
||||||
"|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
|
|
||||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
|
||||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
|
||||||
"|**enable_ensembling**|Flag to enable an ensembling iteration after all the other iterations complete.|\n",
|
|
||||||
"|**ensemble_iterations**|Number of iterations during which we choose a fitted pipeline to be part of the final ensemble.|\n",
|
|
||||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"automl_config = AutoMLConfig(task = 'regression',\n",
|
|
||||||
" max_time_sec = 600,\n",
|
|
||||||
" iterations = 20,\n",
|
|
||||||
" primary_metric = 'spearman_correlation',\n",
|
|
||||||
" debug_log = 'regression.log',\n",
|
|
||||||
" verbosity = logging.INFO,\n",
|
|
||||||
" compute_target = dsvm_compute,\n",
|
|
||||||
" data_script = project_folder + \"/get_data.py\",\n",
|
|
||||||
" enable_ensembling = True,\n",
|
|
||||||
" ensemble_iterations = 5,\n",
|
|
||||||
" path = project_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Train the Model\n",
|
|
||||||
"\n",
|
|
||||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
|
||||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"local_run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Explore the Results"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Widget for Monitoring Runs\n",
|
|
||||||
"\n",
|
|
||||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
|
||||||
"\n",
|
|
||||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.train.widgets import RunDetails\n",
|
|
||||||
"RunDetails(local_run).show() "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"\n",
|
|
||||||
"#### Retrieve All Child Runs\n",
|
|
||||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"children = list(local_run.get_children())\n",
|
|
||||||
"metricslist = {}\n",
|
|
||||||
"for run in children:\n",
|
|
||||||
" properties = run.get_properties()\n",
|
|
||||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
|
||||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
|
||||||
"\n",
|
|
||||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
|
||||||
"rundata"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Retrieve the Best Model\n",
|
|
||||||
"\n",
|
|
||||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"best_run, fitted_model = local_run.get_output()\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### Best Model Based on Any Other Metric\n",
|
|
||||||
"Show the run and the model that has the smallest `root_mean_squared_error` value."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"lookup_metric = \"root_mean_squared_error\"\n",
|
|
||||||
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
|
|
||||||
"print(best_run)\n",
|
|
||||||
"print(fitted_model)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Test the Best Model (Ensemble)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Predict on training and test set, and calculate residual values."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn.datasets import load_diabetes\n",
|
|
||||||
"from sklearn.linear_model import Ridge\n",
|
|
||||||
"from sklearn.metrics import mean_squared_error\n",
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"\n",
|
|
||||||
"X, y = load_diabetes(return_X_y = True)\n",
|
|
||||||
"\n",
|
|
||||||
"X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
|
|
||||||
"X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 0)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"y_pred_train = fitted_model.predict(X_train)\n",
|
|
||||||
"y_residual_train = y_train - y_pred_train\n",
|
|
||||||
"\n",
|
|
||||||
"y_pred_test = fitted_model.predict(X_test)\n",
|
|
||||||
"y_residual_test = y_test - y_pred_test"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%matplotlib inline\n",
|
|
||||||
"import matplotlib.pyplot as plt\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"from sklearn import datasets\n",
|
|
||||||
"from sklearn.metrics import mean_squared_error, r2_score\n",
|
|
||||||
"\n",
|
|
||||||
"# Set up a multi-plot chart.\n",
|
|
||||||
"f, (a0, a1) = plt.subplots(1, 2, gridspec_kw = {'width_ratios':[1, 1], 'wspace':0, 'hspace': 0})\n",
|
|
||||||
"f.suptitle('Regression Residual Values', fontsize = 18)\n",
|
|
||||||
"f.set_figheight(6)\n",
|
|
||||||
"f.set_figwidth(16)\n",
|
|
||||||
"\n",
|
|
||||||
"# Plot residual values of training set.\n",
|
|
||||||
"a0.axis([0, 360, -200, 200])\n",
|
|
||||||
"a0.plot(y_residual_train, 'bo', alpha = 0.5)\n",
|
|
||||||
"a0.plot([-10,360],[0,0], 'r-', lw = 3)\n",
|
|
||||||
"a0.text(16,170,'RMSE = {0:.2f}'.format(np.sqrt(mean_squared_error(y_train, y_pred_train))), fontsize = 12)\n",
|
|
||||||
"a0.text(16,140,'R2 score = {0:.2f}'.format(r2_score(y_train, y_pred_train)), fontsize = 12)\n",
|
|
||||||
"a0.set_xlabel('Training samples', fontsize = 12)\n",
|
|
||||||
"a0.set_ylabel('Residual Values', fontsize = 12)\n",
|
|
||||||
"\n",
|
|
||||||
"# Plot a histogram.\n",
|
|
||||||
"a0.hist(y_residual_train, orientation = 'horizontal', color = 'b', bins = 10, histtype = 'step');\n",
|
|
||||||
"a0.hist(y_residual_train, orientation = 'horizontal', color = 'b', alpha = 0.2, bins = 10);\n",
|
|
||||||
"\n",
|
|
||||||
"# Plot residual values of test set.\n",
|
|
||||||
"a1.axis([0, 90, -200, 200])\n",
|
|
||||||
"a1.plot(y_residual_test, 'bo', alpha = 0.5)\n",
|
|
||||||
"a1.plot([-10,360],[0,0], 'r-', lw = 3)\n",
|
|
||||||
"a1.text(5,170,'RMSE = {0:.2f}'.format(np.sqrt(mean_squared_error(y_test, y_pred_test))), fontsize = 12)\n",
|
|
||||||
"a1.text(5,140,'R2 score = {0:.2f}'.format(r2_score(y_test, y_pred_test)), fontsize = 12)\n",
|
|
||||||
"a1.set_xlabel('Test samples', fontsize = 12)\n",
|
|
||||||
"a1.set_yticklabels([])\n",
|
|
||||||
"\n",
|
|
||||||
"# Plot a histogram.\n",
|
|
||||||
"a1.hist(y_residual_test, orientation = 'horizontal', color = 'b', bins = 10, histtype = 'step')\n",
|
|
||||||
"a1.hist(y_residual_test, orientation = 'horizontal', color = 'b', alpha = 0.2, bins = 10)\n",
|
|
||||||
"\n",
|
|
||||||
"plt.show()"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "ratanase"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
141
automl/README.md
141
automl/README.md
@@ -1,52 +1,24 @@
|
|||||||
# Table of Contents
|
# Table of Contents
|
||||||
1. [Automated ML Introduction](#introduction)
|
1. [Auto ML Introduction](#introduction)
|
||||||
1. [Running samples in Azure Notebooks](#jupyter)
|
2. [Running samples in a Local Conda environment](#localconda)
|
||||||
1. [Running samples in a Local Conda environment](#localconda)
|
3. [Auto ML SDK Sample Notebooks](#samples)
|
||||||
1. [Automated ML SDK Sample Notebooks](#samples)
|
4. [Documentation](#documentation)
|
||||||
1. [Documentation](#documentation)
|
5. [Running using python command](#pythoncommand)
|
||||||
1. [Running using python command](#pythoncommand)
|
6. [Troubleshooting](#troubleshooting)
|
||||||
1. [Troubleshooting](#troubleshooting)
|
|
||||||
|
|
||||||
<a name="introduction"></a>
|
|
||||||
# Automated ML introduction
|
|
||||||
Automated machine learning (automated ML) builds high quality machine learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, automated ML will give you a high quality machine learning model that you can use for predictions.
|
|
||||||
|
|
||||||
|
# Auto ML Introduction <a name="introduction"></a>
|
||||||
|
AutoML builds high quality Machine Learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, AutoML will give you a high quality machine learning model that you can use for predictions.
|
||||||
|
|
||||||
If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
|
If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
|
||||||
|
|
||||||
If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
|
If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
|
||||||
|
|
||||||
<a name="jupyter"></a>
|
|
||||||
## Running samples in Azure Notebooks - Jupyter based notebooks in the Azure cloud
|
|
||||||
|
|
||||||
1. [](https://aka.ms/aml-clone-azure-notebooks)
|
# Running samples in a Local Conda environment <a name="localconda"></a>
|
||||||
[Import sample notebooks ](https://aka.ms/aml-clone-azure-notebooks) into Azure Notebooks.
|
|
||||||
1. Follow the instructions in the [../00.configuration](00.configuration.ipynb) notebook to create and connect to a workspace.
|
|
||||||
1. Open one of the sample notebooks.
|
|
||||||
|
|
||||||
**Make sure the Azure Notebook kernel is set to `Python 3.6`** when you open a notebook.
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
<a name="localconda"></a>
|
You can run these notebooks in Azure Notebooks without any extra installation. To run these notebook on your own notebook server, use these installation instructions.
|
||||||
## Running samples in a Local Conda environment
|
|
||||||
|
|
||||||
To run these notebook on your own notebook server, use these installation instructions.
|
|
||||||
|
|
||||||
The instructions below will install everything you need and then start a Jupyter notebook. To start your Jupyter notebook manually, use:
|
|
||||||
|
|
||||||
```
|
|
||||||
conda activate azure_automl
|
|
||||||
jupyter notebook
|
|
||||||
```
|
|
||||||
|
|
||||||
or on Mac:
|
|
||||||
|
|
||||||
```
|
|
||||||
source activate azure_automl
|
|
||||||
jupyter notebook
|
|
||||||
```
|
|
||||||
|
|
||||||
|
It is best if you create a new conda environment locally to try this SDK, so it doesn't mess up with your existing Python environment.
|
||||||
|
|
||||||
### 1. Install mini-conda from [here](https://conda.io/miniconda.html), choose Python 3.7 or higher.
|
### 1. Install mini-conda from [here](https://conda.io/miniconda.html), choose Python 3.7 or higher.
|
||||||
- **Note**: if you already have conda installed, you can keep using it but it should be version 4.4.10 or later (as shown by: conda -V). If you have a previous version installed, you can update it using the command: conda update conda.
|
- **Note**: if you already have conda installed, you can keep using it but it should be version 4.4.10 or later (as shown by: conda -V). If you have a previous version installed, you can update it using the command: conda update conda.
|
||||||
@@ -57,7 +29,7 @@ There's no need to install mini-conda specifically.
|
|||||||
|
|
||||||
### 3. Setup a new conda environment
|
### 3. Setup a new conda environment
|
||||||
The **automl/automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook.
|
The **automl/automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook.
|
||||||
It takes the conda environment name as an optional parameter. The default conda environment name is azure_automl. The exact command depends on the operating system. It can take about 10 minutes to execute.
|
It takes the conda environment name as an optional parameter. The default conda environment name is azure_automl. The exact command depends on the operating system. It can take about 30 minutes to execute.
|
||||||
## Windows
|
## Windows
|
||||||
Start a conda command windows, cd to the **automl** folder where the sample notebooks were extracted and then run:
|
Start a conda command windows, cd to the **automl** folder where the sample notebooks were extracted and then run:
|
||||||
```
|
```
|
||||||
@@ -76,19 +48,19 @@ bash automl_setup_mac.sh
|
|||||||
cd to the **automl** folder where the sample notebooks were extracted and then run:
|
cd to the **automl** folder where the sample notebooks were extracted and then run:
|
||||||
|
|
||||||
```
|
```
|
||||||
bash automl_setup_linux.sh
|
automl_setup_linux.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
### 4. Running configuration.ipynb
|
### 4. Running configuration.ipynb
|
||||||
- Before running any samples you next need to run the configuration notebook. Click on 00.configuration.ipynb notebook
|
- Before running any samples you next need to run the configuration notebook. Click on 00.configuration.ipynb notebook
|
||||||
|
- Please make sure you use the Python [conda env:azure_automl] kernel when running this notebook.
|
||||||
- Execute the cells in the notebook to Register Machine Learning Services Resource Provider and create a workspace. (*instructions in notebook*)
|
- Execute the cells in the notebook to Register Machine Learning Services Resource Provider and create a workspace. (*instructions in notebook*)
|
||||||
|
|
||||||
### 5. Running Samples
|
### 5. Running Samples
|
||||||
- Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks.
|
- Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks.
|
||||||
- Follow the instructions in the individual notebooks to explore various features in AutoML
|
- Follow the instructions in the individual notebooks to explore various features in AutoML
|
||||||
|
|
||||||
<a name="samples"></a>
|
# Auto ML SDK Sample Notebooks <a name="samples"></a>
|
||||||
# Automated ML SDK Sample Notebooks
|
|
||||||
- [00.configuration.ipynb](00.configuration.ipynb)
|
- [00.configuration.ipynb](00.configuration.ipynb)
|
||||||
- Register Machine Learning Services Resource Provider
|
- Register Machine Learning Services Resource Provider
|
||||||
- Create new Azure ML Workspace
|
- Create new Azure ML Workspace
|
||||||
@@ -115,7 +87,7 @@ bash automl_setup_linux.sh
|
|||||||
|
|
||||||
- [03b.auto-ml-remote-batchai.ipynb](03b.auto-ml-remote-batchai.ipynb)
|
- [03b.auto-ml-remote-batchai.ipynb](03b.auto-ml-remote-batchai.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
- Example of using automated ML for classification using a remote Batch AI compute for training
|
- Example of using Auto ML for classification using a remote Batch AI compute for training
|
||||||
- Parallel execution of iterations
|
- Parallel execution of iterations
|
||||||
- Async tracking of progress
|
- Async tracking of progress
|
||||||
- Cancelling individual iterations or entire run
|
- Cancelling individual iterations or entire run
|
||||||
@@ -134,7 +106,7 @@ bash automl_setup_linux.sh
|
|||||||
- Specify a target metrics to indicate stopping criteria
|
- Specify a target metrics to indicate stopping criteria
|
||||||
- Handling Missing Data in the input
|
- Handling Missing Data in the input
|
||||||
|
|
||||||
- [06.auto-ml-sparse-data-train-test-split.ipynb](06.auto-ml-sparse-data-train-test-split.ipynb)
|
- [06.auto-ml-sparse-data-custom-cv-split.ipynb](06.auto-ml-sparse-data-custom-cv-split.ipynb)
|
||||||
- Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html)
|
- Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html)
|
||||||
- Handle sparse datasets
|
- Handle sparse datasets
|
||||||
- Specify custom train and validation set
|
- Specify custom train and validation set
|
||||||
@@ -143,11 +115,11 @@ bash automl_setup_linux.sh
|
|||||||
- List all projects for the workspace
|
- List all projects for the workspace
|
||||||
- List all AutoML Runs for a given project
|
- List all AutoML Runs for a given project
|
||||||
- Get details for a AutoML Run. (Automl settings, run widget & all metrics)
|
- Get details for a AutoML Run. (Automl settings, run widget & all metrics)
|
||||||
- Download fitted pipeline for any iteration
|
- Downlaod fitted pipeline for any iteration
|
||||||
|
|
||||||
- [08.auto-ml-remote-execution-with-DataStore.ipynb](08.auto-ml-remote-execution-with-DataStore.ipynb)
|
- [08.auto-ml-remote-execution-with-text-file-on-DSVM](08.auto-ml-remote-execution-with-text-file-on-DSVM.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](https://innovate.burningman.org/datasets-page/)
|
- Dataset: scikit learn's [digit dataset](https://innovate.burningman.org/datasets-page/)
|
||||||
- Download the data and store it in DataStore.
|
- Download the data and store it in the DSVM to improve performance.
|
||||||
|
|
||||||
- [09.auto-ml-classification-with-deployment.ipynb](09.auto-ml-classification-with-deployment.ipynb)
|
- [09.auto-ml-classification-with-deployment.ipynb](09.auto-ml-classification-with-deployment.ipynb)
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||||
@@ -171,32 +143,20 @@ bash automl_setup_linux.sh
|
|||||||
- [13.auto-ml-dataprep.ipynb](13.auto-ml-dataprep.ipynb)
|
- [13.auto-ml-dataprep.ipynb](13.auto-ml-dataprep.ipynb)
|
||||||
- Using DataPrep for reading data
|
- Using DataPrep for reading data
|
||||||
|
|
||||||
- [14.auto-ml-model-explanation.ipynb](14.auto-ml-model-explanation.ipynb)
|
- [14a.auto-ml-classification-ensemble.ipynb](14a.auto-ml-classification-ensemble.ipynb)
|
||||||
- Dataset: seaborn's [iris dataset](https://seaborn.pydata.org/generated/seaborn.load_dataset.html)
|
- Classification with ensembling
|
||||||
- Explaining the AutoML classification pipeline
|
|
||||||
- Visualizing feature importance in widget
|
|
||||||
|
|
||||||
- [15a.auto-ml-classification-ensemble.ipynb](15a.auto-ml-classification-ensemble.ipynb)
|
|
||||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
|
||||||
- Enables an extra iteration for generating an Ensemble of models
|
|
||||||
- Uses local compute for training
|
|
||||||
|
|
||||||
- [15b.auto-ml-regression-ensemble.ipynb](15b.auto-ml-regression-ensemble.ipynb)
|
- [14b.auto-ml-regression-ensemble.ipynb](14b.auto-ml-regression-ensemble.ipynb)
|
||||||
- Dataset: scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html)
|
- Regression with ensembling
|
||||||
- Enables an extra iteration for generating an Ensemble of models
|
|
||||||
- Uses remote Linux DSVM for training
|
|
||||||
|
|
||||||
<a name="documentation"></a>
|
# Documentation <a name="documentation"></a>
|
||||||
# Documentation
|
|
||||||
## Table of Contents
|
## Table of Contents
|
||||||
1. [Automated ML Settings ](#automlsettings)
|
1. [Auto ML Settings ](#automlsettings)
|
||||||
1. [Cross validation split options](#cvsplits)
|
2. [Cross validation split options](#cvsplits)
|
||||||
1. [Get Data Syntax](#getdata)
|
3. [Get Data Syntax](#getdata)
|
||||||
1. [Data pre-processing and featurization](#preprocessing)
|
4. [Data pre-processing and featurization](#preprocessing)
|
||||||
|
|
||||||
<a name="automlsettings"></a>
|
|
||||||
## Automated ML Settings
|
|
||||||
|
|
||||||
|
## Auto ML Settings <a name="automlsettings"></a>
|
||||||
|Property|Description|Default|
|
|Property|Description|Default|
|
||||||
|-|-|-|
|
|-|-|-|
|
||||||
|**primary_metric**|This is the metric that you want to optimize.<br><br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i><br><br> Regression supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i><br><i>normalized_root_mean_squared_log_error</i>| Classification: accuracy <br><br> Regression: spearman_correlation
|
|**primary_metric**|This is the metric that you want to optimize.<br><br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i><br><br> Regression supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i><br><i>normalized_root_mean_squared_log_error</i>| Classification: accuracy <br><br> Regression: spearman_correlation
|
||||||
@@ -208,10 +168,9 @@ bash automl_setup_linux.sh
|
|||||||
|**preprocess**|*True/False* <br>Setting this to *True* enables preprocessing <br>on the input to handle missing data, and perform some common feature extraction<br>*Note: If input data is Sparse you cannot use preprocess=True*|False|
|
|**preprocess**|*True/False* <br>Setting this to *True* enables preprocessing <br>on the input to handle missing data, and perform some common feature extraction<br>*Note: If input data is Sparse you cannot use preprocess=True*|False|
|
||||||
|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.<br> You can set it to *-1* to use all cores|1|
|
|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.<br> You can set it to *-1* to use all cores|1|
|
||||||
|**exit_score**|*double* value indicating the target for *primary_metric*. <br> Once the target is surpassed the run terminates|None|
|
|**exit_score**|*double* value indicating the target for *primary_metric*. <br> Once the target is surpassed the run terminates|None|
|
||||||
|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoostingRegressor</i><br><i>DecisionTreeRegressor</i><br><i>KNeighborsRegressor</i><br><i>LassoLars</i><br><i>SGDRegressor</i><br><i>RandomForestRegressor</i><br><i>ExtraTreesRegressor</i>|None|
|
|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>gradient boosting</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoostingRegressor</i><br><i>DecisionTreeRegressor</i><br><i>KNeighborsRegressor</i><br><i>LassoLars</i><br><i>SGDRegressor</i><br><i>RandomForestRegressor</i><br><i>ExtraTreesRegressor</i>|None|
|
||||||
|
|
||||||
<a name="cvsplits"></a>
|
## Cross validation split options <a name="cvsplits"></a>
|
||||||
## Cross validation split options
|
|
||||||
### K-Folds Cross Validation
|
### K-Folds Cross Validation
|
||||||
Use *n_cross_validations* setting to specify the number of cross validations. The training data set will be randomly split into *n_cross_validations* folds of equal size. During each cross validation round, one of the folds will be used for validation of the model trained on the remaining folds. This process repeats for *n_cross_validations* rounds until each fold is used once as validation set. Finally, the average scores accross all *n_cross_validations* rounds will be reported, and the corresponding model will be retrained on the whole training data set.
|
Use *n_cross_validations* setting to specify the number of cross validations. The training data set will be randomly split into *n_cross_validations* folds of equal size. During each cross validation round, one of the folds will be used for validation of the model trained on the remaining folds. This process repeats for *n_cross_validations* rounds until each fold is used once as validation set. Finally, the average scores accross all *n_cross_validations* rounds will be reported, and the corresponding model will be retrained on the whole training data set.
|
||||||
|
|
||||||
@@ -221,8 +180,7 @@ Use *validation_size* to specify the percentage of the training data set that sh
|
|||||||
### Custom train and validation set
|
### Custom train and validation set
|
||||||
You can specify seperate train and validation set either through the get_data() or directly to the fit method.
|
You can specify seperate train and validation set either through the get_data() or directly to the fit method.
|
||||||
|
|
||||||
<a name="getdata"></a>
|
## get_data() syntax <a name="getdata"></a>
|
||||||
## get_data() syntax
|
|
||||||
The *get_data()* function can be used to return a dictionary with these values:
|
The *get_data()* function can be used to return a dictionary with these values:
|
||||||
|
|
||||||
|Key|Type|Dependency|Mutually Exclusive with|Description|
|
|Key|Type|Dependency|Mutually Exclusive with|Description|
|
||||||
@@ -238,23 +196,21 @@ The *get_data()* function can be used to return a dictionary with these values:
|
|||||||
|columns|Array of strings|data_train||*Optional* Whitelist of columns to use for features|
|
|columns|Array of strings|data_train||*Optional* Whitelist of columns to use for features|
|
||||||
|cv_splits_indices|Array of integers|data_train||*Optional* List of indexes to split the data for cross validation|
|
|cv_splits_indices|Array of integers|data_train||*Optional* List of indexes to split the data for cross validation|
|
||||||
|
|
||||||
<a name="preprocessing"></a>
|
## Data pre-processing and featurization <a name="preprocessing"></a>
|
||||||
## Data pre-processing and featurization
|
If you use "preprocess=True", the following data preprocessing steps are performed automatically for you:
|
||||||
If you use `preprocess=True`, the following data preprocessing steps are performed automatically for you:
|
### 1. Dropping high cardinality or no variance features
|
||||||
|
- Features with no useful information are dropped from training and validation sets. These include features with all values missing, same value across all rows or with extremely high cardinality (e.g., hashes, IDs or GUIDs).
|
||||||
|
### 2. Missing value imputation
|
||||||
|
- For numerical features, missing values are imputed with average of values in the column.
|
||||||
|
- For categorical features, missing values are imputed with most frequent value.
|
||||||
|
### 3. Generating additional features
|
||||||
|
- For DateTime features: Year, Month, Day, Day of week, Day of year, Quarter, Week of the year, Hour, Minute, Second.
|
||||||
|
- For Text features: Term frequency based on bi-grams and tri-grams, Count vectorizer.
|
||||||
|
### 4. Transformations and encodings
|
||||||
|
- Numeric features with very few unique values are transformed into categorical features.
|
||||||
|
- Depending on cardinality of categorical features label encoding or (hashing) one-hot encoding is performed.
|
||||||
|
|
||||||
1. Dropping high cardinality or no variance features
|
# Running using python command <a name="pythoncommand"></a>
|
||||||
- Features with no useful information are dropped from training and validation sets. These include features with all values missing, same value across all rows or with extremely high cardinality (e.g., hashes, IDs or GUIDs).
|
|
||||||
2. Missing value imputation
|
|
||||||
- For numerical features, missing values are imputed with average of values in the column.
|
|
||||||
- For categorical features, missing values are imputed with most frequent value.
|
|
||||||
3. Generating additional features
|
|
||||||
- For DateTime features: Year, Month, Day, Day of week, Day of year, Quarter, Week of the year, Hour, Minute, Second.
|
|
||||||
- For Text features: Term frequency based on bi-grams and tri-grams, Count vectorizer.
|
|
||||||
4. Transformations and encodings
|
|
||||||
- Numeric features with very few unique values are transformed into categorical features.
|
|
||||||
|
|
||||||
<a name="pythoncommand"></a>
|
|
||||||
# Running using python command
|
|
||||||
Jupyter notebook provides a File / Download as / Python (.py) option for saving the notebook as a Python file.
|
Jupyter notebook provides a File / Download as / Python (.py) option for saving the notebook as a Python file.
|
||||||
You can then run this file using the python command.
|
You can then run this file using the python command.
|
||||||
However, on Windows the file needs to be modified before it can be run.
|
However, on Windows the file needs to be modified before it can be run.
|
||||||
@@ -264,8 +220,7 @@ The following condition must be added to the main code in the file:
|
|||||||
|
|
||||||
The main code of the file must be indented so that it is under this condition.
|
The main code of the file must be indented so that it is under this condition.
|
||||||
|
|
||||||
<a name="troubleshooting"></a>
|
# Troubleshooting <a name="troubleshooting"></a>
|
||||||
# Troubleshooting
|
|
||||||
## Iterations fail and the log contains "MemoryError"
|
## Iterations fail and the log contains "MemoryError"
|
||||||
This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size.
|
This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size.
|
||||||
If you are using a remote DSVM, memory is needed for each concurrent iteration. The concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
|
If you are using a remote DSVM, memory is needed for each concurrent iteration. The concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
|
||||||
|
|||||||
@@ -5,15 +5,15 @@ dependencies:
|
|||||||
- python=3.6
|
- python=3.6
|
||||||
- nb_conda
|
- nb_conda
|
||||||
- matplotlib
|
- matplotlib
|
||||||
- numpy>=1.11.0,<1.15.0
|
- numpy>=1.11.0,<1.16.0
|
||||||
- cython
|
|
||||||
- urllib3<1.24
|
|
||||||
- scipy>=0.19.0,<0.20.0
|
- scipy>=0.19.0,<0.20.0
|
||||||
- scikit-learn>=0.18.0,<=0.19.1
|
- scikit-learn>=0.18.0,<=0.19.1
|
||||||
- pandas>=0.22.0,<0.23.0
|
- pandas>=0.22.0,<0.23.0
|
||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
- azureml-sdk[automl,notebooks]
|
- --extra-index-url https://pypi.python.org/simple
|
||||||
|
- azureml-sdk[automl]
|
||||||
|
- azureml-train-widgets
|
||||||
- pandas_ml
|
- pandas_ml
|
||||||
|
|
||||||
|
|||||||
@@ -1,21 +1,16 @@
|
|||||||
@echo off
|
@echo off
|
||||||
set conda_env_name=%1
|
set conda_env_name=%1
|
||||||
set automl_env_file=%2
|
|
||||||
set PIP_NO_WARN_SCRIPT_LOCATION=0
|
|
||||||
|
|
||||||
IF "%conda_env_name%"=="" SET conda_env_name="azure_automl"
|
IF "%conda_env_name%"=="" SET conda_env_name="azure_automl"
|
||||||
IF "%automl_env_file%"=="" SET automl_env_file="automl_env.yml"
|
|
||||||
|
|
||||||
IF NOT EXIST %automl_env_file% GOTO YmlMissing
|
|
||||||
|
|
||||||
call conda activate %conda_env_name% 2>nul:
|
call conda activate %conda_env_name% 2>nul:
|
||||||
|
|
||||||
if not errorlevel 1 (
|
if not errorlevel 1 (
|
||||||
echo Upgrading azureml-sdk[automl] in existing conda environment %conda_env_name%
|
echo Upgrading azureml-sdk[automl] in existing conda environment %conda_env_name%
|
||||||
call pip install --upgrade azureml-sdk[automl,notebooks]
|
call pip install --upgrade azureml-sdk[automl]
|
||||||
if errorlevel 1 goto ErrorExit
|
if errorlevel 1 goto ErrorExit
|
||||||
) else (
|
) else (
|
||||||
call conda env create -f %automl_env_file% -n %conda_env_name%
|
call conda env create -f automl_env.yml -n %conda_env_name%
|
||||||
)
|
)
|
||||||
|
|
||||||
call conda activate %conda_env_name% 2>nul:
|
call conda activate %conda_env_name% 2>nul:
|
||||||
@@ -23,10 +18,10 @@ if errorlevel 1 goto ErrorExit
|
|||||||
|
|
||||||
call pip install psutil
|
call pip install psutil
|
||||||
|
|
||||||
call jupyter nbextension install --py azureml.train.widgets --user
|
call jupyter nbextension install --py azureml.train.widgets
|
||||||
if errorlevel 1 goto ErrorExit
|
if errorlevel 1 goto ErrorExit
|
||||||
|
|
||||||
call jupyter nbextension enable --py azureml.train.widgets --user
|
call jupyter nbextension enable --py azureml.train.widgets
|
||||||
if errorlevel 1 goto ErrorExit
|
if errorlevel 1 goto ErrorExit
|
||||||
|
|
||||||
echo.
|
echo.
|
||||||
@@ -41,9 +36,6 @@ jupyter notebook --log-level=50
|
|||||||
|
|
||||||
goto End
|
goto End
|
||||||
|
|
||||||
:YmlMissing
|
|
||||||
echo File %automl_env_file% not found.
|
|
||||||
|
|
||||||
:ErrorExit
|
:ErrorExit
|
||||||
echo Install failed
|
echo Install failed
|
||||||
|
|
||||||
|
|||||||
@@ -1,30 +1,18 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
CONDA_ENV_NAME=$1
|
CONDA_ENV_NAME=$1
|
||||||
AUTOML_ENV_FILE=$2
|
|
||||||
PIP_NO_WARN_SCRIPT_LOCATION=0
|
|
||||||
|
|
||||||
if [ "$CONDA_ENV_NAME" == "" ]
|
if [ "$CONDA_ENV_NAME" == "" ]
|
||||||
then
|
then
|
||||||
CONDA_ENV_NAME="azure_automl"
|
CONDA_ENV_NAME="azure_automl"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$AUTOML_ENV_FILE" == "" ]
|
|
||||||
then
|
|
||||||
AUTOML_ENV_FILE="automl_env.yml"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f $AUTOML_ENV_FILE ]; then
|
|
||||||
echo "File $AUTOML_ENV_FILE not found"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if source activate $CONDA_ENV_NAME 2> /dev/null
|
if source activate $CONDA_ENV_NAME 2> /dev/null
|
||||||
then
|
then
|
||||||
echo "Upgrading azureml-sdk[automl] in existing conda environment" $CONDA_ENV_NAME
|
echo "Upgrading azureml-sdk[automl] in existing conda environment" $CONDA_ENV_NAME
|
||||||
pip install --upgrade azureml-sdk[automl,notebooks]
|
pip install --upgrade azureml-sdk[automl]
|
||||||
else
|
else
|
||||||
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&
|
conda env create -f automl_env.yml -n $CONDA_ENV_NAME &&
|
||||||
source activate $CONDA_ENV_NAME &&
|
source activate $CONDA_ENV_NAME &&
|
||||||
jupyter nbextension install --py azureml.train.widgets --user &&
|
jupyter nbextension install --py azureml.train.widgets --user &&
|
||||||
jupyter nbextension enable --py azureml.train.widgets --user &&
|
jupyter nbextension enable --py azureml.train.widgets --user &&
|
||||||
|
|||||||
@@ -1,30 +1,18 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
CONDA_ENV_NAME=$1
|
CONDA_ENV_NAME=$1
|
||||||
AUTOML_ENV_FILE=$2
|
|
||||||
PIP_NO_WARN_SCRIPT_LOCATION=0
|
|
||||||
|
|
||||||
if [ "$CONDA_ENV_NAME" == "" ]
|
if [ "$CONDA_ENV_NAME" == "" ]
|
||||||
then
|
then
|
||||||
CONDA_ENV_NAME="azure_automl"
|
CONDA_ENV_NAME="azure_automl"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$AUTOML_ENV_FILE" == "" ]
|
|
||||||
then
|
|
||||||
AUTOML_ENV_FILE="automl_env.yml"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -f $AUTOML_ENV_FILE ]; then
|
|
||||||
echo "File $AUTOML_ENV_FILE not found"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if source activate $CONDA_ENV_NAME 2> /dev/null
|
if source activate $CONDA_ENV_NAME 2> /dev/null
|
||||||
then
|
then
|
||||||
echo "Upgrading azureml-sdk[automl] in existing conda environment" $CONDA_ENV_NAME
|
echo "Upgrading azureml-sdk[automl] in existing conda environment" $CONDA_ENV_NAME
|
||||||
pip install --upgrade azureml-sdk[automl,notebooks]
|
pip install --upgrade azureml-sdk[automl]
|
||||||
else
|
else
|
||||||
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&
|
conda env create -f automl_env.yml -n $CONDA_ENV_NAME &&
|
||||||
source activate $CONDA_ENV_NAME &&
|
source activate $CONDA_ENV_NAME &&
|
||||||
conda install lightgbm -c conda-forge -y &&
|
conda install lightgbm -c conda-forge -y &&
|
||||||
jupyter nbextension install --py azureml.train.widgets --user &&
|
jupyter nbextension install --py azureml.train.widgets --user &&
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
174
ignore/doc-qa/how-to-deploy-to-aci/how-to-deploy-to-aci.py
Normal file
174
ignore/doc-qa/how-to-deploy-to-aci/how-to-deploy-to-aci.py
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import azureml.core
|
||||||
|
print('SDK version' + azureml.core.VERSION)
|
||||||
|
|
||||||
|
# PREREQ: load workspace info
|
||||||
|
# import azureml.core
|
||||||
|
|
||||||
|
# <loadWorkspace>
|
||||||
|
from azureml.core import Workspace
|
||||||
|
ws = Workspace.from_config()
|
||||||
|
# </loadWorkspace>
|
||||||
|
|
||||||
|
scorepy_content = "import json\nimport numpy as np\nimport os\nimport pickle\nfrom sklearn.externals import joblib\nfrom sklearn.linear_model import LogisticRegression\n\nfrom azureml.core.model import Model\n\ndef init():\n global model\n # retreive the path to the model file using the model name\n model_path = Model.get_model_path('sklearn_mnist')\n model = joblib.load(model_path)\n\ndef run(raw_data):\n data = np.array(json.loads(raw_data)['data'])\n # make prediction\n y_hat = model.predict(data)\n return json.dumps(y_hat.tolist())"
|
||||||
|
print(scorepy_content)
|
||||||
|
with open("score.py","w") as f:
|
||||||
|
f.write(scorepy_content)
|
||||||
|
|
||||||
|
|
||||||
|
# PREREQ: create environment file
|
||||||
|
from azureml.core.conda_dependencies import CondaDependencies
|
||||||
|
|
||||||
|
myenv = CondaDependencies()
|
||||||
|
myenv.add_conda_package("scikit-learn")
|
||||||
|
|
||||||
|
with open("myenv.yml","w") as f:
|
||||||
|
f.write(myenv.serialize_to_string())
|
||||||
|
|
||||||
|
#<configImage>
|
||||||
|
from azureml.core.image import ContainerImage
|
||||||
|
|
||||||
|
image_config = ContainerImage.image_configuration(execution_script = "score.py",
|
||||||
|
runtime = "python",
|
||||||
|
conda_file = "myenv.yml",
|
||||||
|
description = "Image with mnist model",
|
||||||
|
tags = {"data": "mnist", "type": "classification"}
|
||||||
|
)
|
||||||
|
#</configImage>
|
||||||
|
|
||||||
|
# <configAci>
|
||||||
|
from azureml.core.webservice import AciWebservice
|
||||||
|
|
||||||
|
aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1,
|
||||||
|
memory_gb = 1,
|
||||||
|
tags = {"data": "mnist", "type": "classification"},
|
||||||
|
description = 'Handwriting recognition')
|
||||||
|
# </configAci>
|
||||||
|
|
||||||
|
#<registerModel>
|
||||||
|
from azureml.core.model import Model
|
||||||
|
|
||||||
|
model_name = "sklearn_mnist"
|
||||||
|
model = Model.register(model_path = "sklearn_mnist_model.pkl",
|
||||||
|
model_name = model_name,
|
||||||
|
tags = {"data": "mnist", "type": "classification"},
|
||||||
|
description = "Mnist handwriting recognition",
|
||||||
|
workspace = ws)
|
||||||
|
#</registerModel>
|
||||||
|
|
||||||
|
# <retrieveModel>
|
||||||
|
from azureml.core.model import Model
|
||||||
|
|
||||||
|
model_name = "sklearn_mnist"
|
||||||
|
model=Model(ws, model_name)
|
||||||
|
# </retrieveModel>
|
||||||
|
|
||||||
|
|
||||||
|
# ## DEPLOY FROM REGISTERED MODEL
|
||||||
|
|
||||||
|
# <option2Deploy>
|
||||||
|
from azureml.core.webservice import Webservice
|
||||||
|
|
||||||
|
service_name = 'aci-mnist-2'
|
||||||
|
service = Webservice.deploy_from_model(deployment_config = aciconfig,
|
||||||
|
image_config = image_config,
|
||||||
|
models = [model], # this is the registered model object
|
||||||
|
name = service_name,
|
||||||
|
workspace = ws)
|
||||||
|
service.wait_for_deployment(show_output = True)
|
||||||
|
print(service.state)
|
||||||
|
# </option2Deploy>
|
||||||
|
|
||||||
|
service.delete()
|
||||||
|
|
||||||
|
# ## DEPLOY FROM IMAGE
|
||||||
|
|
||||||
|
|
||||||
|
# <option3CreateImage>
|
||||||
|
from azureml.core.image import ContainerImage
|
||||||
|
|
||||||
|
image = ContainerImage.create(name = "myimage1",
|
||||||
|
models = [model], # this is the registered model object
|
||||||
|
image_config = image_config,
|
||||||
|
workspace = ws)
|
||||||
|
|
||||||
|
image.wait_for_creation(show_output = True)
|
||||||
|
# </option3CreateImage>
|
||||||
|
|
||||||
|
# <option3Deploy>
|
||||||
|
from azureml.core.webservice import Webservice
|
||||||
|
|
||||||
|
service_name = 'aci-mnist-13'
|
||||||
|
service = Webservice.deploy_from_image(deployment_config = aciconfig,
|
||||||
|
image = image,
|
||||||
|
name = service_name,
|
||||||
|
workspace = ws)
|
||||||
|
service.wait_for_deployment(show_output = True)
|
||||||
|
print(service.state)
|
||||||
|
# </option3Deploy>
|
||||||
|
|
||||||
|
service.delete()
|
||||||
|
|
||||||
|
|
||||||
|
# ## DEPLOY FROM MODEL FILE
|
||||||
|
# First change score.py!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
scorepy_content = "import json\nimport numpy as np\nimport os\nimport pickle\nfrom sklearn.externals import joblib\nfrom sklearn.linear_model import LogisticRegression\n\nfrom azureml.core.model import Model\n\ndef init():\n global model\n # retreive the path to the model file using the model name\n model_path = Model.get_model_path('sklearn_mnist_model.pkl')\n model = joblib.load(model_path)\n\ndef run(raw_data):\n data = np.array(json.loads(raw_data)['data'])\n # make prediction\n y_hat = model.predict(data)\n return json.dumps(y_hat.tolist())"
|
||||||
|
with open("score.py","w") as f:
|
||||||
|
f.write(scorepy_content)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# <option1Deploy>
|
||||||
|
from azureml.core.webservice import Webservice
|
||||||
|
|
||||||
|
service_name = 'aci-mnist-1'
|
||||||
|
service = Webservice.deploy(deployment_config = aciconfig,
|
||||||
|
image_config = image_config,
|
||||||
|
model_paths = ['sklearn_mnist_model.pkl'],
|
||||||
|
name = service_name,
|
||||||
|
workspace = ws)
|
||||||
|
|
||||||
|
service.wait_for_deployment(show_output = True)
|
||||||
|
print(service.state)
|
||||||
|
# </option1Deploy>
|
||||||
|
|
||||||
|
# <testService>
|
||||||
|
# Load Data
|
||||||
|
import os
|
||||||
|
import urllib
|
||||||
|
|
||||||
|
os.makedirs('./data', exist_ok = True)
|
||||||
|
|
||||||
|
urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/test-images.gz')
|
||||||
|
|
||||||
|
from utils import load_data
|
||||||
|
X_test = load_data('./data/test-images.gz', False) / 255.0
|
||||||
|
|
||||||
|
from sklearn import datasets
|
||||||
|
import numpy as np
|
||||||
|
import json
|
||||||
|
|
||||||
|
# find 5 random samples from test set
|
||||||
|
n = 5
|
||||||
|
sample_indices = np.random.permutation(X_test.shape[0])[0:n]
|
||||||
|
|
||||||
|
test_samples = json.dumps({"data": X_test[sample_indices].tolist()})
|
||||||
|
test_samples = bytes(test_samples, encoding = 'utf8')
|
||||||
|
|
||||||
|
# predict using the deployed model
|
||||||
|
prediction = service.run(input_data = test_samples)
|
||||||
|
print(prediction)
|
||||||
|
# </testService>
|
||||||
|
|
||||||
|
# <deleteService>
|
||||||
|
service.delete()
|
||||||
|
# </deleteService>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
BIN
ignore/doc-qa/how-to-deploy-to-aci/sklearn_mnist_model.pkl
Normal file
BIN
ignore/doc-qa/how-to-deploy-to-aci/sklearn_mnist_model.pkl
Normal file
Binary file not shown.
27
ignore/doc-qa/how-to-deploy-to-aci/utils.py
Normal file
27
ignore/doc-qa/how-to-deploy-to-aci/utils.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
# Licensed under the MIT License.
|
||||||
|
|
||||||
|
import gzip
|
||||||
|
import numpy as np
|
||||||
|
import struct
|
||||||
|
|
||||||
|
|
||||||
|
# load compressed MNIST gz files and return numpy arrays
|
||||||
|
def load_data(filename, label=False):
|
||||||
|
with gzip.open(filename) as gz:
|
||||||
|
struct.unpack('I', gz.read(4))
|
||||||
|
n_items = struct.unpack('>I', gz.read(4))
|
||||||
|
if not label:
|
||||||
|
n_rows = struct.unpack('>I', gz.read(4))[0]
|
||||||
|
n_cols = struct.unpack('>I', gz.read(4))[0]
|
||||||
|
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
|
||||||
|
res = res.reshape(n_items[0], n_rows * n_cols)
|
||||||
|
else:
|
||||||
|
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
|
||||||
|
res = res.reshape(n_items[0], 1)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
# one-hot encode a 1-D array
|
||||||
|
def one_hot_encode(array, num_of_classes):
|
||||||
|
return np.eye(num_of_classes)[array.reshape(-1)]
|
||||||
39
ignore/doc-qa/how-to-set-up-training-targets/Local.py
Normal file
39
ignore/doc-qa/how-to-set-up-training-targets/Local.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# Code for Local computer and Submit training run sections
|
||||||
|
|
||||||
|
# Check core SDK version number
|
||||||
|
import azureml.core
|
||||||
|
|
||||||
|
print("SDK version:", azureml.core.VERSION)
|
||||||
|
|
||||||
|
#<run_local>
|
||||||
|
from azureml.core.runconfig import RunConfiguration
|
||||||
|
|
||||||
|
# Edit a run configuration property on the fly.
|
||||||
|
run_local = RunConfiguration()
|
||||||
|
|
||||||
|
run_local.environment.python.user_managed_dependencies = True
|
||||||
|
#</run_local>
|
||||||
|
|
||||||
|
from azureml.core import Workspace
|
||||||
|
ws = Workspace.from_config()
|
||||||
|
|
||||||
|
|
||||||
|
# Set up an experiment
|
||||||
|
# <experiment>
|
||||||
|
from azureml.core import Experiment
|
||||||
|
experiment_name = 'my_experiment'
|
||||||
|
|
||||||
|
exp = Experiment(workspace=ws, name=experiment_name)
|
||||||
|
# </experiment>
|
||||||
|
|
||||||
|
# Submit the experiment using the run configuration
|
||||||
|
#<local_submit>
|
||||||
|
from azureml.core import ScriptRunConfig
|
||||||
|
import os
|
||||||
|
|
||||||
|
script_folder = os.getcwd()
|
||||||
|
src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_local)
|
||||||
|
run = exp.submit(src)
|
||||||
|
run.wait_for_completion(show_output = True)
|
||||||
|
#</local_submit>
|
||||||
|
|
||||||
48
ignore/doc-qa/how-to-set-up-training-targets/amlcompute.py
Normal file
48
ignore/doc-qa/how-to-set-up-training-targets/amlcompute.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# Code for Azure Machine Learning Compute - Run-based creation
|
||||||
|
|
||||||
|
# Check core SDK version number
|
||||||
|
import azureml.core
|
||||||
|
|
||||||
|
print("SDK version:", azureml.core.VERSION)
|
||||||
|
|
||||||
|
|
||||||
|
from azureml.core import Workspace
|
||||||
|
ws = Workspace.from_config()
|
||||||
|
|
||||||
|
|
||||||
|
# Set up an experiment
|
||||||
|
from azureml.core import Experiment
|
||||||
|
experiment_name = 'my-experiment'
|
||||||
|
script_folder= "./"
|
||||||
|
|
||||||
|
exp = Experiment(workspace=ws, name=experiment_name)
|
||||||
|
|
||||||
|
|
||||||
|
#<run_temp_compute>
|
||||||
|
from azureml.core.compute import ComputeTarget, AmlCompute
|
||||||
|
|
||||||
|
# First, list the supported VM families for Azure Machine Learning Compute
|
||||||
|
print(AmlCompute.supported_vmsizes(workspace=ws))
|
||||||
|
|
||||||
|
from azureml.core.runconfig import RunConfiguration
|
||||||
|
# Create a new runconfig object
|
||||||
|
run_temp_compute = RunConfiguration()
|
||||||
|
|
||||||
|
# Signal that you want to use AmlCompute to execute the script
|
||||||
|
run_temp_compute.target = "amlcompute"
|
||||||
|
|
||||||
|
# AmlCompute is created in the same region as your workspace
|
||||||
|
# Set the VM size for AmlCompute from the list of supported_vmsizes
|
||||||
|
run_temp_compute.amlcompute.vm_size = 'STANDARD_D2_V2'
|
||||||
|
#</run_temp_compute>
|
||||||
|
|
||||||
|
|
||||||
|
# Submit the experiment using the run configuration
|
||||||
|
from azureml.core import ScriptRunConfig
|
||||||
|
|
||||||
|
src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_temp_compute)
|
||||||
|
run = exp.submit(src)
|
||||||
|
run.wait_for_completion(show_output = True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
70
ignore/doc-qa/how-to-set-up-training-targets/amlcompute2.py
Normal file
70
ignore/doc-qa/how-to-set-up-training-targets/amlcompute2.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# Code for Azure Machine Learning Compute - Persistent compute
|
||||||
|
|
||||||
|
# Check core SDK version number
|
||||||
|
import azureml.core
|
||||||
|
|
||||||
|
print("SDK version:", azureml.core.VERSION)
|
||||||
|
|
||||||
|
from azureml.core import Workspace
|
||||||
|
ws = Workspace.from_config()
|
||||||
|
|
||||||
|
|
||||||
|
# Set up an experiment
|
||||||
|
from azureml.core import Experiment
|
||||||
|
experiment_name = 'my-experiment'
|
||||||
|
script_folder= "./"
|
||||||
|
|
||||||
|
exp = Experiment(workspace=ws, name=experiment_name)
|
||||||
|
|
||||||
|
#<cpu_cluster>
|
||||||
|
from azureml.core.compute import ComputeTarget, AmlCompute
|
||||||
|
from azureml.core.compute_target import ComputeTargetException
|
||||||
|
|
||||||
|
# Choose a name for your CPU cluster
|
||||||
|
cpu_cluster_name = "cpucluster"
|
||||||
|
|
||||||
|
# Verify that cluster does not exist already
|
||||||
|
try:
|
||||||
|
cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
|
||||||
|
print('Found existing cluster, use it.')
|
||||||
|
except ComputeTargetException:
|
||||||
|
# To use a different region for the compute, add a location='<region>' parameter
|
||||||
|
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
|
||||||
|
max_nodes=4)
|
||||||
|
cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
|
||||||
|
|
||||||
|
cpu_cluster.wait_for_completion(show_output=True)
|
||||||
|
#</cpu_cluster>
|
||||||
|
|
||||||
|
#<run_amlcompute>
|
||||||
|
from azureml.core.runconfig import RunConfiguration
|
||||||
|
from azureml.core.conda_dependencies import CondaDependencies
|
||||||
|
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
|
||||||
|
|
||||||
|
# Create a new runconfig object
|
||||||
|
run_amlcompute = RunConfiguration()
|
||||||
|
|
||||||
|
# Use the cpu_cluster you created above.
|
||||||
|
run_amlcompute.target = cpu_cluster
|
||||||
|
|
||||||
|
# Enable Docker
|
||||||
|
run_amlcompute.environment.docker.enabled = True
|
||||||
|
|
||||||
|
# Set Docker base image to the default CPU-based image
|
||||||
|
run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE
|
||||||
|
|
||||||
|
# Use conda_dependencies.yml to create a conda environment in the Docker image for execution
|
||||||
|
run_amlcompute.environment.python.user_managed_dependencies = False
|
||||||
|
|
||||||
|
# Specify CondaDependencies obj, add necessary packages
|
||||||
|
run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
|
||||||
|
#</run_amlcompute>
|
||||||
|
|
||||||
|
# Submit the experiment using the run configuration
|
||||||
|
#<amlcompute_submit>
|
||||||
|
from azureml.core import ScriptRunConfig
|
||||||
|
|
||||||
|
src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_amlcompute)
|
||||||
|
run = exp.submit(src)
|
||||||
|
run.wait_for_completion(show_output = True)
|
||||||
|
#</amlcompute_submit>
|
||||||
26
ignore/doc-qa/how-to-set-up-training-targets/dsvm.py
Normal file
26
ignore/doc-qa/how-to-set-up-training-targets/dsvm.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
# Code for Remote virtual machines
|
||||||
|
|
||||||
|
compute_target_name = "sheri-linuxvm"
|
||||||
|
|
||||||
|
#<run_dsvm>
|
||||||
|
import azureml.core
|
||||||
|
from azureml.core.runconfig import RunConfiguration
|
||||||
|
from azureml.core.conda_dependencies import CondaDependencies
|
||||||
|
|
||||||
|
run_dsvm = RunConfiguration(framework = "python")
|
||||||
|
|
||||||
|
# Set the compute target to the Linux DSVM
|
||||||
|
run_dsvm.target = compute_target_name
|
||||||
|
|
||||||
|
# Use Docker in the remote VM
|
||||||
|
run_dsvm.environment.docker.enabled = True
|
||||||
|
|
||||||
|
# Use the CPU base image
|
||||||
|
# To use GPU in DSVM, you must also use the GPU base Docker image "azureml.core.runconfig.DEFAULT_GPU_IMAGE"
|
||||||
|
run_dsvm.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
|
||||||
|
print('Base Docker image is:', run_dsvm.environment.docker.base_image)
|
||||||
|
|
||||||
|
# Specify the CondaDependencies object
|
||||||
|
run_dsvm.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
|
||||||
|
#</run_dsvm>
|
||||||
|
print(run_dsvm)
|
||||||
27
ignore/doc-qa/how-to-set-up-training-targets/hdi.py
Normal file
27
ignore/doc-qa/how-to-set-up-training-targets/hdi.py
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
|
||||||
|
from azureml.core import Workspace
|
||||||
|
ws = Workspace.from_config()
|
||||||
|
|
||||||
|
from azureml.core.compute import ComputeTarget
|
||||||
|
|
||||||
|
# refers to an existing compute resource attached to the workspace!
|
||||||
|
hdi_compute = ComputeTarget(workspace=ws, name='sherihdi')
|
||||||
|
|
||||||
|
|
||||||
|
#<run_hdi>
|
||||||
|
from azureml.core.runconfig import RunConfiguration
|
||||||
|
from azureml.core.conda_dependencies import CondaDependencies
|
||||||
|
|
||||||
|
|
||||||
|
# use pyspark framework
|
||||||
|
run_hdi = RunConfiguration(framework="pyspark")
|
||||||
|
|
||||||
|
# Set compute target to the HDI cluster
|
||||||
|
run_hdi.target = hdi_compute.name
|
||||||
|
|
||||||
|
# specify CondaDependencies object to ask system installing numpy
|
||||||
|
cd = CondaDependencies()
|
||||||
|
cd.add_conda_package('numpy')
|
||||||
|
run_hdi.environment.python.conda_dependencies = cd
|
||||||
|
#</run_hdi>
|
||||||
|
print(run_hdi)
|
||||||
9
ignore/doc-qa/how-to-set-up-training-targets/mylib.py
Normal file
9
ignore/doc-qa/how-to-set-up-training-targets/mylib.py
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Copyright (c) Microsoft. All rights reserved.
|
||||||
|
# Licensed under the MIT license.
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def get_alphas():
|
||||||
|
# list of numbers from 0.0 to 1.0 with a 0.05 interval
|
||||||
|
return np.arange(0.0, 1.0, 0.05)
|
||||||
52
ignore/doc-qa/how-to-set-up-training-targets/remote.py
Normal file
52
ignore/doc-qa/how-to-set-up-training-targets/remote.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# Code for Remote virtual machines
|
||||||
|
|
||||||
|
compute_target_name = "attach-dsvm"
|
||||||
|
|
||||||
|
#<run_dsvm>
|
||||||
|
import azureml.core
|
||||||
|
from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE
|
||||||
|
from azureml.core.conda_dependencies import CondaDependencies
|
||||||
|
|
||||||
|
run_dsvm = RunConfiguration(framework = "python")
|
||||||
|
|
||||||
|
# Set the compute target to the Linux DSVM
|
||||||
|
run_dsvm.target = compute_target_name
|
||||||
|
|
||||||
|
# Use Docker in the remote VM
|
||||||
|
run_dsvm.environment.docker.enabled = True
|
||||||
|
|
||||||
|
# Use the CPU base image
|
||||||
|
# To use GPU in DSVM, you must also use the GPU base Docker image "azureml.core.runconfig.DEFAULT_GPU_IMAGE"
|
||||||
|
run_dsvm.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
|
||||||
|
print('Base Docker image is:', run_dsvm.environment.docker.base_image)
|
||||||
|
|
||||||
|
# Prepare the Docker and conda environment automatically when they're used for the first time
|
||||||
|
run_dsvm.prepare_environment = True
|
||||||
|
|
||||||
|
# Specify the CondaDependencies object
|
||||||
|
run_dsvm.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
|
||||||
|
#</run_dsvm>
|
||||||
|
hdi_compute.name = "blah"
|
||||||
|
from azureml.core.runconfig import RunConfiguration
|
||||||
|
from azureml.core.conda_dependencies import CondaDependencies
|
||||||
|
|
||||||
|
|
||||||
|
# use pyspark framework
|
||||||
|
hdi_run_config = RunConfiguration(framework="pyspark")
|
||||||
|
|
||||||
|
# Set compute target to the HDI cluster
|
||||||
|
hdi_run_config.target = hdi_compute.name
|
||||||
|
|
||||||
|
# specify CondaDependencies object to ask system installing numpy
|
||||||
|
cd = CondaDependencies()
|
||||||
|
cd.add_conda_package('numpy')
|
||||||
|
hdi_run_config.environment.python.conda_dependencies = cd
|
||||||
|
|
||||||
|
#<run_hdi>
|
||||||
|
from azureml.core.runconfig import RunConfiguration
|
||||||
|
# Configure the HDInsight run
|
||||||
|
# Load the runconfig object from the myhdi.runconfig file generated in the previous attach operation
|
||||||
|
run_hdi = RunConfiguration.load(project_object = project, run_name = 'myhdi')
|
||||||
|
|
||||||
|
# Ask the system to prepare the conda environment automatically when it's used for the first time
|
||||||
|
run_hdi.auto_prepare_environment = True>
|
||||||
25
ignore/doc-qa/how-to-set-up-training-targets/runconfig.py
Normal file
25
ignore/doc-qa/how-to-set-up-training-targets/runconfig.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# Code for What's a run configuration
|
||||||
|
|
||||||
|
# <run_system_managed>
|
||||||
|
from azureml.core.runconfig import RunConfiguration
|
||||||
|
from azureml.core.conda_dependencies import CondaDependencies
|
||||||
|
|
||||||
|
run_system_managed = RunConfiguration()
|
||||||
|
|
||||||
|
# Specify the conda dependencies with scikit-learn
|
||||||
|
run_system_managed.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
|
||||||
|
# </run_system_managed>
|
||||||
|
print(run_system_managed)
|
||||||
|
|
||||||
|
|
||||||
|
# <run_user_managed>
|
||||||
|
from azureml.core.runconfig import RunConfiguration
|
||||||
|
|
||||||
|
run_user_managed = RunConfiguration()
|
||||||
|
run_user_managed.environment.python.user_managed_dependencies = True
|
||||||
|
|
||||||
|
# Choose a specific Python environment by pointing to a Python path. For example:
|
||||||
|
# run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'
|
||||||
|
# </run_user_managed>
|
||||||
|
print(run_user_managed)
|
||||||
|
|
||||||
45
ignore/doc-qa/how-to-set-up-training-targets/train.py
Normal file
45
ignore/doc-qa/how-to-set-up-training-targets/train.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# Copyright (c) Microsoft. All rights reserved.
|
||||||
|
# Licensed under the MIT license.
|
||||||
|
|
||||||
|
from sklearn.datasets import load_diabetes
|
||||||
|
from sklearn.linear_model import Ridge
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from azureml.core.run import Run
|
||||||
|
from sklearn.externals import joblib
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import mylib
|
||||||
|
|
||||||
|
os.makedirs('./outputs', exist_ok=True)
|
||||||
|
|
||||||
|
X, y = load_diabetes(return_X_y=True)
|
||||||
|
|
||||||
|
run = Run.get_context()
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
||||||
|
test_size=0.2,
|
||||||
|
random_state=0)
|
||||||
|
data = {"train": {"X": X_train, "y": y_train},
|
||||||
|
"test": {"X": X_test, "y": y_test}}
|
||||||
|
|
||||||
|
# list of numbers from 0.0 to 1.0 with a 0.05 interval
|
||||||
|
alphas = mylib.get_alphas()
|
||||||
|
|
||||||
|
for alpha in alphas:
|
||||||
|
# Use Ridge algorithm to create a regression model
|
||||||
|
reg = Ridge(alpha=alpha)
|
||||||
|
reg.fit(data["train"]["X"], data["train"]["y"])
|
||||||
|
|
||||||
|
preds = reg.predict(data["test"]["X"])
|
||||||
|
mse = mean_squared_error(preds, data["test"]["y"])
|
||||||
|
run.log('alpha', alpha)
|
||||||
|
run.log('mse', mse)
|
||||||
|
|
||||||
|
model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
|
||||||
|
# save model in the outputs folder so it automatically get uploaded
|
||||||
|
with open(model_file_name, "wb") as file:
|
||||||
|
joblib.dump(value=reg, filename=os.path.join('./outputs/',
|
||||||
|
model_file_name))
|
||||||
|
|
||||||
|
print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
# code snippets for the quickstart-create-workspace-with-python article
|
||||||
|
# <import>
|
||||||
|
import azureml.core
|
||||||
|
print(azureml.core.VERSION)
|
||||||
|
# </import>
|
||||||
|
|
||||||
|
# this is NOT a snippet. If this code changes, go fix it in the article!
|
||||||
|
from azureml.core import Workspace
|
||||||
|
ws = Workspace.create(name='myworkspace',
|
||||||
|
subscription_id='<subscription-id>',
|
||||||
|
resource_group='myresourcegroup',
|
||||||
|
create_resource_group=True,
|
||||||
|
location='eastus2' # or other supported Azure region
|
||||||
|
)
|
||||||
|
|
||||||
|
# <getDetails>
|
||||||
|
ws.get_details()
|
||||||
|
# </getDetails>
|
||||||
|
|
||||||
|
# <writeConfig>
|
||||||
|
# Create the configuration file.
|
||||||
|
ws.write_config()
|
||||||
|
|
||||||
|
# Use this code to load the workspace from
|
||||||
|
# other scripts and notebooks in this directory.
|
||||||
|
# ws = Workspace.from_config()
|
||||||
|
# </writeConfig>
|
||||||
|
|
||||||
|
# <useWs>
|
||||||
|
from azureml.core import Experiment
|
||||||
|
|
||||||
|
# Create a new experiment in your workspace.
|
||||||
|
exp = Experiment(workspace=ws, name='myexp')
|
||||||
|
|
||||||
|
# Start a run and start the logging service.
|
||||||
|
run = exp.start_logging()
|
||||||
|
|
||||||
|
# Log a single number.
|
||||||
|
run.log('my magic number', 42)
|
||||||
|
|
||||||
|
# Log a list (Fibonacci numbers).
|
||||||
|
run.log_list('my list', [1, 1, 2, 3, 5, 8, 13, 21, 34, 55])
|
||||||
|
|
||||||
|
# Finish the run.
|
||||||
|
run.complete()
|
||||||
|
# </useWs>
|
||||||
|
|
||||||
|
# <viewLog>
|
||||||
|
print(run.get_portal_url())
|
||||||
|
# </viewLog>
|
||||||
|
|
||||||
|
|
||||||
|
# <delete>
|
||||||
|
ws.delete(delete_dependent_resources=True)
|
||||||
|
# </delete>
|
||||||
67
ignore/doc-qa/testnotebook.ipynb
Normal file
67
ignore/doc-qa/testnotebook.ipynb
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Testing notebook include"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {
|
||||||
|
"name": "import"
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Azure ML SDK Version: 1.0.83\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%matplotlib inline\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"\n",
|
||||||
|
"import azureml.core\n",
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"# check core SDK version number\n",
|
||||||
|
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"celltoolbar": "Edit Metadata",
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6 - AzureML",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3-azureml"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 26 KiB |
@@ -1,14 +1,14 @@
|
|||||||
# ONNX on Azure Machine Learning
|
# ONNX on Azure Machine Learning
|
||||||
|
|
||||||
These tutorials show how to create and deploy [ONNX](http://onnx.ai) models in Azure Machine Learning environments using [ONNX Runtime](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-build-deploy-onnx) for inference. Once deployed as a web service, you can ping the model with your own set of images to be analyzed!
|
These tutorials show how to create and deploy [ONNX](http://onnx.ai) models using Azure Machine Learning and the [ONNX Runtime](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-build-deploy-onnx).
|
||||||
|
Once deployed as web services, you can ping the models with your own images to be analyzed!
|
||||||
|
|
||||||
## Tutorials
|
## Tutorials
|
||||||
- [Obtain ONNX model from ONNX Model Zoo and deploy with ONNX Runtime inference - Handwritten Digit Classification (MNIST)](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-inference-mnist-deploy.ipynb)
|
- [Obtain ONNX model from ONNX Model Zoo and deploy - ResNet50](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb)
|
||||||
- [Obtain ONNX model from ONNX Model Zoo and deploy with ONNX Runtime inference - Facial Expression Recognition (Emotion FER+)](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-inference-facial-emotion-recognition-deploy.ipynb)
|
|
||||||
- [Obtain ONNX model from ONNX Model Zoo and deploy with ONNX Runtime inference - Image Recognition (ResNet50)](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb)
|
|
||||||
- [Convert ONNX model from CoreML and deploy - TinyYOLO](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb)
|
- [Convert ONNX model from CoreML and deploy - TinyYOLO](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb)
|
||||||
- [Train ONNX model in PyTorch and deploy - MNIST](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb)
|
- [Train ONNX model in PyTorch and deploy - MNIST](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb)
|
||||||
|
- [Handwritten Digit Classification (MNIST) using ONNX Runtime on AzureML](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-inference-mnist.ipynb)
|
||||||
|
- [Facial Expression Recognition using ONNX Runtime on AzureML](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-inference-emotion-recognition.ipynb)
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
- [ONNX Runtime Python API Documentation](http://aka.ms/onnxruntime-python)
|
- [ONNX Runtime Python API Documentation](http://aka.ms/onnxruntime-python)
|
||||||
@@ -21,8 +21,7 @@ These tutorials show how to create and deploy [ONNX](http://onnx.ai) models in A
|
|||||||
|
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
Licensed under the MIT License.
|
Licensed under the MIT License.
|
||||||
|
|
||||||
## Acknowledgements
|
|
||||||
These tutorials were developed by Vinitra Swamy and Prasanth Pulavarthi of the Microsoft AI Frameworks team and adapted for presentation at Microsoft Ignite 2018.
|
|
||||||
|
|||||||
@@ -59,9 +59,8 @@
|
|||||||
"You'll need to run the following commands to use this tutorial:\n",
|
"You'll need to run the following commands to use this tutorial:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"```sh\n",
|
"```sh\n",
|
||||||
|
"pip install coremltools\n",
|
||||||
"pip install onnxmltools\n",
|
"pip install onnxmltools\n",
|
||||||
"pip install coremltools # use this on Linux and Mac\n",
|
|
||||||
"pip install git+https://github.com/apple/coremltools # use this on Windows\n",
|
|
||||||
"```"
|
"```"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -80,10 +79,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import urllib.request\n",
|
"!wget https://s3-us-west-2.amazonaws.com/coreml-models/TinyYOLO.mlmodel"
|
||||||
"\n",
|
|
||||||
"onnx_model_url = \"https://s3-us-west-2.amazonaws.com/coreml-models/TinyYOLO.mlmodel\"\n",
|
|
||||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"TinyYOLO.mlmodel\")\n"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -177,9 +173,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"models = ws.models\n",
|
"models = ws.models()\n",
|
||||||
"for name, m in models.items():\n",
|
"for m in models:\n",
|
||||||
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -248,7 +244,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n",
|
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Facial Expression Recognition (FER+) using ONNX Runtime on Azure ML\n",
|
"# Facial Expression Recognition (Emotion FER+) using ONNX Runtime on Azure ML\n",
|
||||||
"\n",
|
"\n",
|
||||||
"This example shows how to deploy an image classification neural network using the Facial Expression Recognition ([FER](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. This tutorial will show you how to deploy a FER+ model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
|
"This example shows how to deploy an image classification neural network using the Facial Expression Recognition ([FER](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. This tutorial will show you how to deploy a FER+ model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -158,7 +158,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"models = ws.models\n",
|
"models = ws.models()\n",
|
||||||
"for name, m in models.items():\n",
|
"for name, m in models.items():\n",
|
||||||
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
]
|
]
|
||||||
@@ -325,7 +325,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\"])\n",
|
"myenv = CondaDependencies()\n",
|
||||||
|
"myenv.add_pip_package(\"numpy\")\n",
|
||||||
|
"myenv.add_pip_package(\"azureml-core\")\n",
|
||||||
|
"myenv.add_pip_package(\"onnxruntime\")\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
@@ -355,7 +359,7 @@
|
|||||||
" tags = {\"demo\": \"onnx\"})\n",
|
" tags = {\"demo\": \"onnx\"})\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"image = ContainerImage.create(name = \"onnximage\",\n",
|
"image = ContainerImage.create(name = \"onnxtest\",\n",
|
||||||
" # this is the model object\n",
|
" # this is the model object\n",
|
||||||
" models = [model],\n",
|
" models = [model],\n",
|
||||||
" image_config = image_config,\n",
|
" image_config = image_config,\n",
|
||||||
@@ -483,6 +487,7 @@
|
|||||||
" emotions = []\n",
|
" emotions = []\n",
|
||||||
" for i in range(N):\n",
|
" for i in range(N):\n",
|
||||||
" emotions.append(emotion_keys[classes[i]])\n",
|
" emotions.append(emotion_keys[classes[i]])\n",
|
||||||
|
" \n",
|
||||||
" return emotions\n",
|
" return emotions\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def softmax(x):\n",
|
"def softmax(x):\n",
|
||||||
@@ -550,7 +555,7 @@
|
|||||||
" tensor.ParseFromString(f.read())\n",
|
" tensor.ParseFromString(f.read())\n",
|
||||||
" \n",
|
" \n",
|
||||||
" output_data = numpy_helper.to_array(tensor)\n",
|
" output_data = numpy_helper.to_array(tensor)\n",
|
||||||
" output_processed = emotion_map(postprocess(output_data[0]))[0]\n",
|
" output_processed = emotion_map(postprocess(output_data))[0]\n",
|
||||||
" test_outputs.append(output_processed)"
|
" test_outputs.append(output_processed)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -700,9 +705,7 @@
|
|||||||
"# Any PNG or JPG image file should work\n",
|
"# Any PNG or JPG image file should work\n",
|
||||||
"# Make sure to include the entire path with // instead of /\n",
|
"# Make sure to include the entire path with // instead of /\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# e.g. your_test_image = \"C:/Users/vinitra.swamy/Pictures/face.png\"\n",
|
"# e.g. your_test_image = \"C://Users//vinitra.swamy//Pictures//emotion_test_images//img_1.png\"\n",
|
||||||
"\n",
|
|
||||||
"your_test_image = \"<path to file>\"\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"import matplotlib.image as mpimg\n",
|
"import matplotlib.image as mpimg\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -756,7 +759,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# remember to delete your service after you are done using it!\n",
|
"# remember to delete your service after you are done using it!\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# aci_service.delete()"
|
"aci_service.delete()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -165,7 +165,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"models = ws.models\n",
|
"models = ws.models()\n",
|
||||||
"for name, m in models.items():\n",
|
"for name, m in models.items():\n",
|
||||||
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
]
|
]
|
||||||
@@ -297,7 +297,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\"])\n",
|
"myenv = CondaDependencies()\n",
|
||||||
|
"myenv.add_pip_package(\"numpy\")\n",
|
||||||
|
"myenv.add_pip_package(\"azureml-core\")\n",
|
||||||
|
"myenv.add_pip_package(\"onnxruntime\")\n",
|
||||||
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
@@ -311,6 +315,16 @@
|
|||||||
"This step will likely take a few minutes."
|
"This step will likely take a few minutes."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core.image import ContainerImage\n",
|
||||||
|
"help(ContainerImage.image_configuration)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -326,7 +340,7 @@
|
|||||||
" tags = {\"demo\": \"onnx\"}) \n",
|
" tags = {\"demo\": \"onnx\"}) \n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"image = ContainerImage.create(name = \"onnximage\",\n",
|
"image = ContainerImage.create(name = \"onnxtest\",\n",
|
||||||
" # this is the model object\n",
|
" # this is the model object\n",
|
||||||
" models = [model],\n",
|
" models = [model],\n",
|
||||||
" image_config = image_config,\n",
|
" image_config = image_config,\n",
|
||||||
@@ -389,7 +403,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.webservice import Webservice\n",
|
"from azureml.core.webservice import Webservice\n",
|
||||||
"\n",
|
"\n",
|
||||||
"aci_service_name = 'onnx-demo-mnist'\n",
|
"aci_service_name = 'onnx-demo-mnist20'\n",
|
||||||
"print(\"Service\", aci_service_name)\n",
|
"print(\"Service\", aci_service_name)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||||
@@ -629,8 +643,6 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Any PNG or JPG image file should work\n",
|
"# Any PNG or JPG image file should work\n",
|
||||||
"\n",
|
"\n",
|
||||||
"your_test_image = \"<path to file>\"\n",
|
|
||||||
"\n",
|
|
||||||
"# e.g. your_test_image = \"C:/Users/vinitra.swamy/Pictures/handwritten_digit.png\"\n",
|
"# e.g. your_test_image = \"C:/Users/vinitra.swamy/Pictures/handwritten_digit.png\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"import matplotlib.image as mpimg\n",
|
"import matplotlib.image as mpimg\n",
|
||||||
@@ -740,7 +752,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# remember to delete your service after you are done using it!\n",
|
"# remember to delete your service after you are done using it!\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# aci_service.delete()"
|
"aci_service.delete()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -56,21 +56,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"#### Download pre-trained ONNX model from ONNX Model Zoo.\n",
|
"#### Download pre-trained ONNX model from ONNX Model Zoo.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Download the [ResNet50v2 model and test data](https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz) and extract it in the same folder as this tutorial notebook.\n"
|
"Download the [ResNet50v2 model and test data](https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz) and place it in the same folder as this tutorial notebook. You can unzip the file through the following line of code.\n",
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import urllib.request\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"onnx_model_url = \"https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz\"\n",
|
"```sh\n",
|
||||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"resnet50v2.tar.gz\")\n",
|
"(myenv) $ tar xvzf resnet50v2.tar.gz\n",
|
||||||
"\n",
|
"```"
|
||||||
"!tar xvzf resnet50v2.tar.gz"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -140,9 +130,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"models = ws.models\n",
|
"models = ws.models()\n",
|
||||||
"for name, m in models.items():\n",
|
"for m in models:\n",
|
||||||
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -232,7 +222,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n",
|
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
|||||||
@@ -255,22 +255,6 @@
|
|||||||
"RunDetails(run).show()"
|
"RunDetails(run).show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Alternatively, you can block until the script has completed training before running more code."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -334,9 +318,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"models = ws.models\n",
|
"models = ws.models()\n",
|
||||||
"for name, m in models.items():\n",
|
"for m in models:\n",
|
||||||
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -408,7 +392,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||||
"\n",
|
"\n",
|
||||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n",
|
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||||
" f.write(myenv.serialize_to_string())"
|
" f.write(myenv.serialize_to_string())"
|
||||||
|
|||||||
@@ -74,37 +74,21 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import os\n",
|
"# Batch AI compute\n",
|
||||||
"\n",
|
"cluster_name = \"gpu-cluster\"\n",
|
||||||
"# choose a name for your cluster\n",
|
"try:\n",
|
||||||
"batchai_cluster_name = os.environ.get(\"BATCHAI_CLUSTER_NAME\", \"gpu-cluster\")\n",
|
" cluster = BatchAiCompute(ws, cluster_name)\n",
|
||||||
"cluster_min_nodes = os.environ.get(\"BATCHAI_CLUSTER_MIN_NODES\", 0)\n",
|
" print(\"found existing cluster.\")\n",
|
||||||
"cluster_max_nodes = os.environ.get(\"BATCHAI_CLUSTER_MAX_NODES\", 1)\n",
|
"except:\n",
|
||||||
"vm_size = os.environ.get(\"BATCHAI_CLUSTER_SKU\", \"STANDARD_NC6\")\n",
|
" print(\"creating new cluster\")\n",
|
||||||
"autoscale_enabled = os.environ.get(\"BATCHAI_CLUSTER_AUTOSCALE_ENABLED\", True)\n",
|
" provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\",\n",
|
||||||
"\n",
|
" autoscale_enabled = True,\n",
|
||||||
"\n",
|
" cluster_min_nodes = 0, \n",
|
||||||
"if batchai_cluster_name in ws.compute_targets:\n",
|
" cluster_max_nodes = 1)\n",
|
||||||
" compute_target = ws.compute_targets[batchai_cluster_name]\n",
|
|
||||||
" if compute_target and type(compute_target) is BatchAiCompute:\n",
|
|
||||||
" print('found compute target. just use it. ' + batchai_cluster_name)\n",
|
|
||||||
"else:\n",
|
|
||||||
" print('creating a new compute target...')\n",
|
|
||||||
" provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = vm_size, # NC6 is GPU-enabled\n",
|
|
||||||
" vm_priority = 'lowpriority', # optional\n",
|
|
||||||
" autoscale_enabled = autoscale_enabled,\n",
|
|
||||||
" cluster_min_nodes = cluster_min_nodes, \n",
|
|
||||||
" cluster_max_nodes = cluster_max_nodes)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" # create the cluster\n",
|
" # create the cluster\n",
|
||||||
" compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n",
|
" cluster = ComputeTarget.create(ws, cluster_name, provisioning_config)\n",
|
||||||
" \n",
|
" cluster.wait_for_completion(show_output=True)"
|
||||||
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
|
||||||
" # if no min node count is provided it will use the scale settings for the cluster\n",
|
|
||||||
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
|
||||||
" \n",
|
|
||||||
" # For a more detailed view of current BatchAI cluster status, use the 'status' property \n",
|
|
||||||
" print(compute_target.status.serialize())"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -372,7 +356,7 @@
|
|||||||
" mode=\"download\" \n",
|
" mode=\"download\" \n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"output_dir = PipelineData(name=\"scores\", \n",
|
"output_dir = PipelineData(name=\"scores\", \n",
|
||||||
" datastore=default_ds, \n",
|
" datastore_name=default_ds.name, \n",
|
||||||
" output_path_on_compute=\"batchscoring/results\")"
|
" output_path_on_compute=\"batchscoring/results\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -465,7 +449,7 @@
|
|||||||
" \"--label_dir\", label_dir, \n",
|
" \"--label_dir\", label_dir, \n",
|
||||||
" \"--output_dir\", output_dir, \n",
|
" \"--output_dir\", output_dir, \n",
|
||||||
" \"--batch_size\", batch_size_param],\n",
|
" \"--batch_size\", batch_size_param],\n",
|
||||||
" target=compute_target,\n",
|
" target=cluster,\n",
|
||||||
" inputs=[input_images, label_dir],\n",
|
" inputs=[input_images, label_dir],\n",
|
||||||
" outputs=[output_dir],\n",
|
" outputs=[output_dir],\n",
|
||||||
" runconfig=batchai_run_config,\n",
|
" runconfig=batchai_run_config,\n",
|
||||||
@@ -606,12 +590,9 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from azureml.pipeline.core import PublishedPipeline\n",
|
"from azureml.pipeline.core import PublishedPipeline\n",
|
||||||
"\n",
|
"\n",
|
||||||
"rest_endpoint = published_pipeline.endpoint\n",
|
"rest_endpoint = PublishedPipeline.get_endpoint(published_id, ws)\n",
|
||||||
"# specify batch size when running the pipeline\n",
|
"# specify batch size when running the pipeline\n",
|
||||||
"response = requests.post(rest_endpoint, \n",
|
"response = requests.post(rest_endpoint, headers=aad_token, json={\"param_batch_size\": 50})\n",
|
||||||
" headers=aad_token, \n",
|
|
||||||
" json={\"ExperimentName\": \"batch_scoring\",\n",
|
|
||||||
" \"ParameterAssignments\": {\"param_batch_size\": 50}})\n",
|
|
||||||
"run_id = response.json()[\"Id\"]"
|
"run_id = response.json()[\"Id\"]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -629,7 +610,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.pipeline.core.run import PipelineRun\n",
|
"from azureml.pipeline.core.run import PipelineRun\n",
|
||||||
"published_pipeline_run = PipelineRun(ws.experiments[\"batch_scoring\"], run_id)\n",
|
"published_pipeline_run = PipelineRun(ws.experiments()[\"batch_scoring\"], run_id)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"RunDetails(published_pipeline_run).show()"
|
"RunDetails(published_pipeline_run).show()"
|
||||||
]
|
]
|
||||||
|
|||||||
14
pr.md
14
pr.md
@@ -12,18 +12,6 @@
|
|||||||
## Community Blogs
|
## Community Blogs
|
||||||
- [Power Bat – How Spektacom is Powering the Game of Cricket with Microsoft AI](https://blogs.technet.microsoft.com/machinelearning/2018/10/11/power-bat-how-spektacom-is-powering-the-game-of-cricket-with-microsoft-ai/)
|
- [Power Bat – How Spektacom is Powering the Game of Cricket with Microsoft AI](https://blogs.technet.microsoft.com/machinelearning/2018/10/11/power-bat-how-spektacom-is-powering-the-game-of-cricket-with-microsoft-ai/)
|
||||||
|
|
||||||
## Ignite 2018 Public Preview Launch Sessions
|
|
||||||
- [AI with Azure Machine Learning services: Simplifying the data science process](https://myignite.techcommunity.microsoft.com/sessions/66248)
|
|
||||||
- [AI TechTalk: Azure Machine Learning SDK - a walkthrough](https://myignite.techcommunity.microsoft.com/sessions/66265)
|
|
||||||
- [AI for an intelligent cloud and intelligent edge: Discover, deploy, and manage with Azure ML services](https://myignite.techcommunity.microsoft.com/sessions/65389)
|
|
||||||
- [Generating high quality models efficiently using Automated ML and Hyperparameter Tuning](https://myignite.techcommunity.microsoft.com/sessions/66245)
|
|
||||||
- [AI for pros: Deep learning with PyTorch using the Azure Data Science Virtual Machine and scaling training with Azure ML](https://myignite.techcommunity.microsoft.com/sessions/66244)
|
|
||||||
|
|
||||||
## Get-started Videos on YouTube
|
|
||||||
- [Get started with Python SDK](https://youtu.be/VIsXeTuW3FU)
|
|
||||||
- [Get started from Azure Portal](https://youtu.be/lCkYUHV86Mk)
|
|
||||||
|
|
||||||
|
|
||||||
## Third Party Articles
|
## Third Party Articles
|
||||||
- [Azure’s new machine learning features embrace Python](https://www.infoworld.com/article/3306840/azure/azures-new-machine-learning-features-embrace-python.html) (InfoWorld)
|
- [Azure’s new machine learning features embrace Python](https://www.infoworld.com/article/3306840/azure/azures-new-machine-learning-features-embrace-python.html) (InfoWorld)
|
||||||
- [How to use Azure ML in Windows 10](https://www.infoworld.com/article/3308381/azure/how-to-use-azure-ml-in-windows-10.html) (InfoWorld)
|
- [How to use Azure ML in Windows 10](https://www.infoworld.com/article/3308381/azure/how-to-use-azure-ml-in-windows-10.html) (InfoWorld)
|
||||||
@@ -36,7 +24,7 @@
|
|||||||
## Community Projects
|
## Community Projects
|
||||||
- [Fashion MNIST](https://github.com/amynic/azureml-sdk-fashion)
|
- [Fashion MNIST](https://github.com/amynic/azureml-sdk-fashion)
|
||||||
- Keras on Databricks
|
- Keras on Databricks
|
||||||
- [Samples from CSS](https://github.com/Azure/AMLSamples)
|
- Samples from CSS
|
||||||
|
|
||||||
|
|
||||||
## Azure Machine Learning Studio Resources
|
## Azure Machine Learning Studio Resources
|
||||||
|
|||||||
@@ -434,13 +434,12 @@
|
|||||||
"from azureml.core.image import Image\n",
|
"from azureml.core.image import Image\n",
|
||||||
"from azureml.core.webservice import Webservice\n",
|
"from azureml.core.webservice import Webservice\n",
|
||||||
"from azureml.contrib.brainwave import BrainwaveWebservice, BrainwaveImage\n",
|
"from azureml.contrib.brainwave import BrainwaveWebservice, BrainwaveImage\n",
|
||||||
"from azureml.exceptions import WebserviceException\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"model_name = \"catsanddogs-resnet50-model\"\n",
|
"model_name = \"catsanddogs-resnet50-model\"\n",
|
||||||
"image_name = \"catsanddogs-resnet50-image\"\n",
|
"image_name = \"catsanddogs-resnet50-image\"\n",
|
||||||
"service_name = \"modelbuild-service\"\n",
|
"service_name = \"modelbuild-service\"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"registered_model = Model.register(ws, model_def_path, model_name)\n",
|
"registered_model = Model.register(ws, service_def_path, model_name)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"image_config = BrainwaveImage.image_configuration()\n",
|
"image_config = BrainwaveImage.image_configuration()\n",
|
||||||
"deployment_config = BrainwaveWebservice.deploy_configuration()\n",
|
"deployment_config = BrainwaveWebservice.deploy_configuration()\n",
|
||||||
@@ -449,10 +448,8 @@
|
|||||||
" service = Webservice(ws, service_name)\n",
|
" service = Webservice(ws, service_name)\n",
|
||||||
" service.delete()\n",
|
" service.delete()\n",
|
||||||
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
|
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
|
||||||
" service.wait_for_deployment(True)\n",
|
|
||||||
"except WebserviceException:\n",
|
"except WebserviceException:\n",
|
||||||
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
|
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)"
|
||||||
" service.wait_for_deployment(True)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -80,7 +80,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from azureml.contrib.brainwave.models import QuantizedResnet50\n",
|
"from azureml.contrib.brainwave.models import QuantizedResnet50, Resnet50\n",
|
||||||
"model_path = os.path.expanduser('~/models')\n",
|
"model_path = os.path.expanduser('~/models')\n",
|
||||||
"model = QuantizedResnet50(model_path, is_frozen = True)\n",
|
"model = QuantizedResnet50(model_path, is_frozen = True)\n",
|
||||||
"feature_tensor = model.import_graph_def(image_tensors)\n",
|
"feature_tensor = model.import_graph_def(image_tensors)\n",
|
||||||
@@ -198,7 +198,7 @@
|
|||||||
" image_config = BrainwaveImage.image_configuration()\n",
|
" image_config = BrainwaveImage.image_configuration()\n",
|
||||||
" deployment_config = BrainwaveWebservice.deploy_configuration()\n",
|
" deployment_config = BrainwaveWebservice.deploy_configuration()\n",
|
||||||
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
|
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
|
||||||
" service.wait_for_deployment(True)"
|
" service.wait_for_deployment(true)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -265,7 +265,9 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"service.delete()"
|
"service.delete()\n",
|
||||||
|
" \n",
|
||||||
|
"registered_model.delete()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -404,7 +404,7 @@
|
|||||||
" image_config = BrainwaveImage.image_configuration()\n",
|
" image_config = BrainwaveImage.image_configuration()\n",
|
||||||
" deployment_config = BrainwaveWebservice.deploy_configuration()\n",
|
" deployment_config = BrainwaveWebservice.deploy_configuration()\n",
|
||||||
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
|
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
|
||||||
" service.wait_for_deployment(True)"
|
" service.wait_for_deployment(true)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
BIN
project-brainwave/snowleopardgaze.jpg
Normal file
BIN
project-brainwave/snowleopardgaze.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 61 KiB |
@@ -391,15 +391,6 @@
|
|||||||
"RunDetails(run).show()"
|
"RunDetails(run).show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -480,15 +471,6 @@
|
|||||||
"RunDetails(hyperdrive_run).show()"
|
"RunDetails(hyperdrive_run).show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"hyperdrive_run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
|||||||
@@ -53,5 +53,5 @@ def run(input_data):
|
|||||||
pred_probs = softmax(model(img)).detach().numpy()[0]
|
pred_probs = softmax(model(img)).detach().numpy()[0]
|
||||||
index = torch.argmax(output, 1)
|
index = torch.argmax(output, 1)
|
||||||
|
|
||||||
result = {"label": classes[index], "probability": str(pred_probs[index])}
|
result = json.dumps({"label": classes[index], "probability": str(pred_probs[index])})
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
/data/
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -300,7 +300,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'gpucluster' of type BatchAI."
|
"Now that you have created the compute target, let's see what the workspace's `compute_targets()` function returns. You should now see one entry named 'gpucluster' of type BatchAI."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -309,7 +309,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"compute_targets = ws.compute_targets\n",
|
"compute_targets = ws.compute_targets()\n",
|
||||||
"for name, ct in compute_targets.items():\n",
|
"for name, ct in compute_targets.items():\n",
|
||||||
" print(name, ct.type, ct.provisioning_state)"
|
" print(name, ct.type, ct.provisioning_state)"
|
||||||
]
|
]
|
||||||
@@ -480,15 +480,6 @@
|
|||||||
"run"
|
"run"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"run.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -775,15 +766,6 @@
|
|||||||
"RunDetails(htr).show()"
|
"RunDetails(htr).show()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"htr.wait_for_completion(show_output = True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -875,9 +857,9 @@
|
|||||||
"def run(raw_data):\n",
|
"def run(raw_data):\n",
|
||||||
" data = np.array(json.loads(raw_data)['data'])\n",
|
" data = np.array(json.loads(raw_data)['data'])\n",
|
||||||
" # make prediction\n",
|
" # make prediction\n",
|
||||||
" out = output.eval(session=sess, feed_dict={X: data})\n",
|
" out = output.eval(session = sess, feed_dict = {X: data})\n",
|
||||||
" y_hat = np.argmax(out, axis=1)\n",
|
" y_hat = np.argmax(out, axis = 1)\n",
|
||||||
" return y_hat.tolist()"
|
" return json.dumps(y_hat.tolist())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1026,10 +1008,10 @@
|
|||||||
"sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n",
|
"sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n",
|
"test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n",
|
||||||
"test_samples = bytes(test_samples, encoding='utf8')\n",
|
"test_samples = bytes(test_samples, encoding = 'utf8')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# predict using the deployed model\n",
|
"# predict using the deployed model\n",
|
||||||
"result = json.loads(service.run(input_data=test_samples))\n",
|
"result = json.loads(service.run(input_data = test_samples))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# compare actual value vs. the predicted values:\n",
|
"# compare actual value vs. the predicted values:\n",
|
||||||
"i = 0\n",
|
"i = 0\n",
|
||||||
@@ -1044,8 +1026,8 @@
|
|||||||
" font_color = 'red' if y_test[s] != result[i] else 'black'\n",
|
" font_color = 'red' if y_test[s] != result[i] else 'black'\n",
|
||||||
" clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n",
|
" clr_map = plt.cm.gray if y_test[s] != result[i] else plt.cm.Greys\n",
|
||||||
" \n",
|
" \n",
|
||||||
" plt.text(x=10, y=-10, s=y_hat[s], fontsize=18, color=font_color)\n",
|
" plt.text(x = 10, y = -10, s = y_hat[s], fontsize = 18, color = font_color)\n",
|
||||||
" plt.imshow(X_test[s].reshape(28, 28), cmap=clr_map)\n",
|
" plt.imshow(X_test[s].reshape(28, 28), cmap = clr_map)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" i = i + 1\n",
|
" i = i + 1\n",
|
||||||
"plt.show()"
|
"plt.show()"
|
||||||
@@ -1097,15 +1079,15 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"models = ws.models\n",
|
"models = ws.models()\n",
|
||||||
"for name, model in models.items():\n",
|
"for name, model in models.items():\n",
|
||||||
" print(\"Model: {}, ID: {}\".format(name, model.id))\n",
|
" print(\"Model: {}, ID: {}\".format(name, model.id))\n",
|
||||||
" \n",
|
" \n",
|
||||||
"images = ws.images\n",
|
"images = ws.images()\n",
|
||||||
"for name, image in images.items():\n",
|
"for name, image in images.items():\n",
|
||||||
" print(\"Image: {}, location: {}\".format(name, image.image_location))\n",
|
" print(\"Image: {}, location: {}\".format(name, image.image_location))\n",
|
||||||
" \n",
|
" \n",
|
||||||
"webservices = ws.webservices\n",
|
"webservices = ws.webservices()\n",
|
||||||
"for name, webservice in webservices.items():\n",
|
"for name, webservice in webservices.items():\n",
|
||||||
" print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))"
|
" print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))"
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,2 +0,0 @@
|
|||||||
/data/
|
|
||||||
/tf-distr-hvd/
|
|
||||||
@@ -1 +0,0 @@
|
|||||||
/tf-distr-ps/
|
|
||||||
321
training/06.distributed-cntk-with-custom-docker/cntk_mnist.py
Normal file
321
training/06.distributed-cntk-with-custom-docker/cntk_mnist.py
Normal file
@@ -0,0 +1,321 @@
|
|||||||
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
|
# Licensed under the MIT License.
|
||||||
|
# Script adapted from:
|
||||||
|
# 1. https://github.com/Microsoft/CNTK/blob/v2.0/Tutorials/CNTK_103A_MNIST_DataLoader.ipynb
|
||||||
|
# 2. https://github.com/Microsoft/CNTK/blob/v2.0/Tutorials/CNTK_103C_MNIST_MultiLayerPerceptron.ipynb
|
||||||
|
# ===================================================================================================
|
||||||
|
"""Train a CNTK multi-layer perceptron on the MNIST dataset."""
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
import gzip
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import struct
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
import cntk as C
|
||||||
|
from azureml.core.run import Run
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
run = Run.get_submitted_run()
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate')
|
||||||
|
parser.add_argument('--num_hidden_layers', type=int, default=2, help='number of hidden layers')
|
||||||
|
parser.add_argument('--minibatch_size', type=int, default=64, help='minibatchsize')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# Functions to load MNIST images and unpack into train and test set.
|
||||||
|
# - loadData reads image data and formats into a 28x28 long array
|
||||||
|
# - loadLabels reads the corresponding labels data, 1 for each image
|
||||||
|
# - load packs the downloaded image and labels data into a combined format to be read later by
|
||||||
|
# CNTK text reader
|
||||||
|
|
||||||
|
|
||||||
|
def loadData(src, cimg):
|
||||||
|
print('Downloading ' + src)
|
||||||
|
gzfname, h = urlretrieve(src, './delete.me')
|
||||||
|
print('Done.')
|
||||||
|
try:
|
||||||
|
with gzip.open(gzfname) as gz:
|
||||||
|
n = struct.unpack('I', gz.read(4))
|
||||||
|
# Read magic number.
|
||||||
|
if n[0] != 0x3080000:
|
||||||
|
raise Exception('Invalid file: unexpected magic number.')
|
||||||
|
# Read number of entries.
|
||||||
|
n = struct.unpack('>I', gz.read(4))[0]
|
||||||
|
if n != cimg:
|
||||||
|
raise Exception('Invalid file: expected {0} entries.'.format(cimg))
|
||||||
|
crow = struct.unpack('>I', gz.read(4))[0]
|
||||||
|
ccol = struct.unpack('>I', gz.read(4))[0]
|
||||||
|
if crow != 28 or ccol != 28:
|
||||||
|
raise Exception('Invalid file: expected 28 rows/cols per image.')
|
||||||
|
# Read data.
|
||||||
|
res = np.fromstring(gz.read(cimg * crow * ccol), dtype=np.uint8)
|
||||||
|
finally:
|
||||||
|
os.remove(gzfname)
|
||||||
|
return res.reshape((cimg, crow * ccol))
|
||||||
|
|
||||||
|
|
||||||
|
def loadLabels(src, cimg):
|
||||||
|
print('Downloading ' + src)
|
||||||
|
gzfname, h = urlretrieve(src, './delete.me')
|
||||||
|
print('Done.')
|
||||||
|
try:
|
||||||
|
with gzip.open(gzfname) as gz:
|
||||||
|
n = struct.unpack('I', gz.read(4))
|
||||||
|
# Read magic number.
|
||||||
|
if n[0] != 0x1080000:
|
||||||
|
raise Exception('Invalid file: unexpected magic number.')
|
||||||
|
# Read number of entries.
|
||||||
|
n = struct.unpack('>I', gz.read(4))
|
||||||
|
if n[0] != cimg:
|
||||||
|
raise Exception('Invalid file: expected {0} rows.'.format(cimg))
|
||||||
|
# Read labels.
|
||||||
|
res = np.fromstring(gz.read(cimg), dtype=np.uint8)
|
||||||
|
finally:
|
||||||
|
os.remove(gzfname)
|
||||||
|
return res.reshape((cimg, 1))
|
||||||
|
|
||||||
|
|
||||||
|
def try_download(dataSrc, labelsSrc, cimg):
|
||||||
|
data = loadData(dataSrc, cimg)
|
||||||
|
labels = loadLabels(labelsSrc, cimg)
|
||||||
|
return np.hstack((data, labels))
|
||||||
|
|
||||||
|
# Save the data files into a format compatible with CNTK text reader
|
||||||
|
|
||||||
|
|
||||||
|
def savetxt(filename, ndarray):
|
||||||
|
dir = os.path.dirname(filename)
|
||||||
|
|
||||||
|
if not os.path.exists(dir):
|
||||||
|
os.makedirs(dir)
|
||||||
|
|
||||||
|
if not os.path.isfile(filename):
|
||||||
|
print("Saving", filename)
|
||||||
|
with open(filename, 'w') as f:
|
||||||
|
labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
|
||||||
|
for row in ndarray:
|
||||||
|
row_str = row.astype(str)
|
||||||
|
label_str = labels[row[-1]]
|
||||||
|
feature_str = ' '.join(row_str[:-1])
|
||||||
|
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
|
||||||
|
else:
|
||||||
|
print("File already exists", filename)
|
||||||
|
|
||||||
|
# Read a CTF formatted text (as mentioned above) using the CTF deserializer from a file
|
||||||
|
|
||||||
|
|
||||||
|
def create_reader(path, is_training, input_dim, num_label_classes):
|
||||||
|
return C.io.MinibatchSource(C.io.CTFDeserializer(path, C.io.StreamDefs(
|
||||||
|
labels=C.io.StreamDef(field='labels', shape=num_label_classes, is_sparse=False),
|
||||||
|
features=C.io.StreamDef(field='features', shape=input_dim, is_sparse=False)
|
||||||
|
)), randomize=is_training, max_sweeps=C.io.INFINITELY_REPEAT if is_training else 1)
|
||||||
|
|
||||||
|
# Defines a utility that prints the training progress
|
||||||
|
|
||||||
|
|
||||||
|
def print_training_progress(trainer, mb, frequency, verbose=1):
|
||||||
|
training_loss = "NA"
|
||||||
|
eval_error = "NA"
|
||||||
|
|
||||||
|
if mb % frequency == 0:
|
||||||
|
training_loss = trainer.previous_minibatch_loss_average
|
||||||
|
eval_error = trainer.previous_minibatch_evaluation_average
|
||||||
|
if verbose:
|
||||||
|
print("Minibatch: {0}, Loss: {1:.4f}, Error: {2:.2f}%".format(mb, training_loss, eval_error * 100))
|
||||||
|
|
||||||
|
return mb, training_loss, eval_error
|
||||||
|
|
||||||
|
# Create the network architecture
|
||||||
|
|
||||||
|
|
||||||
|
def create_model(features):
|
||||||
|
with C.layers.default_options(init=C.layers.glorot_uniform(), activation=C.ops.relu):
|
||||||
|
h = features
|
||||||
|
for _ in range(num_hidden_layers):
|
||||||
|
h = C.layers.Dense(hidden_layers_dim)(h)
|
||||||
|
r = C.layers.Dense(num_output_classes, activation=None)(h)
|
||||||
|
return r
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
run = Run.get_submitted_run()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from urllib.request import urlretrieve
|
||||||
|
except ImportError:
|
||||||
|
from urllib import urlretrieve
|
||||||
|
|
||||||
|
# Select the right target device when this script is being used:
|
||||||
|
if 'TEST_DEVICE' in os.environ:
|
||||||
|
if os.environ['TEST_DEVICE'] == 'cpu':
|
||||||
|
C.device.try_set_default_device(C.device.cpu())
|
||||||
|
else:
|
||||||
|
C.device.try_set_default_device(C.device.gpu(0))
|
||||||
|
|
||||||
|
# URLs for the train image and labels data
|
||||||
|
url_train_image = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
|
||||||
|
url_train_labels = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
|
||||||
|
num_train_samples = 60000
|
||||||
|
|
||||||
|
print("Downloading train data")
|
||||||
|
train = try_download(url_train_image, url_train_labels, num_train_samples)
|
||||||
|
|
||||||
|
url_test_image = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'
|
||||||
|
url_test_labels = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'
|
||||||
|
num_test_samples = 10000
|
||||||
|
|
||||||
|
print("Downloading test data")
|
||||||
|
test = try_download(url_test_image, url_test_labels, num_test_samples)
|
||||||
|
|
||||||
|
# Save the train and test files (prefer our default path for the data
|
||||||
|
rank = os.environ.get("OMPI_COMM_WORLD_RANK")
|
||||||
|
data_dir = os.path.join("outputs", "MNIST")
|
||||||
|
sentinel_path = os.path.join(data_dir, "complete.txt")
|
||||||
|
if rank == '0':
|
||||||
|
print('Writing train text file...')
|
||||||
|
savetxt(os.path.join(data_dir, "Train-28x28_cntk_text.txt"), train)
|
||||||
|
|
||||||
|
print('Writing test text file...')
|
||||||
|
savetxt(os.path.join(data_dir, "Test-28x28_cntk_text.txt"), test)
|
||||||
|
with open(sentinel_path, 'w+') as f:
|
||||||
|
f.write("download complete")
|
||||||
|
|
||||||
|
print('Done with downloading data.')
|
||||||
|
else:
|
||||||
|
while not os.path.exists(sentinel_path):
|
||||||
|
time.sleep(0.01)
|
||||||
|
|
||||||
|
# Ensure we always get the same amount of randomness
|
||||||
|
np.random.seed(0)
|
||||||
|
|
||||||
|
# Define the data dimensions
|
||||||
|
input_dim = 784
|
||||||
|
num_output_classes = 10
|
||||||
|
|
||||||
|
# Ensure the training and test data is generated and available for this tutorial.
|
||||||
|
# We search in two locations in the toolkit for the cached MNIST data set.
|
||||||
|
data_found = False
|
||||||
|
for data_dir in [os.path.join("..", "Examples", "Image", "DataSets", "MNIST"),
|
||||||
|
os.path.join("data_" + str(rank), "MNIST"),
|
||||||
|
os.path.join("outputs", "MNIST")]:
|
||||||
|
train_file = os.path.join(data_dir, "Train-28x28_cntk_text.txt")
|
||||||
|
test_file = os.path.join(data_dir, "Test-28x28_cntk_text.txt")
|
||||||
|
if os.path.isfile(train_file) and os.path.isfile(test_file):
|
||||||
|
data_found = True
|
||||||
|
break
|
||||||
|
if not data_found:
|
||||||
|
raise ValueError("Please generate the data by completing CNTK 103 Part A")
|
||||||
|
print("Data directory is {0}".format(data_dir))
|
||||||
|
|
||||||
|
num_hidden_layers = args.num_hidden_layers
|
||||||
|
hidden_layers_dim = 400
|
||||||
|
|
||||||
|
input = C.input_variable(input_dim)
|
||||||
|
label = C.input_variable(num_output_classes)
|
||||||
|
|
||||||
|
z = create_model(input)
|
||||||
|
# Scale the input to 0-1 range by dividing each pixel by 255.
|
||||||
|
z = create_model(input / 255.0)
|
||||||
|
|
||||||
|
loss = C.cross_entropy_with_softmax(z, label)
|
||||||
|
label_error = C.classification_error(z, label)
|
||||||
|
|
||||||
|
# Instantiate the trainer object to drive the model training
|
||||||
|
learning_rate = args.learning_rate
|
||||||
|
lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
|
||||||
|
learner = C.sgd(z.parameters, lr_schedule)
|
||||||
|
trainer = C.Trainer(z, (loss, label_error), [learner])
|
||||||
|
|
||||||
|
# Initialize the parameters for the trainer
|
||||||
|
minibatch_size = args.minibatch_size
|
||||||
|
num_samples_per_sweep = 60000
|
||||||
|
num_sweeps_to_train_with = 10
|
||||||
|
num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
|
||||||
|
|
||||||
|
# Create the reader to training data set
|
||||||
|
reader_train = create_reader(train_file, True, input_dim, num_output_classes)
|
||||||
|
|
||||||
|
# Map the data streams to the input and labels.
|
||||||
|
input_map = {
|
||||||
|
label: reader_train.streams.labels,
|
||||||
|
input: reader_train.streams.features
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run the trainer on and perform model training
|
||||||
|
training_progress_output_freq = 500
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
losses = []
|
||||||
|
for i in range(0, int(num_minibatches_to_train)):
|
||||||
|
# Read a mini batch from the training data file
|
||||||
|
data = reader_train.next_minibatch(minibatch_size, input_map=input_map)
|
||||||
|
|
||||||
|
trainer.train_minibatch(data)
|
||||||
|
batchsize, loss, error = print_training_progress(trainer, i, training_progress_output_freq, verbose=1)
|
||||||
|
if (error != 'NA') and (loss != 'NA'):
|
||||||
|
errors.append(float(error))
|
||||||
|
losses.append(float(loss))
|
||||||
|
|
||||||
|
# log the losses
|
||||||
|
if rank == '0':
|
||||||
|
run.log_list("Loss", losses)
|
||||||
|
run.log_list("Error", errors)
|
||||||
|
|
||||||
|
# Read the training data
|
||||||
|
reader_test = create_reader(test_file, False, input_dim, num_output_classes)
|
||||||
|
|
||||||
|
test_input_map = {
|
||||||
|
label: reader_test.streams.labels,
|
||||||
|
input: reader_test.streams.features,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test data for trained model
|
||||||
|
test_minibatch_size = 512
|
||||||
|
num_samples = 10000
|
||||||
|
num_minibatches_to_test = num_samples // test_minibatch_size
|
||||||
|
test_result = 0.0
|
||||||
|
|
||||||
|
for i in range(num_minibatches_to_test):
|
||||||
|
# We are loading test data in batches specified by test_minibatch_size
|
||||||
|
# Each data point in the minibatch is a MNIST digit image of 784 dimensions
|
||||||
|
# with one pixel per dimension that we will encode / decode with the
|
||||||
|
# trained model.
|
||||||
|
data = reader_test.next_minibatch(test_minibatch_size,
|
||||||
|
input_map=test_input_map)
|
||||||
|
|
||||||
|
eval_error = trainer.test_minibatch(data)
|
||||||
|
test_result = test_result + eval_error
|
||||||
|
|
||||||
|
# Average of evaluation errors of all test minibatches
|
||||||
|
print("Average test error: {0:.2f}%".format((test_result * 100) / num_minibatches_to_test))
|
||||||
|
|
||||||
|
out = C.softmax(z)
|
||||||
|
|
||||||
|
# Read the data for evaluation
|
||||||
|
reader_eval = create_reader(test_file, False, input_dim, num_output_classes)
|
||||||
|
|
||||||
|
eval_minibatch_size = 25
|
||||||
|
eval_input_map = {input: reader_eval.streams.features}
|
||||||
|
|
||||||
|
data = reader_test.next_minibatch(eval_minibatch_size, input_map=test_input_map)
|
||||||
|
|
||||||
|
img_label = data[label].asarray()
|
||||||
|
img_data = data[input].asarray()
|
||||||
|
predicted_label_prob = [out.eval(img_data[i]) for i in range(len(img_data))]
|
||||||
|
|
||||||
|
# Find the index with the maximum value for both predicted as well as the ground truth
|
||||||
|
pred = [np.argmax(predicted_label_prob[i]) for i in range(len(predicted_label_prob))]
|
||||||
|
gtlabel = [np.argmax(img_label[i]) for i in range(len(img_label))]
|
||||||
|
|
||||||
|
print("Label :", gtlabel[:25])
|
||||||
|
print("Predicted:", pred)
|
||||||
|
|
||||||
|
# save model to outputs folder
|
||||||
|
z.save('outputs/cntk.model')
|
||||||
@@ -176,13 +176,13 @@
|
|||||||
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
||||||
"import tensorflow as tf\n",
|
"import tensorflow as tf\n",
|
||||||
"\n",
|
"\n",
|
||||||
"logs_dir = os.path.join(os.curdir, \"logs\")\n",
|
"logs_dir = os.curdir + os.sep + \"logs\"\n",
|
||||||
"data_dir = os.path.abspath(os.path.join(os.curdir, \"mnist_data\"))\n",
|
"tensorflow_logs_dir = os.path.join(logs_dir, \"tensorflow\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if not path.exists(data_dir):\n",
|
"if not path.exists(tensorflow_logs_dir):\n",
|
||||||
" makedirs(data_dir)\n",
|
" makedirs(tensorflow_logs_dir)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"os.environ[\"TEST_TMPDIR\"] = data_dir\n",
|
"os.environ[\"TEST_TMPDIR\"] = logs_dir\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Writing logs to ./logs results in their being uploaded to Artifact Service,\n",
|
"# Writing logs to ./logs results in their being uploaded to Artifact Service,\n",
|
||||||
"# and thus, made accessible to our Tensorboard instance.\n",
|
"# and thus, made accessible to our Tensorboard instance.\n",
|
||||||
@@ -191,15 +191,15 @@
|
|||||||
"# Create an experiment\n",
|
"# Create an experiment\n",
|
||||||
"exp = Experiment(ws, experiment_name)\n",
|
"exp = Experiment(ws, experiment_name)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# If you would like the run to go for longer, add --max_steps 5000 to the arguments list:\n",
|
|
||||||
"# arguments_list += [\"--max_steps\", \"5000\"]\n",
|
|
||||||
"\n",
|
|
||||||
"script = ScriptRunConfig(exp_dir,\n",
|
"script = ScriptRunConfig(exp_dir,\n",
|
||||||
" script=\"mnist_with_summaries.py\",\n",
|
" script=\"mnist_with_summaries.py\",\n",
|
||||||
" run_config=run_config,\n",
|
" run_config=run_config)\n",
|
||||||
" arguments=arguments_list)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"run = exp.submit(script)\n",
|
"# If you would like the run to go for longer, add --max_steps 5000 to the arguments list:\n",
|
||||||
|
"# arguments_list += [\"--max_steps\", \"5000\"]\n",
|
||||||
|
"kwargs = {}\n",
|
||||||
|
"kwargs['arguments_list'] = arguments_list\n",
|
||||||
|
"run = exp.submit(script, kwargs)\n",
|
||||||
"# You can also wait for the run to complete\n",
|
"# You can also wait for the run to complete\n",
|
||||||
"# run.wait_for_completion(show_output=True)\n",
|
"# run.wait_for_completion(show_output=True)\n",
|
||||||
"runs.append(run)"
|
"runs.append(run)"
|
||||||
@@ -373,7 +373,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"try:\n",
|
"try:\n",
|
||||||
" # If you already have a cluster named this, we don't need to make a new one.\n",
|
" # If you already have a cluster named this, we don't need to make a new one.\n",
|
||||||
" cts = ws.compute_targets \n",
|
" cts = ws.compute_targets() \n",
|
||||||
" compute_target = cts[clust_name]\n",
|
" compute_target = cts[clust_name]\n",
|
||||||
" assert compute_target.type == 'BatchAI'\n",
|
" assert compute_target.type == 'BatchAI'\n",
|
||||||
"except:\n",
|
"except:\n",
|
||||||
|
|||||||
52
training/readme.md
Normal file
52
training/readme.md
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
# Training ML models with Azure ML SDK
|
||||||
|
These notebook tutorials cover the various scenarios for training machine learning and deep learning models with Azure Machine Learning.
|
||||||
|
|
||||||
|
## Sample notebooks
|
||||||
|
- [01.train-hyperparameter-tune-deploy-with-pytorch](./01.train-hyperparameter-tune-deploy-with-pytorch/01.train-hyperparameter-tune-deploy-with-pytorch.ipynb)
|
||||||
|
Train, hyperparameter tune, and deploy a PyTorch image classification model that distinguishes bees vs. ants using transfer learning. Azure ML concepts covered:
|
||||||
|
- Create a remote compute target (Batch AI cluster)
|
||||||
|
- Upload training data using `Datastore`
|
||||||
|
- Run a single-node `PyTorch` training job
|
||||||
|
- Hyperparameter tune model with HyperDrive
|
||||||
|
- Find and register the best model
|
||||||
|
- Deploy model to ACI
|
||||||
|
- [02.distributed-pytorch-with-horovod](./02.distributed-pytorch-with-horovod/02.distributed-pytorch-with-horovod.ipynb)
|
||||||
|
Train a PyTorch model on the MNIST dataset using distributed training with Horovod. Azure ML concepts covered:
|
||||||
|
- Create a remote compute target (Batch AI cluster)
|
||||||
|
- Run a two-node distributed `PyTorch` training job using Horovod
|
||||||
|
- [03.train-hyperparameter-tun-deploy-with-tensorflow](./03.train-hyperparameter-tune-deploy-with-tensorflow/03.train-hyperparameter-tune-deploy-with-tensorflow.ipynb)
|
||||||
|
Train, hyperparameter tune, and deploy a TensorFlow model on the MNIST dataset. Azure ML concepts covered:
|
||||||
|
- Create a remote compute target (Batch AI cluster)
|
||||||
|
- Upload training data using `Datastore`
|
||||||
|
- Run a single-node `TensorFlow` training job
|
||||||
|
- Leverage features of the `Run` object
|
||||||
|
- Download the trained model
|
||||||
|
- Hyperparameter tune model with HyperDrive
|
||||||
|
- Find and register the best model
|
||||||
|
- Deploy model to ACI
|
||||||
|
- [04.distributed-tensorflow-with-horovod](./04.distributed-tensorflow-with-horovod/04.distributed-tensorflow-with-horovod.ipynb)
|
||||||
|
Train a TensorFlow word2vec model using distributed training with Horovod. Azure ML concepts covered:
|
||||||
|
- Create a remote compute target (Batch AI cluster)
|
||||||
|
- Upload training data using `Datastore`
|
||||||
|
- Run a two-node distributed `TensorFlow` training job using Horovod
|
||||||
|
- [05.distributed-tensorflow-with-parameter-server](./05.distributed-tensorflow-with-parameter-server/05.distributed-tensorflow-with-parameter-server.ipynb)
|
||||||
|
Train a TensorFlow model on the MNIST dataset using native distributed TensorFlow (parameter server). Azure ML concepts covered:
|
||||||
|
- Create a remote compute target (Batch AI cluster)
|
||||||
|
- Run a two workers, one parameter server distributed `TensorFlow` training job
|
||||||
|
- [06.distributed-cntk-with-custom-docker](./06.distributed-cntk-with-custom-docker/06.distributed-cntk-with-custom-docker.ipynb)
|
||||||
|
Train a CNTK model on the MNIST dataset using the Azure ML base `Estimator` with custom Docker image and distributed training. Azure ML concepts covered:
|
||||||
|
- Create a remote compute target (Batch AI cluster)
|
||||||
|
- Upload training data using `Datastore`
|
||||||
|
- Run a base `Estimator` training job using a custom Docker image from Docker Hub
|
||||||
|
- Distributed CNTK two-node training job via MPI using base `Estimator`
|
||||||
|
|
||||||
|
- [07.tensorboard](./07.tensorboard/07.tensorboard.ipynb)
|
||||||
|
Train a TensorFlow MNIST model locally, on a DSVM, and on Batch AI and view the logs live on TensorBoard. Azure ML concepts covered:
|
||||||
|
- Run the training job locally with Azure ML and run TensorBoard locally. Start (and stop) an Azure ML `TensorBoard` object to stream and view the logs
|
||||||
|
- Run the training job on a remote DSVM and stream the logs to TensorBoard
|
||||||
|
- Run the training job on a remote Batch AI cluster and stream the logs to TensorBoard
|
||||||
|
- Start a `Tensorboard` instance that displays the logs from all three above runs in one
|
||||||
|
- [08.export-run-history-to-tensorboard](./08.export-run-history-to-tensorboard/08.export-run-history-to-tensorboard.ipynb)
|
||||||
|
- Start an Azure ML `Experiment` and log metrics to `Run` history
|
||||||
|
- Export the `Run` history logs to TensorBoard logs
|
||||||
|
- View the logs in TensorBoard
|
||||||
@@ -58,13 +58,14 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"name": "import",
|
||||||
"tags": [
|
"tags": [
|
||||||
"check version"
|
"check version"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"%matplotlib notebook\n",
|
"%matplotlib inline\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"import matplotlib\n",
|
"import matplotlib\n",
|
||||||
"import matplotlib.pyplot as plt\n",
|
"import matplotlib.pyplot as plt\n",
|
||||||
@@ -159,8 +160,8 @@
|
|||||||
"autoscale_enabled = os.environ.get(\"BATCHAI_CLUSTER_AUTOSCALE_ENABLED\", True)\n",
|
"autoscale_enabled = os.environ.get(\"BATCHAI_CLUSTER_AUTOSCALE_ENABLED\", True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"if batchai_cluster_name in ws.compute_targets:\n",
|
"if batchai_cluster_name in ws.compute_targets():\n",
|
||||||
" compute_target = ws.compute_targets[batchai_cluster_name]\n",
|
" compute_target = ws.compute_targets()[batchai_cluster_name]\n",
|
||||||
" if compute_target and type(compute_target) is BatchAiCompute:\n",
|
" if compute_target and type(compute_target) is BatchAiCompute:\n",
|
||||||
" print('found compute target. just use it. ' + batchai_cluster_name)\n",
|
" print('found compute target. just use it. ' + batchai_cluster_name)\n",
|
||||||
"else:\n",
|
"else:\n",
|
||||||
@@ -201,6 +202,13 @@
|
|||||||
"Download the MNIST dataset and save the files into a `data` directory locally. Images and labels for both training and testing are downloaded."
|
"Download the MNIST dataset and save the files into a `data` directory locally. Images and labels for both training and testing are downloaded."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@@ -716,4 +724,4 @@
|
|||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 2
|
"nbformat_minor": 2
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -97,7 +97,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"%matplotlib notebook\n",
|
"%matplotlib inline\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"import matplotlib\n",
|
"import matplotlib\n",
|
||||||
"import matplotlib.pyplot as plt\n",
|
"import matplotlib.pyplot as plt\n",
|
||||||
@@ -134,7 +134,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"ws = Workspace.from_config()\n",
|
"ws = Workspace.from_config()\n",
|
||||||
"model=Model(ws, 'sklearn_mnist')\n",
|
"model=Model(ws, 'sklearn_mnist')\n",
|
||||||
"model.download(target_dir='.', exists_ok=True)\n",
|
"model.download(target_dir = '.')\n",
|
||||||
"import os \n",
|
"import os \n",
|
||||||
"# verify the downloaded model file\n",
|
"# verify the downloaded model file\n",
|
||||||
"os.stat('./sklearn_mnist_model.pkl')"
|
"os.stat('./sklearn_mnist_model.pkl')"
|
||||||
@@ -210,7 +210,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"conf_mx = confusion_matrix(y_test, y_hat)\n",
|
"conf_mx = confusion_matrix(y_test, y_hat)\n",
|
||||||
"print(conf_mx)\n",
|
"print(conf_mx)\n",
|
||||||
"print('Overall accuracy:', np.average(y_hat == y_test))"
|
"print('Overall accuracy:', np.average(y_hat==y_test))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -296,8 +296,7 @@
|
|||||||
" data = np.array(json.loads(raw_data)['data'])\n",
|
" data = np.array(json.loads(raw_data)['data'])\n",
|
||||||
" # make prediction\n",
|
" # make prediction\n",
|
||||||
" y_hat = model.predict(data)\n",
|
" y_hat = model.predict(data)\n",
|
||||||
" # you can return any data type as long as it is JSON-serializable\n",
|
" return json.dumps(y_hat.tolist())"
|
||||||
" return y_hat.tolist()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -478,10 +477,10 @@
|
|||||||
"sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n",
|
"sample_indices = np.random.permutation(X_test.shape[0])[0:n]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n",
|
"test_samples = json.dumps({\"data\": X_test[sample_indices].tolist()})\n",
|
||||||
"test_samples = bytes(test_samples, encoding='utf8')\n",
|
"test_samples = bytes(test_samples, encoding = 'utf8')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# predict using the deployed model\n",
|
"# predict using the deployed model\n",
|
||||||
"result = service.run(input_data=test_samples)\n",
|
"result = json.loads(service.run(input_data=test_samples))\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# compare actual value vs. the predicted values:\n",
|
"# compare actual value vs. the predicted values:\n",
|
||||||
"i = 0\n",
|
"i = 0\n",
|
||||||
|
|||||||
@@ -393,7 +393,7 @@
|
|||||||
"> * Review training results\n",
|
"> * Review training results\n",
|
||||||
"> * Register the best model\n",
|
"> * Register the best model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Learn more about [how to configure settings for automatic training](https://aka.ms/aml-how-to-configure-auto) or [how to use automatic training on a remote resource](https://aka.ms/aml-how-to-auto-remote)."
|
"Learn more about [how to configure settings for automatic training](https://aka.ms/aml-how-configure-auto) or [how to use automatic training on a remote resource](https://aka.ms/aml-how-to-auto-remote)."
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
Reference in New Issue
Block a user