Update notebooks

This commit is contained in:
rastala
2018-09-23 21:56:44 -04:00
parent 867538cf3f
commit a6c6e63451
49 changed files with 373 additions and 2864 deletions

View File

@@ -18,7 +18,7 @@
"## Prerequisites:\n",
"\n",
"### 1. Install Azure ML SDK\n",
"Follow [SDK installation instructions](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment).\n",
"Follow [SDK installation instructions](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-environment).\n",
"\n",
"### 2. Install some additional packages\n",
"This Notebook requires some additional libraries. In the conda environment, run below commands: \n",
@@ -185,35 +185,11 @@
},
"outputs": [],
"source": [
"# load workspace configuratio from ./aml_config/config.json file.ß\n",
"# load workspace configuratio from ./aml_config/config.json file.\n",
"my_workspace = Workspace.from_config()\n",
"my_workspace.get_details()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create a folder to host all sample projects\n",
"Lastly, create a folder where all the sample projects will be hosted."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"sample_projects_folder = './sample_projects'\n",
"\n",
"if not os.path.isdir(sample_projects_folder):\n",
" os.mkdir(sample_projects_folder)\n",
" \n",
"print('Sample projects will be created in {}.'.format(sample_projects_folder))"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -225,9 +201,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {
@@ -239,7 +215,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
"version": "3.6.4"
}
},
"nbformat": 4,

View File

@@ -277,6 +277,16 @@
" os.remove(path=model_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# now let's take a look at the experiment in Azure portal.\n",
"experiment"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -778,9 +788,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -88,7 +88,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a folder to store the training script."
"## View `train.py`\n",
"\n",
"`train.py` is already created for you."
]
},
{
@@ -97,18 +99,15 @@
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"script_folder = './samples/train-on-local'\n",
"os.makedirs(script_folder, exist_ok=True)"
"with open('./train.py', 'r') as f:\n",
" print(f.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create `train.py`\n",
"\n",
"Use `%%writefile` magic to write training code to `train.py` file under your script folder."
"Note `train.py` also references a `mylib.py` file."
]
},
{
@@ -117,73 +116,8 @@
"metadata": {},
"outputs": [],
"source": [
"%%writefile $script_folder/train.py\n",
"\n",
"import os\n",
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import Ridge\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"from azureml.core.run import Run\n",
"from sklearn.externals import joblib\n",
"\n",
"# example of referencing another script\n",
"import mylib\n",
"\n",
"X, y = load_diabetes(return_X_y=True)\n",
"\n",
"run = Run.get_submitted_run()\n",
"\n",
"X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)\n",
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
" \"test\": {\"X\": X_test, \"y\": y_test}}\n",
"\n",
"# example of referencing another script\n",
"alphas = mylib.get_alphas()\n",
"\n",
"for alpha in alphas:\n",
" # Use Ridge algorithm to create a regression model\n",
" reg = Ridge(alpha=alpha)\n",
" reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
"\n",
" preds = reg.predict(data[\"test\"][\"X\"])\n",
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
" run.log('alpha', alpha)\n",
" run.log('mse', mse)\n",
"\n",
" model_file_name='ridge_{0:.2f}.pkl'.format(alpha)\n",
" # save model in the outputs folder so it automatically get uploaded\n",
" with open(model_file_name, \"wb\") as file:\n",
" joblib.dump(value=reg, filename=model_file_name)\n",
" \n",
" # upload the model file explicitly into artifacts \n",
" run.upload_file(name=model_file_name, path_or_stream=model_file_name)\n",
" \n",
" # register the model\n",
" run.register_model(model_name='diabetes-model', model_path=model_file_name)\n",
"\n",
" print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`train.py` also references a `mylib.py` file. So let's create that too."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile $script_folder/mylib.py\n",
"import numpy as np\n",
"\n",
"def get_alphas():\n",
" # list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
" return np.arange(0.0, 1.0, 0.05)"
"with open('./mylib.py', 'r') as f:\n",
" print(f.read())"
]
},
{
@@ -209,7 +143,7 @@
"run_config_user_managed.environment.python.user_managed_dependencies = True\n",
"\n",
"# You can choose a specific Python environment by pointing to a Python path \n",
"#run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'"
"#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'"
]
},
{
@@ -228,9 +162,8 @@
"source": [
"from azureml.core import ScriptRunConfig\n",
"\n",
"src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_user_managed)\n",
"run = exp.submit(src)\n",
"run.wait_for_completion(show_output=True)"
"src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n",
"run = exp.submit(src)"
]
},
{
@@ -249,6 +182,22 @@
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Block to wait till run finishes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -290,9 +239,8 @@
"metadata": {},
"outputs": [],
"source": [
"src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_system_managed)\n",
"run = exp.submit(src)\n",
"run.wait_for_completion(show_output = True)"
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n",
"run = exp.submit(src)"
]
},
{
@@ -311,12 +259,30 @@
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Block and wait till run finishes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Docker-based execution\n",
"**NOTE** You must have Docker engine installed locally in order to use this execution mode. You can also ask the system to pull down a Docker image and execute your scripts in it."
"**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
"\n",
"You can also ask the system to pull down a Docker image and execute your scripts in it."
]
},
{
@@ -356,7 +322,7 @@
"metadata": {},
"outputs": [],
"source": [
"src = ScriptRunConfig(source_directory=script_folder, script='train.py', run_config=run_config_docker)\n",
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)\n",
"run = exp.submit(src)"
]
},
@@ -376,7 +342,7 @@
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
"run.wait_for_completion(show_output=True)"
]
},
{
@@ -455,7 +421,7 @@
"outputs": [],
"source": [
"# supply a model name, and the full path to the serialized model file.\n",
"model = run.register_model(model_name='best_ridge_model', model_path='ridge_0.40.pkl')"
"model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')"
]
},
{
@@ -477,9 +443,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -0,0 +1,9 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license.
import numpy as np
def get_alphas():
# list of numbers from 0.0 to 1.0 with a 0.05 interval
return np.arange(0.0, 1.0, 0.05)

View File

@@ -1,24 +1,30 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license.
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from azureml.core.run import Run
from sklearn.externals import joblib
import os
import numpy as np
import mylib
# os.makedirs('./outputs', exist_ok = True)
os.makedirs('./outputs', exist_ok=True)
X, y = load_diabetes(return_X_y=True)
run = Run.get_submitted_run()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2,
random_state=0)
data = {"train": {"X": X_train, "y": y_train},
"test": {"X": X_test, "y": y_test}}
# list of numbers from 0.0 to 1.0 with a 0.05 interval
alphas = np.arange(0.0, 1.0, 0.05)
alphas = mylib.get_alphas()
for alpha in alphas:
# Use Ridge algorithm to create a regression model
@@ -33,13 +39,7 @@ for alpha in alphas:
model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
# save model in the outputs folder so it automatically get uploaded
with open(model_file_name, "wb") as file:
joblib.dump(value=reg, filename=model_file_name)
# upload the model file explicitly into artifacts
run.upload_file(name=model_file_name, path_or_stream=model_file_name)
# register the model
# commented out for now until a bug is fixed
# run.register_model(file_name = model_file_name)
joblib.dump(value=reg, filename=os.path.join('./outputs/',
model_file_name))
print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))

View File

@@ -1,325 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 03. Train on Azure Container Instance (EXPERIMENTAL)\n",
"\n",
"* Create Workspace\n",
"* Create Project\n",
"* Create `train.py` in the project folder.\n",
"* Configure an ACI (Azure Container Instance) run\n",
"* Execute in ACI"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"create workspace"
]
},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create An Experiment\n",
"\n",
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Experiment\n",
"experiment_name = 'train-on-aci'\n",
"experiment = Experiment(workspace = ws, name = experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a folder to store the training script."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"script_folder = './samples/train-on-aci'\n",
"os.makedirs(script_folder, exist_ok = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Remote execution on ACI\n",
"\n",
"Use `%%writefile` magic to write training code to `train.py` file under the project folder."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile $script_folder/train.py\n",
"\n",
"import os\n",
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import Ridge\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"from azureml.core.run import Run\n",
"from sklearn.externals import joblib\n",
"\n",
"import numpy as np\n",
"\n",
"os.makedirs('./outputs', exist_ok=True)\n",
"\n",
"X, y = load_diabetes(return_X_y = True)\n",
"\n",
"run = Run.get_submitted_run()\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
" \"test\": {\"X\": X_test, \"y\": y_test}}\n",
"\n",
"# list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
"alphas = np.arange(0.0, 1.0, 0.05)\n",
"\n",
"for alpha in alphas:\n",
" # Use Ridge algorithm to create a regression model\n",
" reg = Ridge(alpha = alpha)\n",
" reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
"\n",
" preds = reg.predict(data[\"test\"][\"X\"])\n",
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
" run.log('alpha', alpha)\n",
" run.log('mse', mse)\n",
" \n",
" model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n",
" with open(model_file_name, \"wb\") as file:\n",
" joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n",
"\n",
" print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure for using ACI\n",
"Linux-based ACI is available in `westus`, `eastus`, `westeurope`, `northeurope`, `westus2` and `southeastasia` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"configure run"
]
},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"# create a new runconfig object\n",
"run_config = RunConfiguration()\n",
"\n",
"# signal that you want to use ACI to execute script.\n",
"run_config.target = \"containerinstance\"\n",
"\n",
"# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n",
"run_config.container_instance.region = 'eastus'\n",
"\n",
"# set the ACI CPU and Memory \n",
"run_config.container_instance.cpu_cores = 1\n",
"run_config.container_instance.memory_gb = 2\n",
"\n",
"# enable Docker \n",
"run_config.environment.docker.enabled = True\n",
"\n",
"# set Docker base image to the default CPU-based image\n",
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n",
"\n",
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
"run_config.auto_prepare_environment = True\n",
"\n",
"# specify CondaDependencies obj\n",
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Submit the Experiment\n",
"Finally, run the training job on the ACI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"remote run",
"aci"
]
},
"outputs": [],
"source": [
"%%time \n",
"from azureml.core.script_run_config import ScriptRunConfig\n",
"\n",
"script_run_config = ScriptRunConfig(source_directory = script_folder,\n",
" script= 'train.py',\n",
" run_config = run_config)\n",
"\n",
"run = experiment.submit(script_run_config)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"remote run",
"aci"
]
},
"outputs": [],
"source": [
"%%time\n",
"# Shows output of the run on stdout.\n",
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"query history"
]
},
"outputs": [],
"source": [
"# Show run details\n",
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"get metrics"
]
},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"run.get_metrics()\n",
"metrics = run.get_metrics()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
" min(metrics['mse']), \n",
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -13,10 +13,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# 03. Train on Azure Container Instance (EXPERIMENTAL)\n",
"# 03. Train on Azure Container Instance\n",
"\n",
"* Create Workspace\n",
"* Create Project\n",
"* Create `train.py` in the project folder.\n",
"* Configure an ACI (Azure Container Instance) run\n",
"* Execute in ACI"
@@ -87,31 +86,13 @@
"experiment = Experiment(workspace = ws, name = experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a folder to store the training script."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"script_folder = './samples/train-on-aci'\n",
"os.makedirs(script_folder, exist_ok = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Remote execution on ACI\n",
"\n",
"Use `%%writefile` magic to write training code to `train.py` file under the project folder."
"The training script `train.py` is already created for you. Let's have a look."
]
},
{
@@ -120,46 +101,8 @@
"metadata": {},
"outputs": [],
"source": [
"%%writefile $script_folder/train.py\n",
"\n",
"import os\n",
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import Ridge\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"from azureml.core.run import Run\n",
"from sklearn.externals import joblib\n",
"\n",
"import numpy as np\n",
"\n",
"os.makedirs('./outputs', exist_ok=True)\n",
"\n",
"X, y = load_diabetes(return_X_y = True)\n",
"\n",
"run = Run.get_submitted_run()\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
" \"test\": {\"X\": X_test, \"y\": y_test}}\n",
"\n",
"# list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
"alphas = np.arange(0.0, 1.0, 0.05)\n",
"\n",
"for alpha in alphas:\n",
" # Use Ridge algorithm to create a regression model\n",
" reg = Ridge(alpha = alpha)\n",
" reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
"\n",
" preds = reg.predict(data[\"test\"][\"X\"])\n",
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
" run.log('alpha', alpha)\n",
" run.log('mse', mse)\n",
" \n",
" model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n",
" with open(model_file_name, \"wb\") as file:\n",
" joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n",
"\n",
" print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
"with open('./train.py', 'r') as f:\n",
" print(f.read())"
]
},
{
@@ -167,7 +110,7 @@
"metadata": {},
"source": [
"## Configure for using ACI\n",
"Linux-based ACI is available in `westus`, `eastus`, `westeurope`, `northeurope`, `westus2` and `southeastasia` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)."
"Linux-based ACI is available in `West US`, `East US`, `West Europe`, `North Europe`, `West US 2`, `Southeast Asia`, `Australia East`, `East US 2`, and `Central US` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)."
]
},
{
@@ -190,7 +133,7 @@
"run_config.target = \"containerinstance\"\n",
"\n",
"# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n",
"run_config.container_instance.region = 'eastus'\n",
"run_config.container_instance.region = 'eastus2'\n",
"\n",
"# set the ACI CPU and Memory \n",
"run_config.container_instance.cpu_cores = 1\n",
@@ -201,7 +144,6 @@
"\n",
"# set Docker base image to the default CPU-based image\n",
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n",
"\n",
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
"run_config.environment.python.user_managed_dependencies = False\n",
@@ -235,11 +177,25 @@
"%%time \n",
"from azureml.core.script_run_config import ScriptRunConfig\n",
"\n",
"script_run_config = ScriptRunConfig(source_directory = script_folder,\n",
" script= 'train.py',\n",
" run_config = run_config)\n",
"script_run_config = ScriptRunConfig(source_directory='./',\n",
" script='train.py',\n",
" run_config=run_config)\n",
"\n",
"run = experiment.submit(script_run_config)\n"
"run = experiment.submit(script_run_config)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"query history"
]
},
"outputs": [],
"source": [
"# Show run details\n",
"run"
]
},
{
@@ -255,21 +211,7 @@
"source": [
"%%time\n",
"# Shows output of the run on stdout.\n",
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"query history"
]
},
"outputs": [],
"source": [
"# Show run details\n",
"run"
"run.wait_for_completion(show_output=True)"
]
},
{
@@ -299,13 +241,30 @@
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# show all the files stored within the run record\n",
"run.get_file_names()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now you can take a model produced here, register it and then deploy as a web service."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -0,0 +1,44 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license.
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from azureml.core.run import Run
from sklearn.externals import joblib
import os
import numpy as np
os.makedirs('./outputs', exist_ok=True)
X, y = load_diabetes(return_X_y=True)
run = Run.get_submitted_run()
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2,
random_state=0)
data = {"train": {"X": X_train, "y": y_train},
"test": {"X": X_test, "y": y_test}}
# list of numbers from 0.0 to 1.0 with a 0.05 interval
alphas = np.arange(0.0, 1.0, 0.05)
for alpha in alphas:
# Use Ridge algorithm to create a regression model
reg = Ridge(alpha=alpha)
reg.fit(data["train"]["X"], data["train"]["y"])
preds = reg.predict(data["test"]["X"])
mse = mean_squared_error(preds, data["test"]["y"])
run.log('alpha', alpha)
run.log('mse', mse)
model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
# save model in the outputs folder so it automatically get uploaded
with open(model_file_name, "wb") as file:
joblib.dump(value=reg, filename=os.path.join('./outputs/',
model_file_name))
print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))

View File

@@ -1,321 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 04. Train in a remote VM (MLC managed DSVM)\n",
"* Create Workspace\n",
"* Create Project\n",
"* Create `train.py` file\n",
"* Create DSVM as Machine Learning Compute (MLC) resource\n",
"* Configure & execute a run in a conda environment in the default miniconda Docker container on DSVM"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Experiment\n",
"\n",
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"experiment_name = 'train-on-remote-vm'\n",
"\n",
"from azureml.core import Experiment\n",
"\n",
"exp = Experiment(workspace = ws, name = experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View `train.py`\n",
"\n",
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train.py` in a cell to show the file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('./train.py', 'r') as training_script:\n",
" print(training_script.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Linux DSVM as a compute target\n",
"\n",
"**Note**: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n",
" \n",
"**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below. [Read more](../../documentation/sdk/ssh-issue.md) on this."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import DsvmCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"compute_target_name = 'mydsvm'\n",
"\n",
"try:\n",
" dsvm_compute = DsvmCompute(workspace = ws, name = compute_target_name)\n",
" print('found existing:', dsvm_compute.name)\n",
"except ComputeTargetException:\n",
" print('creating new.')\n",
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
" dsvm_compute = DsvmCompute.create(ws, name = compute_target_name, provisioning_configuration = dsvm_config)\n",
" dsvm_compute.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Attach an existing Linux DSVM as a compute target\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"'''\n",
" from azureml.core.compute import RemoteCompute \n",
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n",
" dsvm_compute = RemoteCompute.attach(ws,name=\"attach-from-sdk6\",username=<username>,address=<ipaddress>,ssh_port=22,password=<password>)\n",
"'''"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure & Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure a Docker run with new conda environment on the VM\n",
"You can execute in a Docker container in the VM. If you choose this route, you don't need to install anything on the VM yourself. Azure ML execution service will take care of it for you."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"\n",
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
"run_config = RunConfiguration(framework = \"python\")\n",
"\n",
"# Set compute target to the Linux DSVM\n",
"run_config.target = compute_target_name\n",
"\n",
"# Use Docker in the remote VM\n",
"run_config.environment.docker.enabled = True\n",
"\n",
"# Use CPU base image from DockerHub\n",
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"print('Base Docker image is:', run_config.environment.docker.base_image)\n",
"\n",
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
"run_config.prepare_environment = True\n",
"\n",
"# specify CondaDependencies obj\n",
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Submit the Experiment\n",
"Submit script to run in the Docker image in the remote VM. If you run this for the first time, the system will download the base image, layer in packages specified in the `conda_dependencies.yml` file on top of the base image, create a container and then execute the script in the container."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Run\n",
"from azureml.core import ScriptRunConfig\n",
"\n",
"src = ScriptRunConfig(source_directory = '.', script = 'train.py', run_config = run_config)\n",
"run = exp.submit(src)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### View run history details"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Find the best run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"run.get_metrics()\n",
"metrics = run.get_metrics()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
" min(metrics['mse']), \n",
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clean up compute resource"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dsvm_compute.delete()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -299,9 +299,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -1,257 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 05. Train in Spark\n",
"* Create Workspace\n",
"* Create Experiment\n",
"* Copy relevant files to the script folder\n",
"* Configure and Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Experiment\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"experiment_name = 'train-on-remote-vm'\n",
"\n",
"from azureml.core import Experiment\n",
"\n",
"exp = Experiment(workspace = ws, name = experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View `train-spark.py`\n",
"\n",
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('train-spark.py', 'r') as training_script:\n",
" print(training_script.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure & Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Attach an HDI cluster\n",
"To use HDI commpute target:\n",
" 1. Create an Spark for HDI cluster in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
" 2. Enter the IP address, username and password below"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import HDInsightCompute\n",
"\n",
"try:\n",
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n",
" hdi_compute_new = HDInsightCompute.attach(ws, \n",
" name=\"hdi-attach\", \n",
" address=\"hdi-ignite-demo-ssh.azurehdinsight.net\", \n",
" ssh_port=22, \n",
" username='<username>', \n",
" password='<password>')\n",
"\n",
"except UserErrorException as e:\n",
" print(\"Caught = {}\".format(e.message))\n",
" print(\"Compute config already attached.\")\n",
" \n",
" \n",
"hdi_compute_new.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure HDI run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"\n",
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
"run_config = RunConfiguration(framework = \"python\")\n",
"\n",
"# Set compute target to the Linux DSVM\n",
"run_config.target = hdi_compute.name\n",
"\n",
"# Use Docker in the remote VM\n",
"# run_config.environment.docker.enabled = True\n",
"\n",
"# Use CPU base image from DockerHub\n",
"# run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"# print('Base Docker image is:', run_config.environment.docker.base_image)\n",
"\n",
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
"# run_config.prepare_environment = True\n",
"\n",
"# specify CondaDependencies obj\n",
"# run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
"# load the runconfig object from the \"myhdi.runconfig\" file generated by the attach operaton above."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Submit the script to HDI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
" script= 'train-spark.py',\n",
" run_config = run_config)\n",
"run = experiment.submit(script_run_config)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get the URL of the run history web page\n",
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"metrics = run.get_metrics()\n",
"print(metrics)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -235,9 +235,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -398,9 +398,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -313,9 +313,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -105,8 +105,8 @@
"inputs_dc = ModelDataCollector(\"best_model\", identifier=\"inputs\", feature_names=[\"feat1\", \"feat2\", \"feat3\". \"feat4\", \"feat5\", \"Feat6\"])\n",
"prediction_dc = ModelDataCollector(\"best_model\", identifier=\"predictions\", feature_names=[\"prediction1\", \"prediction2\"])```\n",
" \n",
"* Identifier: Identifier is later used for building the folder structure in your Blob, it can be used to divide raw data versus processed.\n",
"* CorrelationId: is an optional parameter, you do not need to set it up if your model doesnt require it. Having a correlationId in place does help you for easier mapping with other data. (Examples include: LoanNumber, CustomerId, etc.)\n",
"* Identifier: Identifier is later used for building the folder structure in your Blob, it can be used to divide \"raw\" data versus \"processed\".\n",
"* CorrelationId: is an optional parameter, you do not need to set it up if your model doesn't require it. Having a correlationId in place does help you for easier mapping with other data. (Examples include: LoanNumber, CustomerId, etc.)\n",
"* Feature Names: These need to be set up in the order of your features in order for them to have column names when the .csv is created.\n",
"\n",
"### c. In your run function add:\n",
@@ -425,9 +425,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:myenv3]",
"display_name": "Python 3.6",
"language": "python",
"name": "conda-env-myenv3-py"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -203,7 +203,7 @@
"metadata": {},
"outputs": [],
"source": [
"# load workspace configuratio from ./aml_config/config.json file.ß\n",
"# load workspace configuratio from ./aml_config/config.json file.\n",
"my_workspace = Workspace.from_config()\n",
"my_workspace.get_details()"
]
@@ -243,9 +243,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -377,9 +377,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -387,9 +387,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -449,9 +449,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -500,9 +500,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -473,9 +473,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -374,9 +374,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -396,9 +396,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -154,7 +154,7 @@
"metadata": {},
"outputs": [],
"source": [
"run_id = 'AutoML_b7c4076b-181d-4ef4-ab9f-36bb44c1e36c'\n",
"run_id = '' # Filling your own run_id\n",
"\n",
"from azureml.train.widgets import RunDetails\n",
"\n",
@@ -304,9 +304,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -458,9 +458,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -478,9 +478,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -270,9 +270,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -229,9 +229,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -218,9 +218,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -545,9 +545,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -706,9 +706,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:finaldemo]",
"display_name": "Python 3.6",
"language": "python",
"name": "conda-env-finaldemo-py"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -831,9 +831,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:finaldemo]",
"display_name": "Python 3.6",
"language": "python",
"name": "conda-env-finaldemo-py"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -54,9 +54,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -9,6 +9,13 @@
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This notebook demonstrates how to run batch scoring job. __[Inception-V3 model](https://arxiv.org/abs/1512.00567)__ and unlabeled images from __[ImageNet](http://image-net.org/)__ dataset will be used. It registers a pretrained inception model in model registry then uses the model to do batch scoring on images in a blob container."
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -106,11 +113,13 @@
"import numpy as np\n",
"import shutil\n",
"from tensorflow.contrib.slim.python.slim.nets import inception_v3\n",
"from azureml.core.model import Model\n",
"\n",
"slim = tf.contrib.slim\n",
"\n",
"parser = argparse.ArgumentParser(description=\"Start a tensorflow model serving\")\n",
"parser.add_argument('--model_dir', dest=\"model_dir\", required=True)\n",
"parser.add_argument('--model_name', dest=\"model_name\", required=True)\n",
"parser.add_argument('--label_dir', dest=\"label_dir\", required=True)\n",
"parser.add_argument('--dataset_path', dest=\"dataset_path\", required=True)\n",
"parser.add_argument('--output_dir', dest=\"output_dir\", required=True)\n",
"parser.add_argument('--batch_size', dest=\"batch_size\", type=int, required=True)\n",
@@ -162,12 +171,14 @@
"\n",
"def main(_):\n",
" start_time = datetime.datetime.now()\n",
" label_file_name = os.path.join(args.model_dir, \"labels.txt\")\n",
" label_file_name = os.path.join(args.label_dir, \"labels.txt\")\n",
" label_dict = get_class_label_dict(label_file_name)\n",
" classes_num = len(label_dict)\n",
" test_feeder = DataIterator(data_dir=args.dataset_path)\n",
" total_size = len(test_feeder.labels)\n",
" count = 0\n",
" # get model from model registry\n",
" model_path = Model.get_model_path(args.model_name)\n",
" with tf.Session() as sess:\n",
" test_images = test_feeder.input_pipeline(batch_size=args.batch_size)\n",
" with slim.arg_scope(inception_v3.inception_v3_arg_scope()):\n",
@@ -182,7 +193,6 @@
" coord = tf.train.Coordinator()\n",
" threads = tf.train.start_queue_runners(sess=sess, coord=coord)\n",
" saver = tf.train.Saver()\n",
" model_path = os.path.join(args.model_dir, \"inception_v3.ckpt\")\n",
" saver.restore(sess, model_path)\n",
" out_filename = os.path.join(args.output_dir, \"result-labels.txt\")\n",
" with open(out_filename, \"w\") as result_file:\n",
@@ -208,13 +218,56 @@
" tf.app.run()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prepare Model and Input data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"account_name = \"pipelinedata\"\n",
"# create directory for model\n",
"model_dir = 'models'\n",
"if not os.path.isdir(model_dir):\n",
" os.mkdir(model_dir)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download Model\n",
"<font color=red>This manual step is required to register the model to the workspace</font>\n",
"\n",
"Download and extract model from http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz to model_dir"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get samples images and upload to Datastore\n",
"<font color=red>This manual step is required to run batchai_score.py</font>\n",
"\n",
"Download and extract sample images from ImageNet evaluation set and **upload** to a blob that will be registered as a Datastore in the next step\n",
"\n",
"A copy of sample images from ImageNet evaluation set can be found at __[BatchAI Samples Blob](https://batchaisamples.blob.core.windows.net/samples/imagenet_samples.zip?st=2017-09-29T18%3A29%3A00Z&se=2099-12-31T08%3A00%3A00Z&sp=rl&sv=2016-05-31&sr=c&sig=PmhL%2BYnYAyNTZr1DM2JySvrI12e%2F4wZNIwCtf7TRI%2BM%3D)__ \n",
"\n",
"There are multiple ways to create folders and upload files into Azure Blob Container - you can use __[Azure Portal](https://ms.portal.azure.com/)__, __[Storage Explorer](http://storageexplorer.com/)__, __[Azure CLI2](https://render.githubusercontent.com/azure-cli-extension)__ or Azure SDK for your preferable programming language. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"account_name = \"batchscoringdata\"\n",
"sample_data = Datastore.register_azure_blob_container(ws, \"sampledata\", \"sampledata\", \n",
" account_name=account_name, \n",
" overwrite=True)"
@@ -279,11 +332,42 @@
" path_on_datastore=\"batchscoring/models\",\n",
" mode=\"download\" \n",
" )\n",
"label_dir = DataReference(datastore=sample_data, \n",
" data_reference_name=\"input_labels\",\n",
" path_on_datastore=\"batchscoring/labels\",\n",
" mode=\"download\" \n",
" )\n",
"output_dir = PipelineData(name=\"scores\", \n",
" datastore_name=default_ds, \n",
" output_path_on_compute=\"batchscoring/results\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Register the model with Workspace"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import shutil\n",
"from azureml.core.model import Model\n",
"\n",
"# register downloaded model \n",
"model = Model.register(model_path = \"models/inception_v3.ckpt\",\n",
" model_name = \"inception\", # this is the name the model is registered as\n",
" tags = {'pretrained': \"inception\"},\n",
" description = \"Imagenet trained tensorflow inception\",\n",
" workspace = ws)\n",
"# remove the downloaded dir after registration if you wish\n",
"shutil.rmtree(\"models\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -337,19 +421,21 @@
"metadata": {},
"outputs": [],
"source": [
"step = PythonScriptStep(\n",
"inception_model_name = \"inception_v3.ckpt\"\n",
"\n",
"batch_score_step = PythonScriptStep(\n",
" name=\"batch ai scoring\",\n",
" script_name=\"batchai_score.py\",\n",
" arguments=[\"--dataset_path\", input_images, \n",
" \"--model_dir\", model_dir, \n",
" \"--model_name\", \"inception\",\n",
" \"--label_dir\", label_dir, \n",
" \"--output_dir\", output_dir, \n",
" \"--batch_size\", batch_size_param],\n",
" target=cluster,\n",
" inputs=[input_images, model_dir],\n",
" inputs=[input_images, label_dir],\n",
" outputs=[output_dir],\n",
" runconfig=batchai_run_config,\n",
" source_directory=project_folder,\n",
" allow_reuse=False\n",
" source_directory=project_folder\n",
")"
]
},
@@ -359,7 +445,7 @@
"metadata": {},
"outputs": [],
"source": [
"pipeline = Pipeline(workspace=ws, steps=[step])\n",
"pipeline = Pipeline(workspace=ws, steps=[batch_score_step])\n",
"pipeline_run = Experiment(ws, 'batch_scoring').submit(pipeline, pipeline_params={\"param_batch_size\": 20})"
]
},
@@ -389,29 +475,21 @@
"pipeline_run.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"step_run = list(pipeline_run.get_children())[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"step_run.download_file(\"./outputs/result-labels.txt\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Display few results"
"# Download and review output"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"step_run = list(pipeline_run.get_children())[0]\n",
"step_run.download_file(\"./outputs/result-labels.txt\")"
]
},
{
@@ -447,7 +525,7 @@
"outputs": [],
"source": [
"published_pipeline = pipeline_run.publish_pipeline(\n",
" name=\"batch score\", description=\"scores images kept in container sampledata\", version=\"1.0\")\n",
" name=\"Inception v3 scoring\", description=\"Batch scoring using Inception v3 model\", version=\"1.0\")\n",
"\n",
"published_id = published_pipeline.id"
]
@@ -483,7 +561,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Hit the REST endpoint"
"## Run published pipeline using its REST endpoint"
]
},
{
@@ -495,6 +573,7 @@
"from azureml.pipeline.core import PublishedPipeline\n",
"\n",
"rest_endpoint = PublishedPipeline.get_endpoint(published_id, ws)\n",
"# specify batch size when running the pipeline\n",
"response = requests.post(rest_endpoint, headers=aad_token, json={\"param_batch_size\": 50})\n",
"run_id = response.json()[\"Id\"]"
]
@@ -517,20 +596,13 @@
"\n",
"RunDetails(published_pipeline_run).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {
@@ -542,7 +614,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.5"
}
},
"nbformat": 4,

View File

@@ -554,7 +554,7 @@
"\n",
"New BSD License\n",
"\n",
"Copyright (c) 20072018 The scikit-learn developers.\n",
"Copyright (c) 2007-2018 The scikit-learn developers.\n",
"All rights reserved.\n",
"\n",
"\n",
@@ -595,9 +595,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -287,9 +287,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -545,9 +545,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -736,9 +736,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:amlsdk]",
"display_name": "Python 3.6",
"language": "python",
"name": "conda-env-amlsdk-py"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -266,9 +266,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -1103,9 +1103,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -337,9 +337,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -263,9 +263,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -342,9 +342,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -482,9 +482,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -221,9 +221,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -676,9 +676,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -541,9 +541,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {

View File

@@ -191,7 +191,7 @@
"|**max_time_sec**|12,000|Time limit in seconds for each iteration|\n",
"|**iterations**|20|Number of iterations. In each iteration, the model trains with the data with a specific pipeline|\n",
"|**n_cross_validations**|3|Number of cross validation splits|\n",
"|**preprocess**|True| *True/False* Enables experiment to perform preprocessing on the input. Preprocessing handles *missing data*, and performs some common *feature extraction*|\n",
"|**preprocess**|False| *True/False* Enables experiment to perform preprocessing on the input. Preprocessing handles *missing data*, and performs some common *feature extraction*|\n",
"|**exit_score**|0.995|*double* value indicating the target for *primary_metric*. Once the target is surpassed the run terminates|\n",
"|**blacklist_algos**|['kNN','LinearSVM']|*Array* of *strings* indicating algorithms to ignore.\n"
]
@@ -210,7 +210,7 @@
" max_time_sec = 12000,\n",
" iterations = 20,\n",
" n_cross_validations = 3,\n",
" preprocess = True,\n",
" preprocess = False,\n",
" exit_score = 0.995,\n",
" blacklist_algos = ['kNN','LinearSVM'],\n",
" X = X_digits,\n",
@@ -380,9 +380,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"display_name": "Python 3.6",
"language": "python",
"name": "python3"
"name": "python36"
},
"language_info": {
"codemirror_mode": {