Compare commits

..

31 Commits

Author SHA1 Message Date
Larry Franks
6a145086d8 Note about setting location.
@sdgilley Is this update OK? We want to show that location can be set for a new public preview functionality.
2021-07-09 10:37:21 -04:00
Sheri Gilley
cb695c91ce Create testnotebook.ipynb 2020-02-03 16:19:59 -06:00
Sheri Gilley
de505d67bd Delete testnotebook.ipynb 2020-02-03 16:14:53 -06:00
Sheri Gilley
f19cfa4630 Create testnotebook.ipynb 2020-02-03 16:11:59 -06:00
Sheri Gilley
7eed2e4b56 Update 01.train-models.ipynb 2020-02-03 15:29:22 -06:00
Sheri Gilley
57b0f701f8 remove deprecated auto_prepare_environment 2019-11-20 17:28:44 -06:00
Sheri Gilley
7db93bcb1d update comments 2019-01-22 17:18:19 -06:00
Sheri Gilley
fcbe925640 Merge branch 'sdk-codetest' of https://github.com/Azure/MachineLearningNotebooks into sdk-codetest 2019-01-07 13:06:12 -06:00
Sheri Gilley
bedfbd649e fix files 2019-01-07 13:06:02 -06:00
Sheri Gilley
fb760f648d Delete temp.py 2019-01-07 12:58:32 -06:00
Sheri Gilley
a9a0713d2f Delete donotupload.py 2019-01-07 12:57:58 -06:00
Sheri Gilley
c9d018b52c remove prepare environment 2019-01-07 12:56:54 -06:00
Sheri Gilley
53dbd0afcf hdi run config code 2019-01-07 11:29:40 -06:00
Sheri Gilley
e3a64b1f16 code for remote vm 2019-01-04 12:51:11 -06:00
Sheri Gilley
732eecfc7c update names 2019-01-04 12:45:28 -06:00
Sheri Gilley
6995c086ff change snippet names 2019-01-03 22:39:06 -06:00
Sheri Gilley
80bba4c7ae code for amlcompute section 2019-01-03 18:55:31 -06:00
Sheri Gilley
3c581b533f for local computer 2019-01-03 18:07:12 -06:00
Sheri Gilley
cc688caa4e change names 2019-01-03 08:53:49 -06:00
Sheri Gilley
da225e116e new code 2019-01-03 08:02:35 -06:00
Sheri Gilley
73c5d02880 Update quickstart.py 2018-12-17 12:23:03 -06:00
Sheri Gilley
e472b54f1b Update quickstart.py 2018-12-17 12:22:40 -06:00
Sheri Gilley
716c6d8bb1 add quickstart code 2018-11-06 11:27:58 -06:00
Sheri Gilley
23189c6f40 move folder 2018-10-17 16:24:46 -05:00
Sheri Gilley
361b57ed29 change all names to camelCase 2018-10-17 11:47:09 -05:00
Sheri Gilley
3f531fd211 try camelCase 2018-10-17 11:09:46 -05:00
Sheri Gilley
111f5e8d73 playing around 2018-10-17 10:46:33 -05:00
Sheri Gilley
96c59d5c2b testing 2018-10-17 09:56:04 -05:00
Sheri Gilley
ce3214b7c6 fix name 2018-10-16 17:33:24 -05:00
Sheri Gilley
53199d17de add delete 2018-10-16 16:54:08 -05:00
Sheri Gilley
54c883412c add test service 2018-10-16 16:49:41 -05:00
77 changed files with 2766 additions and 8833 deletions

7
.amlignore Normal file
View File

@@ -0,0 +1,7 @@
.ipynb_checkpoints
azureml-logs
.azureml
.git
outputs
azureml-setup
docs

3
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"python.pythonPath": "C:\\Users\\sgilley\\.azureml\\envs\\jan3\\python.exe"
}

View File

@@ -27,7 +27,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Prerequisites\n", "## Prerequisites\n",
"1. Make sure you go through the [00. Installation and Configuration](../../00.configuration.ipynb) Notebook first if you haven't. \n", "1. Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't. \n",
"\n", "\n",
"2. Install following pre-requisite libraries to your conda environment and restart notebook.\n", "2. Install following pre-requisite libraries to your conda environment and restart notebook.\n",
"```shell\n", "```shell\n",
@@ -525,7 +525,8 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(conda_packages=[\"scikit-learn\"])\n", "myenv = CondaDependencies()\n",
"myenv.add_conda_package(\"scikit-learn\")\n",
"print(myenv.serialize_to_string())\n", "print(myenv.serialize_to_string())\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
@@ -679,7 +680,7 @@
"# score the entire test set.\n", "# score the entire test set.\n",
"test_samples = json.dumps({'data': X_test.tolist()})\n", "test_samples = json.dumps({'data': X_test.tolist()})\n",
"\n", "\n",
"result = service.run(input_data = test_samples)\n", "result = json.loads(service.run(input_data = test_samples))['result']\n",
"residual = result - y_test" "residual = result - y_test"
] ]
}, },
@@ -777,6 +778,13 @@
"%%time\n", "%%time\n",
"service.delete()" "service.delete()"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
@@ -800,7 +808,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.6" "version": "3.6.4"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -21,9 +21,7 @@ def run(raw_data):
data = json.loads(raw_data)['data'] data = json.loads(raw_data)['data']
data = np.array(data) data = np.array(data)
result = model.predict(data) result = model.predict(data)
return json.dumps({"result": result.tolist()})
# you can return any data type as long as it is JSON-serializable
return result.tolist()
except Exception as e: except Exception as e:
result = str(e) result = str(e)
return result return json.dumps({"error": result})

View File

@@ -1 +0,0 @@
/samples/

View File

@@ -1,477 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 02. Train locally\n",
"* Create or load workspace.\n",
"* Create scripts locally.\n",
"* Create `train.py` in a folder, along with a `my.lib` file.\n",
"* Configure & execute a local run in a user-managed Python environment.\n",
"* Configure & execute a local run in a system-managed Python environment.\n",
"* Configure & execute a local run in a Docker environment.\n",
"* Query run metrics to find the best model\n",
"* Register model for operationalization."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.workspace import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create An Experiment\n",
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Experiment\n",
"experiment_name = 'train-on-local'\n",
"exp = Experiment(workspace=ws, name=experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View `train.py`\n",
"\n",
"`train.py` is already created for you."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('./train.py', 'r') as f:\n",
" print(f.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note `train.py` also references a `mylib.py` file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('./mylib.py', 'r') as f:\n",
" print(f.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure & Run\n",
"### User-managed environment\n",
"Below, we use a user-managed run, which means you are responsible to ensure all the necessary packages are available in the Python environment you choose to run the script."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"\n",
"# Editing a run configuration property on-fly.\n",
"run_config_user_managed = RunConfiguration()\n",
"\n",
"run_config_user_managed.environment.python.user_managed_dependencies = True\n",
"\n",
"# You can choose a specific Python environment by pointing to a Python path \n",
"#run_config.environment.python.interpreter_path = '/home/johndoe/miniconda3/envs/sdk2/bin/python'"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Submit script to run in the user-managed environment\n",
"Note whole script folder is submitted for execution, including the `mylib.py` file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import ScriptRunConfig\n",
"\n",
"src = ScriptRunConfig(source_directory='./', script='train.py', run_config=run_config_user_managed)\n",
"run = exp.submit(src)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get run history details"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Block to wait till run finishes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### System-managed environment\n",
"You can also ask the system to build a new conda environment and execute your scripts in it. The environment is built once and will be reused in subsequent executions as long as the conda dependencies remain unchanged. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"run_config_system_managed = RunConfiguration()\n",
"\n",
"run_config_system_managed.environment.python.user_managed_dependencies = False\n",
"run_config_system_managed.auto_prepare_environment = True\n",
"\n",
"# Specify conda dependencies with scikit-learn\n",
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
"run_config_system_managed.environment.python.conda_dependencies = cd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Submit script to run in the system-managed environment\n",
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_system_managed)\n",
"run = exp.submit(src)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Get run history details"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Block and wait till run finishes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Docker-based execution\n",
"**IMPORTANT**: You must have Docker engine installed locally in order to use this execution mode. If your kernel is already running in a Docker container, such as **Azure Notebooks**, this mode will **NOT** work.\n",
"\n",
"You can also ask the system to pull down a Docker image and execute your scripts in it."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run_config_docker = RunConfiguration()\n",
"run_config_docker.environment.python.user_managed_dependencies = False\n",
"run_config_docker.auto_prepare_environment = True\n",
"run_config_docker.environment.docker.enabled = True\n",
"run_config_docker.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"\n",
"# Specify conda dependencies with scikit-learn\n",
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
"run_config_docker.environment.python.conda_dependencies = cd\n",
"\n",
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Submit script to run in the system-managed environment\n",
"A new conda environment is built based on the conda dependencies object. If you are running this for the first time, this might take up to 5 mninutes. But this conda environment is reused so long as you don't change the conda dependencies.\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
"\n",
"# Check if Docker is installed and Linux containers are enables\n",
"if subprocess.run(\"docker -v\", shell=True) == 0:\n",
" out = subprocess.check_output(\"docker system info\", shell=True, encoding=\"ascii\").split(\"\\n\")\n",
" if not \"OSType: linux\" in out:\n",
" print(\"Switch Docker engine to use Linux containers.\")\n",
" else:\n",
" run = exp.submit(src)\n",
"else:\n",
" print(\"Docker engine not installed.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Get run history details\n",
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Query run metrics"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"query history",
"get metrics"
]
},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"run.get_metrics()\n",
"metrics = run.get_metrics()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's find the model that has the lowest MSE value logged."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"best_alpha = metrics['alpha'][np.argmin(metrics['mse'])]\n",
"\n",
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
" min(metrics['mse']), \n",
" best_alpha\n",
"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also list all the files that are associated with this run record"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.get_file_names()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We know the model `ridge_0.40.pkl` is the best performing model from the eariler queries. So let's register it with the workspace."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# supply a model name, and the full path to the serialized model file.\n",
"model = run.register_model(model_name='best_ridge_model', model_path='./outputs/ridge_0.40.pkl')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(model.name, model.version, model.url)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now you can deploy this model following the example in the 01 notebook."
]
}
],
"metadata": {
"authors": [
{
"name": "roastala"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -291,7 +291,11 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"run_config_docker = RunConfiguration()\n", "run_config_docker = RunConfiguration()\n",
"\n",
"run_config_docker.environment.python.user_managed_dependencies = False\n", "run_config_docker.environment.python.user_managed_dependencies = False\n",
"run_config_docker.auto_prepare_environment = True\n", "run_config_docker.auto_prepare_environment = True\n",
"run_config_docker.environment.docker.enabled = True\n", "run_config_docker.environment.docker.enabled = True\n",
@@ -299,9 +303,7 @@
"\n", "\n",
"# Specify conda dependencies with scikit-learn\n", "# Specify conda dependencies with scikit-learn\n",
"cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n", "cd = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
"run_config_docker.environment.python.conda_dependencies = cd\n", "run_config_docker.environment.python.conda_dependencies = cd"
"\n",
"src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)"
] ]
}, },
{ {
@@ -320,17 +322,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import subprocess\n", "src = ScriptRunConfig(source_directory=\"./\", script='train.py', run_config=run_config_docker)\n",
"\n", "run = exp.submit(src)"
"# Check if Docker is installed and Linux containers are enables\n",
"if subprocess.run(\"docker -v\", shell=True) == 0:\n",
" out = subprocess.check_output(\"docker system info\", shell=True, encoding=\"ascii\").split(\"\\n\")\n",
" if not \"OSType: linux\" in out:\n",
" print(\"Switch Docker engine to use Linux containers.\")\n",
" else:\n",
" run = exp.submit(src)\n",
"else:\n",
" print(\"Docker engine not installed.\")"
] ]
}, },
{ {

View File

@@ -1,325 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 03. Train on Azure Container Instance (EXPERIMENTAL)\n",
"\n",
"* Create Workspace\n",
"* Create Project\n",
"* Create `train.py` in the project folder.\n",
"* Configure an ACI (Azure Container Instance) run\n",
"* Execute in ACI"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"create workspace"
]
},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create An Experiment\n",
"\n",
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Experiment\n",
"experiment_name = 'train-on-aci'\n",
"experiment = Experiment(workspace = ws, name = experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create a folder to store the training script."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"script_folder = './samples/train-on-aci'\n",
"os.makedirs(script_folder, exist_ok = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Remote execution on ACI\n",
"\n",
"Use `%%writefile` magic to write training code to `train.py` file under the project folder."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile $script_folder/train.py\n",
"\n",
"import os\n",
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import Ridge\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"from azureml.core.run import Run\n",
"from sklearn.externals import joblib\n",
"\n",
"import numpy as np\n",
"\n",
"os.makedirs('./outputs', exist_ok=True)\n",
"\n",
"X, y = load_diabetes(return_X_y = True)\n",
"\n",
"run = Run.get_submitted_run()\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
" \"test\": {\"X\": X_test, \"y\": y_test}}\n",
"\n",
"# list of numbers from 0.0 to 1.0 with a 0.05 interval\n",
"alphas = np.arange(0.0, 1.0, 0.05)\n",
"\n",
"for alpha in alphas:\n",
" # Use Ridge algorithm to create a regression model\n",
" reg = Ridge(alpha = alpha)\n",
" reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])\n",
"\n",
" preds = reg.predict(data[\"test\"][\"X\"])\n",
" mse = mean_squared_error(preds, data[\"test\"][\"y\"])\n",
" run.log('alpha', alpha)\n",
" run.log('mse', mse)\n",
" \n",
" model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)\n",
" with open(model_file_name, \"wb\") as file:\n",
" joblib.dump(value = reg, filename = 'outputs/' + model_file_name)\n",
"\n",
" print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure for using ACI\n",
"Linux-based ACI is available in `westus`, `eastus`, `westeurope`, `northeurope`, `westus2` and `southeastasia` regions. See details [here](https://docs.microsoft.com/en-us/azure/container-instances/container-instances-quotas#region-availability)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"configure run"
]
},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"# create a new runconfig object\n",
"run_config = RunConfiguration()\n",
"\n",
"# signal that you want to use ACI to execute script.\n",
"run_config.target = \"containerinstance\"\n",
"\n",
"# ACI container group is only supported in certain regions, which can be different than the region the Workspace is in.\n",
"run_config.container_instance.region = 'eastus'\n",
"\n",
"# set the ACI CPU and Memory \n",
"run_config.container_instance.cpu_cores = 1\n",
"run_config.container_instance.memory_gb = 2\n",
"\n",
"# enable Docker \n",
"run_config.environment.docker.enabled = True\n",
"\n",
"# set Docker base image to the default CPU-based image\n",
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"#run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'\n",
"\n",
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
"run_config.auto_prepare_environment = True\n",
"\n",
"# specify CondaDependencies obj\n",
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Submit the Experiment\n",
"Finally, run the training job on the ACI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"remote run",
"aci"
]
},
"outputs": [],
"source": [
"%%time \n",
"from azureml.core.script_run_config import ScriptRunConfig\n",
"\n",
"script_run_config = ScriptRunConfig(source_directory = script_folder,\n",
" script= 'train.py',\n",
" run_config = run_config)\n",
"\n",
"run = experiment.submit(script_run_config)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"remote run",
"aci"
]
},
"outputs": [],
"source": [
"%%time\n",
"# Shows output of the run on stdout.\n",
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"query history"
]
},
"outputs": [],
"source": [
"# Show run details\n",
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"get metrics"
]
},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"run.get_metrics()\n",
"metrics = run.get_metrics()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
" min(metrics['mse']), \n",
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,321 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 04. Train in a remote VM (MLC managed DSVM)\n",
"* Create Workspace\n",
"* Create Project\n",
"* Create `train.py` file\n",
"* Create DSVM as Machine Learning Compute (MLC) resource\n",
"* Configure & execute a run in a conda environment in the default miniconda Docker container on DSVM"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Experiment\n",
"\n",
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"experiment_name = 'train-on-remote-vm'\n",
"\n",
"from azureml.core import Experiment\n",
"\n",
"exp = Experiment(workspace = ws, name = experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View `train.py`\n",
"\n",
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train.py` in a cell to show the file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('./train.py', 'r') as training_script:\n",
" print(training_script.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Linux DSVM as a compute target\n",
"\n",
"**Note**: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n",
" \n",
"**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you switch to a different port (such as 5022), you can append the port number to the address like the example below. [Read more](../../documentation/sdk/ssh-issue.md) on this."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import DsvmCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"compute_target_name = 'mydsvm'\n",
"\n",
"try:\n",
" dsvm_compute = DsvmCompute(workspace = ws, name = compute_target_name)\n",
" print('found existing:', dsvm_compute.name)\n",
"except ComputeTargetException:\n",
" print('creating new.')\n",
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
" dsvm_compute = DsvmCompute.create(ws, name = compute_target_name, provisioning_configuration = dsvm_config)\n",
" dsvm_compute.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Attach an existing Linux DSVM as a compute target\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"'''\n",
" from azureml.core.compute import RemoteCompute \n",
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n",
" dsvm_compute = RemoteCompute.attach(ws,name=\"attach-from-sdk6\",username=<username>,address=<ipaddress>,ssh_port=22,password=<password>)\n",
"'''"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure & Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure a Docker run with new conda environment on the VM\n",
"You can execute in a Docker container in the VM. If you choose this route, you don't need to install anything on the VM yourself. Azure ML execution service will take care of it for you."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"\n",
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
"run_config = RunConfiguration(framework = \"python\")\n",
"\n",
"# Set compute target to the Linux DSVM\n",
"run_config.target = compute_target_name\n",
"\n",
"# Use Docker in the remote VM\n",
"run_config.environment.docker.enabled = True\n",
"\n",
"# Use CPU base image from DockerHub\n",
"run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"print('Base Docker image is:', run_config.environment.docker.base_image)\n",
"\n",
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
"run_config.prepare_environment = True\n",
"\n",
"# specify CondaDependencies obj\n",
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Submit the Experiment\n",
"Submit script to run in the Docker image in the remote VM. If you run this for the first time, the system will download the base image, layer in packages specified in the `conda_dependencies.yml` file on top of the base image, create a container and then execute the script in the container."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Run\n",
"from azureml.core import ScriptRunConfig\n",
"\n",
"src = ScriptRunConfig(source_directory = '.', script = 'train.py', run_config = run_config)\n",
"run = exp.submit(src)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### View run history details"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Find the best run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"run.get_metrics()\n",
"metrics = run.get_metrics()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"print('When alpha is {1:0.2f}, we have min MSE {0:0.2f}.'.format(\n",
" min(metrics['mse']), \n",
" metrics['alpha'][np.argmin(metrics['mse'])]\n",
"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Clean up compute resource"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dsvm_compute.delete()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,257 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 05. Train in Spark\n",
"* Create Workspace\n",
"* Create Experiment\n",
"* Copy relevant files to the script folder\n",
"* Configure and Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prerequisites\n",
"Make sure you go through the [00. Installation and Configuration](00.configuration.ipynb) Notebook first if you haven't."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize Workspace\n",
"\n",
"Initialize a workspace object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Experiment\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"experiment_name = 'train-on-remote-vm'\n",
"\n",
"from azureml.core import Experiment\n",
"\n",
"exp = Experiment(workspace = ws, name = experiment_name)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View `train-spark.py`\n",
"\n",
"For convenience, we created a training script for you. It is printed below as a text, but you can also run `%pfile ./train-spark.py` in a cell to show the file."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('train-spark.py', 'r') as training_script:\n",
" print(training_script.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure & Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Attach an HDI cluster\n",
"To use HDI commpute target:\n",
" 1. Create an Spark for HDI cluster in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
" 2. Enter the IP address, username and password below"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import HDInsightCompute\n",
"\n",
"try:\n",
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n",
" hdi_compute_new = HDInsightCompute.attach(ws, \n",
" name=\"hdi-attach\", \n",
" address=\"hdi-ignite-demo-ssh.azurehdinsight.net\", \n",
" ssh_port=22, \n",
" username='<username>', \n",
" password='<password>')\n",
"\n",
"except UserErrorException as e:\n",
" print(\"Caught = {}\".format(e.message))\n",
" print(\"Compute config already attached.\")\n",
" \n",
" \n",
"hdi_compute_new.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure HDI run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"\n",
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
"run_config = RunConfiguration(framework = \"python\")\n",
"\n",
"# Set compute target to the Linux DSVM\n",
"run_config.target = hdi_compute.name\n",
"\n",
"# Use Docker in the remote VM\n",
"# run_config.environment.docker.enabled = True\n",
"\n",
"# Use CPU base image from DockerHub\n",
"# run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"# print('Base Docker image is:', run_config.environment.docker.base_image)\n",
"\n",
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"# Prepare the Docker and conda environment automatically when executingfor the first time.\n",
"# run_config.prepare_environment = True\n",
"\n",
"# specify CondaDependencies obj\n",
"# run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])\n",
"# load the runconfig object from the \"myhdi.runconfig\" file generated by the attach operaton above."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Submit the script to HDI"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
" script= 'train-spark.py',\n",
" run_config = run_config)\n",
"run = experiment.submit(script_run_config)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get the URL of the run history web page\n",
"run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get all metris logged in the run\n",
"metrics = run.get_metrics()\n",
"print(metrics)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -192,11 +192,9 @@
" data = json.loads(raw_data)['data']\n", " data = json.loads(raw_data)['data']\n",
" data = numpy.array(data)\n", " data = numpy.array(data)\n",
" result = model.predict(data)\n", " result = model.predict(data)\n",
" # you can return any datatype as long as it is JSON-serializable\n",
" return result.tolist()\n",
" except Exception as e:\n", " except Exception as e:\n",
" error = str(e)\n", " result = str(e)\n",
" return error" " return json.dumps({\"result\": result.tolist()})"
] ]
}, },
{ {
@@ -389,6 +387,13 @@
"source": [ "source": [
"aci_service.delete()" "aci_service.delete()"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
@@ -412,7 +417,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.6" "version": "3.6.5"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -122,11 +122,9 @@
" data = json.loads(raw_data)['data']\n", " data = json.loads(raw_data)['data']\n",
" data = numpy.array(data)\n", " data = numpy.array(data)\n",
" result = model.predict(data)\n", " result = model.predict(data)\n",
" # you can return any data type as long as it is JSON-serializable\n",
" return result.tolist()\n",
" except Exception as e:\n", " except Exception as e:\n",
" error = str(e)\n", " result = str(e)\n",
" return error" " return json.dumps({\"result\": result.tolist()})"
] ]
}, },
{ {
@@ -334,7 +332,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.6" "version": "3.6.5"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -156,12 +156,11 @@
" inputs_dc.collect(data) #this call is saving our input data into our blob\n", " inputs_dc.collect(data) #this call is saving our input data into our blob\n",
" prediction_dc.collect(result)#this call is saving our prediction data into our blob\n", " prediction_dc.collect(result)#this call is saving our prediction data into our blob\n",
" print (\"saving prediction data\" + time.strftime(\"%H:%M:%S\"))\n", " print (\"saving prediction data\" + time.strftime(\"%H:%M:%S\"))\n",
" # you can return any data type as long as it is JSON-serializable\n", " return json.dumps({\"result\": result.tolist()})\n",
" return result.tolist()\n",
" except Exception as e:\n", " except Exception as e:\n",
" error = str(e)\n", " result = str(e)\n",
" print (error + time.strftime(\"%H:%M:%S\"))\n", " print (result + time.strftime(\"%H:%M:%S\"))\n",
" return error" " return json.dumps({\"error\": result})"
] ]
}, },
{ {

View File

@@ -161,12 +161,13 @@
" \n", " \n",
" #Print statement for appinsights custom traces:\n", " #Print statement for appinsights custom traces:\n",
" print (\"saving prediction data\" + time.strftime(\"%H:%M:%S\"))\n", " print (\"saving prediction data\" + time.strftime(\"%H:%M:%S\"))\n",
" # you can return any data type as long as it is JSON-serializable\n", " \n",
" return result.tolist()\n", " return json.dumps({\"result\": result.tolist()})\n",
" \n",
" except Exception as e:\n", " except Exception as e:\n",
" error = str(e)\n", " result = str(e)\n",
" print (error + time.strftime(\"%H:%M:%S\"))\n", " print (result + time.strftime(\"%H:%M:%S\"))\n",
" return error" " return json.dumps({\"error\": result})"
] ]
}, },
{ {

View File

@@ -1,5 +1,10 @@
For full documentation for Azure Machine Learning service, visit **https://aka.ms/aml-docs**. Get the full documentation for Azure Machine Learning service at:
# Sample Notebooks for Azure Machine Learning service
https://docs.microsoft.com/azure/machine-learning/service/
<br>
# Sample notebooks for Azure Machine Learning service
To run the notebooks in this repository use one of these methods: To run the notebooks in this repository use one of these methods:
@@ -17,24 +22,13 @@ To run the notebooks in this repository use one of these methods:
## **Use your own notebook server** ## **Use your own notebook server**
Video walkthrough:
[![get started video](images/yt_cover.png)](https://youtu.be/VIsXeTuW3FU)
1. Setup a Jupyter Notebook server and [install the Azure Machine Learning SDK](https://docs.microsoft.com/en-us/azure/machine-learning/service/quickstart-create-workspace-with-python). 1. Setup a Jupyter Notebook server and [install the Azure Machine Learning SDK](https://docs.microsoft.com/en-us/azure/machine-learning/service/quickstart-create-workspace-with-python).
1. Clone [this repository](https://aka.ms/aml-notebooks). 1. Clone [this repository](https://aka.ms/aml-notebooks).
1. You may need to install other packages for specific notebook. 1. You may need to install other packages for specific notebooks
- For example, to run the Azure Machine Learning Data Prep notebooks, install the extra dataprep SDK:
```
pip install --upgrade azureml-dataprep
```
1. Start your notebook server. 1. Start your notebook server.
1. Follow the instructions in the [00.configuration](00.configuration.ipynb) notebook to create and connect to a workspace. 1. Follow the instructions in the [00.configuration](00.configuration.ipynb) notebook to create and connect to a workspace.
1. Open one of the sample notebooks. 1. Open one of the sample notebooks.
> Note: **Looking for automated machine learning samples?** > Note: **Looking for automated machine learning samples?**
> For your convenience, you can use an installation script instead of the steps below for the automated ML notebooks. Go to the [automl folder README](automl/README.md) and follow the instructions. The script installs all packages needed for notebooks in that folder. > For your convenience, you can use an installation script instead of the steps below for the automated ML notebooks. Go to the [automl folder README](automl/README.md) and follow the instructions. The script installs all packages needed for notebooks in that folder.

View File

@@ -0,0 +1,15 @@
# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.
# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
name: project_environment
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.5.2 and later.

115
aml_config/docker.runconfig Normal file
View File

@@ -0,0 +1,115 @@
# The script to run.
script: train.py
# The arguments to the script file.
arguments: []
# The name of the compute target to use for this run.
target: local
# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch".
framework: PySpark
# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi".
communicator: None
# Automatically prepare the run environment as part of the run itself.
autoPrepareEnvironment: true
# Maximum allowed duration for the run.
maxRunDurationSeconds:
# Number of nodes to use for running job.
nodeCount: 1
# Environment details.
environment:
# Environment variables set for the run.
environmentVariables:
EXAMPLE_ENV_VAR: EXAMPLE_VALUE
# Python details
python:
# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
userManagedDependencies: false
# The python interpreter path
interpreterPath: python
# Path to the conda dependencies file to use for this run. If a project
# contains multiple programs with different sets of dependencies, it may be
# convenient to manage those environments with separate files.
condaDependenciesFile: aml_config/conda_dependencies.yml
# Docker details
docker:
# Set True to perform this run inside a Docker container.
enabled: true
# Base image used for Docker-based runs.
baseImage: mcr.microsoft.com/azureml/base:0.2.0
# Set False if necessary to work around shared volume bugs.
sharedVolumes: true
# Run with NVidia Docker extension to support GPUs.
gpuSupport: false
# Extra arguments to the Docker run command.
arguments: []
# Image registry that contains the base image.
baseImageRegistry:
# DNS name or IP address of azure container registry(ACR)
address:
# The username for ACR
username:
# The password for ACR
password:
# Spark details
spark:
# List of spark repositories.
repositories:
- https://mmlspark.azureedge.net/maven
packages:
- group: com.microsoft.ml.spark
artifact: mmlspark_2.11
version: '0.12'
precachePackages: true
# Databricks details
databricks:
# List of maven libraries.
mavenLibraries: []
# List of PyPi libraries
pypiLibraries: []
# List of RCran libraries
rcranLibraries: []
# List of JAR libraries
jarLibraries: []
# List of Egg libraries
eggLibraries: []
# History details.
history:
# Enable history tracking -- this allows status, logs, metrics, and outputs
# to be collected for a run.
outputCollection: true
# whether to take snapshots for history.
snapshotProject: true
# Spark configuration details.
spark:
configuration:
spark.app.name: Azure ML Experiment
spark.yarn.maxAppAttempts: 1
# HDI details.
hdi:
# Yarn deploy mode. Options are cluster and client.
yarnDeployMode: cluster
# Tensorflow details.
tensorflow:
# The number of worker tasks.
workerCount: 1
# The number of parameter server tasks.
parameterServerCount: 1
# Mpi details.
mpi:
# When using MPI, number of processes per node.
processCountPerNode: 1
# data reference configuration details
dataReferences: {}
# Project share datastore reference.
sourceDirectoryDataStore:
# AmlCompute details.
amlcompute:
# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
vmSize:
# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
vmPriority:
# A bool that indicates if the cluster has to be retained after job completion.
retainCluster: false
# Name of the cluster to be created. If not specified, runId will be used as cluster name.
name:
# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
clusterMaxNodeCount: 1

115
aml_config/local.runconfig Normal file
View File

@@ -0,0 +1,115 @@
# The script to run.
script: train.py
# The arguments to the script file.
arguments: []
# The name of the compute target to use for this run.
target: local
# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch".
framework: Python
# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi".
communicator: None
# Automatically prepare the run environment as part of the run itself.
autoPrepareEnvironment: true
# Maximum allowed duration for the run.
maxRunDurationSeconds:
# Number of nodes to use for running job.
nodeCount: 1
# Environment details.
environment:
# Environment variables set for the run.
environmentVariables:
EXAMPLE_ENV_VAR: EXAMPLE_VALUE
# Python details
python:
# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
userManagedDependencies: false
# The python interpreter path
interpreterPath: python
# Path to the conda dependencies file to use for this run. If a project
# contains multiple programs with different sets of dependencies, it may be
# convenient to manage those environments with separate files.
condaDependenciesFile: aml_config/conda_dependencies.yml
# Docker details
docker:
# Set True to perform this run inside a Docker container.
enabled: false
# Base image used for Docker-based runs.
baseImage: mcr.microsoft.com/azureml/base:0.2.0
# Set False if necessary to work around shared volume bugs.
sharedVolumes: true
# Run with NVidia Docker extension to support GPUs.
gpuSupport: false
# Extra arguments to the Docker run command.
arguments: []
# Image registry that contains the base image.
baseImageRegistry:
# DNS name or IP address of azure container registry(ACR)
address:
# The username for ACR
username:
# The password for ACR
password:
# Spark details
spark:
# List of spark repositories.
repositories:
- https://mmlspark.azureedge.net/maven
packages:
- group: com.microsoft.ml.spark
artifact: mmlspark_2.11
version: '0.12'
precachePackages: true
# Databricks details
databricks:
# List of maven libraries.
mavenLibraries: []
# List of PyPi libraries
pypiLibraries: []
# List of RCran libraries
rcranLibraries: []
# List of JAR libraries
jarLibraries: []
# List of Egg libraries
eggLibraries: []
# History details.
history:
# Enable history tracking -- this allows status, logs, metrics, and outputs
# to be collected for a run.
outputCollection: true
# whether to take snapshots for history.
snapshotProject: true
# Spark configuration details.
spark:
configuration:
spark.app.name: Azure ML Experiment
spark.yarn.maxAppAttempts: 1
# HDI details.
hdi:
# Yarn deploy mode. Options are cluster and client.
yarnDeployMode: cluster
# Tensorflow details.
tensorflow:
# The number of worker tasks.
workerCount: 1
# The number of parameter server tasks.
parameterServerCount: 1
# Mpi details.
mpi:
# When using MPI, number of processes per node.
processCountPerNode: 1
# data reference configuration details
dataReferences: {}
# Project share datastore reference.
sourceDirectoryDataStore:
# AmlCompute details.
amlcompute:
# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
vmSize:
# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
vmPriority:
# A bool that indicates if the cluster has to be retained after job completion.
retainCluster: false
# Name of the cluster to be created. If not specified, runId will be used as cluster name.
name:
# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
clusterMaxNodeCount: 1

1
aml_config/project.json Normal file
View File

@@ -0,0 +1 @@
{"Id": "local-compute", "Scope": "/subscriptions/65a1016d-0f67-45d2-b838-b8f373d6d52e/resourceGroups/sheri/providers/Microsoft.MachineLearningServices/workspaces/sheritestqs3/projects/local-compute"}

View File

@@ -138,11 +138,13 @@
"\n", "\n",
"found = False\n", "found = False\n",
"# Check if this compute target already exists in the workspace.\n", "# Check if this compute target already exists in the workspace.\n",
"cts = ws.compute_targets\n", "for ct_name, ct in ws.compute_targets().items():\n",
"if batchai_cluster_name in cts and cts[batchai_cluster_name].type == 'BatchAI':\n", " print(ct.name, ct.type)\n",
" if (ct.name == batchai_cluster_name and ct.type == 'BatchAI'):\n",
" found = True\n", " found = True\n",
" print('Found existing compute target.')\n", " print('Found existing compute target.')\n",
" compute_target = cts[batchai_cluster_name]\n", " compute_target = ct\n",
" break\n",
" \n", " \n",
"if not found:\n", "if not found:\n",
" print('Creating a new compute target...')\n", " print('Creating a new compute target...')\n",

View File

@@ -143,16 +143,16 @@
"dsvm_username = '<<username>>'\n", "dsvm_username = '<<username>>'\n",
"dsvm_password = '<<password>>'\n", "dsvm_password = '<<password>>'\n",
"\n", "\n",
"if compute_name in ws.compute_targets:\n", "if compute_name in ws.compute_targets():\n",
" print('Using existing compute.')\n", " print('Using existing compute.')\n",
" dsvm_compute = ws.compute_targets[compute_name]\n", " dsvm_compute = ws.compute_targets()[compute_name]\n",
"else:\n", "else:\n",
" RemoteCompute.attach(workspace=ws, name=compute_name, address=dsvm_ip_addr, username=dsvm_username, password=dsvm_password, ssh_port=dsvm_ssh_port)\n", " RemoteCompute.attach(workspace=ws, name=compute_name, address=dsvm_ip_addr, username=dsvm_username, password=dsvm_password, ssh_port=dsvm_ssh_port)\n",
"\n", "\n",
" while ws.compute_targets[compute_name].provisioning_state == 'Creating':\n", " while ws.compute_targets()[compute_name].provisioning_state == 'Creating':\n",
" time.sleep(1)\n", " time.sleep(1)\n",
"\n", "\n",
" dsvm_compute = ws.compute_targets[compute_name]\n", " dsvm_compute = ws.compute_targets()[compute_name]\n",
" \n", " \n",
" if dsvm_compute.provisioning_state == 'Failed':\n", " if dsvm_compute.provisioning_state == 'Failed':\n",
" print('Attached failed.')\n", " print('Attached failed.')\n",

View File

@@ -13,7 +13,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# AutoML 06: Train Test Split and Handling Sparse Data\n", "# AutoML 06: Custom CV Splits and Handling Sparse Data\n",
"\n", "\n",
"In this example we use the scikit-learn's [20newsgroup](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_20newsgroups.html) to showcase how you can use AutoML for handling sparse data and how to specify custom cross validations splits.\n", "In this example we use the scikit-learn's [20newsgroup](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_20newsgroups.html) to showcase how you can use AutoML for handling sparse data and how to specify custom cross validations splits.\n",
"\n", "\n",
@@ -27,7 +27,7 @@
"6. Test the best fitted model.\n", "6. Test the best fitted model.\n",
"\n", "\n",
"In addition this notebook showcases the following features\n", "In addition this notebook showcases the following features\n",
"- Explicit train test splits \n", "- **Custom CV** splits \n",
"- Handling **sparse data** in the input" "- Handling **sparse data** in the input"
] ]
}, },

View File

@@ -116,11 +116,10 @@
"source": [ "source": [
"experiment_name = 'automl-local-classification' # Replace this with any project name from previous cell.\n", "experiment_name = 'automl-local-classification' # Replace this with any project name from previous cell.\n",
"\n", "\n",
"proj = ws.experiments[experiment_name]\n", "proj = ws.experiments()[experiment_name]\n",
"summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name'])\n", "summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name'])\n",
"pattern = re.compile('^AutoML_[^_]*$')\n", "pattern = re.compile('^AutoML_[^_]*$')\n",
"all_runs = list(proj.get_runs(properties={'azureml.runsource': 'automl'}))\n", "all_runs = list(proj.get_runs(properties={'azureml.runsource': 'automl'}))\n",
"automl_runs_project = []\n",
"for run in all_runs:\n", "for run in all_runs:\n",
" if(pattern.match(run.id)):\n", " if(pattern.match(run.id)):\n",
" properties = run.get_properties()\n", " properties = run.get_properties()\n",
@@ -131,8 +130,6 @@
" else:\n", " else:\n",
" iterations = properties['num_iterations']\n", " iterations = properties['num_iterations']\n",
" summary_df[run.id] = [amlsettings['task_type'], run.get_details()['status'], properties['primary_metric'], iterations, properties['target'], amlsettings['name']]\n", " summary_df[run.id] = [amlsettings['task_type'], run.get_details()['status'], properties['primary_metric'], iterations, properties['target'], amlsettings['name']]\n",
" if run.get_details()['status'] == 'Completed':\n",
" automl_runs_project.append(run.id)\n",
" \n", " \n",
"from IPython.display import HTML\n", "from IPython.display import HTML\n",
"projname_html = HTML(\"<h3>{}</h3>\".format(proj.name))\n", "projname_html = HTML(\"<h3>{}</h3>\".format(proj.name))\n",
@@ -157,7 +154,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"run_id = automl_runs_project[0] # Replace with your own run_id from above run ids\n", "run_id = '' # Filling your own run_id from above run ids\n",
"assert (run_id in summary_df.keys()),\"Run id not found! Please set run id to a value from above run ids\"\n", "assert (run_id in summary_df.keys()),\"Run id not found! Please set run id to a value from above run ids\"\n",
"\n", "\n",
"from azureml.train.widgets import RunDetails\n", "from azureml.train.widgets import RunDetails\n",

View File

@@ -209,8 +209,7 @@
"source": [ "source": [
"## Configure & Run\n", "## Configure & Run\n",
"\n", "\n",
"First let's create a DataReferenceConfigruation object to inform the system what data folder to download to the compute target.\n", "First let's create a DataReferenceConfigruation object to inform the system what data folder to download to the copmute target."
"The path_on_compute should be an absolute path to ensure that the data files are downloaded only once. The get_data method should use this same path to access the data files."
] ]
}, },
{ {
@@ -222,9 +221,8 @@
"from azureml.core.runconfig import DataReferenceConfiguration\n", "from azureml.core.runconfig import DataReferenceConfiguration\n",
"dr = DataReferenceConfiguration(datastore_name=ds.name, \n", "dr = DataReferenceConfiguration(datastore_name=ds.name, \n",
" path_on_datastore='data', \n", " path_on_datastore='data', \n",
" path_on_compute='/tmp/azureml_runs',\n",
" mode='download', # download files from datastore to compute target\n", " mode='download', # download files from datastore to compute target\n",
" overwrite=False)" " overwrite=True)"
] ]
}, },
{ {
@@ -239,7 +237,7 @@
"conda_run_config = RunConfiguration(framework=\"python\")\n", "conda_run_config = RunConfiguration(framework=\"python\")\n",
"\n", "\n",
"# Set compute target to the Linux DSVM\n", "# Set compute target to the Linux DSVM\n",
"conda_run_config.target = dsvm_compute\n", "conda_run_config.target = dsvm_compute.name\n",
"# set the data reference of the run coonfiguration\n", "# set the data reference of the run coonfiguration\n",
"conda_run_config.data_references = {ds.name: dr}" "conda_run_config.data_references = {ds.name: dr}"
] ]
@@ -251,9 +249,7 @@
"## Create Get Data File\n", "## Create Get Data File\n",
"For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n", "For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
"\n", "\n",
"The *get_data()* function returns a [dictionary](README.md#getdata).\n", "The *get_data()* function returns a [dictionary](README.md#getdata)."
"\n",
"The read_csv uses the path_on_compute value specified in the DataReferenceConfiguration call plus the path_on_datastore folder and then the actual file name."
] ]
}, },
{ {
@@ -282,7 +278,9 @@
"\n", "\n",
"def get_data():\n", "def get_data():\n",
" # Burning man 2016 data\n", " # Burning man 2016 data\n",
" df = pd.read_csv(\"/tmp/azureml_runs/data/data.tsv\", delimiter=\"\\t\", quotechar='\"')\n", " df = pd.read_csv(join(dirname(os.path.realpath(__file__)),\n",
" os.environ[\"AZUREML_DATAREFERENCE_workspacefilestore\"],\n",
" \"data.tsv\"), delimiter=\"\\t\", quotechar='\"')\n",
" # get integer labels\n", " # get integer labels\n",
" le = LabelEncoder()\n", " le = LabelEncoder()\n",
" le.fit(df[\"Label\"].values)\n", " le.fit(df[\"Label\"].values)\n",

View File

@@ -40,7 +40,8 @@
"from azureml.core.experiment import Experiment\n", "from azureml.core.experiment import Experiment\n",
"from azureml.core.workspace import Workspace\n", "from azureml.core.workspace import Workspace\n",
"from azureml.train.automl import AutoMLConfig\n", "from azureml.train.automl import AutoMLConfig\n",
"from azureml.train.automl.run import AutoMLRun\n" "from azureml.train.automl.run import AutoMLRun\n",
"from azureml.train.automl.utilities import get_sdk_dependencies"
] ]
}, },
{ {
@@ -62,6 +63,29 @@
"set_diagnostics_collection(send_diagnostics = True)" "set_diagnostics_collection(send_diagnostics = True)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Retrieve the SDK versions in the current environment"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To retrieve the SDK versions in the current environment, run `get_sdk_dependencies`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"get_sdk_dependencies()"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},

View File

@@ -13,7 +13,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# AutoML 13: Prepare Data using `azureml.dataprep` for Remote Execution (DSVM)\n", "# AutoML 13: Prepare Data using `azureml.dataprep`\n",
"In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n", "In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
"\n", "\n",
"Make sure you have executed the [setup](00.configuration.ipynb) before running this notebook.\n", "Make sure you have executed the [setup](00.configuration.ipynb) before running this notebook.\n",
@@ -24,6 +24,22 @@
"3. Pass the `Dataflow` to AutoML for a remote run." "3. Pass the `Dataflow` to AutoML for a remote run."
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Install `azureml.dataprep` SDK"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install azureml-dataprep"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -66,6 +82,8 @@
"import azureml.core\n", "import azureml.core\n",
"from azureml.core.compute import DsvmCompute\n", "from azureml.core.compute import DsvmCompute\n",
"from azureml.core.experiment import Experiment\n", "from azureml.core.experiment import Experiment\n",
"from azureml.core.runconfig import CondaDependencies\n",
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.workspace import Workspace\n", "from azureml.core.workspace import Workspace\n",
"import azureml.dataprep as dprep\n", "import azureml.dataprep as dprep\n",
"from azureml.train.automl import AutoMLConfig" "from azureml.train.automl import AutoMLConfig"
@@ -80,9 +98,9 @@
"ws = Workspace.from_config()\n", "ws = Workspace.from_config()\n",
" \n", " \n",
"# choose a name for experiment\n", "# choose a name for experiment\n",
"experiment_name = 'automl-dataprep-remote-dsvm'\n", "experiment_name = 'automl-dataprep-classification'\n",
"# project folder\n", "# project folder\n",
"project_folder = './sample_projects/automl-dataprep-remote-dsvm'\n", "project_folder = './sample_projects/automl-dataprep-classification'\n",
" \n", " \n",
"experiment = Experiment(ws, experiment_name)\n", "experiment = Experiment(ws, experiment_name)\n",
" \n", " \n",
@@ -165,6 +183,44 @@
"}" "}"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Local Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pass Data with `Dataflow` Objects\n",
"\n",
"The `Dataflow` objects captured above can be passed to the `submit` method for a local run. AutoML will retrieve the results from the `Dataflow` for model training."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"automl_config = AutoMLConfig(task = 'classification',\n",
" debug_log = 'automl_errors.log',\n",
" X = X,\n",
" y = y,\n",
" **automl_settings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"local_run = experiment.submit(automl_config, show_output = True)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -196,6 +252,43 @@
" dsvm_compute.wait_for_completion(show_output = True)" " dsvm_compute.wait_for_completion(show_output = True)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Update Conda Dependency file to have AutoML and DataPrep SDK\n",
"\n",
"Currently the AutoML and DataPrep SDKs are not installed with the Azure ML SDK by default. To circumvent this limitation, we update the conda dependency file to add these dependencies."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cd = CondaDependencies()\n",
"cd.add_pip_package(pip_package='azureml-dataprep')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create a `RunConfiguration` with DSVM name"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run_config = RunConfiguration(conda_dependencies=cd)\n",
"run_config.target = dsvm_compute\n",
"run_config.auto_prepare_environment = True"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -214,18 +307,10 @@
"automl_config = AutoMLConfig(task = 'classification',\n", "automl_config = AutoMLConfig(task = 'classification',\n",
" debug_log = 'automl_errors.log',\n", " debug_log = 'automl_errors.log',\n",
" path = project_folder,\n", " path = project_folder,\n",
" compute_target = dsvm_compute,\n", " run_configuration = run_config,\n",
" X = X,\n", " X = X,\n",
" y = y,\n", " y = y,\n",
" **automl_settings)" " **automl_settings)\n",
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"remote_run = experiment.submit(automl_config, show_output = True)" "remote_run = experiment.submit(automl_config, show_output = True)"
] ]
}, },
@@ -254,7 +339,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.train.widgets import RunDetails\n", "from azureml.train.widgets import RunDetails\n",
"RunDetails(remote_run).show()" "RunDetails(local_run).show()"
] ]
}, },
{ {
@@ -271,7 +356,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"children = list(remote_run.get_children())\n", "children = list(local_run.get_children())\n",
"metricslist = {}\n", "metricslist = {}\n",
"for run in children:\n", "for run in children:\n",
" properties = run.get_properties()\n", " properties = run.get_properties()\n",
@@ -298,7 +383,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"best_run, fitted_model = remote_run.get_output()\n", "best_run, fitted_model = local_run.get_output()\n",
"print(best_run)\n", "print(best_run)\n",
"print(fitted_model)" "print(fitted_model)"
] ]
@@ -318,7 +403,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"lookup_metric = \"log_loss\"\n", "lookup_metric = \"log_loss\"\n",
"best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n", "best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
"print(best_run)\n", "print(best_run)\n",
"print(fitted_model)" "print(fitted_model)"
] ]
@@ -338,7 +423,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"iteration = 0\n", "iteration = 0\n",
"best_run, fitted_model = remote_run.get_output(iteration = iteration)\n", "best_run, fitted_model = local_run.get_output(iteration = iteration)\n",
"print(best_run)\n", "print(best_run)\n",
"print(fitted_model)" "print(fitted_model)"
] ]

View File

@@ -1,446 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# AutoML 13: Prepare Data using `azureml.dataprep` for Local Execution\n",
"In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
"\n",
"Make sure you have executed the [setup](00.configuration.ipynb) before running this notebook.\n",
"\n",
"In this notebook you will learn how to:\n",
"1. Define data loading and preparation steps in a `Dataflow` using `azureml.dataprep`.\n",
"2. Pass the `Dataflow` to AutoML for a local run.\n",
"3. Pass the `Dataflow` to AutoML for a remote run."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Diagnostics\n",
"\n",
"Opt-in diagnostics for better experience, quality, and security of future releases."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.telemetry import set_diagnostics_collection\n",
"set_diagnostics_collection(send_diagnostics = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create an Experiment\n",
"\n",
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"import os\n",
"\n",
"import pandas as pd\n",
"\n",
"import azureml.core\n",
"from azureml.core.experiment import Experiment\n",
"from azureml.core.workspace import Workspace\n",
"import azureml.dataprep as dprep\n",
"from azureml.train.automl import AutoMLConfig"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ws = Workspace.from_config()\n",
" \n",
"# choose a name for experiment\n",
"experiment_name = 'automl-dataprep-local'\n",
"# project folder\n",
"project_folder = './sample_projects/automl-dataprep-local'\n",
" \n",
"experiment = Experiment(ws, experiment_name)\n",
" \n",
"output = {}\n",
"output['SDK version'] = azureml.core.VERSION\n",
"output['Subscription ID'] = ws.subscription_id\n",
"output['Workspace Name'] = ws.name\n",
"output['Resource Group'] = ws.resource_group\n",
"output['Location'] = ws.location\n",
"output['Project Directory'] = project_folder\n",
"output['Experiment Name'] = experiment.name\n",
"pd.set_option('display.max_colwidth', -1)\n",
"pd.DataFrame(data = output, index = ['']).T"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading Data using DataPrep"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# You can use `smart_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
"# The data referenced here was pulled from `sklearn.datasets.load_digits()`.\n",
"simple_example_data_root = 'https://dprepdata.blob.core.windows.net/automl-notebook-data/'\n",
"X = dprep.smart_read_file(simple_example_data_root + 'X.csv').skip(1) # Remove the header row.\n",
"\n",
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
"# and convert column types manually.\n",
"# Here we read a comma delimited file and convert all columns to integers.\n",
"y = dprep.read_csv(simple_example_data_root + 'y.csv').to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Review the Data Preparation Result\n",
"\n",
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X.skip(1).head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure AutoML\n",
"\n",
"This creates a general AutoML settings object applicable for both local and remote runs."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"automl_settings = {\n",
" \"max_time_sec\" : 600,\n",
" \"iterations\" : 2,\n",
" \"primary_metric\" : 'AUC_weighted',\n",
" \"preprocess\" : False,\n",
" \"verbosity\" : logging.INFO,\n",
" \"n_cross_validations\": 3\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Local Run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pass Data with `Dataflow` Objects\n",
"\n",
"The `Dataflow` objects captured above can be passed to the `submit` method for a local run. AutoML will retrieve the results from the `Dataflow` for model training."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"automl_config = AutoMLConfig(task = 'classification',\n",
" debug_log = 'automl_errors.log',\n",
" X = X,\n",
" y = y,\n",
" **automl_settings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"local_run = experiment.submit(automl_config, show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Explore the Results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Widget for Monitoring Runs\n",
"\n",
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
"\n",
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"RunDetails(local_run).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Retrieve All Child Runs\n",
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"children = list(local_run.get_children())\n",
"metricslist = {}\n",
"for run in children:\n",
" properties = run.get_properties()\n",
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
" metricslist[int(properties['iteration'])] = metrics\n",
" \n",
"import pandas as pd\n",
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
"rundata"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retrieve the Best Model\n",
"\n",
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"best_run, fitted_model = local_run.get_output()\n",
"print(best_run)\n",
"print(fitted_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Best Model Based on Any Other Metric\n",
"Show the run and the model that has the smallest `log_loss` value:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lookup_metric = \"log_loss\"\n",
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
"print(best_run)\n",
"print(fitted_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Model from a Specific Iteration\n",
"Show the run and the model from the first iteration:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"iteration = 0\n",
"best_run, fitted_model = local_run.get_output(iteration = iteration)\n",
"print(best_run)\n",
"print(fitted_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Test the Best Fitted Model\n",
"\n",
"#### Load Test Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import datasets\n",
"\n",
"digits = datasets.load_digits()\n",
"X_test = digits.data[:10, :]\n",
"y_test = digits.target[:10]\n",
"images = digits.images[:10]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Testing Our Best Fitted Model\n",
"We will try to predict 2 digits and see how our model works."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Randomly select digits and test\n",
"from matplotlib import pyplot as plt\n",
"from matplotlib.pyplot import imshow\n",
"import random\n",
"import numpy as np\n",
"\n",
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
" print(index)\n",
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
" label = y_test[index]\n",
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
" fig = plt.figure(1, figsize=(3,3))\n",
" ax1 = fig.add_axes((0,0,.8,.8))\n",
" ax1.set_title(title)\n",
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Appendix"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Capture the `Dataflow` Objects for Later Use in AutoML\n",
"\n",
"`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# sklearn.digits.data + target\n",
"digits_complete = dprep.smart_read_file('https://dprepdata.blob.core.windows.net/automl-notebook-data/digits-complete.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`digits_complete` (sourced from `sklearn.datasets.load_digits()`) is forked into `dflow_X` to capture all the feature columns and `dflow_y` to capture the label column."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"digits_complete.to_pandas_dataframe().shape\n",
"labels_column = 'Column64'\n",
"dflow_X = digits_complete.drop_columns(columns = [labels_column])\n",
"dflow_y = digits_complete.keep_columns(columns = [labels_column])"
]
}
],
"metadata": {
"authors": [
{
"name": "savitam"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,426 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# AutoML 15a: Classification with ensembling on local compute\n",
"\n",
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for a simple classification problem.\n",
"\n",
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
"\n",
"In this notebook you will learn how to:\n",
"1. Create an `Experiment` in an existing `Workspace`.\n",
"2. Configure AutoML using `AutoMLConfig` which enables an extra ensembling iteration.\n",
"3. Train the model using local compute.\n",
"4. Explore the results.\n",
"5. Test the best fitted model.\n",
"\n",
"<b>Disclaimers / Limitations </b>\n",
"- Currently only Train/Validation split is supported; support for cross-validation will be coming soon.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create an Experiment\n",
"\n",
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"import os\n",
"import random\n",
"\n",
"from matplotlib import pyplot as plt\n",
"from matplotlib.pyplot import imshow\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn import datasets\n",
"\n",
"import azureml.core\n",
"from azureml.core.experiment import Experiment\n",
"from azureml.core.workspace import Workspace\n",
"from azureml.train.automl import AutoMLConfig\n",
"from azureml.train.automl.run import AutoMLRun"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ws = Workspace.from_config()\n",
"\n",
"# Choose a name for the experiment and specify the project folder.\n",
"experiment_name = 'automl-local-classification'\n",
"project_folder = './sample_projects/automl-local-classification'\n",
"\n",
"experiment = Experiment(ws, experiment_name)\n",
"\n",
"output = {}\n",
"output['SDK version'] = azureml.core.VERSION\n",
"output['Subscription ID'] = ws.subscription_id\n",
"output['Workspace Name'] = ws.name\n",
"output['Resource Group'] = ws.resource_group\n",
"output['Location'] = ws.location\n",
"output['Project Directory'] = project_folder\n",
"output['Experiment Name'] = experiment.name\n",
"pd.set_option('display.max_colwidth', -1)\n",
"pd.DataFrame(data = output, index = ['']).T"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Diagnostics\n",
"\n",
"Opt-in diagnostics for better experience, quality, and security of future releases."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.telemetry import set_diagnostics_collection\n",
"set_diagnostics_collection(send_diagnostics = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Training Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import datasets\n",
"\n",
"digits = datasets.load_digits()\n",
"\n",
"# Exclude the first 50 rows from training so that they can be used for test.\n",
"X_train = digits.data[150:,:]\n",
"y_train = digits.target[150:]\n",
"X_valid = digits.data[50:150]\n",
"y_valid = digits.target[50:150]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure AutoML\n",
"\n",
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
"\n",
"|Property|Description|\n",
"|-|-|\n",
"|**task**|classification or regression|\n",
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n",
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
"|**X_valid**|(sparse) array-like, shape = [n_samples, n_features]|\n",
"|**y_valid**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
"|**enable_ensembling**|Flag to enable an ensembling iteration after all the other iterations complete.|\n",
"|**ensemble_iterations**|Number of iterations during which we choose a fitted pipeline to be part of the final ensemble.|\n",
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"automl_config = AutoMLConfig(task = 'classification',\n",
" debug_log = 'classification.log',\n",
" primary_metric = 'AUC_weighted',\n",
" max_time_sec = 3600,\n",
" iterations = 10,\n",
" verbosity = logging.INFO,\n",
" X = X_train, \n",
" y = y_train,\n",
" X_valid = X_valid,\n",
" y_valid = y_valid,\n",
" enable_ensembling = True,\n",
" ensemble_iterations = 5,\n",
" path = project_folder)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train the Model\n",
"\n",
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
"In this example, we specify `show_output = True` to print currently running iterations to the console."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"local_run = experiment.submit(automl_config, show_output = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"local_run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Optionally, you can continue an interrupted local run by calling `continue_experiment` without the `iterations` parameter, or run more iterations for a completed run by specifying the `iterations` parameter:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"local_run = local_run.continue_experiment(X = X_train, \n",
" y = y_train,\n",
" X_valid = X_valid,\n",
" y_valid = y_valid,\n",
" show_output = True,\n",
" iterations = 5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"local_run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Explore the Results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Widget for Monitoring Runs\n",
"\n",
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
"\n",
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"RunDetails(local_run).show() "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"#### Retrieve All Child Runs\n",
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"children = list(local_run.get_children())\n",
"metricslist = {}\n",
"for run in children:\n",
" properties = run.get_properties()\n",
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
" metricslist[int(properties['iteration'])] = metrics\n",
"\n",
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
"rundata"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retrieve the Best Model\n",
"\n",
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"best_run, fitted_model = local_run.get_output()\n",
"print(best_run)\n",
"print(fitted_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Best Model Based on Any Other Metric\n",
"Show the run and the model that has the smallest `log_loss` value:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lookup_metric = \"log_loss\"\n",
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
"print(best_run)\n",
"print(fitted_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Model from a Specific Iteration\n",
"Show the run and the model from the third iteration:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"iteration = 3\n",
"third_run, third_model = local_run.get_output(iteration = iteration)\n",
"print(third_run)\n",
"print(third_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Test the Best Fitted Model\n",
"\n",
"#### Load Test Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"digits = datasets.load_digits()\n",
"X_test = digits.data[:10, :]\n",
"y_test = digits.target[:10]\n",
"images = digits.images[:10]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Testing Our Best Pipeline\n",
"We will try to predict 2 digits and see how our model works."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Randomly select digits and test.\n",
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
" print(index)\n",
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
" label = y_test[index]\n",
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
" fig = plt.figure(1, figsize = (3,3))\n",
" ax1 = fig.add_axes((0,0,.8,.8))\n",
" ax1.set_title(title)\n",
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
" plt.show()"
]
}
],
"metadata": {
"authors": [
{
"name": "ratanase"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,442 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# AutoML 15b: Regression with ensembling on remote compute\n",
"\n",
"In this example we use the scikit-learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use AutoML for a simple regression problem.\n",
"\n",
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
"\n",
"In this notebook you will learn how to:\n",
"1. Create an `Experiment` in an existing `Workspace`.\n",
"2. Configure AutoML using `AutoMLConfig`which enables an extra ensembling iteration.\n",
"3. Train the model using remote compute.\n",
"4. Explore the results.\n",
"5. Test the best fitted model.\n",
"\n",
"<b>Disclaimers / Limitations </b>\n",
"- Currently only Train/Validation split is supported; support for cross-validation will be coming soon.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create an Experiment\n",
"\n",
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"import os\n",
"import random\n",
"\n",
"from matplotlib import pyplot as plt\n",
"from matplotlib.pyplot import imshow\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn import datasets\n",
"\n",
"import azureml.core\n",
"from azureml.core.experiment import Experiment\n",
"from azureml.core.workspace import Workspace\n",
"from azureml.train.automl import AutoMLConfig\n",
"from azureml.train.automl.run import AutoMLRun"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ws = Workspace.from_config()\n",
"\n",
"# Choose a name for the experiment and specify the project folder.\n",
"experiment_name = 'automl-local-regression'\n",
"project_folder = './sample_projects/automl-local-regression'\n",
"\n",
"experiment = Experiment(ws, experiment_name)\n",
"\n",
"output = {}\n",
"output['SDK version'] = azureml.core.VERSION\n",
"output['Subscription ID'] = ws.subscription_id\n",
"output['Workspace Name'] = ws.name\n",
"output['Resource Group'] = ws.resource_group\n",
"output['Location'] = ws.location\n",
"output['Project Directory'] = project_folder\n",
"output['Experiment Name'] = experiment.name\n",
"pd.set_option('display.max_colwidth', -1)\n",
"pd.DataFrame(data = output, index = ['']).T"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Diagnostics\n",
"\n",
"Opt-in diagnostics for better experience, quality, and security of future releases."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.telemetry import set_diagnostics_collection\n",
"set_diagnostics_collection(send_diagnostics = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create a Remote Linux DSVM\n",
"**Note:** If creation fails with a message about Marketplace purchase eligibilty, start creation of a DSVM through the [Azure portal](https://portal.azure.com), and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled this setting, you can exit the portal without actually creating the DSVM, and creation of the DSVM through the notebook should work."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import DsvmCompute\n",
"\n",
"dsvm_name = 'mydsvm'\n",
"try:\n",
" dsvm_compute = DsvmCompute(ws, dsvm_name)\n",
" print('Found an existing DSVM.')\n",
"except:\n",
" print('Creating a new DSVM.')\n",
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
" dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n",
" dsvm_compute.wait_for_completion(show_output = True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create Get Data File\n",
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
"In this example, the `get_data()` function returns data using scikit-learn's `diabetes` dataset."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile $project_folder/get_data.py\n",
"\n",
"# Load the diabetes dataset, a well-known built-in small dataset that comes with scikit-learn.\n",
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import Ridge\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"def get_data():\n",
" X, y = load_diabetes(return_X_y = True)\n",
"\n",
" columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n",
"\n",
" X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
" X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 0)\n",
" return { \"X\" : X_train, \"y\" : y_train, \"X_valid\": X_valid, \"y_valid\": y_valid }"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure AutoML\n",
"\n",
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
"\n",
"|Property|Description|\n",
"|-|-|\n",
"|**task**|classification or regression|\n",
"|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
"|**enable_ensembling**|Flag to enable an ensembling iteration after all the other iterations complete.|\n",
"|**ensemble_iterations**|Number of iterations during which we choose a fitted pipeline to be part of the final ensemble.|\n",
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"automl_config = AutoMLConfig(task = 'regression',\n",
" max_time_sec = 600,\n",
" iterations = 20,\n",
" primary_metric = 'spearman_correlation',\n",
" debug_log = 'regression.log',\n",
" verbosity = logging.INFO,\n",
" compute_target = dsvm_compute,\n",
" data_script = project_folder + \"/get_data.py\",\n",
" enable_ensembling = True,\n",
" ensemble_iterations = 5,\n",
" path = project_folder)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train the Model\n",
"\n",
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
"In this example, we specify `show_output = True` to print currently running iterations to the console."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"local_run = experiment.submit(automl_config, show_output = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"local_run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Explore the Results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Widget for Monitoring Runs\n",
"\n",
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
"\n",
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.widgets import RunDetails\n",
"RunDetails(local_run).show() "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"#### Retrieve All Child Runs\n",
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"children = list(local_run.get_children())\n",
"metricslist = {}\n",
"for run in children:\n",
" properties = run.get_properties()\n",
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
" metricslist[int(properties['iteration'])] = metrics\n",
"\n",
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
"rundata"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retrieve the Best Model\n",
"\n",
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"best_run, fitted_model = local_run.get_output()\n",
"print(best_run)\n",
"print(fitted_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Best Model Based on Any Other Metric\n",
"Show the run and the model that has the smallest `root_mean_squared_error` value."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lookup_metric = \"root_mean_squared_error\"\n",
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
"print(best_run)\n",
"print(fitted_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Test the Best Model (Ensemble)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Predict on training and test set, and calculate residual values."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import Ridge\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"X, y = load_diabetes(return_X_y = True)\n",
"\n",
"X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size = 0.2, random_state = 0)\n",
"X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size = 0.5, random_state = 0)\n",
"\n",
"\n",
"y_pred_train = fitted_model.predict(X_train)\n",
"y_residual_train = y_train - y_pred_train\n",
"\n",
"y_pred_test = fitted_model.predict(X_test)\n",
"y_residual_test = y_test - y_pred_test"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"from sklearn import datasets\n",
"from sklearn.metrics import mean_squared_error, r2_score\n",
"\n",
"# Set up a multi-plot chart.\n",
"f, (a0, a1) = plt.subplots(1, 2, gridspec_kw = {'width_ratios':[1, 1], 'wspace':0, 'hspace': 0})\n",
"f.suptitle('Regression Residual Values', fontsize = 18)\n",
"f.set_figheight(6)\n",
"f.set_figwidth(16)\n",
"\n",
"# Plot residual values of training set.\n",
"a0.axis([0, 360, -200, 200])\n",
"a0.plot(y_residual_train, 'bo', alpha = 0.5)\n",
"a0.plot([-10,360],[0,0], 'r-', lw = 3)\n",
"a0.text(16,170,'RMSE = {0:.2f}'.format(np.sqrt(mean_squared_error(y_train, y_pred_train))), fontsize = 12)\n",
"a0.text(16,140,'R2 score = {0:.2f}'.format(r2_score(y_train, y_pred_train)), fontsize = 12)\n",
"a0.set_xlabel('Training samples', fontsize = 12)\n",
"a0.set_ylabel('Residual Values', fontsize = 12)\n",
"\n",
"# Plot a histogram.\n",
"a0.hist(y_residual_train, orientation = 'horizontal', color = 'b', bins = 10, histtype = 'step');\n",
"a0.hist(y_residual_train, orientation = 'horizontal', color = 'b', alpha = 0.2, bins = 10);\n",
"\n",
"# Plot residual values of test set.\n",
"a1.axis([0, 90, -200, 200])\n",
"a1.plot(y_residual_test, 'bo', alpha = 0.5)\n",
"a1.plot([-10,360],[0,0], 'r-', lw = 3)\n",
"a1.text(5,170,'RMSE = {0:.2f}'.format(np.sqrt(mean_squared_error(y_test, y_pred_test))), fontsize = 12)\n",
"a1.text(5,140,'R2 score = {0:.2f}'.format(r2_score(y_test, y_pred_test)), fontsize = 12)\n",
"a1.set_xlabel('Test samples', fontsize = 12)\n",
"a1.set_yticklabels([])\n",
"\n",
"# Plot a histogram.\n",
"a1.hist(y_residual_test, orientation = 'horizontal', color = 'b', bins = 10, histtype = 'step')\n",
"a1.hist(y_residual_test, orientation = 'horizontal', color = 'b', alpha = 0.2, bins = 10)\n",
"\n",
"plt.show()"
]
}
],
"metadata": {
"authors": [
{
"name": "ratanase"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -1,52 +1,24 @@
# Table of Contents # Table of Contents
1. [Automated ML Introduction](#introduction) 1. [Auto ML Introduction](#introduction)
1. [Running samples in Azure Notebooks](#jupyter) 2. [Running samples in a Local Conda environment](#localconda)
1. [Running samples in a Local Conda environment](#localconda) 3. [Auto ML SDK Sample Notebooks](#samples)
1. [Automated ML SDK Sample Notebooks](#samples) 4. [Documentation](#documentation)
1. [Documentation](#documentation) 5. [Running using python command](#pythoncommand)
1. [Running using python command](#pythoncommand) 6. [Troubleshooting](#troubleshooting)
1. [Troubleshooting](#troubleshooting)
<a name="introduction"></a>
# Automated ML introduction
Automated machine learning (automated ML) builds high quality machine learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, automated ML will give you a high quality machine learning model that you can use for predictions.
# Auto ML Introduction <a name="introduction"></a>
AutoML builds high quality Machine Learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, AutoML will give you a high quality machine learning model that you can use for predictions.
If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use. If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire. If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
<a name="jupyter"></a>
## Running samples in Azure Notebooks - Jupyter based notebooks in the Azure cloud
1. [![Azure Notebooks](https://notebooks.azure.com/launch.png)](https://aka.ms/aml-clone-azure-notebooks) # Running samples in a Local Conda environment <a name="localconda"></a>
[Import sample notebooks ](https://aka.ms/aml-clone-azure-notebooks) into Azure Notebooks.
1. Follow the instructions in the [../00.configuration](00.configuration.ipynb) notebook to create and connect to a workspace.
1. Open one of the sample notebooks.
**Make sure the Azure Notebook kernel is set to `Python 3.6`** when you open a notebook. You can run these notebooks in Azure Notebooks without any extra installation. To run these notebook on your own notebook server, use these installation instructions.
![set kernel to Python 3.6](../images/python36.png)
<a name="localconda"></a>
## Running samples in a Local Conda environment
To run these notebook on your own notebook server, use these installation instructions.
The instructions below will install everything you need and then start a Jupyter notebook. To start your Jupyter notebook manually, use:
```
conda activate azure_automl
jupyter notebook
```
or on Mac:
```
source activate azure_automl
jupyter notebook
```
It is best if you create a new conda environment locally to try this SDK, so it doesn't mess up with your existing Python environment.
### 1. Install mini-conda from [here](https://conda.io/miniconda.html), choose Python 3.7 or higher. ### 1. Install mini-conda from [here](https://conda.io/miniconda.html), choose Python 3.7 or higher.
- **Note**: if you already have conda installed, you can keep using it but it should be version 4.4.10 or later (as shown by: conda -V). If you have a previous version installed, you can update it using the command: conda update conda. - **Note**: if you already have conda installed, you can keep using it but it should be version 4.4.10 or later (as shown by: conda -V). If you have a previous version installed, you can update it using the command: conda update conda.
@@ -57,7 +29,7 @@ There's no need to install mini-conda specifically.
### 3. Setup a new conda environment ### 3. Setup a new conda environment
The **automl/automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook. The **automl/automl_setup** script creates a new conda environment, installs the necessary packages, configures the widget and starts a jupyter notebook.
It takes the conda environment name as an optional parameter. The default conda environment name is azure_automl. The exact command depends on the operating system. It can take about 10 minutes to execute. It takes the conda environment name as an optional parameter. The default conda environment name is azure_automl. The exact command depends on the operating system. It can take about 30 minutes to execute.
## Windows ## Windows
Start a conda command windows, cd to the **automl** folder where the sample notebooks were extracted and then run: Start a conda command windows, cd to the **automl** folder where the sample notebooks were extracted and then run:
``` ```
@@ -76,19 +48,19 @@ bash automl_setup_mac.sh
cd to the **automl** folder where the sample notebooks were extracted and then run: cd to the **automl** folder where the sample notebooks were extracted and then run:
``` ```
bash automl_setup_linux.sh automl_setup_linux.sh
``` ```
### 4. Running configuration.ipynb ### 4. Running configuration.ipynb
- Before running any samples you next need to run the configuration notebook. Click on 00.configuration.ipynb notebook - Before running any samples you next need to run the configuration notebook. Click on 00.configuration.ipynb notebook
- Please make sure you use the Python [conda env:azure_automl] kernel when running this notebook.
- Execute the cells in the notebook to Register Machine Learning Services Resource Provider and create a workspace. (*instructions in notebook*) - Execute the cells in the notebook to Register Machine Learning Services Resource Provider and create a workspace. (*instructions in notebook*)
### 5. Running Samples ### 5. Running Samples
- Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks. - Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks.
- Follow the instructions in the individual notebooks to explore various features in AutoML - Follow the instructions in the individual notebooks to explore various features in AutoML
<a name="samples"></a> # Auto ML SDK Sample Notebooks <a name="samples"></a>
# Automated ML SDK Sample Notebooks
- [00.configuration.ipynb](00.configuration.ipynb) - [00.configuration.ipynb](00.configuration.ipynb)
- Register Machine Learning Services Resource Provider - Register Machine Learning Services Resource Provider
- Create new Azure ML Workspace - Create new Azure ML Workspace
@@ -115,7 +87,7 @@ bash automl_setup_linux.sh
- [03b.auto-ml-remote-batchai.ipynb](03b.auto-ml-remote-batchai.ipynb) - [03b.auto-ml-remote-batchai.ipynb](03b.auto-ml-remote-batchai.ipynb)
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits) - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
- Example of using automated ML for classification using a remote Batch AI compute for training - Example of using Auto ML for classification using a remote Batch AI compute for training
- Parallel execution of iterations - Parallel execution of iterations
- Async tracking of progress - Async tracking of progress
- Cancelling individual iterations or entire run - Cancelling individual iterations or entire run
@@ -134,7 +106,7 @@ bash automl_setup_linux.sh
- Specify a target metrics to indicate stopping criteria - Specify a target metrics to indicate stopping criteria
- Handling Missing Data in the input - Handling Missing Data in the input
- [06.auto-ml-sparse-data-train-test-split.ipynb](06.auto-ml-sparse-data-train-test-split.ipynb) - [06.auto-ml-sparse-data-custom-cv-split.ipynb](06.auto-ml-sparse-data-custom-cv-split.ipynb)
- Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html) - Dataset: Scikit learn's [20newsgroup](http://scikit-learn.org/stable/datasets/twenty_newsgroups.html)
- Handle sparse datasets - Handle sparse datasets
- Specify custom train and validation set - Specify custom train and validation set
@@ -143,11 +115,11 @@ bash automl_setup_linux.sh
- List all projects for the workspace - List all projects for the workspace
- List all AutoML Runs for a given project - List all AutoML Runs for a given project
- Get details for a AutoML Run. (Automl settings, run widget & all metrics) - Get details for a AutoML Run. (Automl settings, run widget & all metrics)
- Download fitted pipeline for any iteration - Downlaod fitted pipeline for any iteration
- [08.auto-ml-remote-execution-with-DataStore.ipynb](08.auto-ml-remote-execution-with-DataStore.ipynb) - [08.auto-ml-remote-execution-with-text-file-on-DSVM](08.auto-ml-remote-execution-with-text-file-on-DSVM.ipynb)
- Dataset: scikit learn's [digit dataset](https://innovate.burningman.org/datasets-page/) - Dataset: scikit learn's [digit dataset](https://innovate.burningman.org/datasets-page/)
- Download the data and store it in DataStore. - Download the data and store it in the DSVM to improve performance.
- [09.auto-ml-classification-with-deployment.ipynb](09.auto-ml-classification-with-deployment.ipynb) - [09.auto-ml-classification-with-deployment.ipynb](09.auto-ml-classification-with-deployment.ipynb)
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits) - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
@@ -171,32 +143,20 @@ bash automl_setup_linux.sh
- [13.auto-ml-dataprep.ipynb](13.auto-ml-dataprep.ipynb) - [13.auto-ml-dataprep.ipynb](13.auto-ml-dataprep.ipynb)
- Using DataPrep for reading data - Using DataPrep for reading data
- [14.auto-ml-model-explanation.ipynb](14.auto-ml-model-explanation.ipynb) - [14a.auto-ml-classification-ensemble.ipynb](14a.auto-ml-classification-ensemble.ipynb)
- Dataset: seaborn's [iris dataset](https://seaborn.pydata.org/generated/seaborn.load_dataset.html) - Classification with ensembling
- Explaining the AutoML classification pipeline
- Visualizing feature importance in widget
- [15a.auto-ml-classification-ensemble.ipynb](15a.auto-ml-classification-ensemble.ipynb) - [14b.auto-ml-regression-ensemble.ipynb](14b.auto-ml-regression-ensemble.ipynb)
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits) - Regression with ensembling
- Enables an extra iteration for generating an Ensemble of models
- Uses local compute for training
- [15b.auto-ml-regression-ensemble.ipynb](15b.auto-ml-regression-ensemble.ipynb) # Documentation <a name="documentation"></a>
- Dataset: scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html)
- Enables an extra iteration for generating an Ensemble of models
- Uses remote Linux DSVM for training
<a name="documentation"></a>
# Documentation
## Table of Contents ## Table of Contents
1. [Automated ML Settings ](#automlsettings) 1. [Auto ML Settings ](#automlsettings)
1. [Cross validation split options](#cvsplits) 2. [Cross validation split options](#cvsplits)
1. [Get Data Syntax](#getdata) 3. [Get Data Syntax](#getdata)
1. [Data pre-processing and featurization](#preprocessing) 4. [Data pre-processing and featurization](#preprocessing)
<a name="automlsettings"></a>
## Automated ML Settings
## Auto ML Settings <a name="automlsettings"></a>
|Property|Description|Default| |Property|Description|Default|
|-|-|-| |-|-|-|
|**primary_metric**|This is the metric that you want to optimize.<br><br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i><br><br> Regression supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i><br><i>normalized_root_mean_squared_log_error</i>| Classification: accuracy <br><br> Regression: spearman_correlation |**primary_metric**|This is the metric that you want to optimize.<br><br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i><br><br> Regression supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i><br><i>normalized_root_mean_squared_log_error</i>| Classification: accuracy <br><br> Regression: spearman_correlation
@@ -208,10 +168,9 @@ bash automl_setup_linux.sh
|**preprocess**|*True/False* <br>Setting this to *True* enables preprocessing <br>on the input to handle missing data, and perform some common feature extraction<br>*Note: If input data is Sparse you cannot use preprocess=True*|False| |**preprocess**|*True/False* <br>Setting this to *True* enables preprocessing <br>on the input to handle missing data, and perform some common feature extraction<br>*Note: If input data is Sparse you cannot use preprocess=True*|False|
|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.<br> You can set it to *-1* to use all cores|1| |**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.<br> You can set it to *-1* to use all cores|1|
|**exit_score**|*double* value indicating the target for *primary_metric*. <br> Once the target is surpassed the run terminates|None| |**exit_score**|*double* value indicating the target for *primary_metric*. <br> Once the target is surpassed the run terminates|None|
|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoostingRegressor</i><br><i>DecisionTreeRegressor</i><br><i>KNeighborsRegressor</i><br><i>LassoLars</i><br><i>SGDRegressor</i><br><i>RandomForestRegressor</i><br><i>ExtraTreesRegressor</i>|None| |**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>gradient boosting</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoostingRegressor</i><br><i>DecisionTreeRegressor</i><br><i>KNeighborsRegressor</i><br><i>LassoLars</i><br><i>SGDRegressor</i><br><i>RandomForestRegressor</i><br><i>ExtraTreesRegressor</i>|None|
<a name="cvsplits"></a> ## Cross validation split options <a name="cvsplits"></a>
## Cross validation split options
### K-Folds Cross Validation ### K-Folds Cross Validation
Use *n_cross_validations* setting to specify the number of cross validations. The training data set will be randomly split into *n_cross_validations* folds of equal size. During each cross validation round, one of the folds will be used for validation of the model trained on the remaining folds. This process repeats for *n_cross_validations* rounds until each fold is used once as validation set. Finally, the average scores accross all *n_cross_validations* rounds will be reported, and the corresponding model will be retrained on the whole training data set. Use *n_cross_validations* setting to specify the number of cross validations. The training data set will be randomly split into *n_cross_validations* folds of equal size. During each cross validation round, one of the folds will be used for validation of the model trained on the remaining folds. This process repeats for *n_cross_validations* rounds until each fold is used once as validation set. Finally, the average scores accross all *n_cross_validations* rounds will be reported, and the corresponding model will be retrained on the whole training data set.
@@ -221,8 +180,7 @@ Use *validation_size* to specify the percentage of the training data set that sh
### Custom train and validation set ### Custom train and validation set
You can specify seperate train and validation set either through the get_data() or directly to the fit method. You can specify seperate train and validation set either through the get_data() or directly to the fit method.
<a name="getdata"></a> ## get_data() syntax <a name="getdata"></a>
## get_data() syntax
The *get_data()* function can be used to return a dictionary with these values: The *get_data()* function can be used to return a dictionary with these values:
|Key|Type|Dependency|Mutually Exclusive with|Description| |Key|Type|Dependency|Mutually Exclusive with|Description|
@@ -238,23 +196,21 @@ The *get_data()* function can be used to return a dictionary with these values:
|columns|Array of strings|data_train||*Optional* Whitelist of columns to use for features| |columns|Array of strings|data_train||*Optional* Whitelist of columns to use for features|
|cv_splits_indices|Array of integers|data_train||*Optional* List of indexes to split the data for cross validation| |cv_splits_indices|Array of integers|data_train||*Optional* List of indexes to split the data for cross validation|
<a name="preprocessing"></a> ## Data pre-processing and featurization <a name="preprocessing"></a>
## Data pre-processing and featurization If you use "preprocess=True", the following data preprocessing steps are performed automatically for you:
If you use `preprocess=True`, the following data preprocessing steps are performed automatically for you: ### 1. Dropping high cardinality or no variance features
1. Dropping high cardinality or no variance features
- Features with no useful information are dropped from training and validation sets. These include features with all values missing, same value across all rows or with extremely high cardinality (e.g., hashes, IDs or GUIDs). - Features with no useful information are dropped from training and validation sets. These include features with all values missing, same value across all rows or with extremely high cardinality (e.g., hashes, IDs or GUIDs).
2. Missing value imputation ### 2. Missing value imputation
- For numerical features, missing values are imputed with average of values in the column. - For numerical features, missing values are imputed with average of values in the column.
- For categorical features, missing values are imputed with most frequent value. - For categorical features, missing values are imputed with most frequent value.
3. Generating additional features ### 3. Generating additional features
- For DateTime features: Year, Month, Day, Day of week, Day of year, Quarter, Week of the year, Hour, Minute, Second. - For DateTime features: Year, Month, Day, Day of week, Day of year, Quarter, Week of the year, Hour, Minute, Second.
- For Text features: Term frequency based on bi-grams and tri-grams, Count vectorizer. - For Text features: Term frequency based on bi-grams and tri-grams, Count vectorizer.
4. Transformations and encodings ### 4. Transformations and encodings
- Numeric features with very few unique values are transformed into categorical features. - Numeric features with very few unique values are transformed into categorical features.
- Depending on cardinality of categorical features label encoding or (hashing) one-hot encoding is performed.
<a name="pythoncommand"></a> # Running using python command <a name="pythoncommand"></a>
# Running using python command
Jupyter notebook provides a File / Download as / Python (.py) option for saving the notebook as a Python file. Jupyter notebook provides a File / Download as / Python (.py) option for saving the notebook as a Python file.
You can then run this file using the python command. You can then run this file using the python command.
However, on Windows the file needs to be modified before it can be run. However, on Windows the file needs to be modified before it can be run.
@@ -264,8 +220,7 @@ The following condition must be added to the main code in the file:
The main code of the file must be indented so that it is under this condition. The main code of the file must be indented so that it is under this condition.
<a name="troubleshooting"></a> # Troubleshooting <a name="troubleshooting"></a>
# Troubleshooting
## Iterations fail and the log contains "MemoryError" ## Iterations fail and the log contains "MemoryError"
This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size. This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size.
If you are using a remote DSVM, memory is needed for each concurrent iteration. The concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and concurrent_iterations is set to 10, the minimum memory required is at least 80Gb. If you are using a remote DSVM, memory is needed for each concurrent iteration. The concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.

View File

@@ -5,15 +5,15 @@ dependencies:
- python=3.6 - python=3.6
- nb_conda - nb_conda
- matplotlib - matplotlib
- numpy>=1.11.0,<1.15.0 - numpy>=1.11.0,<1.16.0
- cython
- urllib3<1.24
- scipy>=0.19.0,<0.20.0 - scipy>=0.19.0,<0.20.0
- scikit-learn>=0.18.0,<=0.19.1 - scikit-learn>=0.18.0,<=0.19.1
- pandas>=0.22.0,<0.23.0 - pandas>=0.22.0,<0.23.0
- pip: - pip:
# Required packages for AzureML execution, history, and data preparation. # Required packages for AzureML execution, history, and data preparation.
- azureml-sdk[automl,notebooks] - --extra-index-url https://pypi.python.org/simple
- azureml-sdk[automl]
- azureml-train-widgets
- pandas_ml - pandas_ml

View File

@@ -1,21 +1,16 @@
@echo off @echo off
set conda_env_name=%1 set conda_env_name=%1
set automl_env_file=%2
set PIP_NO_WARN_SCRIPT_LOCATION=0
IF "%conda_env_name%"=="" SET conda_env_name="azure_automl" IF "%conda_env_name%"=="" SET conda_env_name="azure_automl"
IF "%automl_env_file%"=="" SET automl_env_file="automl_env.yml"
IF NOT EXIST %automl_env_file% GOTO YmlMissing
call conda activate %conda_env_name% 2>nul: call conda activate %conda_env_name% 2>nul:
if not errorlevel 1 ( if not errorlevel 1 (
echo Upgrading azureml-sdk[automl] in existing conda environment %conda_env_name% echo Upgrading azureml-sdk[automl] in existing conda environment %conda_env_name%
call pip install --upgrade azureml-sdk[automl,notebooks] call pip install --upgrade azureml-sdk[automl]
if errorlevel 1 goto ErrorExit if errorlevel 1 goto ErrorExit
) else ( ) else (
call conda env create -f %automl_env_file% -n %conda_env_name% call conda env create -f automl_env.yml -n %conda_env_name%
) )
call conda activate %conda_env_name% 2>nul: call conda activate %conda_env_name% 2>nul:
@@ -23,10 +18,10 @@ if errorlevel 1 goto ErrorExit
call pip install psutil call pip install psutil
call jupyter nbextension install --py azureml.train.widgets --user call jupyter nbextension install --py azureml.train.widgets
if errorlevel 1 goto ErrorExit if errorlevel 1 goto ErrorExit
call jupyter nbextension enable --py azureml.train.widgets --user call jupyter nbextension enable --py azureml.train.widgets
if errorlevel 1 goto ErrorExit if errorlevel 1 goto ErrorExit
echo. echo.
@@ -41,9 +36,6 @@ jupyter notebook --log-level=50
goto End goto End
:YmlMissing
echo File %automl_env_file% not found.
:ErrorExit :ErrorExit
echo Install failed echo Install failed

View File

@@ -1,30 +1,18 @@
#!/bin/bash #!/bin/bash
CONDA_ENV_NAME=$1 CONDA_ENV_NAME=$1
AUTOML_ENV_FILE=$2
PIP_NO_WARN_SCRIPT_LOCATION=0
if [ "$CONDA_ENV_NAME" == "" ] if [ "$CONDA_ENV_NAME" == "" ]
then then
CONDA_ENV_NAME="azure_automl" CONDA_ENV_NAME="azure_automl"
fi fi
if [ "$AUTOML_ENV_FILE" == "" ]
then
AUTOML_ENV_FILE="automl_env.yml"
fi
if [ ! -f $AUTOML_ENV_FILE ]; then
echo "File $AUTOML_ENV_FILE not found"
exit 1
fi
if source activate $CONDA_ENV_NAME 2> /dev/null if source activate $CONDA_ENV_NAME 2> /dev/null
then then
echo "Upgrading azureml-sdk[automl] in existing conda environment" $CONDA_ENV_NAME echo "Upgrading azureml-sdk[automl] in existing conda environment" $CONDA_ENV_NAME
pip install --upgrade azureml-sdk[automl,notebooks] pip install --upgrade azureml-sdk[automl]
else else
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME && conda env create -f automl_env.yml -n $CONDA_ENV_NAME &&
source activate $CONDA_ENV_NAME && source activate $CONDA_ENV_NAME &&
jupyter nbextension install --py azureml.train.widgets --user && jupyter nbextension install --py azureml.train.widgets --user &&
jupyter nbextension enable --py azureml.train.widgets --user && jupyter nbextension enable --py azureml.train.widgets --user &&

View File

@@ -1,30 +1,18 @@
#!/bin/bash #!/bin/bash
CONDA_ENV_NAME=$1 CONDA_ENV_NAME=$1
AUTOML_ENV_FILE=$2
PIP_NO_WARN_SCRIPT_LOCATION=0
if [ "$CONDA_ENV_NAME" == "" ] if [ "$CONDA_ENV_NAME" == "" ]
then then
CONDA_ENV_NAME="azure_automl" CONDA_ENV_NAME="azure_automl"
fi fi
if [ "$AUTOML_ENV_FILE" == "" ]
then
AUTOML_ENV_FILE="automl_env.yml"
fi
if [ ! -f $AUTOML_ENV_FILE ]; then
echo "File $AUTOML_ENV_FILE not found"
exit 1
fi
if source activate $CONDA_ENV_NAME 2> /dev/null if source activate $CONDA_ENV_NAME 2> /dev/null
then then
echo "Upgrading azureml-sdk[automl] in existing conda environment" $CONDA_ENV_NAME echo "Upgrading azureml-sdk[automl] in existing conda environment" $CONDA_ENV_NAME
pip install --upgrade azureml-sdk[automl,notebooks] pip install --upgrade azureml-sdk[automl]
else else
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME && conda env create -f automl_env.yml -n $CONDA_ENV_NAME &&
source activate $CONDA_ENV_NAME && source activate $CONDA_ENV_NAME &&
conda install lightgbm -c conda-forge -y && conda install lightgbm -c conda-forge -y &&
jupyter nbextension install --py azureml.train.widgets --user && jupyter nbextension install --py azureml.train.widgets --user &&

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,174 @@
#!/usr/bin/env python
# coding: utf-8
import azureml.core
print('SDK version' + azureml.core.VERSION)
# PREREQ: load workspace info
# import azureml.core
# <loadWorkspace>
from azureml.core import Workspace
ws = Workspace.from_config()
# </loadWorkspace>
scorepy_content = "import json\nimport numpy as np\nimport os\nimport pickle\nfrom sklearn.externals import joblib\nfrom sklearn.linear_model import LogisticRegression\n\nfrom azureml.core.model import Model\n\ndef init():\n global model\n # retreive the path to the model file using the model name\n model_path = Model.get_model_path('sklearn_mnist')\n model = joblib.load(model_path)\n\ndef run(raw_data):\n data = np.array(json.loads(raw_data)['data'])\n # make prediction\n y_hat = model.predict(data)\n return json.dumps(y_hat.tolist())"
print(scorepy_content)
with open("score.py","w") as f:
f.write(scorepy_content)
# PREREQ: create environment file
from azureml.core.conda_dependencies import CondaDependencies
myenv = CondaDependencies()
myenv.add_conda_package("scikit-learn")
with open("myenv.yml","w") as f:
f.write(myenv.serialize_to_string())
#<configImage>
from azureml.core.image import ContainerImage
image_config = ContainerImage.image_configuration(execution_script = "score.py",
runtime = "python",
conda_file = "myenv.yml",
description = "Image with mnist model",
tags = {"data": "mnist", "type": "classification"}
)
#</configImage>
# <configAci>
from azureml.core.webservice import AciWebservice
aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1,
memory_gb = 1,
tags = {"data": "mnist", "type": "classification"},
description = 'Handwriting recognition')
# </configAci>
#<registerModel>
from azureml.core.model import Model
model_name = "sklearn_mnist"
model = Model.register(model_path = "sklearn_mnist_model.pkl",
model_name = model_name,
tags = {"data": "mnist", "type": "classification"},
description = "Mnist handwriting recognition",
workspace = ws)
#</registerModel>
# <retrieveModel>
from azureml.core.model import Model
model_name = "sklearn_mnist"
model=Model(ws, model_name)
# </retrieveModel>
# ## DEPLOY FROM REGISTERED MODEL
# <option2Deploy>
from azureml.core.webservice import Webservice
service_name = 'aci-mnist-2'
service = Webservice.deploy_from_model(deployment_config = aciconfig,
image_config = image_config,
models = [model], # this is the registered model object
name = service_name,
workspace = ws)
service.wait_for_deployment(show_output = True)
print(service.state)
# </option2Deploy>
service.delete()
# ## DEPLOY FROM IMAGE
# <option3CreateImage>
from azureml.core.image import ContainerImage
image = ContainerImage.create(name = "myimage1",
models = [model], # this is the registered model object
image_config = image_config,
workspace = ws)
image.wait_for_creation(show_output = True)
# </option3CreateImage>
# <option3Deploy>
from azureml.core.webservice import Webservice
service_name = 'aci-mnist-13'
service = Webservice.deploy_from_image(deployment_config = aciconfig,
image = image,
name = service_name,
workspace = ws)
service.wait_for_deployment(show_output = True)
print(service.state)
# </option3Deploy>
service.delete()
# ## DEPLOY FROM MODEL FILE
# First change score.py!
scorepy_content = "import json\nimport numpy as np\nimport os\nimport pickle\nfrom sklearn.externals import joblib\nfrom sklearn.linear_model import LogisticRegression\n\nfrom azureml.core.model import Model\n\ndef init():\n global model\n # retreive the path to the model file using the model name\n model_path = Model.get_model_path('sklearn_mnist_model.pkl')\n model = joblib.load(model_path)\n\ndef run(raw_data):\n data = np.array(json.loads(raw_data)['data'])\n # make prediction\n y_hat = model.predict(data)\n return json.dumps(y_hat.tolist())"
with open("score.py","w") as f:
f.write(scorepy_content)
# <option1Deploy>
from azureml.core.webservice import Webservice
service_name = 'aci-mnist-1'
service = Webservice.deploy(deployment_config = aciconfig,
image_config = image_config,
model_paths = ['sklearn_mnist_model.pkl'],
name = service_name,
workspace = ws)
service.wait_for_deployment(show_output = True)
print(service.state)
# </option1Deploy>
# <testService>
# Load Data
import os
import urllib
os.makedirs('./data', exist_ok = True)
urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/test-images.gz')
from utils import load_data
X_test = load_data('./data/test-images.gz', False) / 255.0
from sklearn import datasets
import numpy as np
import json
# find 5 random samples from test set
n = 5
sample_indices = np.random.permutation(X_test.shape[0])[0:n]
test_samples = json.dumps({"data": X_test[sample_indices].tolist()})
test_samples = bytes(test_samples, encoding = 'utf8')
# predict using the deployed model
prediction = service.run(input_data = test_samples)
print(prediction)
# </testService>
# <deleteService>
service.delete()
# </deleteService>

View File

@@ -0,0 +1,27 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import gzip
import numpy as np
import struct
# load compressed MNIST gz files and return numpy arrays
def load_data(filename, label=False):
with gzip.open(filename) as gz:
struct.unpack('I', gz.read(4))
n_items = struct.unpack('>I', gz.read(4))
if not label:
n_rows = struct.unpack('>I', gz.read(4))[0]
n_cols = struct.unpack('>I', gz.read(4))[0]
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
res = res.reshape(n_items[0], n_rows * n_cols)
else:
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
res = res.reshape(n_items[0], 1)
return res
# one-hot encode a 1-D array
def one_hot_encode(array, num_of_classes):
return np.eye(num_of_classes)[array.reshape(-1)]

View File

@@ -0,0 +1,39 @@
# Code for Local computer and Submit training run sections
# Check core SDK version number
import azureml.core
print("SDK version:", azureml.core.VERSION)
#<run_local>
from azureml.core.runconfig import RunConfiguration
# Edit a run configuration property on the fly.
run_local = RunConfiguration()
run_local.environment.python.user_managed_dependencies = True
#</run_local>
from azureml.core import Workspace
ws = Workspace.from_config()
# Set up an experiment
# <experiment>
from azureml.core import Experiment
experiment_name = 'my_experiment'
exp = Experiment(workspace=ws, name=experiment_name)
# </experiment>
# Submit the experiment using the run configuration
#<local_submit>
from azureml.core import ScriptRunConfig
import os
script_folder = os.getcwd()
src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_local)
run = exp.submit(src)
run.wait_for_completion(show_output = True)
#</local_submit>

View File

@@ -0,0 +1,48 @@
# Code for Azure Machine Learning Compute - Run-based creation
# Check core SDK version number
import azureml.core
print("SDK version:", azureml.core.VERSION)
from azureml.core import Workspace
ws = Workspace.from_config()
# Set up an experiment
from azureml.core import Experiment
experiment_name = 'my-experiment'
script_folder= "./"
exp = Experiment(workspace=ws, name=experiment_name)
#<run_temp_compute>
from azureml.core.compute import ComputeTarget, AmlCompute
# First, list the supported VM families for Azure Machine Learning Compute
print(AmlCompute.supported_vmsizes(workspace=ws))
from azureml.core.runconfig import RunConfiguration
# Create a new runconfig object
run_temp_compute = RunConfiguration()
# Signal that you want to use AmlCompute to execute the script
run_temp_compute.target = "amlcompute"
# AmlCompute is created in the same region as your workspace
# Set the VM size for AmlCompute from the list of supported_vmsizes
run_temp_compute.amlcompute.vm_size = 'STANDARD_D2_V2'
#</run_temp_compute>
# Submit the experiment using the run configuration
from azureml.core import ScriptRunConfig
src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_temp_compute)
run = exp.submit(src)
run.wait_for_completion(show_output = True)

View File

@@ -0,0 +1,70 @@
# Code for Azure Machine Learning Compute - Persistent compute
# Check core SDK version number
import azureml.core
print("SDK version:", azureml.core.VERSION)
from azureml.core import Workspace
ws = Workspace.from_config()
# Set up an experiment
from azureml.core import Experiment
experiment_name = 'my-experiment'
script_folder= "./"
exp = Experiment(workspace=ws, name=experiment_name)
#<cpu_cluster>
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
# Choose a name for your CPU cluster
cpu_cluster_name = "cpucluster"
# Verify that cluster does not exist already
try:
cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
print('Found existing cluster, use it.')
except ComputeTargetException:
# To use a different region for the compute, add a location='<region>' parameter
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
max_nodes=4)
cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
cpu_cluster.wait_for_completion(show_output=True)
#</cpu_cluster>
#<run_amlcompute>
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
# Create a new runconfig object
run_amlcompute = RunConfiguration()
# Use the cpu_cluster you created above.
run_amlcompute.target = cpu_cluster
# Enable Docker
run_amlcompute.environment.docker.enabled = True
# Set Docker base image to the default CPU-based image
run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE
# Use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_amlcompute.environment.python.user_managed_dependencies = False
# Specify CondaDependencies obj, add necessary packages
run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
#</run_amlcompute>
# Submit the experiment using the run configuration
#<amlcompute_submit>
from azureml.core import ScriptRunConfig
src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_amlcompute)
run = exp.submit(src)
run.wait_for_completion(show_output = True)
#</amlcompute_submit>

View File

@@ -0,0 +1,26 @@
# Code for Remote virtual machines
compute_target_name = "sheri-linuxvm"
#<run_dsvm>
import azureml.core
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
run_dsvm = RunConfiguration(framework = "python")
# Set the compute target to the Linux DSVM
run_dsvm.target = compute_target_name
# Use Docker in the remote VM
run_dsvm.environment.docker.enabled = True
# Use the CPU base image
# To use GPU in DSVM, you must also use the GPU base Docker image "azureml.core.runconfig.DEFAULT_GPU_IMAGE"
run_dsvm.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
print('Base Docker image is:', run_dsvm.environment.docker.base_image)
# Specify the CondaDependencies object
run_dsvm.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
#</run_dsvm>
print(run_dsvm)

View File

@@ -0,0 +1,27 @@
from azureml.core import Workspace
ws = Workspace.from_config()
from azureml.core.compute import ComputeTarget
# refers to an existing compute resource attached to the workspace!
hdi_compute = ComputeTarget(workspace=ws, name='sherihdi')
#<run_hdi>
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
# use pyspark framework
run_hdi = RunConfiguration(framework="pyspark")
# Set compute target to the HDI cluster
run_hdi.target = hdi_compute.name
# specify CondaDependencies object to ask system installing numpy
cd = CondaDependencies()
cd.add_conda_package('numpy')
run_hdi.environment.python.conda_dependencies = cd
#</run_hdi>
print(run_hdi)

View File

@@ -0,0 +1,9 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license.
import numpy as np
def get_alphas():
# list of numbers from 0.0 to 1.0 with a 0.05 interval
return np.arange(0.0, 1.0, 0.05)

View File

@@ -0,0 +1,52 @@
# Code for Remote virtual machines
compute_target_name = "attach-dsvm"
#<run_dsvm>
import azureml.core
from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE
from azureml.core.conda_dependencies import CondaDependencies
run_dsvm = RunConfiguration(framework = "python")
# Set the compute target to the Linux DSVM
run_dsvm.target = compute_target_name
# Use Docker in the remote VM
run_dsvm.environment.docker.enabled = True
# Use the CPU base image
# To use GPU in DSVM, you must also use the GPU base Docker image "azureml.core.runconfig.DEFAULT_GPU_IMAGE"
run_dsvm.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
print('Base Docker image is:', run_dsvm.environment.docker.base_image)
# Prepare the Docker and conda environment automatically when they're used for the first time
run_dsvm.prepare_environment = True
# Specify the CondaDependencies object
run_dsvm.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
#</run_dsvm>
hdi_compute.name = "blah"
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
# use pyspark framework
hdi_run_config = RunConfiguration(framework="pyspark")
# Set compute target to the HDI cluster
hdi_run_config.target = hdi_compute.name
# specify CondaDependencies object to ask system installing numpy
cd = CondaDependencies()
cd.add_conda_package('numpy')
hdi_run_config.environment.python.conda_dependencies = cd
#<run_hdi>
from azureml.core.runconfig import RunConfiguration
# Configure the HDInsight run
# Load the runconfig object from the myhdi.runconfig file generated in the previous attach operation
run_hdi = RunConfiguration.load(project_object = project, run_name = 'myhdi')
# Ask the system to prepare the conda environment automatically when it's used for the first time
run_hdi.auto_prepare_environment = True>

View File

@@ -0,0 +1,25 @@
# Code for What's a run configuration
# <run_system_managed>
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
run_system_managed = RunConfiguration()
# Specify the conda dependencies with scikit-learn
run_system_managed.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
# </run_system_managed>
print(run_system_managed)
# <run_user_managed>
from azureml.core.runconfig import RunConfiguration
run_user_managed = RunConfiguration()
run_user_managed.environment.python.user_managed_dependencies = True
# Choose a specific Python environment by pointing to a Python path. For example:
# run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'
# </run_user_managed>
print(run_user_managed)

View File

@@ -0,0 +1,45 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license.
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from azureml.core.run import Run
from sklearn.externals import joblib
import os
import numpy as np
import mylib
os.makedirs('./outputs', exist_ok=True)
X, y = load_diabetes(return_X_y=True)
run = Run.get_context()
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2,
random_state=0)
data = {"train": {"X": X_train, "y": y_train},
"test": {"X": X_test, "y": y_test}}
# list of numbers from 0.0 to 1.0 with a 0.05 interval
alphas = mylib.get_alphas()
for alpha in alphas:
# Use Ridge algorithm to create a regression model
reg = Ridge(alpha=alpha)
reg.fit(data["train"]["X"], data["train"]["y"])
preds = reg.predict(data["test"]["X"])
mse = mean_squared_error(preds, data["test"]["y"])
run.log('alpha', alpha)
run.log('mse', mse)
model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
# save model in the outputs folder so it automatically get uploaded
with open(model_file_name, "wb") as file:
joblib.dump(value=reg, filename=os.path.join('./outputs/',
model_file_name))
print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))

View File

@@ -0,0 +1,55 @@
# code snippets for the quickstart-create-workspace-with-python article
# <import>
import azureml.core
print(azureml.core.VERSION)
# </import>
# this is NOT a snippet. If this code changes, go fix it in the article!
from azureml.core import Workspace
ws = Workspace.create(name='myworkspace',
subscription_id='<subscription-id>',
resource_group='myresourcegroup',
create_resource_group=True,
location='eastus2' # or other supported Azure region
)
# <getDetails>
ws.get_details()
# </getDetails>
# <writeConfig>
# Create the configuration file.
ws.write_config()
# Use this code to load the workspace from
# other scripts and notebooks in this directory.
# ws = Workspace.from_config()
# </writeConfig>
# <useWs>
from azureml.core import Experiment
# Create a new experiment in your workspace.
exp = Experiment(workspace=ws, name='myexp')
# Start a run and start the logging service.
run = exp.start_logging()
# Log a single number.
run.log('my magic number', 42)
# Log a list (Fibonacci numbers).
run.log_list('my list', [1, 1, 2, 3, 5, 8, 13, 21, 34, 55])
# Finish the run.
run.complete()
# </useWs>
# <viewLog>
print(run.get_portal_url())
# </viewLog>
# <delete>
ws.delete(delete_dependent_resources=True)
# </delete>

View File

@@ -0,0 +1,67 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Testing notebook include"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"name": "import"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Azure ML SDK Version: 1.0.83\n"
]
}
],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import azureml.core\n",
"from azureml.core import Workspace\n",
"\n",
"# check core SDK version number\n",
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"celltoolbar": "Edit Metadata",
"kernelspec": {
"display_name": "Python 3.6 - AzureML",
"language": "python",
"name": "python3-azureml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -1,14 +1,14 @@
# ONNX on Azure Machine Learning # ONNX on Azure Machine Learning
These tutorials show how to create and deploy [ONNX](http://onnx.ai) models in Azure Machine Learning environments using [ONNX Runtime](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-build-deploy-onnx) for inference. Once deployed as a web service, you can ping the model with your own set of images to be analyzed! These tutorials show how to create and deploy [ONNX](http://onnx.ai) models using Azure Machine Learning and the [ONNX Runtime](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-build-deploy-onnx).
Once deployed as web services, you can ping the models with your own images to be analyzed!
## Tutorials ## Tutorials
- [Obtain ONNX model from ONNX Model Zoo and deploy with ONNX Runtime inference - Handwritten Digit Classification (MNIST)](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-inference-mnist-deploy.ipynb) - [Obtain ONNX model from ONNX Model Zoo and deploy - ResNet50](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb)
- [Obtain ONNX model from ONNX Model Zoo and deploy with ONNX Runtime inference - Facial Expression Recognition (Emotion FER+)](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-inference-facial-emotion-recognition-deploy.ipynb)
- [Obtain ONNX model from ONNX Model Zoo and deploy with ONNX Runtime inference - Image Recognition (ResNet50)](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb)
- [Convert ONNX model from CoreML and deploy - TinyYOLO](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb) - [Convert ONNX model from CoreML and deploy - TinyYOLO](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb)
- [Train ONNX model in PyTorch and deploy - MNIST](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb) - [Train ONNX model in PyTorch and deploy - MNIST](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb)
- [Handwritten Digit Classification (MNIST) using ONNX Runtime on AzureML](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-inference-mnist.ipynb)
- [Facial Expression Recognition using ONNX Runtime on AzureML](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-inference-emotion-recognition.ipynb)
## Documentation ## Documentation
- [ONNX Runtime Python API Documentation](http://aka.ms/onnxruntime-python) - [ONNX Runtime Python API Documentation](http://aka.ms/onnxruntime-python)
@@ -21,8 +21,7 @@ These tutorials show how to create and deploy [ONNX](http://onnx.ai) models in A
## License ## License
Copyright (c) Microsoft Corporation. All rights reserved. Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License. Licensed under the MIT License.
## Acknowledgements
These tutorials were developed by Vinitra Swamy and Prasanth Pulavarthi of the Microsoft AI Frameworks team and adapted for presentation at Microsoft Ignite 2018.

View File

@@ -59,9 +59,8 @@
"You'll need to run the following commands to use this tutorial:\n", "You'll need to run the following commands to use this tutorial:\n",
"\n", "\n",
"```sh\n", "```sh\n",
"pip install coremltools\n",
"pip install onnxmltools\n", "pip install onnxmltools\n",
"pip install coremltools # use this on Linux and Mac\n",
"pip install git+https://github.com/apple/coremltools # use this on Windows\n",
"```" "```"
] ]
}, },
@@ -80,10 +79,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import urllib.request\n", "!wget https://s3-us-west-2.amazonaws.com/coreml-models/TinyYOLO.mlmodel"
"\n",
"onnx_model_url = \"https://s3-us-west-2.amazonaws.com/coreml-models/TinyYOLO.mlmodel\"\n",
"urllib.request.urlretrieve(onnx_model_url, filename=\"TinyYOLO.mlmodel\")\n"
] ]
}, },
{ {
@@ -177,9 +173,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"models = ws.models\n", "models = ws.models()\n",
"for name, m in models.items():\n", "for m in models:\n",
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)" " print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
] ]
}, },
{ {
@@ -248,7 +244,7 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n", "myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\"])\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"

View File

@@ -12,7 +12,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Facial Expression Recognition (FER+) using ONNX Runtime on Azure ML\n", "# Facial Expression Recognition (Emotion FER+) using ONNX Runtime on Azure ML\n",
"\n", "\n",
"This example shows how to deploy an image classification neural network using the Facial Expression Recognition ([FER](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. This tutorial will show you how to deploy a FER+ model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n", "This example shows how to deploy an image classification neural network using the Facial Expression Recognition ([FER](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. This tutorial will show you how to deploy a FER+ model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
"\n", "\n",
@@ -158,7 +158,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"models = ws.models\n", "models = ws.models()\n",
"for name, m in models.items():\n", "for name, m in models.items():\n",
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)" " print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
] ]
@@ -325,7 +325,11 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\"])\n", "myenv = CondaDependencies()\n",
"myenv.add_pip_package(\"numpy\")\n",
"myenv.add_pip_package(\"azureml-core\")\n",
"myenv.add_pip_package(\"onnxruntime\")\n",
"\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"
@@ -355,7 +359,7 @@
" tags = {\"demo\": \"onnx\"})\n", " tags = {\"demo\": \"onnx\"})\n",
"\n", "\n",
"\n", "\n",
"image = ContainerImage.create(name = \"onnximage\",\n", "image = ContainerImage.create(name = \"onnxtest\",\n",
" # this is the model object\n", " # this is the model object\n",
" models = [model],\n", " models = [model],\n",
" image_config = image_config,\n", " image_config = image_config,\n",
@@ -483,6 +487,7 @@
" emotions = []\n", " emotions = []\n",
" for i in range(N):\n", " for i in range(N):\n",
" emotions.append(emotion_keys[classes[i]])\n", " emotions.append(emotion_keys[classes[i]])\n",
" \n",
" return emotions\n", " return emotions\n",
"\n", "\n",
"def softmax(x):\n", "def softmax(x):\n",
@@ -550,7 +555,7 @@
" tensor.ParseFromString(f.read())\n", " tensor.ParseFromString(f.read())\n",
" \n", " \n",
" output_data = numpy_helper.to_array(tensor)\n", " output_data = numpy_helper.to_array(tensor)\n",
" output_processed = emotion_map(postprocess(output_data[0]))[0]\n", " output_processed = emotion_map(postprocess(output_data))[0]\n",
" test_outputs.append(output_processed)" " test_outputs.append(output_processed)"
] ]
}, },
@@ -700,9 +705,7 @@
"# Any PNG or JPG image file should work\n", "# Any PNG or JPG image file should work\n",
"# Make sure to include the entire path with // instead of /\n", "# Make sure to include the entire path with // instead of /\n",
"\n", "\n",
"# e.g. your_test_image = \"C:/Users/vinitra.swamy/Pictures/face.png\"\n", "# e.g. your_test_image = \"C://Users//vinitra.swamy//Pictures//emotion_test_images//img_1.png\"\n",
"\n",
"your_test_image = \"<path to file>\"\n",
"\n", "\n",
"import matplotlib.image as mpimg\n", "import matplotlib.image as mpimg\n",
"\n", "\n",
@@ -756,7 +759,7 @@
"source": [ "source": [
"# remember to delete your service after you are done using it!\n", "# remember to delete your service after you are done using it!\n",
"\n", "\n",
"# aci_service.delete()" "aci_service.delete()"
] ]
}, },
{ {

View File

@@ -165,7 +165,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"models = ws.models\n", "models = ws.models()\n",
"for name, m in models.items():\n", "for name, m in models.items():\n",
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)" " print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
] ]
@@ -297,7 +297,11 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\"])\n", "myenv = CondaDependencies()\n",
"myenv.add_pip_package(\"numpy\")\n",
"myenv.add_pip_package(\"azureml-core\")\n",
"myenv.add_pip_package(\"onnxruntime\")\n",
"\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"
@@ -311,6 +315,16 @@
"This step will likely take a few minutes." "This step will likely take a few minutes."
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.image import ContainerImage\n",
"help(ContainerImage.image_configuration)"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -326,7 +340,7 @@
" tags = {\"demo\": \"onnx\"}) \n", " tags = {\"demo\": \"onnx\"}) \n",
"\n", "\n",
"\n", "\n",
"image = ContainerImage.create(name = \"onnximage\",\n", "image = ContainerImage.create(name = \"onnxtest\",\n",
" # this is the model object\n", " # this is the model object\n",
" models = [model],\n", " models = [model],\n",
" image_config = image_config,\n", " image_config = image_config,\n",
@@ -389,7 +403,7 @@
"source": [ "source": [
"from azureml.core.webservice import Webservice\n", "from azureml.core.webservice import Webservice\n",
"\n", "\n",
"aci_service_name = 'onnx-demo-mnist'\n", "aci_service_name = 'onnx-demo-mnist20'\n",
"print(\"Service\", aci_service_name)\n", "print(\"Service\", aci_service_name)\n",
"\n", "\n",
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n", "aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
@@ -629,8 +643,6 @@
"\n", "\n",
"# Any PNG or JPG image file should work\n", "# Any PNG or JPG image file should work\n",
"\n", "\n",
"your_test_image = \"<path to file>\"\n",
"\n",
"# e.g. your_test_image = \"C:/Users/vinitra.swamy/Pictures/handwritten_digit.png\"\n", "# e.g. your_test_image = \"C:/Users/vinitra.swamy/Pictures/handwritten_digit.png\"\n",
"\n", "\n",
"import matplotlib.image as mpimg\n", "import matplotlib.image as mpimg\n",
@@ -740,7 +752,7 @@
"source": [ "source": [
"# remember to delete your service after you are done using it!\n", "# remember to delete your service after you are done using it!\n",
"\n", "\n",
"# aci_service.delete()" "aci_service.delete()"
] ]
}, },
{ {

View File

@@ -56,21 +56,11 @@
"source": [ "source": [
"#### Download pre-trained ONNX model from ONNX Model Zoo.\n", "#### Download pre-trained ONNX model from ONNX Model Zoo.\n",
"\n", "\n",
"Download the [ResNet50v2 model and test data](https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz) and extract it in the same folder as this tutorial notebook.\n" "Download the [ResNet50v2 model and test data](https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz) and place it in the same folder as this tutorial notebook. You can unzip the file through the following line of code.\n",
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import urllib.request\n",
"\n", "\n",
"onnx_model_url = \"https://s3.amazonaws.com/onnx-model-zoo/resnet/resnet50v2/resnet50v2.tar.gz\"\n", "```sh\n",
"urllib.request.urlretrieve(onnx_model_url, filename=\"resnet50v2.tar.gz\")\n", "(myenv) $ tar xvzf resnet50v2.tar.gz\n",
"\n", "```"
"!tar xvzf resnet50v2.tar.gz"
] ]
}, },
{ {
@@ -140,9 +130,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"models = ws.models\n", "models = ws.models()\n",
"for name, m in models.items():\n", "for m in models:\n",
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)" " print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
] ]
}, },
{ {
@@ -232,7 +222,7 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n", "myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\"])\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"

View File

@@ -255,22 +255,6 @@
"RunDetails(run).show()" "RunDetails(run).show()"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Alternatively, you can block until the script has completed training before running more code."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output=True)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -334,9 +318,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"models = ws.models\n", "models = ws.models()\n",
"for name, m in models.items():\n", "for m in models:\n",
" print(\"Name:\", name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)" " print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)"
] ]
}, },
{ {
@@ -408,7 +392,7 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n", "myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\"])\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"

View File

@@ -74,37 +74,21 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import os\n", "# Batch AI compute\n",
"\n", "cluster_name = \"gpu-cluster\"\n",
"# choose a name for your cluster\n", "try:\n",
"batchai_cluster_name = os.environ.get(\"BATCHAI_CLUSTER_NAME\", \"gpu-cluster\")\n", " cluster = BatchAiCompute(ws, cluster_name)\n",
"cluster_min_nodes = os.environ.get(\"BATCHAI_CLUSTER_MIN_NODES\", 0)\n", " print(\"found existing cluster.\")\n",
"cluster_max_nodes = os.environ.get(\"BATCHAI_CLUSTER_MAX_NODES\", 1)\n", "except:\n",
"vm_size = os.environ.get(\"BATCHAI_CLUSTER_SKU\", \"STANDARD_NC6\")\n", " print(\"creating new cluster\")\n",
"autoscale_enabled = os.environ.get(\"BATCHAI_CLUSTER_AUTOSCALE_ENABLED\", True)\n", " provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\",\n",
"\n", " autoscale_enabled = True,\n",
"\n", " cluster_min_nodes = 0, \n",
"if batchai_cluster_name in ws.compute_targets:\n", " cluster_max_nodes = 1)\n",
" compute_target = ws.compute_targets[batchai_cluster_name]\n",
" if compute_target and type(compute_target) is BatchAiCompute:\n",
" print('found compute target. just use it. ' + batchai_cluster_name)\n",
"else:\n",
" print('creating a new compute target...')\n",
" provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = vm_size, # NC6 is GPU-enabled\n",
" vm_priority = 'lowpriority', # optional\n",
" autoscale_enabled = autoscale_enabled,\n",
" cluster_min_nodes = cluster_min_nodes, \n",
" cluster_max_nodes = cluster_max_nodes)\n",
"\n", "\n",
" # create the cluster\n", " # create the cluster\n",
" compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n", " cluster = ComputeTarget.create(ws, cluster_name, provisioning_config)\n",
" \n", " cluster.wait_for_completion(show_output=True)"
" # can poll for a minimum number of nodes and for a specific timeout. \n",
" # if no min node count is provided it will use the scale settings for the cluster\n",
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
" \n",
" # For a more detailed view of current BatchAI cluster status, use the 'status' property \n",
" print(compute_target.status.serialize())"
] ]
}, },
{ {
@@ -372,7 +356,7 @@
" mode=\"download\" \n", " mode=\"download\" \n",
" )\n", " )\n",
"output_dir = PipelineData(name=\"scores\", \n", "output_dir = PipelineData(name=\"scores\", \n",
" datastore=default_ds, \n", " datastore_name=default_ds.name, \n",
" output_path_on_compute=\"batchscoring/results\")" " output_path_on_compute=\"batchscoring/results\")"
] ]
}, },
@@ -465,7 +449,7 @@
" \"--label_dir\", label_dir, \n", " \"--label_dir\", label_dir, \n",
" \"--output_dir\", output_dir, \n", " \"--output_dir\", output_dir, \n",
" \"--batch_size\", batch_size_param],\n", " \"--batch_size\", batch_size_param],\n",
" target=compute_target,\n", " target=cluster,\n",
" inputs=[input_images, label_dir],\n", " inputs=[input_images, label_dir],\n",
" outputs=[output_dir],\n", " outputs=[output_dir],\n",
" runconfig=batchai_run_config,\n", " runconfig=batchai_run_config,\n",
@@ -606,12 +590,9 @@
"source": [ "source": [
"from azureml.pipeline.core import PublishedPipeline\n", "from azureml.pipeline.core import PublishedPipeline\n",
"\n", "\n",
"rest_endpoint = published_pipeline.endpoint\n", "rest_endpoint = PublishedPipeline.get_endpoint(published_id, ws)\n",
"# specify batch size when running the pipeline\n", "# specify batch size when running the pipeline\n",
"response = requests.post(rest_endpoint, \n", "response = requests.post(rest_endpoint, headers=aad_token, json={\"param_batch_size\": 50})\n",
" headers=aad_token, \n",
" json={\"ExperimentName\": \"batch_scoring\",\n",
" \"ParameterAssignments\": {\"param_batch_size\": 50}})\n",
"run_id = response.json()[\"Id\"]" "run_id = response.json()[\"Id\"]"
] ]
}, },
@@ -629,7 +610,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.pipeline.core.run import PipelineRun\n", "from azureml.pipeline.core.run import PipelineRun\n",
"published_pipeline_run = PipelineRun(ws.experiments[\"batch_scoring\"], run_id)\n", "published_pipeline_run = PipelineRun(ws.experiments()[\"batch_scoring\"], run_id)\n",
"\n", "\n",
"RunDetails(published_pipeline_run).show()" "RunDetails(published_pipeline_run).show()"
] ]

14
pr.md
View File

@@ -12,18 +12,6 @@
## Community Blogs ## Community Blogs
- [Power Bat How Spektacom is Powering the Game of Cricket with Microsoft AI](https://blogs.technet.microsoft.com/machinelearning/2018/10/11/power-bat-how-spektacom-is-powering-the-game-of-cricket-with-microsoft-ai/) - [Power Bat How Spektacom is Powering the Game of Cricket with Microsoft AI](https://blogs.technet.microsoft.com/machinelearning/2018/10/11/power-bat-how-spektacom-is-powering-the-game-of-cricket-with-microsoft-ai/)
## Ignite 2018 Public Preview Launch Sessions
- [AI with Azure Machine Learning services: Simplifying the data science process](https://myignite.techcommunity.microsoft.com/sessions/66248)
- [AI TechTalk: Azure Machine Learning SDK - a walkthrough](https://myignite.techcommunity.microsoft.com/sessions/66265)
- [AI for an intelligent cloud and intelligent edge: Discover, deploy, and manage with Azure ML services](https://myignite.techcommunity.microsoft.com/sessions/65389)
- [Generating high quality models efficiently using Automated ML and Hyperparameter Tuning](https://myignite.techcommunity.microsoft.com/sessions/66245)
- [AI for pros: Deep learning with PyTorch using the Azure Data Science Virtual Machine and scaling training with Azure ML](https://myignite.techcommunity.microsoft.com/sessions/66244)
## Get-started Videos on YouTube
- [Get started with Python SDK](https://youtu.be/VIsXeTuW3FU)
- [Get started from Azure Portal](https://youtu.be/lCkYUHV86Mk)
## Third Party Articles ## Third Party Articles
- [Azures new machine learning features embrace Python](https://www.infoworld.com/article/3306840/azure/azures-new-machine-learning-features-embrace-python.html) (InfoWorld) - [Azures new machine learning features embrace Python](https://www.infoworld.com/article/3306840/azure/azures-new-machine-learning-features-embrace-python.html) (InfoWorld)
- [How to use Azure ML in Windows 10](https://www.infoworld.com/article/3308381/azure/how-to-use-azure-ml-in-windows-10.html) (InfoWorld) - [How to use Azure ML in Windows 10](https://www.infoworld.com/article/3308381/azure/how-to-use-azure-ml-in-windows-10.html) (InfoWorld)
@@ -36,7 +24,7 @@
## Community Projects ## Community Projects
- [Fashion MNIST](https://github.com/amynic/azureml-sdk-fashion) - [Fashion MNIST](https://github.com/amynic/azureml-sdk-fashion)
- Keras on Databricks - Keras on Databricks
- [Samples from CSS](https://github.com/Azure/AMLSamples) - Samples from CSS
## Azure Machine Learning Studio Resources ## Azure Machine Learning Studio Resources

View File

@@ -434,13 +434,12 @@
"from azureml.core.image import Image\n", "from azureml.core.image import Image\n",
"from azureml.core.webservice import Webservice\n", "from azureml.core.webservice import Webservice\n",
"from azureml.contrib.brainwave import BrainwaveWebservice, BrainwaveImage\n", "from azureml.contrib.brainwave import BrainwaveWebservice, BrainwaveImage\n",
"from azureml.exceptions import WebserviceException\n",
"\n", "\n",
"model_name = \"catsanddogs-resnet50-model\"\n", "model_name = \"catsanddogs-resnet50-model\"\n",
"image_name = \"catsanddogs-resnet50-image\"\n", "image_name = \"catsanddogs-resnet50-image\"\n",
"service_name = \"modelbuild-service\"\n", "service_name = \"modelbuild-service\"\n",
"\n", "\n",
"registered_model = Model.register(ws, model_def_path, model_name)\n", "registered_model = Model.register(ws, service_def_path, model_name)\n",
"\n", "\n",
"image_config = BrainwaveImage.image_configuration()\n", "image_config = BrainwaveImage.image_configuration()\n",
"deployment_config = BrainwaveWebservice.deploy_configuration()\n", "deployment_config = BrainwaveWebservice.deploy_configuration()\n",
@@ -449,10 +448,8 @@
" service = Webservice(ws, service_name)\n", " service = Webservice(ws, service_name)\n",
" service.delete()\n", " service.delete()\n",
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n", " service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
" service.wait_for_deployment(True)\n",
"except WebserviceException:\n", "except WebserviceException:\n",
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n", " service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)"
" service.wait_for_deployment(True)"
] ]
}, },
{ {

View File

@@ -80,7 +80,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.contrib.brainwave.models import QuantizedResnet50\n", "from azureml.contrib.brainwave.models import QuantizedResnet50, Resnet50\n",
"model_path = os.path.expanduser('~/models')\n", "model_path = os.path.expanduser('~/models')\n",
"model = QuantizedResnet50(model_path, is_frozen = True)\n", "model = QuantizedResnet50(model_path, is_frozen = True)\n",
"feature_tensor = model.import_graph_def(image_tensors)\n", "feature_tensor = model.import_graph_def(image_tensors)\n",
@@ -198,7 +198,7 @@
" image_config = BrainwaveImage.image_configuration()\n", " image_config = BrainwaveImage.image_configuration()\n",
" deployment_config = BrainwaveWebservice.deploy_configuration()\n", " deployment_config = BrainwaveWebservice.deploy_configuration()\n",
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n", " service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
" service.wait_for_deployment(True)" " service.wait_for_deployment(true)"
] ]
}, },
{ {
@@ -265,7 +265,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"service.delete()" "service.delete()\n",
" \n",
"registered_model.delete()"
] ]
}, },
{ {

View File

@@ -404,7 +404,7 @@
" image_config = BrainwaveImage.image_configuration()\n", " image_config = BrainwaveImage.image_configuration()\n",
" deployment_config = BrainwaveWebservice.deploy_configuration()\n", " deployment_config = BrainwaveWebservice.deploy_configuration()\n",
" service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n", " service = Webservice.deploy_from_model(ws, service_name, [registered_model], image_config, deployment_config)\n",
" service.wait_for_deployment(True)" " service.wait_for_deployment(true)"
] ]
}, },
{ {

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

View File

@@ -391,15 +391,6 @@
"RunDetails(run).show()" "RunDetails(run).show()"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -480,15 +471,6 @@
"RunDetails(hyperdrive_run).show()" "RunDetails(hyperdrive_run).show()"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hyperdrive_run.wait_for_completion(show_output = True)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},

View File

@@ -53,5 +53,5 @@ def run(input_data):
pred_probs = softmax(model(img)).detach().numpy()[0] pred_probs = softmax(model(img)).detach().numpy()[0]
index = torch.argmax(output, 1) index = torch.argmax(output, 1)
result = {"label": classes[index], "probability": str(pred_probs[index])} result = json.dumps({"label": classes[index], "probability": str(pred_probs[index])})
return result return result

View File

@@ -300,7 +300,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Now that you have created the compute target, let's see what the workspace's `compute_targets` property returns. You should now see one entry named 'gpucluster' of type BatchAI." "Now that you have created the compute target, let's see what the workspace's `compute_targets()` function returns. You should now see one entry named 'gpucluster' of type BatchAI."
] ]
}, },
{ {
@@ -309,7 +309,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"compute_targets = ws.compute_targets\n", "compute_targets = ws.compute_targets()\n",
"for name, ct in compute_targets.items():\n", "for name, ct in compute_targets.items():\n",
" print(name, ct.type, ct.provisioning_state)" " print(name, ct.type, ct.provisioning_state)"
] ]
@@ -480,15 +480,6 @@
"run" "run"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output = True)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -775,15 +766,6 @@
"RunDetails(htr).show()" "RunDetails(htr).show()"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"htr.wait_for_completion(show_output = True)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -877,7 +859,7 @@
" # make prediction\n", " # make prediction\n",
" out = output.eval(session = sess, feed_dict = {X: data})\n", " out = output.eval(session = sess, feed_dict = {X: data})\n",
" y_hat = np.argmax(out, axis = 1)\n", " y_hat = np.argmax(out, axis = 1)\n",
" return y_hat.tolist()" " return json.dumps(y_hat.tolist())"
] ]
}, },
{ {
@@ -1097,15 +1079,15 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"models = ws.models\n", "models = ws.models()\n",
"for name, model in models.items():\n", "for name, model in models.items():\n",
" print(\"Model: {}, ID: {}\".format(name, model.id))\n", " print(\"Model: {}, ID: {}\".format(name, model.id))\n",
" \n", " \n",
"images = ws.images\n", "images = ws.images()\n",
"for name, image in images.items():\n", "for name, image in images.items():\n",
" print(\"Image: {}, location: {}\".format(name, image.image_location))\n", " print(\"Image: {}, location: {}\".format(name, image.image_location))\n",
" \n", " \n",
"webservices = ws.webservices\n", "webservices = ws.webservices()\n",
"for name, webservice in webservices.items():\n", "for name, webservice in webservices.items():\n",
" print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))" " print(\"Webservice: {}, scoring URI: {}\".format(name, webservice.scoring_uri))"
] ]

View File

@@ -1,2 +0,0 @@
/data/
/tf-distr-hvd/

View File

@@ -1 +0,0 @@
/tf-distr-ps/

View File

@@ -0,0 +1,321 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# Script adapted from:
# 1. https://github.com/Microsoft/CNTK/blob/v2.0/Tutorials/CNTK_103A_MNIST_DataLoader.ipynb
# 2. https://github.com/Microsoft/CNTK/blob/v2.0/Tutorials/CNTK_103C_MNIST_MultiLayerPerceptron.ipynb
# ===================================================================================================
"""Train a CNTK multi-layer perceptron on the MNIST dataset."""
from __future__ import print_function
import gzip
import numpy as np
import os
import shutil
import struct
import sys
import time
import cntk as C
from azureml.core.run import Run
import argparse
run = Run.get_submitted_run()
parser = argparse.ArgumentParser()
parser.add_argument('--learning_rate', type=float, default=0.001, help='learning rate')
parser.add_argument('--num_hidden_layers', type=int, default=2, help='number of hidden layers')
parser.add_argument('--minibatch_size', type=int, default=64, help='minibatchsize')
args = parser.parse_args()
# Functions to load MNIST images and unpack into train and test set.
# - loadData reads image data and formats into a 28x28 long array
# - loadLabels reads the corresponding labels data, 1 for each image
# - load packs the downloaded image and labels data into a combined format to be read later by
# CNTK text reader
def loadData(src, cimg):
print('Downloading ' + src)
gzfname, h = urlretrieve(src, './delete.me')
print('Done.')
try:
with gzip.open(gzfname) as gz:
n = struct.unpack('I', gz.read(4))
# Read magic number.
if n[0] != 0x3080000:
raise Exception('Invalid file: unexpected magic number.')
# Read number of entries.
n = struct.unpack('>I', gz.read(4))[0]
if n != cimg:
raise Exception('Invalid file: expected {0} entries.'.format(cimg))
crow = struct.unpack('>I', gz.read(4))[0]
ccol = struct.unpack('>I', gz.read(4))[0]
if crow != 28 or ccol != 28:
raise Exception('Invalid file: expected 28 rows/cols per image.')
# Read data.
res = np.fromstring(gz.read(cimg * crow * ccol), dtype=np.uint8)
finally:
os.remove(gzfname)
return res.reshape((cimg, crow * ccol))
def loadLabels(src, cimg):
print('Downloading ' + src)
gzfname, h = urlretrieve(src, './delete.me')
print('Done.')
try:
with gzip.open(gzfname) as gz:
n = struct.unpack('I', gz.read(4))
# Read magic number.
if n[0] != 0x1080000:
raise Exception('Invalid file: unexpected magic number.')
# Read number of entries.
n = struct.unpack('>I', gz.read(4))
if n[0] != cimg:
raise Exception('Invalid file: expected {0} rows.'.format(cimg))
# Read labels.
res = np.fromstring(gz.read(cimg), dtype=np.uint8)
finally:
os.remove(gzfname)
return res.reshape((cimg, 1))
def try_download(dataSrc, labelsSrc, cimg):
data = loadData(dataSrc, cimg)
labels = loadLabels(labelsSrc, cimg)
return np.hstack((data, labels))
# Save the data files into a format compatible with CNTK text reader
def savetxt(filename, ndarray):
dir = os.path.dirname(filename)
if not os.path.exists(dir):
os.makedirs(dir)
if not os.path.isfile(filename):
print("Saving", filename)
with open(filename, 'w') as f:
labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
for row in ndarray:
row_str = row.astype(str)
label_str = labels[row[-1]]
feature_str = ' '.join(row_str[:-1])
f.write('|labels {} |features {}\n'.format(label_str, feature_str))
else:
print("File already exists", filename)
# Read a CTF formatted text (as mentioned above) using the CTF deserializer from a file
def create_reader(path, is_training, input_dim, num_label_classes):
return C.io.MinibatchSource(C.io.CTFDeserializer(path, C.io.StreamDefs(
labels=C.io.StreamDef(field='labels', shape=num_label_classes, is_sparse=False),
features=C.io.StreamDef(field='features', shape=input_dim, is_sparse=False)
)), randomize=is_training, max_sweeps=C.io.INFINITELY_REPEAT if is_training else 1)
# Defines a utility that prints the training progress
def print_training_progress(trainer, mb, frequency, verbose=1):
training_loss = "NA"
eval_error = "NA"
if mb % frequency == 0:
training_loss = trainer.previous_minibatch_loss_average
eval_error = trainer.previous_minibatch_evaluation_average
if verbose:
print("Minibatch: {0}, Loss: {1:.4f}, Error: {2:.2f}%".format(mb, training_loss, eval_error * 100))
return mb, training_loss, eval_error
# Create the network architecture
def create_model(features):
with C.layers.default_options(init=C.layers.glorot_uniform(), activation=C.ops.relu):
h = features
for _ in range(num_hidden_layers):
h = C.layers.Dense(hidden_layers_dim)(h)
r = C.layers.Dense(num_output_classes, activation=None)(h)
return r
if __name__ == '__main__':
run = Run.get_submitted_run()
try:
from urllib.request import urlretrieve
except ImportError:
from urllib import urlretrieve
# Select the right target device when this script is being used:
if 'TEST_DEVICE' in os.environ:
if os.environ['TEST_DEVICE'] == 'cpu':
C.device.try_set_default_device(C.device.cpu())
else:
C.device.try_set_default_device(C.device.gpu(0))
# URLs for the train image and labels data
url_train_image = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
url_train_labels = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
num_train_samples = 60000
print("Downloading train data")
train = try_download(url_train_image, url_train_labels, num_train_samples)
url_test_image = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'
url_test_labels = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'
num_test_samples = 10000
print("Downloading test data")
test = try_download(url_test_image, url_test_labels, num_test_samples)
# Save the train and test files (prefer our default path for the data
rank = os.environ.get("OMPI_COMM_WORLD_RANK")
data_dir = os.path.join("outputs", "MNIST")
sentinel_path = os.path.join(data_dir, "complete.txt")
if rank == '0':
print('Writing train text file...')
savetxt(os.path.join(data_dir, "Train-28x28_cntk_text.txt"), train)
print('Writing test text file...')
savetxt(os.path.join(data_dir, "Test-28x28_cntk_text.txt"), test)
with open(sentinel_path, 'w+') as f:
f.write("download complete")
print('Done with downloading data.')
else:
while not os.path.exists(sentinel_path):
time.sleep(0.01)
# Ensure we always get the same amount of randomness
np.random.seed(0)
# Define the data dimensions
input_dim = 784
num_output_classes = 10
# Ensure the training and test data is generated and available for this tutorial.
# We search in two locations in the toolkit for the cached MNIST data set.
data_found = False
for data_dir in [os.path.join("..", "Examples", "Image", "DataSets", "MNIST"),
os.path.join("data_" + str(rank), "MNIST"),
os.path.join("outputs", "MNIST")]:
train_file = os.path.join(data_dir, "Train-28x28_cntk_text.txt")
test_file = os.path.join(data_dir, "Test-28x28_cntk_text.txt")
if os.path.isfile(train_file) and os.path.isfile(test_file):
data_found = True
break
if not data_found:
raise ValueError("Please generate the data by completing CNTK 103 Part A")
print("Data directory is {0}".format(data_dir))
num_hidden_layers = args.num_hidden_layers
hidden_layers_dim = 400
input = C.input_variable(input_dim)
label = C.input_variable(num_output_classes)
z = create_model(input)
# Scale the input to 0-1 range by dividing each pixel by 255.
z = create_model(input / 255.0)
loss = C.cross_entropy_with_softmax(z, label)
label_error = C.classification_error(z, label)
# Instantiate the trainer object to drive the model training
learning_rate = args.learning_rate
lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
learner = C.sgd(z.parameters, lr_schedule)
trainer = C.Trainer(z, (loss, label_error), [learner])
# Initialize the parameters for the trainer
minibatch_size = args.minibatch_size
num_samples_per_sweep = 60000
num_sweeps_to_train_with = 10
num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
# Create the reader to training data set
reader_train = create_reader(train_file, True, input_dim, num_output_classes)
# Map the data streams to the input and labels.
input_map = {
label: reader_train.streams.labels,
input: reader_train.streams.features
}
# Run the trainer on and perform model training
training_progress_output_freq = 500
errors = []
losses = []
for i in range(0, int(num_minibatches_to_train)):
# Read a mini batch from the training data file
data = reader_train.next_minibatch(minibatch_size, input_map=input_map)
trainer.train_minibatch(data)
batchsize, loss, error = print_training_progress(trainer, i, training_progress_output_freq, verbose=1)
if (error != 'NA') and (loss != 'NA'):
errors.append(float(error))
losses.append(float(loss))
# log the losses
if rank == '0':
run.log_list("Loss", losses)
run.log_list("Error", errors)
# Read the training data
reader_test = create_reader(test_file, False, input_dim, num_output_classes)
test_input_map = {
label: reader_test.streams.labels,
input: reader_test.streams.features,
}
# Test data for trained model
test_minibatch_size = 512
num_samples = 10000
num_minibatches_to_test = num_samples // test_minibatch_size
test_result = 0.0
for i in range(num_minibatches_to_test):
# We are loading test data in batches specified by test_minibatch_size
# Each data point in the minibatch is a MNIST digit image of 784 dimensions
# with one pixel per dimension that we will encode / decode with the
# trained model.
data = reader_test.next_minibatch(test_minibatch_size,
input_map=test_input_map)
eval_error = trainer.test_minibatch(data)
test_result = test_result + eval_error
# Average of evaluation errors of all test minibatches
print("Average test error: {0:.2f}%".format((test_result * 100) / num_minibatches_to_test))
out = C.softmax(z)
# Read the data for evaluation
reader_eval = create_reader(test_file, False, input_dim, num_output_classes)
eval_minibatch_size = 25
eval_input_map = {input: reader_eval.streams.features}
data = reader_test.next_minibatch(eval_minibatch_size, input_map=test_input_map)
img_label = data[label].asarray()
img_data = data[input].asarray()
predicted_label_prob = [out.eval(img_data[i]) for i in range(len(img_data))]
# Find the index with the maximum value for both predicted as well as the ground truth
pred = [np.argmax(predicted_label_prob[i]) for i in range(len(predicted_label_prob))]
gtlabel = [np.argmax(img_label[i]) for i in range(len(img_label))]
print("Label :", gtlabel[:25])
print("Predicted:", pred)
# save model to outputs folder
z.save('outputs/cntk.model')

View File

@@ -176,13 +176,13 @@
"from azureml.core.script_run_config import ScriptRunConfig\n", "from azureml.core.script_run_config import ScriptRunConfig\n",
"import tensorflow as tf\n", "import tensorflow as tf\n",
"\n", "\n",
"logs_dir = os.path.join(os.curdir, \"logs\")\n", "logs_dir = os.curdir + os.sep + \"logs\"\n",
"data_dir = os.path.abspath(os.path.join(os.curdir, \"mnist_data\"))\n", "tensorflow_logs_dir = os.path.join(logs_dir, \"tensorflow\")\n",
"\n", "\n",
"if not path.exists(data_dir):\n", "if not path.exists(tensorflow_logs_dir):\n",
" makedirs(data_dir)\n", " makedirs(tensorflow_logs_dir)\n",
"\n", "\n",
"os.environ[\"TEST_TMPDIR\"] = data_dir\n", "os.environ[\"TEST_TMPDIR\"] = logs_dir\n",
"\n", "\n",
"# Writing logs to ./logs results in their being uploaded to Artifact Service,\n", "# Writing logs to ./logs results in their being uploaded to Artifact Service,\n",
"# and thus, made accessible to our Tensorboard instance.\n", "# and thus, made accessible to our Tensorboard instance.\n",
@@ -191,15 +191,15 @@
"# Create an experiment\n", "# Create an experiment\n",
"exp = Experiment(ws, experiment_name)\n", "exp = Experiment(ws, experiment_name)\n",
"\n", "\n",
"# If you would like the run to go for longer, add --max_steps 5000 to the arguments list:\n",
"# arguments_list += [\"--max_steps\", \"5000\"]\n",
"\n",
"script = ScriptRunConfig(exp_dir,\n", "script = ScriptRunConfig(exp_dir,\n",
" script=\"mnist_with_summaries.py\",\n", " script=\"mnist_with_summaries.py\",\n",
" run_config=run_config,\n", " run_config=run_config)\n",
" arguments=arguments_list)\n",
"\n", "\n",
"run = exp.submit(script)\n", "# If you would like the run to go for longer, add --max_steps 5000 to the arguments list:\n",
"# arguments_list += [\"--max_steps\", \"5000\"]\n",
"kwargs = {}\n",
"kwargs['arguments_list'] = arguments_list\n",
"run = exp.submit(script, kwargs)\n",
"# You can also wait for the run to complete\n", "# You can also wait for the run to complete\n",
"# run.wait_for_completion(show_output=True)\n", "# run.wait_for_completion(show_output=True)\n",
"runs.append(run)" "runs.append(run)"
@@ -373,7 +373,7 @@
"\n", "\n",
"try:\n", "try:\n",
" # If you already have a cluster named this, we don't need to make a new one.\n", " # If you already have a cluster named this, we don't need to make a new one.\n",
" cts = ws.compute_targets \n", " cts = ws.compute_targets() \n",
" compute_target = cts[clust_name]\n", " compute_target = cts[clust_name]\n",
" assert compute_target.type == 'BatchAI'\n", " assert compute_target.type == 'BatchAI'\n",
"except:\n", "except:\n",

52
training/readme.md Normal file
View File

@@ -0,0 +1,52 @@
# Training ML models with Azure ML SDK
These notebook tutorials cover the various scenarios for training machine learning and deep learning models with Azure Machine Learning.
## Sample notebooks
- [01.train-hyperparameter-tune-deploy-with-pytorch](./01.train-hyperparameter-tune-deploy-with-pytorch/01.train-hyperparameter-tune-deploy-with-pytorch.ipynb)
Train, hyperparameter tune, and deploy a PyTorch image classification model that distinguishes bees vs. ants using transfer learning. Azure ML concepts covered:
- Create a remote compute target (Batch AI cluster)
- Upload training data using `Datastore`
- Run a single-node `PyTorch` training job
- Hyperparameter tune model with HyperDrive
- Find and register the best model
- Deploy model to ACI
- [02.distributed-pytorch-with-horovod](./02.distributed-pytorch-with-horovod/02.distributed-pytorch-with-horovod.ipynb)
Train a PyTorch model on the MNIST dataset using distributed training with Horovod. Azure ML concepts covered:
- Create a remote compute target (Batch AI cluster)
- Run a two-node distributed `PyTorch` training job using Horovod
- [03.train-hyperparameter-tun-deploy-with-tensorflow](./03.train-hyperparameter-tune-deploy-with-tensorflow/03.train-hyperparameter-tune-deploy-with-tensorflow.ipynb)
Train, hyperparameter tune, and deploy a TensorFlow model on the MNIST dataset. Azure ML concepts covered:
- Create a remote compute target (Batch AI cluster)
- Upload training data using `Datastore`
- Run a single-node `TensorFlow` training job
- Leverage features of the `Run` object
- Download the trained model
- Hyperparameter tune model with HyperDrive
- Find and register the best model
- Deploy model to ACI
- [04.distributed-tensorflow-with-horovod](./04.distributed-tensorflow-with-horovod/04.distributed-tensorflow-with-horovod.ipynb)
Train a TensorFlow word2vec model using distributed training with Horovod. Azure ML concepts covered:
- Create a remote compute target (Batch AI cluster)
- Upload training data using `Datastore`
- Run a two-node distributed `TensorFlow` training job using Horovod
- [05.distributed-tensorflow-with-parameter-server](./05.distributed-tensorflow-with-parameter-server/05.distributed-tensorflow-with-parameter-server.ipynb)
Train a TensorFlow model on the MNIST dataset using native distributed TensorFlow (parameter server). Azure ML concepts covered:
- Create a remote compute target (Batch AI cluster)
- Run a two workers, one parameter server distributed `TensorFlow` training job
- [06.distributed-cntk-with-custom-docker](./06.distributed-cntk-with-custom-docker/06.distributed-cntk-with-custom-docker.ipynb)
Train a CNTK model on the MNIST dataset using the Azure ML base `Estimator` with custom Docker image and distributed training. Azure ML concepts covered:
- Create a remote compute target (Batch AI cluster)
- Upload training data using `Datastore`
- Run a base `Estimator` training job using a custom Docker image from Docker Hub
- Distributed CNTK two-node training job via MPI using base `Estimator`
- [07.tensorboard](./07.tensorboard/07.tensorboard.ipynb)
Train a TensorFlow MNIST model locally, on a DSVM, and on Batch AI and view the logs live on TensorBoard. Azure ML concepts covered:
- Run the training job locally with Azure ML and run TensorBoard locally. Start (and stop) an Azure ML `TensorBoard` object to stream and view the logs
- Run the training job on a remote DSVM and stream the logs to TensorBoard
- Run the training job on a remote Batch AI cluster and stream the logs to TensorBoard
- Start a `Tensorboard` instance that displays the logs from all three above runs in one
- [08.export-run-history-to-tensorboard](./08.export-run-history-to-tensorboard/08.export-run-history-to-tensorboard.ipynb)
- Start an Azure ML `Experiment` and log metrics to `Run` history
- Export the `Run` history logs to TensorBoard logs
- View the logs in TensorBoard

View File

@@ -58,13 +58,14 @@
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": { "metadata": {
"name": "import",
"tags": [ "tags": [
"check version" "check version"
] ]
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"%matplotlib notebook\n", "%matplotlib inline\n",
"import numpy as np\n", "import numpy as np\n",
"import matplotlib\n", "import matplotlib\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
@@ -159,8 +160,8 @@
"autoscale_enabled = os.environ.get(\"BATCHAI_CLUSTER_AUTOSCALE_ENABLED\", True)\n", "autoscale_enabled = os.environ.get(\"BATCHAI_CLUSTER_AUTOSCALE_ENABLED\", True)\n",
"\n", "\n",
"\n", "\n",
"if batchai_cluster_name in ws.compute_targets:\n", "if batchai_cluster_name in ws.compute_targets():\n",
" compute_target = ws.compute_targets[batchai_cluster_name]\n", " compute_target = ws.compute_targets()[batchai_cluster_name]\n",
" if compute_target and type(compute_target) is BatchAiCompute:\n", " if compute_target and type(compute_target) is BatchAiCompute:\n",
" print('found compute target. just use it. ' + batchai_cluster_name)\n", " print('found compute target. just use it. ' + batchai_cluster_name)\n",
"else:\n", "else:\n",
@@ -201,6 +202,13 @@
"Download the MNIST dataset and save the files into a `data` directory locally. Images and labels for both training and testing are downloaded." "Download the MNIST dataset and save the files into a `data` directory locally. Images and labels for both training and testing are downloaded."
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,

View File

@@ -97,7 +97,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"%matplotlib notebook\n", "%matplotlib inline\n",
"import numpy as np\n", "import numpy as np\n",
"import matplotlib\n", "import matplotlib\n",
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
@@ -134,7 +134,7 @@
"\n", "\n",
"ws = Workspace.from_config()\n", "ws = Workspace.from_config()\n",
"model=Model(ws, 'sklearn_mnist')\n", "model=Model(ws, 'sklearn_mnist')\n",
"model.download(target_dir='.', exists_ok=True)\n", "model.download(target_dir = '.')\n",
"import os \n", "import os \n",
"# verify the downloaded model file\n", "# verify the downloaded model file\n",
"os.stat('./sklearn_mnist_model.pkl')" "os.stat('./sklearn_mnist_model.pkl')"
@@ -296,8 +296,7 @@
" data = np.array(json.loads(raw_data)['data'])\n", " data = np.array(json.loads(raw_data)['data'])\n",
" # make prediction\n", " # make prediction\n",
" y_hat = model.predict(data)\n", " y_hat = model.predict(data)\n",
" # you can return any data type as long as it is JSON-serializable\n", " return json.dumps(y_hat.tolist())"
" return y_hat.tolist()"
] ]
}, },
{ {
@@ -481,7 +480,7 @@
"test_samples = bytes(test_samples, encoding = 'utf8')\n", "test_samples = bytes(test_samples, encoding = 'utf8')\n",
"\n", "\n",
"# predict using the deployed model\n", "# predict using the deployed model\n",
"result = service.run(input_data=test_samples)\n", "result = json.loads(service.run(input_data=test_samples))\n",
"\n", "\n",
"# compare actual value vs. the predicted values:\n", "# compare actual value vs. the predicted values:\n",
"i = 0\n", "i = 0\n",

View File

@@ -393,7 +393,7 @@
"> * Review training results\n", "> * Review training results\n",
"> * Register the best model\n", "> * Register the best model\n",
"\n", "\n",
"Learn more about [how to configure settings for automatic training](https://aka.ms/aml-how-to-configure-auto) or [how to use automatic training on a remote resource](https://aka.ms/aml-how-to-auto-remote)." "Learn more about [how to configure settings for automatic training](https://aka.ms/aml-how-configure-auto) or [how to use automatic training on a remote resource](https://aka.ms/aml-how-to-auto-remote)."
] ]
} }
], ],