Compare commits
22 Commits
imatiach-m
...
shwinne1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cf0490ab92 | ||
|
|
9f0e817c70 | ||
|
|
a4d713d19b | ||
|
|
91a20a0ff9 | ||
|
|
a0c510bf42 | ||
|
|
116d57c012 | ||
|
|
660708db63 | ||
|
|
206df82f9b | ||
|
|
7cfb2da5b8 | ||
|
|
e5adb4af3a | ||
|
|
b849267220 | ||
|
|
9891080b70 | ||
|
|
2974e86aa0 | ||
|
|
0a18161193 | ||
|
|
c676cc9969 | ||
|
|
50f4bc9643 | ||
|
|
f3c7072735 | ||
|
|
44295d9e16 | ||
|
|
710fc0bb4b | ||
|
|
c44dba427f | ||
|
|
8066a9263c | ||
|
|
054aadffed |
53
.github/ISSUE_TEMPLATE/notebook-issue.md
vendored
53
.github/ISSUE_TEMPLATE/notebook-issue.md
vendored
@@ -1,43 +1,30 @@
|
||||
---
|
||||
name: Notebook issue
|
||||
about: Describe your notebook issue
|
||||
title: "[Notebook] DESCRIPTIVE TITLE"
|
||||
labels: notebook
|
||||
about: Create a report to help us improve
|
||||
title: "[Notebook issue]"
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
### DESCRIPTION: Describe clearly + concisely
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
Provide the following if applicable:
|
||||
+ Your Python & SDK version
|
||||
+ Python Scripts or the full notebook name
|
||||
+ Pipeline definition
|
||||
+ Environment definition
|
||||
+ Example data
|
||||
+ Any log files.
|
||||
+ Run and Workspace Id
|
||||
|
||||
.
|
||||
### REPRODUCIBLE: Steps
|
||||
**To Reproduce**
|
||||
Steps to reproduce the behavior:
|
||||
1.
|
||||
|
||||
**Expected behavior**
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
.
|
||||
### EXPECTATION: Clear description
|
||||
|
||||
|
||||
.
|
||||
### CONFIG/ENVIRONMENT:
|
||||
```Provide where applicable
|
||||
|
||||
## Your Python & SDK version:
|
||||
|
||||
## Environment definition:
|
||||
|
||||
## Notebook name or Python scripts:
|
||||
|
||||
## Run and Workspace Id:
|
||||
|
||||
## Pipeline definition:
|
||||
|
||||
## Example data:
|
||||
|
||||
## Any log files:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
```
|
||||
**Additional context**
|
||||
Add any other context about the problem here.
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
This repository contains example notebooks demonstrating the [Azure Machine Learning](https://azure.microsoft.com/en-us/services/machine-learning-service/) Python SDK which allows you to build, train, deploy and manage machine learning solutions using Azure. The AML SDK allows you the choice of using local or cloud compute resources, while managing and maintaining the complete data science workflow from the cloud.
|
||||
|
||||

|
||||
|
||||
## Quick installation
|
||||
```sh
|
||||
|
||||
215
build_nb_index.py
Normal file
215
build_nb_index.py
Normal file
@@ -0,0 +1,215 @@
|
||||
# ---------------------------------------------------------
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# ---------------------------------------------------------
|
||||
|
||||
### USAGE
|
||||
#
|
||||
# 1. Add following metadata elements to the notebook
|
||||
#
|
||||
# "friendly_name": "string", friendly name for notebook
|
||||
# "exclude_from_index": true/false, setting true excludes the notebook from index
|
||||
# "order_index": integer, smaller value moves notebook closer to beginning
|
||||
# "category": "starter", "tutorial", "training", "deployment" or "other"
|
||||
# "tags": [ "featured" ], optional, only supported tag to highlight notebook with :star: symbol
|
||||
# "task": "string", description of notebook task
|
||||
# "datasets": [ "dataset 1", "dataset 2"], list of datasets, can be ["None"]
|
||||
# "compute": [ "compute 1", "compute 2" ], list of computes, can be ["None"]
|
||||
# "deployment": ["deployment 1", "deployment 2"], list of deployment targets, can be ["None"]
|
||||
# "framework": ["fw 1", "fw2"], list of ml framework, can be ["None"]
|
||||
#
|
||||
# 2. Then run
|
||||
#
|
||||
# build_nb_index.py <root folder of notebooks>
|
||||
#
|
||||
# 3. The script should produce index.md file with tables of notebook indices
|
||||
|
||||
### Example metadata section
|
||||
|
||||
'''
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "cforbe"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
},
|
||||
"msauthor": "trbye",
|
||||
"friendly_name": "Prepare data for regression modeling",
|
||||
"exclude_from_index": false,
|
||||
"order_index": 1,
|
||||
"category": "tutorial",
|
||||
"tags": [
|
||||
"featured"
|
||||
],
|
||||
"task": "Regression",
|
||||
"datasets": [
|
||||
"NYC Taxi"
|
||||
],
|
||||
"compute": [
|
||||
"local"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"framework": [
|
||||
"Azure ML AutoML"
|
||||
]
|
||||
}
|
||||
'''
|
||||
|
||||
import os, json, sys
|
||||
from shutil import copyfile, copytree, rmtree
|
||||
|
||||
|
||||
# Index building walk over notebook folder
|
||||
def post_process(notebooks_dir):
|
||||
indexer = NotebookIndex()
|
||||
n_dest = len(notebooks_dir)
|
||||
for r, d, f in os.walk(notebooks_dir):
|
||||
for file in f:
|
||||
# Handle only notebooks
|
||||
if file.endswith(".ipynb") and not file.endswith('checkpoint.ipynb'):
|
||||
try:
|
||||
file_path = os.path.join(r, file)
|
||||
with open(file_path, 'r') as fin:
|
||||
content = json.load(fin)
|
||||
print(file)
|
||||
indexer.add_to_index(os.path.join(r[n_dest:],file), content["metadata"])
|
||||
except Exception as e:
|
||||
print("Problem: ",str(e))
|
||||
indexer.write_index("./index.md")
|
||||
|
||||
### Customize these make index look different
|
||||
|
||||
index_template = '''
|
||||
# Index
|
||||
Azure Machine Learning is a cloud service that you use to train, deploy, automate, and manage machine learning models. This index should assist in navigating the Azure Machine Learning notebook samples and encourage efficient retrieval of topics and content.
|
||||
|
||||
|
||||
## Getting Started
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|
|
||||
GETTING_STARTED_NBS
|
||||
|
||||
## Tutorials
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|
|
||||
TUTORIAL_NBS
|
||||
|
||||
## Training
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|
|
||||
TRAINING_NBS
|
||||
|
||||
## Deployment
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|
|
||||
DEPLOYMENT_NBS
|
||||
|
||||
## Other Notebooks
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|
|
||||
OTHER_NBS
|
||||
'''
|
||||
|
||||
index_row = '''| NB_SYMBOL[NB_NAME](NB_PATH) | NB_TASK | NB_DATASET | NB_COMPUTE | NB_DEPLOYMENT | NB_FRAMEWORK |'''
|
||||
|
||||
index_file = "index.md"
|
||||
|
||||
nb_types = ["starter", "tutorial", "training", "deployment", "other"]
|
||||
replace_strings = ["GETTING_STARTED_NBS", "TUTORIAL_NBS", "TRAINING_NBS", "DEPLOYMENT_NBS", "OTHER_NBS"]
|
||||
|
||||
class NotebookIndex:
|
||||
def __init__(self):
|
||||
self.index = index_template
|
||||
self.nb_rows = {}
|
||||
for elem in nb_types:
|
||||
self.nb_rows[elem] = []
|
||||
|
||||
def add_to_index(self, path_to_notebook, metadata):
|
||||
repo_url = "https://github.com/Azure/MachineLearningNotebooks/blob/master/"
|
||||
|
||||
if "exclude_from_index" in metadata:
|
||||
if metadata["exclude_from_index"]:
|
||||
return
|
||||
|
||||
if "friendly_name" in metadata:
|
||||
this_row = index_row.replace("NB_NAME",metadata["friendly_name"])
|
||||
else:
|
||||
this_name = os.path.basename(path_to_notebook)
|
||||
this_row = index_row.replace("NB_NAME", this_name[:-6])
|
||||
|
||||
path_to_notebook = path_to_notebook.replace("\\","/")
|
||||
this_row = this_row.replace("NB_PATH", repo_url + path_to_notebook)
|
||||
|
||||
if "task" in metadata:
|
||||
this_row = this_row.replace("NB_TASK", metadata["task"])
|
||||
if "datasets" in metadata:
|
||||
this_row = this_row.replace("NB_DATASET", ", ".join(metadata["datasets"]))
|
||||
if "compute" in metadata:
|
||||
this_row = this_row.replace("NB_COMPUTE", ", ".join(metadata["compute"]))
|
||||
if "deployment" in metadata:
|
||||
this_row = this_row.replace("NB_DEPLOYMENT", ", ".join(metadata["deployment"]))
|
||||
if "framework" in metadata:
|
||||
this_row = this_row.replace("NB_FRAMEWORK", ", ".join(metadata["framework"]))
|
||||
## Fall back
|
||||
this_row = this_row.replace("NB_TASK","")
|
||||
this_row = this_row.replace("NB_DATASET","")
|
||||
this_row = this_row.replace("NB_COMPUTE","")
|
||||
this_row = this_row.replace("NB_DEPLOYMENT","")
|
||||
this_row = this_row.replace("NB_FRAMEWORK","")
|
||||
|
||||
if "tags" in metadata:
|
||||
if "featured" in metadata["tags"]:
|
||||
this_row = this_row.replace("NB_SYMBOL",":star:")
|
||||
## Fall back
|
||||
this_row =this_row.replace("NB_SYMBOL","")
|
||||
|
||||
index_order = 9999999
|
||||
if "index_order" in metadata:
|
||||
index_order = metadata["index_order"]
|
||||
|
||||
if "category" in metadata:
|
||||
self.nb_rows[metadata["category"]].append((index_order, this_row))
|
||||
else:
|
||||
self.nb_rows["other"].append((index_order, this_row))
|
||||
|
||||
def sort_and_stringify(self,section):
|
||||
sorted_index = sorted(self.nb_rows[section], key = lambda x: x[0])
|
||||
sorted_index = [x[1] for x in sorted_index]
|
||||
## TODO: Make this portable
|
||||
return "\n".join(sorted_index)
|
||||
|
||||
def write_index(self, index_file):
|
||||
for nb_type, replace_string in zip(nb_types, replace_strings):
|
||||
nb_string = self.sort_and_stringify(nb_type)
|
||||
self.index = self.index.replace(replace_string, nb_string)
|
||||
with open(index_file,"w") as fin:
|
||||
fin.write(self.index)
|
||||
|
||||
try:
|
||||
dest_repo = sys.argv[1]
|
||||
except:
|
||||
dest_repo = "./MachineLearningNotebooks"
|
||||
|
||||
post_process(dest_repo)
|
||||
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.0.57 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.55 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -214,7 +214,8 @@
|
||||
"* You do not have permission to create a resource group if it's non-existing.\n",
|
||||
"* You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this subscription\n",
|
||||
"\n",
|
||||
"If workspace creation fails, please work with your IT admin to provide you with the appropriate permissions or to provision the required resources."
|
||||
"If workspace creation fails, please work with your IT admin to provide you with the appropriate permissions or to provision the required resources.\n",
|
||||
"To learn more about the Enterprise SKU, please visit the Pricing and SKU details page."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -230,11 +231,14 @@
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"# Create the workspace using the specified parameters\n",
|
||||
"# To create an Enterprise workspace, please specify the sku = enterprise\n",
|
||||
|
||||
"ws = Workspace.create(name = workspace_name,\n",
|
||||
" subscription_id = subscription_id,\n",
|
||||
" resource_group = resource_group, \n",
|
||||
" location = workspace_region,\n",
|
||||
" create_resource_group = True,\n",
|
||||
" create_resource_group = True,\n",
|
||||
" sku = basic,\n",
|
||||
" exist_ok = True)\n",
|
||||
"ws.get_details()\n",
|
||||
"\n",
|
||||
@@ -380,4 +384,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
||||
723
contrib/datadrift/azure-ml-datadrift.ipynb
Normal file
723
contrib/datadrift/azure-ml-datadrift.ipynb
Normal file
@@ -0,0 +1,723 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Track Data Drift between Training and Inference Data in Production \n",
|
||||
"\n",
|
||||
"With this notebook, you will learn how to enable the DataDrift service to automatically track and determine whether your inference data is drifting from the data your model was initially trained on. The DataDrift service provides metrics and visualizations to help stakeholders identify which specific features cause the concept drift to occur.\n",
|
||||
"\n",
|
||||
"Please email driftfeedback@microsoft.com with any issues. A member from the DataDrift team will respond shortly. \n",
|
||||
"\n",
|
||||
"The DataDrift Public Preview API can be found [here](https://docs.microsoft.com/en-us/python/api/azureml-contrib-datadrift/?view=azure-ml-py). "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Prerequisites and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Install the DataDrift package\n",
|
||||
"\n",
|
||||
"Install the azureml-contrib-datadrift, azureml-opendatasets and lightgbm packages before running this notebook.\n",
|
||||
"```\n",
|
||||
"pip install azureml-contrib-datadrift\n",
|
||||
"pip install lightgbm\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import Dependencies"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"import time\n",
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import requests\n",
|
||||
"from azureml.contrib.datadrift import DataDriftDetector, AlertConfiguration\n",
|
||||
"from azureml.opendatasets import NoaaIsdWeather\n",
|
||||
"from azureml.core import Dataset, Workspace, Run\n",
|
||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from sklearn.model_selection import train_test_split\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up Configuraton and Create Azure ML Workspace\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) first if you haven't already to establish your connection to the AzureML Workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Please type in your initials/alias. The prefix is prepended to the names of resources created by this notebook. \n",
|
||||
"prefix = \"dd\"\n",
|
||||
"\n",
|
||||
"# NOTE: Please do not change the model_name, as it's required by the score.py file\n",
|
||||
"model_name = \"driftmodel\"\n",
|
||||
"image_name = \"{}driftimage\".format(prefix)\n",
|
||||
"service_name = \"{}driftservice\".format(prefix)\n",
|
||||
"\n",
|
||||
"# optionally, set email address to receive an email alert for DataDrift\n",
|
||||
"email_address = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Generate Train/Testing Data\n",
|
||||
"\n",
|
||||
"For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You may replace this step with your own dataset. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"usaf_list = ['725724', '722149', '723090', '722159', '723910', '720279',\n",
|
||||
" '725513', '725254', '726430', '720381', '723074', '726682',\n",
|
||||
" '725486', '727883', '723177', '722075', '723086', '724053',\n",
|
||||
" '725070', '722073', '726060', '725224', '725260', '724520',\n",
|
||||
" '720305', '724020', '726510', '725126', '722523', '703333',\n",
|
||||
" '722249', '722728', '725483', '722972', '724975', '742079',\n",
|
||||
" '727468', '722193', '725624', '722030', '726380', '720309',\n",
|
||||
" '722071', '720326', '725415', '724504', '725665', '725424',\n",
|
||||
" '725066']\n",
|
||||
"\n",
|
||||
"columns = ['usaf', 'wban', 'datetime', 'latitude', 'longitude', 'elevation', 'windAngle', 'windSpeed', 'temperature', 'stationName', 'p_k']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def enrich_weather_noaa_data(noaa_df):\n",
|
||||
" hours_in_day = 23\n",
|
||||
" week_in_year = 52\n",
|
||||
" \n",
|
||||
" noaa_df[\"hour\"] = noaa_df[\"datetime\"].dt.hour\n",
|
||||
" noaa_df[\"weekofyear\"] = noaa_df[\"datetime\"].dt.week\n",
|
||||
" \n",
|
||||
" noaa_df[\"sine_weekofyear\"] = noaa_df['datetime'].transform(lambda x: np.sin((2*np.pi*x.dt.week-1)/week_in_year))\n",
|
||||
" noaa_df[\"cosine_weekofyear\"] = noaa_df['datetime'].transform(lambda x: np.cos((2*np.pi*x.dt.week-1)/week_in_year))\n",
|
||||
"\n",
|
||||
" noaa_df[\"sine_hourofday\"] = noaa_df['datetime'].transform(lambda x: np.sin(2*np.pi*x.dt.hour/hours_in_day))\n",
|
||||
" noaa_df[\"cosine_hourofday\"] = noaa_df['datetime'].transform(lambda x: np.cos(2*np.pi*x.dt.hour/hours_in_day))\n",
|
||||
" \n",
|
||||
" return noaa_df\n",
|
||||
"\n",
|
||||
"def add_window_col(input_df):\n",
|
||||
" shift_interval = pd.Timedelta('-7 days') # your X days interval\n",
|
||||
" df_shifted = input_df.copy()\n",
|
||||
" df_shifted['datetime'] = df_shifted['datetime'] - shift_interval\n",
|
||||
" df_shifted.drop(list(input_df.columns.difference(['datetime', 'usaf', 'wban', 'sine_hourofday', 'temperature'])), axis=1, inplace=True)\n",
|
||||
"\n",
|
||||
" # merge, keeping only observations where -1 lag is present\n",
|
||||
" df2 = pd.merge(input_df,\n",
|
||||
" df_shifted,\n",
|
||||
" on=['datetime', 'usaf', 'wban', 'sine_hourofday'],\n",
|
||||
" how='inner', # use 'left' to keep observations without lags\n",
|
||||
" suffixes=['', '-7'])\n",
|
||||
" return df2\n",
|
||||
"\n",
|
||||
"def get_noaa_data(start_time, end_time, cols, station_list):\n",
|
||||
" isd = NoaaIsdWeather(start_time, end_time, cols=cols)\n",
|
||||
" # Read into Pandas data frame.\n",
|
||||
" noaa_df = isd.to_pandas_dataframe()\n",
|
||||
" noaa_df = noaa_df.rename(columns={\"stationName\": \"station_name\"})\n",
|
||||
" \n",
|
||||
" df_filtered = noaa_df[noaa_df[\"usaf\"].isin(station_list)]\n",
|
||||
" df_filtered.reset_index(drop=True)\n",
|
||||
" \n",
|
||||
" # Enrich with time features\n",
|
||||
" df_enriched = enrich_weather_noaa_data(df_filtered)\n",
|
||||
" \n",
|
||||
" return df_enriched\n",
|
||||
"\n",
|
||||
"def get_featurized_noaa_df(start_time, end_time, cols, station_list):\n",
|
||||
" df_1 = get_noaa_data(start_time - timedelta(days=7), start_time - timedelta(seconds=1), cols, station_list)\n",
|
||||
" df_2 = get_noaa_data(start_time, end_time, cols, station_list)\n",
|
||||
" noaa_df = pd.concat([df_1, df_2])\n",
|
||||
" \n",
|
||||
" print(\"Adding window feature\")\n",
|
||||
" df_window = add_window_col(noaa_df)\n",
|
||||
" \n",
|
||||
" cat_columns = df_window.dtypes == object\n",
|
||||
" cat_columns = cat_columns[cat_columns == True]\n",
|
||||
" \n",
|
||||
" print(\"Encoding categorical columns\")\n",
|
||||
" df_encoded = pd.get_dummies(df_window, columns=cat_columns.keys().tolist())\n",
|
||||
" \n",
|
||||
" print(\"Dropping unnecessary columns\")\n",
|
||||
" df_featurized = df_encoded.drop(['windAngle', 'windSpeed', 'datetime', 'elevation'], axis=1).dropna().drop_duplicates()\n",
|
||||
" \n",
|
||||
" return df_featurized"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Train model on Jan 1 - 14, 2009 data\n",
|
||||
"df = get_featurized_noaa_df(datetime(2009, 1, 1), datetime(2009, 1, 14, 23, 59, 59), columns, usaf_list)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"label = \"temperature\"\n",
|
||||
"x_df = df.drop(label, axis=1)\n",
|
||||
"y_df = df[[label]]\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(df, y_df, test_size=0.2, random_state=223)\n",
|
||||
"print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n",
|
||||
"\n",
|
||||
"training_dir = 'outputs/training'\n",
|
||||
"training_file = \"training.csv\"\n",
|
||||
"\n",
|
||||
"# Generate training dataframe to register as Training Dataset\n",
|
||||
"os.makedirs(training_dir, exist_ok=True)\n",
|
||||
"training_df = pd.merge(x_train.drop(label, axis=1), y_train, left_index=True, right_index=True)\n",
|
||||
"training_df.to_csv(training_dir + \"/\" + training_file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create/Register Training Dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_name = \"dataset\"\n",
|
||||
"name_suffix = datetime.utcnow().strftime(\"%Y-%m-%d-%H-%M-%S\")\n",
|
||||
"snapshot_name = \"snapshot-{}\".format(name_suffix)\n",
|
||||
"\n",
|
||||
"dstore = ws.get_default_datastore()\n",
|
||||
"dstore.upload(training_dir, \"data/training\", show_progress=True)\n",
|
||||
"dpath = dstore.path(\"data/training/training.csv\")\n",
|
||||
"trainingDataset = Dataset.auto_read_files(dpath, include_path=True)\n",
|
||||
"trainingDataset = trainingDataset.register(workspace=ws, name=dataset_name, description=\"dset\", exist_ok=True)\n",
|
||||
"\n",
|
||||
"datasets = [(Dataset.Scenario.TRAINING, trainingDataset)]\n",
|
||||
"print(\"dataset registration done.\\n\")\n",
|
||||
"datasets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train and Save Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import lightgbm as lgb\n",
|
||||
"\n",
|
||||
"train = lgb.Dataset(data=x_train, \n",
|
||||
" label=y_train)\n",
|
||||
"\n",
|
||||
"test = lgb.Dataset(data=x_test, \n",
|
||||
" label=y_test,\n",
|
||||
" reference=train)\n",
|
||||
"\n",
|
||||
"params = {'learning_rate' : 0.1,\n",
|
||||
" 'boosting' : 'gbdt',\n",
|
||||
" 'metric' : 'rmse',\n",
|
||||
" 'feature_fraction' : 1,\n",
|
||||
" 'bagging_fraction' : 1,\n",
|
||||
" 'max_depth': 6,\n",
|
||||
" 'num_leaves' : 31,\n",
|
||||
" 'objective' : 'regression',\n",
|
||||
" 'bagging_freq' : 1,\n",
|
||||
" \"verbose\": -1,\n",
|
||||
" 'min_data_per_leaf': 100}\n",
|
||||
"\n",
|
||||
"model = lgb.train(params, \n",
|
||||
" num_boost_round=500,\n",
|
||||
" train_set=train,\n",
|
||||
" valid_sets=[train, test],\n",
|
||||
" verbose_eval=50,\n",
|
||||
" early_stopping_rounds=25)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_file = 'outputs/{}.pkl'.format(model_name)\n",
|
||||
"\n",
|
||||
"os.makedirs('outputs', exist_ok=True)\n",
|
||||
"joblib.dump(model, model_file)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = Model.register(model_path=model_file,\n",
|
||||
" model_name=model_name,\n",
|
||||
" workspace=ws,\n",
|
||||
" datasets=datasets)\n",
|
||||
"\n",
|
||||
"print(model_name, image_name, service_name, model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Deploy Model To AKS"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare Environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn', 'joblib', 'lightgbm', 'pandas'],\n",
|
||||
" pip_packages=['azureml-monitoring', 'azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Image"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Image creation may take up to 15 minutes.\n",
|
||||
"\n",
|
||||
"image_name = image_name + str(model.version)\n",
|
||||
"\n",
|
||||
"if not image_name in ws.images:\n",
|
||||
" # Use the score.py defined in this directory as the execution script\n",
|
||||
" # NOTE: The Model Data Collector must be enabled in the execution script for DataDrift to run correctly\n",
|
||||
" image_config = ContainerImage.image_configuration(execution_script=\"score.py\",\n",
|
||||
" runtime=\"python\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" description=\"Image with weather dataset model\")\n",
|
||||
" image = ContainerImage.create(name=image_name,\n",
|
||||
" models=[model],\n",
|
||||
" image_config=image_config,\n",
|
||||
" workspace=ws)\n",
|
||||
"\n",
|
||||
" image.wait_for_creation(show_output=True)\n",
|
||||
"else:\n",
|
||||
" image = ws.images[image_name]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Compute Target"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_name = 'dd-demo-e2e'\n",
|
||||
"prov_config = AksCompute.provisioning_configuration()\n",
|
||||
"\n",
|
||||
"if not aks_name in ws.compute_targets:\n",
|
||||
" aks_target = ComputeTarget.create(workspace=ws,\n",
|
||||
" name=aks_name,\n",
|
||||
" provisioning_configuration=prov_config)\n",
|
||||
"\n",
|
||||
" aks_target.wait_for_completion(show_output=True)\n",
|
||||
" print(aks_target.provisioning_state)\n",
|
||||
" print(aks_target.provisioning_errors)\n",
|
||||
"else:\n",
|
||||
" aks_target=ws.compute_targets[aks_name]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service_name = service_name\n",
|
||||
"\n",
|
||||
"if not aks_service_name in ws.webservices:\n",
|
||||
" aks_config = AksWebservice.deploy_configuration(collect_model_data=True, enable_app_insights=True)\n",
|
||||
" aks_service = Webservice.deploy_from_image(workspace=ws,\n",
|
||||
" name=aks_service_name,\n",
|
||||
" image=image,\n",
|
||||
" deployment_config=aks_config,\n",
|
||||
" deployment_target=aks_target)\n",
|
||||
" aks_service.wait_for_deployment(show_output=True)\n",
|
||||
" print(aks_service.state)\n",
|
||||
"else:\n",
|
||||
" aks_service = ws.webservices[aks_service_name]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Run DataDrift Analysis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Send Scoring Data to Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Download Scoring Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Score Model on March 15, 2016 data\n",
|
||||
"scoring_df = get_noaa_data(datetime(2016, 3, 15) - timedelta(days=7), datetime(2016, 3, 16), columns, usaf_list)\n",
|
||||
"# Add the window feature column\n",
|
||||
"scoring_df = add_window_col(scoring_df)\n",
|
||||
"\n",
|
||||
"# Drop features not used by the model\n",
|
||||
"print(\"Dropping unnecessary columns\")\n",
|
||||
"scoring_df = scoring_df.drop(['windAngle', 'windSpeed', 'datetime', 'elevation'], axis=1).dropna()\n",
|
||||
"scoring_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# One Hot Encode the scoring dataset to match the training dataset schema\n",
|
||||
"columns_dict = model.datasets[\"training\"][0].get_profile().columns\n",
|
||||
"extra_cols = ('Path', 'Column1')\n",
|
||||
"for k in extra_cols:\n",
|
||||
" columns_dict.pop(k, None)\n",
|
||||
"training_columns = list(columns_dict.keys())\n",
|
||||
"\n",
|
||||
"categorical_columns = scoring_df.dtypes == object\n",
|
||||
"categorical_columns = categorical_columns[categorical_columns == True]\n",
|
||||
"\n",
|
||||
"test_df = pd.get_dummies(scoring_df[categorical_columns.keys().tolist()])\n",
|
||||
"encoded_df = scoring_df.join(test_df)\n",
|
||||
"\n",
|
||||
"# Populate missing OHE columns with 0 values to match traning dataset schema\n",
|
||||
"difference = list(set(training_columns) - set(encoded_df.columns.tolist()))\n",
|
||||
"for col in difference:\n",
|
||||
" encoded_df[col] = 0\n",
|
||||
"encoded_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Serialize dataframe to list of row dictionaries\n",
|
||||
"encoded_dict = encoded_df.to_dict('records')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit Scoring Data to Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"\n",
|
||||
"# retreive the API keys. AML generates two keys.\n",
|
||||
"key1, key2 = aks_service.get_keys()\n",
|
||||
"\n",
|
||||
"total_count = len(scoring_df)\n",
|
||||
"i = 0\n",
|
||||
"load = []\n",
|
||||
"for row in encoded_dict:\n",
|
||||
" load.append(row)\n",
|
||||
" i = i + 1\n",
|
||||
" if i % 100 == 0:\n",
|
||||
" payload = json.dumps({\"data\": load})\n",
|
||||
" \n",
|
||||
" # construct raw HTTP request and send to the service\n",
|
||||
" payload_binary = bytes(payload,encoding = 'utf8')\n",
|
||||
" headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
||||
" resp = requests.post(aks_service.scoring_uri, payload_binary, headers=headers)\n",
|
||||
" \n",
|
||||
" print(\"prediction:\", resp.content, \"Progress: {}/{}\".format(i, total_count)) \n",
|
||||
"\n",
|
||||
" load = []\n",
|
||||
" time.sleep(3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We need to wait up to 10 minutes for the Model Data Collector to dump the model input and inference data to storage in the Workspace, where it's used by the DataDriftDetector job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"time.sleep(600)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure DataDrift"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"services = [service_name]\n",
|
||||
"start = datetime.now() - timedelta(days=2)\n",
|
||||
"end = datetime(year=2020, month=1, day=22, hour=15, minute=16)\n",
|
||||
"feature_list = ['usaf', 'wban', 'latitude', 'longitude', 'station_name', 'p_k', 'sine_hourofday', 'cosine_hourofday', 'temperature-7']\n",
|
||||
"alert_config = AlertConfiguration([email_address]) if email_address else None\n",
|
||||
"\n",
|
||||
"# there will be an exception indicating using get() method if DataDrift object already exist\n",
|
||||
"try:\n",
|
||||
" datadrift = DataDriftDetector.create(ws, model.name, model.version, services, frequency=\"Day\", alert_config=alert_config)\n",
|
||||
"except KeyError:\n",
|
||||
" datadrift = DataDriftDetector.get(ws, model.name, model.version)\n",
|
||||
" \n",
|
||||
"print(\"Details of DataDrift Object:\\n{}\".format(datadrift))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Run an Adhoc DataDriftDetector Run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"target_date = datetime.today()\n",
|
||||
"run = datadrift.run(target_date, services, feature_list=feature_list, create_compute_target=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"exp = Experiment(ws, datadrift._id)\n",
|
||||
"dd_run = Run(experiment=exp, run_id=run)\n",
|
||||
"RunDetails(dd_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get Drift Analysis Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"children = list(dd_run.get_children())\n",
|
||||
"for child in children:\n",
|
||||
" child.wait_for_completion()\n",
|
||||
"\n",
|
||||
"drift_metrics = datadrift.get_output(start_time=start, end_time=end)\n",
|
||||
"drift_metrics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Show all drift figures, one per serivice.\n",
|
||||
"# If setting with_details is False (by default), only drift will be shown; if it's True, all details will be shown.\n",
|
||||
"\n",
|
||||
"drift_figures = datadrift.show(with_details=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Enable DataDrift Schedule"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"datadrift.enable_schedule()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "rafarmah"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
8
contrib/datadrift/azure-ml-datadrift.yml
Normal file
8
contrib/datadrift/azure-ml-datadrift.yml
Normal file
@@ -0,0 +1,8 @@
|
||||
name: azure-ml-datadrift
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-contrib-datadrift
|
||||
- azureml-opendatasets
|
||||
- lightgbm
|
||||
- azureml-widgets
|
||||
58
contrib/datadrift/score.py
Normal file
58
contrib/datadrift/score.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import pickle
|
||||
import json
|
||||
import numpy
|
||||
import azureml.train.automl
|
||||
from sklearn.externals import joblib
|
||||
from sklearn.linear_model import Ridge
|
||||
from azureml.core.model import Model
|
||||
from azureml.core.run import Run
|
||||
from azureml.monitoring import ModelDataCollector
|
||||
import time
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def init():
|
||||
global model, inputs_dc, prediction_dc, feature_names, categorical_features
|
||||
|
||||
print("Model is initialized" + time.strftime("%H:%M:%S"))
|
||||
model_path = Model.get_model_path(model_name="driftmodel")
|
||||
model = joblib.load(model_path)
|
||||
|
||||
feature_names = ["usaf", "wban", "latitude", "longitude", "station_name", "p_k",
|
||||
"sine_weekofyear", "cosine_weekofyear", "sine_hourofday", "cosine_hourofday",
|
||||
"temperature-7"]
|
||||
|
||||
categorical_features = ["usaf", "wban", "p_k", "station_name"]
|
||||
|
||||
inputs_dc = ModelDataCollector(model_name="driftmodel",
|
||||
identifier="inputs",
|
||||
feature_names=feature_names)
|
||||
|
||||
prediction_dc = ModelDataCollector("driftmodel",
|
||||
identifier="predictions",
|
||||
feature_names=["temperature"])
|
||||
|
||||
|
||||
def run(raw_data):
|
||||
global inputs_dc, prediction_dc
|
||||
|
||||
try:
|
||||
data = json.loads(raw_data)["data"]
|
||||
data = pd.DataFrame(data)
|
||||
|
||||
# Remove the categorical features as the model expects OHE values
|
||||
input_data = data.drop(categorical_features, axis=1)
|
||||
|
||||
result = model.predict(input_data)
|
||||
|
||||
# Collect the non-OHE dataframe
|
||||
collected_df = data[feature_names]
|
||||
|
||||
inputs_dc.collect(collected_df.values)
|
||||
prediction_dc.collect(result)
|
||||
return result.tolist()
|
||||
except Exception as e:
|
||||
error = str(e)
|
||||
|
||||
print(error + time.strftime("%H:%M:%S"))
|
||||
return error
|
||||
@@ -8,7 +8,7 @@ As a pre-requisite, run the [configuration Notebook](../configuration.ipynb) not
|
||||
* [train-on-local](./training/train-on-local): Learn how to submit a run to local computer and use Azure ML managed run configuration.
|
||||
* [train-on-amlcompute](./training/train-on-amlcompute): Use a 1-n node Azure ML managed compute cluster for remote runs on Azure CPU or GPU infrastructure.
|
||||
* [train-on-remote-vm](./training/train-on-remote-vm): Use Data Science Virtual Machine as a target for remote runs.
|
||||
* [logging-api](./track-and-monitor-experiments/logging-api): Learn about the details of logging metrics to run history.
|
||||
* [logging-api](./training/logging-api): Learn about the details of logging metrics to run history.
|
||||
* [register-model-create-image-deploy-service](./deployment/register-model-create-image-deploy-service): Learn about the details of model management.
|
||||
* [production-deploy-to-aks](./deployment/production-deploy-to-aks) Deploy a model to production at scale on Azure Kubernetes Service.
|
||||
* [enable-data-collection-for-models-in-aks](./deployment/enable-data-collection-for-models-in-aks) Learn about data collection APIs for deployed model.
|
||||
|
||||
@@ -155,11 +155,11 @@ jupyter notebook
|
||||
- [auto-ml-subsampling-local.ipynb](subsampling/auto-ml-subsampling-local.ipynb)
|
||||
- How to enable subsampling
|
||||
|
||||
- [auto-ml-dataset.ipynb](dataprep/auto-ml-dataset.ipynb)
|
||||
- Using Dataset for reading data
|
||||
- [auto-ml-dataprep.ipynb](dataprep/auto-ml-dataprep.ipynb)
|
||||
- Using DataPrep for reading data
|
||||
|
||||
- [auto-ml-dataset-remote-execution.ipynb](dataprep-remote-execution/auto-ml-dataset-remote-execution.ipynb)
|
||||
- Using Dataset for reading data with remote execution
|
||||
- [auto-ml-dataprep-remote-execution.ipynb](dataprep-remote-execution/auto-ml-dataprep-remote-execution.ipynb)
|
||||
- Using DataPrep for reading data with remote execution
|
||||
|
||||
- [auto-ml-classification-with-whitelisting.ipynb](classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb)
|
||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||
@@ -229,7 +229,7 @@ The main code of the file must be indented so that it is under this condition.
|
||||
2. Check that you have conda 64-bit installed rather than 32-bit. You can check this with the command `conda info`. The `platform` should be `win-64` for Windows or `osx-64` for Mac.
|
||||
3. Check that you have conda 4.4.10 or later. You can check the version with the command `conda -V`. If you have a previous version installed, you can update it using the command: `conda update conda`.
|
||||
4. On Linux, if the error is `gcc: error trying to exec 'cc1plus': execvp: No such file or directory`, install build essentials using the command `sudo apt-get install build-essential`.
|
||||
5. Pass a new name as the first parameter to automl_setup so that it creates a new conda environment. You can view existing conda environments using `conda env list` and remove them with `conda env remove -n <environmentname>`.
|
||||
5. Pass a new name as the first parameter to automl_setup so that it creates a new conda environment. You can view existing conda environments using `conda env list` and remove them with `conda env remove -n <environmentname>`.
|
||||
|
||||
## automl_setup_linux.sh fails
|
||||
If automl_setup_linux.sh fails on Ubuntu Linux with the error: `unable to execute 'gcc': No such file or directory`
|
||||
@@ -264,13 +264,13 @@ Some Windows environments see an error loading numpy with the latest Python vers
|
||||
Check the tensorflow version in the automated ml conda environment. Supported versions are < 1.13. Uninstall tensorflow from the environment if version is >= 1.13
|
||||
You may check the version of tensorflow and uninstall as follows
|
||||
1) start a command shell, activate conda environment where automated ml packages are installed
|
||||
2) enter `pip freeze` and look for `tensorflow` , if found, the version listed should be < 1.13
|
||||
3) If the listed version is a not a supported version, `pip uninstall tensorflow` in the command shell and enter y for confirmation.
|
||||
2) enter `pip freeze` and look for `tensorflow` , if found, the version listed should be < 1.13
|
||||
3) If the listed version is a not a supported version, `pip uninstall tensorflow` in the command shell and enter y for confirmation.
|
||||
|
||||
## Remote run: DsvmCompute.create fails
|
||||
## Remote run: DsvmCompute.create fails
|
||||
There are several reasons why the DsvmCompute.create can fail. The reason is usually in the error message but you have to look at the end of the error message for the detailed reason. Some common reasons are:
|
||||
1) `Compute name is invalid, it should start with a letter, be between 2 and 16 character, and only include letters (a-zA-Z), numbers (0-9) and \'-\'.` Note that underscore is not allowed in the name.
|
||||
2) `The requested VM size xxxxx is not available in the current region.` You can select a different region or vm_size.
|
||||
2) `The requested VM size xxxxx is not available in the current region.` You can select a different region or vm_size.
|
||||
|
||||
## Remote run: Unable to establish SSH connection
|
||||
Automated ML uses the SSH protocol to communicate with remote DSVMs. This defaults to port 22. Possible causes for this error are:
|
||||
@@ -296,4 +296,4 @@ To resolve this issue, allocate a DSVM with more memory or reduce the value spec
|
||||
|
||||
## Remote run: Iterations show as "Not Responding" in the RunDetails widget.
|
||||
This can be caused by too many concurrent iterations for a remote DSVM. Each concurrent iteration usually takes 100% of a core when it is running. Some iterations can use multiple cores. So, the max_concurrent_iterations setting should always be less than the number of cores of the DSVM.
|
||||
To resolve this issue, try reducing the value specified for the max_concurrent_iterations setting.
|
||||
To resolve this issue, try reducing the value specified for the max_concurrent_iterations setting.
|
||||
@@ -13,13 +13,10 @@ dependencies:
|
||||
- scikit-learn>=0.19.0,<=0.20.3
|
||||
- pandas>=0.22.0,<=0.23.4
|
||||
- py-xgboost<=0.80
|
||||
- pyarrow>=0.11.0
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-defaults
|
||||
- azureml-train-automl
|
||||
- azureml-sdk[automl,explain]
|
||||
- azureml-widgets
|
||||
- azureml-explain-model
|
||||
- pandas_ml
|
||||
|
||||
|
||||
@@ -14,13 +14,10 @@ dependencies:
|
||||
- scikit-learn>=0.19.0,<=0.20.3
|
||||
- pandas>=0.22.0,<0.23.0
|
||||
- py-xgboost<=0.80
|
||||
- pyarrow>=0.11.0
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-defaults
|
||||
- azureml-train-automl
|
||||
- azureml-sdk[automl,explain]
|
||||
- azureml-widgets
|
||||
- azureml-explain-model
|
||||
- pandas_ml
|
||||
|
||||
|
||||
@@ -1,15 +1,6 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@@ -20,38 +11,40 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning\n",
|
||||
"_**Classification with Deployment using a Bank Marketing Dataset**_\n",
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Train](#Train)\n",
|
||||
"1. [Results](#Results)\n",
|
||||
"1. [Deploy](#Deploy)\n",
|
||||
"1. [Test](#Test)\n",
|
||||
"1. [Acknowledgements](#Acknowledgements)"
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"# Unique Descriptive Title\n",
|
||||
"_**Unique Subtitle**_\n",
|
||||
"\n",
|
||||
"In this example we use the UCI Bank Marketing dataset to showcase how you can use AutoML for a classification problem and deploy it to an Azure Container Instance (ACI). The classification goal is to predict if the client will subscribe to a term deposit with the bank.\n",
|
||||
"Introduction that describes in a customer friendly language, what they will do and accomplish.\n".
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Prerequisites](#Prerequisites)\n",
|
||||
"1. [Configuration and Setup](#Setup)\n",
|
||||
"1. [Working with Data](#Working with Data)\n",
|
||||
"1. [Training](#Training)\n",
|
||||
"1. [Productionizing](#Productionizing)\n",
|
||||
"1. [Model Monitoring](#Model Monitoring)\n",
|
||||
"1. [Clean up resources](#Clean up resources)\n",
|
||||
"1. [Next Steps](#Next Steps)\n",
|
||||
"1. [Acknowledgements](#Acknowledgements)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configuration\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an experiment using an existing workspace.\n",
|
||||
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"3. Train the model using local compute.\n",
|
||||
"4. Explore the results.\n",
|
||||
"5. Register the model.\n",
|
||||
"6. Create a container image.\n",
|
||||
"7. Create an Azure Container Instance (ACI) service.\n",
|
||||
"8. Test the ACI service."
|
||||
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) notebook first if you haven't already to establish your connection to the AzureML Workspace. \n",
|
||||
"Please note that a Basic edition workspace is created by default in the configuration.ipynb file.\n",
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -60,8 +53,7 @@
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object....\n",
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -69,17 +61,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import pandas as pd\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,26 +70,21 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"tenant_id = os.environ['TENANT_ID’]\n",
|
||||
"client_id = os.environ['CLIENT_ID’]\n",
|
||||
"run = Run.get_context()\n",
|
||||
"secret_name = “{0}-secret”.format(client_id)\n",
|
||||
"secret = run.get_secret(name=secret_name)\n",
|
||||
"sp_auth = ServicePrincipalAuthentication(tenant_id, client_id, secret)\n",
|
||||
"ws = Workspace.from_config(auth=sp_auth)\n",
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-classification-bmarketing'\n",
|
||||
"# choose a unique name for experiment\n",
|
||||
"experiment_name = 'unique-name'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-classification-bankmarketing'\n",
|
||||
"project_folder = './sample_projects/test'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -115,575 +92,77 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or Attach existing AmlCompute\n",
|
||||
"You will need to create a compute target for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||
"You will need to create a compute target for your run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
||||
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read this article on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"# Working with Data\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"automlcl\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
" \n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 6)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" \n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
"Here you would learn how to perform Data labeling and use Open Datasets etc..\n",
|
||||
"To do this first load....\n",
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Data\n",
|
||||
"## Training\n",
|
||||
"\n",
|
||||
"Here load the data in the get_data() script to be utilized in azure compute. To do this first load all the necessary libraries and dependencies to set up paths for the data and to create the conda_Run_config."
|
||||
"Here you would learn how to train a DNN using...\n",
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not os.path.isdir('data'):\n",
|
||||
" os.mkdir('data')\n",
|
||||
" \n",
|
||||
"if not os.path.exists(project_folder):\n",
|
||||
" os.makedirs(project_folder)"
|
||||
"# Productionizing\n",
|
||||
"\n",
|
||||
"Here you would learn how to deploy your model to ACI to perform...\n",
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"# Model Monitoring\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
"Here you would learn how to detect datadrift etc...\n",
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Clean up resources\n",
|
||||
"\n",
|
||||
"# Set compute target to AmlCompute\n",
|
||||
"conda_run_config.target = compute_target\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"Now, let's clean up the resources we created...\n",
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Next Steps\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
"In this notebook, you’ve done x, y, z. You can learn more with these resources:\n",
|
||||
"+ [SDK reference documentation for `MyClass`]()\n",
|
||||
"+ [About this feature](https://docs.microsoft.com/azure/machine-learning/service/thisfeature)\n",
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Here we create the script to be run in azure comput for loading the data, we load the bank marketing dataset into X_train and y_train. Next X_train and y_train is returned for training the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv\"\n",
|
||||
"dataset = Dataset.Tabular.from_delimited_files(data)\n",
|
||||
"X_train = dataset.drop_columns(columns=['y'])\n",
|
||||
"y_train = dataset.keep_columns(columns=['y'], validate=True)\n",
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train\n",
|
||||
"\n",
|
||||
"Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], Multi-class targets.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
|
||||
"\n",
|
||||
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\": 5,\n",
|
||||
" \"iterations\": 10,\n",
|
||||
" \"n_cross_validations\": 2,\n",
|
||||
" \"primary_metric\": 'AUC_weighted',\n",
|
||||
" \"preprocess\": True,\n",
|
||||
" \"max_concurrent_iterations\": 5,\n",
|
||||
" \"verbosity\": logging.INFO,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path = project_folder,\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" X = X_train,\n",
|
||||
" y = y_train,\n",
|
||||
" **automl_settings\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(remote_run).show() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy\n",
|
||||
"\n",
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = remote_run.get_output()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register the Fitted Model for Deployment\n",
|
||||
"If neither `metric` nor `iteration` are specified in the `register_model` call, the iteration with the best primary metric is registered."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"description = 'AutoML Model trained on bank marketing data to predict if a client will subscribe to a term deposit'\n",
|
||||
"tags = None\n",
|
||||
"model = remote_run.register_model(description = description, tags = tags)\n",
|
||||
"\n",
|
||||
"print(remote_run.model_id) # This will be written to the script file later in the notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Scoring Script\n",
|
||||
"The scoring script is required to generate the image for deployment. It contains the code to do the predictions on input data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile score.py\n",
|
||||
"import pickle\n",
|
||||
"import json\n",
|
||||
"import numpy\n",
|
||||
"import azureml.train.automl\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def init():\n",
|
||||
" global model\n",
|
||||
" model_path = Model.get_model_path(model_name = '<<modelid>>') # this name is model.id of model that we want to deploy\n",
|
||||
" # deserialize the model file back into a sklearn model\n",
|
||||
" model = joblib.load(model_path)\n",
|
||||
"\n",
|
||||
"def run(rawdata):\n",
|
||||
" try:\n",
|
||||
" data = json.loads(rawdata)['data']\n",
|
||||
" data = np.array(data)\n",
|
||||
" result = model.predict(data)\n",
|
||||
" except Exception as e:\n",
|
||||
" result = str(e)\n",
|
||||
" return json.dumps({\"error\": result})\n",
|
||||
" return json.dumps({\"result\":result.tolist()})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a YAML File for the Environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To ensure the fit results are consistent with the training results, the SDK dependency versions need to be the same as the environment that trains the model. Details about retrieving the versions can be found in notebook [12.auto-ml-retrieve-the-training-sdk-versions](12.auto-ml-retrieve-the-training-sdk-versions.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dependencies = remote_run.get_run_sdk_dependencies(iteration = 1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for p in ['azureml-train-automl', 'azureml-core']:\n",
|
||||
" print('{}\\t{}'.format(p, dependencies[p]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],\n",
|
||||
" pip_packages=['azureml-train-automl'])\n",
|
||||
"\n",
|
||||
"conda_env_file_name = 'myenv.yml'\n",
|
||||
"myenv.save_to_file('.', conda_env_file_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Substitute the actual version number in the environment file.\n",
|
||||
"# This is not strictly needed in this notebook because the model should have been generated using the current SDK version.\n",
|
||||
"# However, we include this in case this code is used on an experiment from a previous SDK version.\n",
|
||||
"\n",
|
||||
"with open(conda_env_file_name, 'r') as cefr:\n",
|
||||
" content = cefr.read()\n",
|
||||
"\n",
|
||||
"with open(conda_env_file_name, 'w') as cefw:\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n",
|
||||
"\n",
|
||||
"# Substitute the actual model id in the script file.\n",
|
||||
"\n",
|
||||
"script_file_name = 'score.py'\n",
|
||||
"\n",
|
||||
"with open(script_file_name, 'r') as cefr:\n",
|
||||
" content = cefr.read()\n",
|
||||
"\n",
|
||||
"with open(script_file_name, 'w') as cefw:\n",
|
||||
" cefw.write(content.replace('<<modelid>>', remote_run.model_id))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a Container Image\n",
|
||||
"\n",
|
||||
"Next use Azure Container Instances for deploying models as a web service for quickly deploying and validating your model\n",
|
||||
"or when testing a model that is under development."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.image import Image, ContainerImage\n",
|
||||
"\n",
|
||||
"image_config = ContainerImage.image_configuration(runtime= \"python\",\n",
|
||||
" execution_script = script_file_name,\n",
|
||||
" conda_file = conda_env_file_name,\n",
|
||||
" tags = {'area': \"bmData\", 'type': \"automl_classification\"},\n",
|
||||
" description = \"Image for automl classification sample\")\n",
|
||||
"\n",
|
||||
"image = Image.create(name = \"automlsampleimage\",\n",
|
||||
" # this is the model object \n",
|
||||
" models = [model],\n",
|
||||
" image_config = image_config, \n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"image.wait_for_creation(show_output = True)\n",
|
||||
"\n",
|
||||
"if image.creation_state == 'Failed':\n",
|
||||
" print(\"Image build log at: \" + image.image_build_log_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy the Image as a Web Service on Azure Container Instance\n",
|
||||
"\n",
|
||||
"Deploy an image that contains the model and other assets needed by the service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||
" memory_gb = 1, \n",
|
||||
" tags = {'area': \"bmData\", 'type': \"automl_classification\"}, \n",
|
||||
" description = 'sample service for Automl Classification')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"\n",
|
||||
"aci_service_name = 'automl-sample-bankmarketing'\n",
|
||||
"print(aci_service_name)\n",
|
||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||
" image = image,\n",
|
||||
" name = aci_service_name,\n",
|
||||
" workspace = ws)\n",
|
||||
"aci_service.wait_for_deployment(True)\n",
|
||||
"print(aci_service.state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete a Web Service\n",
|
||||
"\n",
|
||||
"Deletes the specified web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#aci_service.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get Logs from a Deployed Web Service\n",
|
||||
"\n",
|
||||
"Gets logs from a deployed web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#aci_service.get_logs()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test\n",
|
||||
"\n",
|
||||
"Now that the model is trained split our data in the same way the data was split for training (The difference here is the data is being split locally) and then run the test data through the trained model to get the predicted values."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load the bank marketing datasets.\n",
|
||||
"from numpy import array"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_validate.csv\"\n",
|
||||
"dataset = Dataset.Tabular.from_delimited_files(data)\n",
|
||||
"X_test = dataset.drop_columns(columns=['y'])\n",
|
||||
"y_test = dataset.keep_columns(columns=['y'], validate=True)\n",
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_test = X_test.to_pandas_dataframe()\n",
|
||||
"y_test = y_test.to_pandas_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_pred = fitted_model.predict(X_test)\n",
|
||||
"actual = array(y_test)\n",
|
||||
"actual = actual[:,0]\n",
|
||||
"print(y_pred.shape, \" \", actual.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Calculate metrics for the prediction\n",
|
||||
"\n",
|
||||
"Now visualize the data on a scatter plot to show what our truth (actual) values are compared to the predicted values \n",
|
||||
"from the trained model that was returned."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib notebook\n",
|
||||
"test_pred = plt.scatter(actual, y_pred, color='b')\n",
|
||||
"test_test = plt.scatter(actual, actual, color='g')\n",
|
||||
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Acknowledgements"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This Bank Marketing dataset is made available under the Creative Commons (CCO: Public Domain) License: https://creativecommons.org/publicdomain/zero/1.0/. Any rights in individual contents of the database are licensed under the Database Contents License: https://creativecommons.org/publicdomain/zero/1.0/ and is available at: https://www.kaggle.com/janiobachmann/bank-marketing-dataset .\n",
|
||||
"This dataset is made available under the Creative Commons (CCO: Public Domain) License: https://creativecommons.org/publicdomain/zero/1.0/. Any rights in individual contents of the database are licensed under the Database Contents License: https://creativecommons.org/publicdomain/zero/1.0/ and is available at: https://www.kaggle.com/janiobachmann/bank-marketing-dataset .\n",
|
||||
"\n",
|
||||
"_**Acknowledgements**_\n",
|
||||
"This data set is originally available within the UCI Machine Learning Database: https://archive.ics.uci.edu/ml/datasets/bank+marketing\n",
|
||||
"This dataset is originally available within the UCI Machine Learning Database: https://archive.ics.uci.edu/ml/datasets/bank+marketing\n",
|
||||
"\n",
|
||||
"[Moro et al., 2014] S. Moro, P. Cortez and P. Rita. A Data-Driven Approach to Predict the Success of Bank Telemarketing. Decision Support Systems, Elsevier, 62:22-31, June 2014"
|
||||
]
|
||||
@@ -692,9 +171,32 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "v-rasav"
|
||||
"name": "YOUR ALIAS"
|
||||
}
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"MNIST"
|
||||
],
|
||||
"deployment": [
|
||||
"AKS"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"PyTorch"
|
||||
],
|
||||
"friendly_name": "How to use ModuleStep with AML Pipelines",
|
||||
},
|
||||
"order_index": 14,
|
||||
"star_tag": [],
|
||||
"tags": [
|
||||
"Pipeline Builder"
|
||||
],
|
||||
"task": "Demonstrates the use of ModuleStep"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
@@ -715,4 +217,8 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -2,8 +2,6 @@ name: auto-ml-classification-bank-marketing
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-defaults
|
||||
- azureml-explain-model
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
|
||||
@@ -74,12 +74,14 @@
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import pandas as pd\n",
|
||||
"import os\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"import azureml.dataprep as dprep\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.train.automl.run import AutoMLRun"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -150,12 +152,11 @@
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" \n",
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -196,8 +197,11 @@
|
||||
"# Set compute target to AmlCompute\n",
|
||||
"conda_run_config.target = compute_target\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
@@ -207,7 +211,7 @@
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Here create the script to be run in azure compute for loading the data, load the credit card dataset into cards and store the Class column (y) in the y variable and store the remaining data in the x variable. Next split the data using random_split and return X_train and y_train for training the model."
|
||||
"Here create the script to be run in azure compute for loading the data, load the credit card dataset into cards and store the Class column (y) in the y variable and store the remaining data in the x variable. Next split the data using train_test_split and return X_train and y_train for training the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -217,9 +221,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/creditcard.csv\"\n",
|
||||
"dataset = Dataset.Tabular.from_delimited_files(data)\n",
|
||||
"X = dataset.drop_columns(columns=['Class'])\n",
|
||||
"y = dataset.keep_columns(columns=['Class'], validate=True)\n",
|
||||
"dflow = dprep.read_csv(data, infer_column_types=True)\n",
|
||||
"dflow.get_profile()\n",
|
||||
"X = dflow.drop_columns(columns=['Class'])\n",
|
||||
"y = dflow.keep_columns(columns=['Class'], validate_column_exists=True)\n",
|
||||
"X_train, X_test = X.random_split(percentage=0.8, seed=223)\n",
|
||||
"y_train, y_test = y.random_split(percentage=0.8, seed=223)"
|
||||
]
|
||||
@@ -442,7 +447,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for p in ['azureml-train-automl', 'azureml-core']:\n",
|
||||
"for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n",
|
||||
" print('{}\\t{}'.format(p, dependencies[p]))"
|
||||
]
|
||||
},
|
||||
@@ -453,7 +458,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],\n",
|
||||
" pip_packages=['azureml-train-automl'])\n",
|
||||
" pip_packages=['azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"conda_env_file_name = 'myenv.yml'\n",
|
||||
"myenv.save_to_file('.', conda_env_file_name)"
|
||||
@@ -473,7 +478,7 @@
|
||||
" content = cefr.read()\n",
|
||||
"\n",
|
||||
"with open(conda_env_file_name, 'w') as cefw:\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n",
|
||||
"\n",
|
||||
"# Substitute the actual model id in the script file.\n",
|
||||
"\n",
|
||||
|
||||
@@ -2,8 +2,6 @@ name: auto-ml-classification-credit-card-fraud
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-defaults
|
||||
- azureml-explain-model
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
|
||||
@@ -41,6 +41,8 @@
|
||||
"\n",
|
||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"An Enterprise workspace is required for this notebook. To learn more about creating an Enterprise workspace or upgrading to an Enterprise workspace from the Azure portal, please visit our [Workspace page](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-workspace#upgrade).\n",
|
||||
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an experiment using an existing workspace.\n",
|
||||
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||
@@ -61,61 +63,13 @@
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn import datasets\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.train.automl.run import AutoMLRun"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-classification-deployment'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-classification-deployment'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train\n",
|
||||
"\n",
|
||||
"The following steps require an Enterprise workspace to gain access to these features.To learn more about creating an Enterprise workspace or upgrading to an Enterprise workspace from the Azure portal, please visit our [Workspace page](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-workspace#upgrade).\n",
|
||||
"Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
@@ -297,7 +251,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for p in ['azureml-train-automl', 'azureml-core']:\n",
|
||||
"for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n",
|
||||
" print('{}\\t{}'.format(p, dependencies[p]))"
|
||||
]
|
||||
},
|
||||
@@ -310,7 +264,7 @@
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],\n",
|
||||
" pip_packages=['azureml-train-automl'])\n",
|
||||
" pip_packages=['azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"conda_env_file_name = 'myenv.yml'\n",
|
||||
"myenv.save_to_file('.', conda_env_file_name)"
|
||||
@@ -330,7 +284,7 @@
|
||||
" content = cefr.read()\n",
|
||||
"\n",
|
||||
"with open(conda_env_file_name, 'w') as cefw:\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n",
|
||||
"\n",
|
||||
"# Substitute the actual model id in the script file.\n",
|
||||
"\n",
|
||||
@@ -484,7 +438,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "savitam"
|
||||
"name": "shwinne"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -507,4 +461,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
|
||||
@@ -479,7 +479,27 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
}
|
||||
},
|
||||
"friendly_name": "Testing index",
|
||||
"exclude_from_index": false,
|
||||
"order_index": 1,
|
||||
"category": "tutorial",
|
||||
"tags": [
|
||||
"featured"
|
||||
],
|
||||
"task": "Regression",
|
||||
"datasets": [
|
||||
"NYC Taxi"
|
||||
],
|
||||
"compute": [
|
||||
"local"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"framework": [
|
||||
"Azure ML AutoML"
|
||||
],
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -1,509 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning\n",
|
||||
"_**Load Data using `TabularDataset` for Remote Execution (AmlCompute)**_\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Data](#Data)\n",
|
||||
"1. [Train](#Train)\n",
|
||||
"1. [Results](#Results)\n",
|
||||
"1. [Test](#Test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"In this example we showcase how you can use AzureML Dataset to load data for AutoML.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create a `TabularDataset` pointing to the training data.\n",
|
||||
"2. Pass the `TabularDataset` to AutoML for a remote run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-dataset-remote-bai'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-dataprep-remote-bai'\n",
|
||||
" \n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
" \n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n",
|
||||
"example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n",
|
||||
"dataset = Dataset.Tabular.from_delimited_files(example_data)\n",
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Review the data\n",
|
||||
"\n",
|
||||
"You can peek the result of a `TabularDataset` at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only `j` records, which makes it fast even against large datasets.\n",
|
||||
"\n",
|
||||
"`TabularDataset` objects are immutable and are composed of a list of subsetting transformations (optional)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = dataset.drop_columns(columns=['Primary Type', 'FBI Code'])\n",
|
||||
"y = dataset.keep_columns(columns=['Primary Type'], validate=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train\n",
|
||||
"\n",
|
||||
"This creates a general AutoML settings object applicable for both local and remote runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\" : 10,\n",
|
||||
" \"iterations\" : 2,\n",
|
||||
" \"primary_metric\" : 'AUC_weighted',\n",
|
||||
" \"preprocess\" : True,\n",
|
||||
" \"verbosity\" : logging.INFO\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach an AmlCompute cluster"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"automlc2\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
"\n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 6)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\\n\",\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
"\n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
"\n",
|
||||
"# Set compute target to AmlCompute\n",
|
||||
"conda_run_config.target = compute_target\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pass Data with `TabularDataset` Objects\n",
|
||||
"\n",
|
||||
"The `TabularDataset` objects captured above can also be passed to the `submit` method for a remote run. AutoML will serialize the `TabularDataset` object and send it to the remote compute target. The `TabularDataset` will not be evaluated locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path = project_folder,\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" X = X,\n",
|
||||
" y = y,\n",
|
||||
" **automl_settings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pre-process cache cleanup\n",
|
||||
"The preprocess data gets cache at user default file store. When the run is completed the cache can be cleaned by running below cell"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run.clean_preprocessor_cache()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Cancelling Runs\n",
|
||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cancel the ongoing experiment and stop scheduling new iterations.\n",
|
||||
"# remote_run.cancel()\n",
|
||||
"\n",
|
||||
"# Cancel iteration 1 and move onto iteration 2.\n",
|
||||
"# remote_run.cancel_iteration(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(remote_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"children = list(remote_run.get_children())\n",
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
" \n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
"rundata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = remote_run.get_output()\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model that has the smallest `log_loss` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lookup_metric = \"log_loss\"\n",
|
||||
"best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the first iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iteration = 0\n",
|
||||
"best_run, fitted_model = remote_run.get_output(iteration = iteration)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test\n",
|
||||
"\n",
|
||||
"#### Load Test Data\n",
|
||||
"For the test data, it should have the same preparation step as the train data. Otherwise it might get failed at the preprocessing step."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_test = Dataset.Tabular.from_delimited_files(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv')\n",
|
||||
"\n",
|
||||
"df_test = dataset_test.to_pandas_dataframe()\n",
|
||||
"df_test = df_test[pd.notnull(df_test['Primary Type'])]\n",
|
||||
"\n",
|
||||
"y_test = df_test[['Primary Type']]\n",
|
||||
"X_test = df_test.drop(['Primary Type', 'FBI Code'], axis=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing Our Best Fitted Model\n",
|
||||
"We will use confusion matrix to see how our model works."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pandas_ml import ConfusionMatrix\n",
|
||||
"\n",
|
||||
"ypred = fitted_model.predict(X_test)\n",
|
||||
"\n",
|
||||
"cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n",
|
||||
"\n",
|
||||
"print(cm)\n",
|
||||
"\n",
|
||||
"cm.plot()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "savitam"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,402 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning\n",
|
||||
"_**Load Data using `TabularDataset` for Local Execution**_\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Data](#Data)\n",
|
||||
"1. [Train](#Train)\n",
|
||||
"1. [Results](#Results)\n",
|
||||
"1. [Test](#Test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"In this example we showcase how you can use AzureML Dataset to load data for AutoML.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create a `TabularDataset` pointing to the training data.\n",
|
||||
"2. Pass the `TabularDataset` to AutoML for a local run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
" \n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-dataset-local'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-dataset-local'\n",
|
||||
" \n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
" \n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace Name'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n",
|
||||
"example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n",
|
||||
"dataset = Dataset.Tabular.from_delimited_files(example_data)\n",
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Review the data\n",
|
||||
"\n",
|
||||
"You can peek the result of a `TabularDataset` at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only `j` records, which makes it fast even against large datasets.\n",
|
||||
"\n",
|
||||
"`TabularDataset` objects are immutable and are composed of a list of subsetting transformations (optional)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = dataset.drop_columns(columns=['Primary Type', 'FBI Code'])\n",
|
||||
"y = dataset.keep_columns(columns=['Primary Type'], validate=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train\n",
|
||||
"\n",
|
||||
"This creates a general AutoML settings object applicable for both local and remote runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\" : 10,\n",
|
||||
" \"iterations\" : 2,\n",
|
||||
" \"primary_metric\" : 'AUC_weighted',\n",
|
||||
" \"preprocess\" : True,\n",
|
||||
" \"verbosity\" : logging.INFO\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pass Data with `TabularDataset` Objects\n",
|
||||
"\n",
|
||||
"The `TabularDataset` objects captured above can be passed to the `submit` method for a local run. AutoML will retrieve the results from the `TabularDataset` for model training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" X = X,\n",
|
||||
" y = y,\n",
|
||||
" **automl_settings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(local_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"children = list(local_run.get_children())\n",
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
" \n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
"rundata"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = local_run.get_output()\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model that has the smallest `log_loss` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lookup_metric = \"log_loss\"\n",
|
||||
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the first iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iteration = 0\n",
|
||||
"best_run, fitted_model = local_run.get_output(iteration = iteration)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Test\n",
|
||||
"\n",
|
||||
"#### Load Test Data\n",
|
||||
"For the test data, it should have the same preparation step as the train data. Otherwise it might get failed at the preprocessing step."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_test = Dataset.Tabular.from_delimited_files(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv')\n",
|
||||
"\n",
|
||||
"df_test = dataset_test.to_pandas_dataframe()\n",
|
||||
"df_test = df_test[pd.notnull(df_test['Primary Type'])]\n",
|
||||
"\n",
|
||||
"y_test = df_test[['Primary Type']]\n",
|
||||
"X_test = df_test.drop(['Primary Type', 'FBI Code'], axis=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing Our Best Fitted Model\n",
|
||||
"We will use confusion matrix to see how our model works."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pandas_ml import ConfusionMatrix\n",
|
||||
"\n",
|
||||
"ypred = fitted_model.predict(X_test)\n",
|
||||
"\n",
|
||||
"cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n",
|
||||
"\n",
|
||||
"print(cm)\n",
|
||||
"\n",
|
||||
"cm.plot()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "savitam"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
name: auto-ml-dataset
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
@@ -231,7 +231,6 @@
|
||||
"automl_config = AutoMLConfig(task='forecasting',\n",
|
||||
" debug_log='automl_nyc_energy_errors.log',\n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" blacklist_models = ['ExtremeRandomTrees'],\n",
|
||||
" iterations=10,\n",
|
||||
" iteration_timeout_minutes=5,\n",
|
||||
" X=X_train,\n",
|
||||
@@ -482,7 +481,7 @@
|
||||
"automl_config_lags = AutoMLConfig(task='forecasting',\n",
|
||||
" debug_log='automl_nyc_energy_errors.log',\n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" blacklist_models=['ElasticNet','ExtremeRandomTrees','GradientBoosting'],\n",
|
||||
" blacklist_models=['ElasticNet'],\n",
|
||||
" iterations=10,\n",
|
||||
" iteration_timeout_minutes=10,\n",
|
||||
" X=X_train,\n",
|
||||
|
||||
@@ -244,8 +244,7 @@
|
||||
"|**X**|Training matrix of features as a pandas DataFrame, shape = [n_training_samples, n_features]|\n",
|
||||
"|**y**|Target values as a numpy.ndarray, shape = [n_training_samples, ]|\n",
|
||||
"|**n_cross_validations**|Number of cross-validation folds to use for model/pipeline selection|\n",
|
||||
"|**enable_voting_ensemble**|Allow AutoML to create a Voting ensemble of the best performing models\n",
|
||||
"|**enable_stack_ensemble**|Allow AutoML to create a Stack ensemble of the best performing models\n",
|
||||
"|**enable_ensembling**|Allow AutoML to create ensembles of the best performing models\n",
|
||||
"|**debug_log**|Log file path for writing debugging information\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n",
|
||||
"|**time_column_name**|Name of the datetime column in the input data|\n",
|
||||
@@ -274,8 +273,7 @@
|
||||
" X=X_train,\n",
|
||||
" y=y_train,\n",
|
||||
" n_cross_validations=3,\n",
|
||||
" enable_voting_ensemble=False,\n",
|
||||
" enable_stack_ensemble=False,\n",
|
||||
" enable_ensembling=False,\n",
|
||||
" path=project_folder,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" **time_series_settings)"
|
||||
@@ -665,10 +663,10 @@
|
||||
"conda_env_file_name = 'fcast_env.yml'\n",
|
||||
"\n",
|
||||
"dependencies = ml_run.get_run_sdk_dependencies(iteration = best_iteration)\n",
|
||||
"for p in ['azureml-train-automl', 'azureml-core']:\n",
|
||||
"for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n",
|
||||
" print('{}\\t{}'.format(p, dependencies[p]))\n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-train-automl'])\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"myenv.save_to_file('.', conda_env_file_name)"
|
||||
]
|
||||
@@ -690,7 +688,7 @@
|
||||
" content = cefr.read()\n",
|
||||
"\n",
|
||||
"with open(conda_env_file_name, 'w') as cefw:\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n",
|
||||
"\n",
|
||||
"# Substitute the actual model id in the script file.\n",
|
||||
"\n",
|
||||
|
||||
@@ -70,12 +70,13 @@
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import os\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"import azureml.dataprep as dprep\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
@@ -146,12 +147,11 @@
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" \n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -192,8 +192,11 @@
|
||||
"# Set compute target to AmlCompute\n",
|
||||
"conda_run_config.target = compute_target\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(conda_packages=['numpy', 'py-xgboost<=0.80'])\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
@@ -203,7 +206,7 @@
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Here create the script to be run in azure compute for loading the data, load the concrete strength dataset into the X and y variables. Next, split the data using random_split and return X_train and y_train for training the model. Finally, return X_train and y_train for training the model."
|
||||
"Here create the script to be run in azure compute for loading the data, load the concrete strength dataset into the X and y variables. Next, split the data using train_test_split and return X_train and y_train for training the model. Finally, return X_train and y_train for training the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -213,12 +216,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/compresive_strength_concrete.csv\"\n",
|
||||
"dataset = Dataset.Tabular.from_delimited_files(data)\n",
|
||||
"X = dataset.drop_columns(columns=['CONCRETE'])\n",
|
||||
"y = dataset.keep_columns(columns=['CONCRETE'], validate=True)\n",
|
||||
"dflow = dprep.read_csv(data, infer_column_types=True)\n",
|
||||
"dflow.get_profile()\n",
|
||||
"X = dflow.drop_columns(columns=['CONCRETE'])\n",
|
||||
"y = dflow.keep_columns(columns=['CONCRETE'], validate_column_exists=True)\n",
|
||||
"X_train, X_test = X.random_split(percentage=0.8, seed=223)\n",
|
||||
"y_train, y_test = y.random_split(percentage=0.8, seed=223) \n",
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
"dflow.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -480,7 +484,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for p in ['azureml-train-automl', 'azureml-core']:\n",
|
||||
"for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n",
|
||||
" print('{}\\t{}'.format(p, dependencies[p]))"
|
||||
]
|
||||
},
|
||||
@@ -490,7 +494,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost==0.80'], pip_packages=['azureml-train-automl'])\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"conda_env_file_name = 'myenv.yml'\n",
|
||||
"myenv.save_to_file('.', conda_env_file_name)"
|
||||
@@ -510,7 +516,7 @@
|
||||
" content = cefr.read()\n",
|
||||
"\n",
|
||||
"with open(conda_env_file_name, 'w') as cefw:\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n",
|
||||
"\n",
|
||||
"# Substitute the actual model id in the script file.\n",
|
||||
"\n",
|
||||
|
||||
@@ -2,8 +2,6 @@ name: auto-ml-regression-concrete-strength
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-defaults
|
||||
- azureml-explain-model
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
|
||||
@@ -70,12 +70,13 @@
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import os\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"import azureml.dataprep as dprep\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
@@ -146,12 +147,11 @@
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" \n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -192,8 +192,11 @@
|
||||
"# Set compute target to AmlCompute\n",
|
||||
"conda_run_config.target = compute_target\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(conda_packages=['numpy', 'py-xgboost<=0.80'])\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
@@ -203,7 +206,7 @@
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Here create the script to be run in azure compute for loading the data, load the hardware dataset into the X and y variables. Next split the data using random_split and return X_train and y_train for training the model."
|
||||
"Here create the script to be run in azure compute for loading the data, load the hardware dataset into the X and y variables. Next split the data using train_test_split and return X_train and y_train for training the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -213,12 +216,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/machineData.csv\"\n",
|
||||
"dataset = Dataset.Tabular.from_delimited_files(data)\n",
|
||||
"X = dataset.drop_columns(columns=['ERP'])\n",
|
||||
"y = dataset.keep_columns(columns=['ERP'], validate=True)\n",
|
||||
"dflow = dprep.read_csv(data, infer_column_types=True)\n",
|
||||
"dflow.get_profile()\n",
|
||||
"X = dflow.drop_columns(columns=['ERP'])\n",
|
||||
"y = dflow.keep_columns(columns=['ERP'], validate_column_exists=True)\n",
|
||||
"X_train, X_test = X.random_split(percentage=0.8, seed=223)\n",
|
||||
"y_train, y_test = y.random_split(percentage=0.8, seed=223)\n",
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
"y_train, y_test = y.random_split(percentage=0.8, seed=223) \n",
|
||||
"dflow.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -498,7 +502,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for p in ['azureml-train-automl', 'azureml-core']:\n",
|
||||
"for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n",
|
||||
" print('{}\\t{}'.format(p, dependencies[p]))"
|
||||
]
|
||||
},
|
||||
@@ -508,7 +512,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost==0.80'], pip_packages=['azureml-train-automl'])\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"conda_env_file_name = 'myenv.yml'\n",
|
||||
"myenv.save_to_file('.', conda_env_file_name)"
|
||||
@@ -528,7 +532,7 @@
|
||||
" content = cefr.read()\n",
|
||||
"\n",
|
||||
"with open(conda_env_file_name, 'w') as cefw:\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n",
|
||||
" cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n",
|
||||
"\n",
|
||||
"# Substitute the actual model id in the script file.\n",
|
||||
"\n",
|
||||
|
||||
@@ -2,8 +2,6 @@ name: auto-ml-regression-hardware-performance
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-defaults
|
||||
- azureml-explain-model
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
|
||||
@@ -73,7 +73,10 @@
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"import os\n",
|
||||
"import csv\n",
|
||||
"\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from sklearn import datasets\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
@@ -81,8 +84,8 @@
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"import azureml.dataprep as dprep"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -134,7 +137,7 @@
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"automlc2\"\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
@@ -153,12 +156,11 @@
|
||||
" # Create the cluster.\\n\",\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
"\n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -234,8 +236,11 @@
|
||||
"# Set compute target to AmlCompute\n",
|
||||
"conda_run_config.target = compute_target\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
@@ -243,9 +248,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creating a TabularDataset\n",
|
||||
"### Dprep reference\n",
|
||||
"\n",
|
||||
"Defined X and y as `TabularDataset`s, which are passed to automated machine learning in the AutoMLConfig."
|
||||
"Defined X and y as dprep references, which are passed to automated machine learning in the AutoMLConfig."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -254,8 +259,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = Dataset.Tabular.from_delimited_files(path=ds.path('irisdata/X_train.csv'))\n",
|
||||
"y = Dataset.Tabular.from_delimited_files(path=ds.path('irisdata/y_train.csv'))"
|
||||
"X = dprep.read_csv(path=ds.path('irisdata/X_train.csv'), infer_column_types=True)\n",
|
||||
"y = dprep.read_csv(path=ds.path('irisdata/y_train.csv'), infer_column_types=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -493,7 +498,8 @@
|
||||
" res_path = 'onnx_resource.json'\n",
|
||||
" run.download_file(name=constants.MODEL_RESOURCE_PATH_ONNX, output_file_path=res_path)\n",
|
||||
" with open(res_path) as f:\n",
|
||||
" return json.load(f)\n",
|
||||
" onnx_res = json.load(f)\n",
|
||||
" return onnx_res\n",
|
||||
"\n",
|
||||
"if onnxrt_present and python_version_compatible: \n",
|
||||
" mdl_bytes = onnx_mdl.SerializeToString()\n",
|
||||
|
||||
@@ -2,8 +2,6 @@ name: auto-ml-remote-amlcompute-with-onnx
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-defaults
|
||||
- azureml-explain-model
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
|
||||
@@ -74,6 +74,7 @@
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"import os\n",
|
||||
"import csv\n",
|
||||
"\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
@@ -83,8 +84,8 @@
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"import azureml.dataprep as dprep"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -136,7 +137,7 @@
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"automlc2\"\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
@@ -155,12 +156,11 @@
|
||||
" # Create the cluster.\\n\",\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
"\n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,8 +210,11 @@
|
||||
"# Set compute target to AmlCompute\n",
|
||||
"conda_run_config.target = compute_target\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
@@ -219,9 +222,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creating TabularDataset\n",
|
||||
"### Dprep reference\n",
|
||||
"\n",
|
||||
"Defined X and y as `TabularDataset`s, which are passed to Automated ML in the AutoMLConfig. `from_delimited_files` by default sets the `infer_column_types` to true, which will infer the columns type automatically. If you do wish to manually set the column types, you can set the `set_column_types` argument to manually set the type of each columns."
|
||||
"Defined X and y as dprep references, which are passed to automated machine learning in the AutoMLConfig."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -230,8 +233,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = Dataset.Tabular.from_delimited_files(path=ds.path('digitsdata/X_train.csv'))\n",
|
||||
"y = Dataset.Tabular.from_delimited_files(path=ds.path('digitsdata/y_train.csv'))"
|
||||
"X = dprep.read_csv(path=ds.path('digitsdata/X_train.csv'), infer_column_types=True)\n",
|
||||
"y = dprep.read_csv(path=ds.path('digitsdata/y_train.csv'), infer_column_types=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -2,8 +2,6 @@ name: auto-ml-remote-amlcompute
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-defaults
|
||||
- azureml-explain-model
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
|
||||
@@ -342,6 +342,7 @@
|
||||
" n_cross_validations = n_cross_validations, \r\n",
|
||||
" preprocess = preprocess,\r\n",
|
||||
" verbosity = logging.INFO, \r\n",
|
||||
" enable_ensembling = False,\r\n",
|
||||
" X = X_train, \r\n",
|
||||
" y = y_train, \r\n",
|
||||
" path = project_folder,\r\n",
|
||||
|
||||
@@ -314,18 +314,25 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Training Data Using Dataset"
|
||||
"## Load Training Data Using DataPrep"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Automated ML takes a `TabularDataset` as input.\n",
|
||||
"Automated ML takes a Dataflow as input.\n",
|
||||
"\n",
|
||||
"You are free to use the data preparation libraries/tools of your choice to do the require preparation and once you are done, you can write it to a datastore and create a TabularDataset from it.\n",
|
||||
"If you are familiar with Pandas and have done your data preparation work in Pandas already, you can use the `read_pandas_dataframe` method in dprep to convert the DataFrame to a Dataflow.\n",
|
||||
"```python\n",
|
||||
"df = pd.read_csv(...)\n",
|
||||
"# apply some transforms\n",
|
||||
"dprep.read_pandas_dataframe(df, temp_folder='/path/accessible/by/both/driver/and/worker')\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You will get the datastore you registered previously and pass it to Dataset for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. "
|
||||
"If you just need to ingest data without doing any preparation, you can directly use AzureML Data Prep (Data Prep) to do so. The code below demonstrates this scenario. Data Prep also has data preparation capabilities, we have many [sample notebooks](https://github.com/Microsoft/AMLDataPrepDocs) demonstrating the capabilities.\n",
|
||||
"\n",
|
||||
"You will get the datastore you registered previously and pass it to Data Prep for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -334,21 +341,21 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"import azureml.dataprep as dprep\n",
|
||||
"from azureml.data.datapath import DataPath\n",
|
||||
"\n",
|
||||
"datastore = Datastore.get(workspace = ws, datastore_name = datastore_name)\n",
|
||||
"\n",
|
||||
"X_train = Dataset.Tabular.from_delimited_files(datastore.path('X.csv'))\n",
|
||||
"y_train = Dataset.Tabular.from_delimited_files(datastore.path('y.csv'))"
|
||||
"X_train = dprep.read_csv(datastore.path('X.csv'))\n",
|
||||
"y_train = dprep.read_csv(datastore.path('y.csv')).to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Review the TabularDataset\n",
|
||||
"You can peek the result of a TabularDataset at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only j records for all the steps in the TabularDataset, which makes it fast even against large datasets."
|
||||
"## Review the Data Preparation Result\n",
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only j records for all the steps in the Dataflow, which makes it fast even against large datasets."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -357,7 +364,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train.take(5).to_pandas_dataframe()"
|
||||
"X_train.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -366,7 +373,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_train.take(5).to_pandas_dataframe()"
|
||||
"y_train.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -331,18 +331,25 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Training Data Using Dataset"
|
||||
"## Load Training Data Using DataPrep"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Automated ML takes a `TabularDataset` as input.\n",
|
||||
"Automated ML takes a Dataflow as input.\n",
|
||||
"\n",
|
||||
"You are free to use the data preparation libraries/tools of your choice to do the require preparation and once you are done, you can write it to a datastore and create a TabularDataset from it.\n",
|
||||
"If you are familiar with Pandas and have done your data preparation work in Pandas already, you can use the `read_pandas_dataframe` method in dprep to convert the DataFrame to a Dataflow.\n",
|
||||
"```python\n",
|
||||
"df = pd.read_csv(...)\n",
|
||||
"# apply some transforms\n",
|
||||
"dprep.read_pandas_dataframe(df, temp_folder='/path/accessible/by/both/driver/and/worker')\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You will get the datastore you registered previously and pass it to Dataset for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. "
|
||||
"If you just need to ingest data without doing any preparation, you can directly use AzureML Data Prep (Data Prep) to do so. The code below demonstrates this scenario. Data Prep also has data preparation capabilities, we have many [sample notebooks](https://github.com/Microsoft/AMLDataPrepDocs) demonstrating the capabilities.\n",
|
||||
"\n",
|
||||
"You will get the datastore you registered previously and pass it to Data Prep for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -351,21 +358,21 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"import azureml.dataprep as dprep\n",
|
||||
"from azureml.data.datapath import DataPath\n",
|
||||
"\n",
|
||||
"datastore = Datastore.get(workspace = ws, datastore_name = datastore_name)\n",
|
||||
"\n",
|
||||
"X_train = Dataset.Tabular.from_delimited_files(datastore.path('X.csv'))\n",
|
||||
"y_train = Dataset.Tabular.from_delimited_files(datastore.path('y.csv'))"
|
||||
"X_train = dprep.read_csv(datastore.path('X.csv'))\n",
|
||||
"y_train = dprep.read_csv(datastore.path('y.csv')).to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Review the TabularDataset\n",
|
||||
"You can peek the result of a TabularDataset at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only j records for all the steps in the TabularDataset, which makes it fast even against large datasets."
|
||||
"## Review the Data Preparation Result\n",
|
||||
"You can peek the result of a Dataflow at any range using skip(i) and head(j). Doing so evaluates only j records for all the steps in the Dataflow, which makes it fast even against large datasets."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -374,7 +381,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train.take(5).to_pandas_dataframe()"
|
||||
"X_train.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -383,7 +390,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_train.take(5).to_pandas_dataframe()"
|
||||
"y_train.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -115,36 +115,6 @@
|
||||
" workspace=ws)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can now create and/or use an Environment object when deploying a Webservice. The Environment can have been previously registered with your Workspace, or it will be registered with it as a part of the Webservice deployment. Only Environments that were created using azureml-defaults version 1.0.48 or later will work with this new handling however.\n",
|
||||
"\n",
|
||||
"More information can be found in our [using environments notebook](../training/using-environments/using-environments.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"\n",
|
||||
"env = Environment.from_conda_specification(name='deploytocloudenv', file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"# This is optional at this point\n",
|
||||
"# env.register(workspace=ws)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -183,7 +153,10 @@
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=env)"
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\", \n",
|
||||
" extra_docker_file_steps=\"helloworld.txt\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -336,7 +336,7 @@
|
||||
" num_replicas=1,\n",
|
||||
" auth_enabled = False)\n",
|
||||
"\n",
|
||||
"aks_service_name ='my-aks-service-3'\n",
|
||||
"aks_service_name ='my-aks-service'\n",
|
||||
"\n",
|
||||
"aks_service = Webservice.deploy_from_image(workspace = ws,\n",
|
||||
" name = aks_service_name,\n",
|
||||
|
||||
@@ -404,7 +404,7 @@
|
||||
" num_replicas=1,\n",
|
||||
" auth_enabled = False)\n",
|
||||
"\n",
|
||||
"aks_service_name ='my-aks-service-1'\n",
|
||||
"aks_service_name ='my-aks-service'\n",
|
||||
"\n",
|
||||
"aks_service = Webservice.deploy_from_image(workspace = ws,\n",
|
||||
" name = aks_service_name,\n",
|
||||
|
||||
@@ -694,7 +694,7 @@
|
||||
" num_replicas=1,\n",
|
||||
" auth_enabled = False)\n",
|
||||
"\n",
|
||||
"aks_service_name ='my-aks-service-2'\n",
|
||||
"aks_service_name ='my-aks-service'\n",
|
||||
"\n",
|
||||
"aks_service = Webservice.deploy_from_image(workspace = ws,\n",
|
||||
" name = aks_service_name,\n",
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
"If you want to log custom traces, you will follow the standard deplyment process for AKS and you will:\n",
|
||||
"1. Update scoring file.\n",
|
||||
"2. Update aks configuration.\n",
|
||||
"3. Deploy the model with this new configuration. "
|
||||
"3. Build new image and deploy it. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -178,7 +178,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Create Inference Configuration"
|
||||
"## 6. Create your new Image"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -187,11 +187,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")"
|
||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||
" runtime = \"python\",\n",
|
||||
" conda_file = \"myenv.yml\",\n",
|
||||
" description = \"Image with ridge regression model\",\n",
|
||||
" tags = {'area': \"diabetes\", 'type': \"regression\"}\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"image = ContainerImage.create(name = \"myimage1\",\n",
|
||||
" # this is the model object\n",
|
||||
" models = [model],\n",
|
||||
" image_config = image_config,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"image.wait_for_creation(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -209,7 +220,7 @@
|
||||
"source": [
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"\n",
|
||||
"aci_deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||
" memory_gb = 1, \n",
|
||||
" tags = {'area': \"diabetes\", 'type': \"regression\"}, \n",
|
||||
" description = 'Predict diabetes using regression model',\n",
|
||||
@@ -225,7 +236,11 @@
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"\n",
|
||||
"aci_service_name = 'my-aci-service-4'\n",
|
||||
"aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aci_deployment_config)\n",
|
||||
"print(aci_service_name)\n",
|
||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||
" image = image,\n",
|
||||
" name = aci_service_name,\n",
|
||||
" workspace = ws)\n",
|
||||
"aci_service.wait_for_deployment(True)\n",
|
||||
"print(aci_service.state)"
|
||||
]
|
||||
@@ -346,7 +361,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Set the web service configuration\n",
|
||||
"aks_deployment_config = AksWebservice.deploy_configuration(enable_app_insights=True)"
|
||||
"aks_config = AksWebservice.deploy_configuration(enable_app_insights=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -364,12 +379,12 @@
|
||||
"source": [
|
||||
"if aks_target.provisioning_state== \"Succeeded\": \n",
|
||||
" aks_service_name ='aks-w-dc5'\n",
|
||||
" aks_service = Model.deploy(ws,\n",
|
||||
" aks_service_name, \n",
|
||||
" [model], \n",
|
||||
" inference_config, \n",
|
||||
" aks_deployment_config, \n",
|
||||
" deployment_target = aks_target) \n",
|
||||
" aks_service = Webservice.deploy_from_image(workspace = ws, \n",
|
||||
" name = aks_service_name,\n",
|
||||
" image = image,\n",
|
||||
" deployment_config = aks_config,\n",
|
||||
" deployment_target = aks_target\n",
|
||||
" )\n",
|
||||
" aks_service.wait_for_deployment(show_output = True)\n",
|
||||
" print(aks_service.state)\n",
|
||||
"else:\n",
|
||||
@@ -449,6 +464,7 @@
|
||||
"%%time\n",
|
||||
"aks_service.delete()\n",
|
||||
"aci_service.delete()\n",
|
||||
"image.delete()\n",
|
||||
"model.delete()"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -243,7 +243,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setting up inference configuration\n",
|
||||
"### Create container image\n",
|
||||
"First we create a YAML file that specifies which dependencies we would like to see in our container."
|
||||
]
|
||||
},
|
||||
@@ -265,7 +265,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we create the inference configuration."
|
||||
"Then we have Azure ML create the container. This step will likely take a few minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -274,19 +274,48 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" extra_docker_file_steps = \"Dockerfile\")"
|
||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||
" runtime = \"python\",\n",
|
||||
" conda_file = \"myenv.yml\",\n",
|
||||
" docker_file = \"Dockerfile\",\n",
|
||||
" description = \"TinyYOLO ONNX Demo\",\n",
|
||||
" tags = {\"demo\": \"onnx\"}\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"image = ContainerImage.create(name = \"onnxyolo\",\n",
|
||||
" models = [model],\n",
|
||||
" image_config = image_config,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"image.wait_for_creation(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy the model"
|
||||
"In case you need to debug your code, the next line of code accesses the log file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(image.image_build_log_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We're all set! Let's get our model chugging.\n",
|
||||
"\n",
|
||||
"### Deploy the container image"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -307,7 +336,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following cell will take a few minutes to run as the model gets packaged up and deployed to ACI."
|
||||
"The following cell will likely take a few minutes to run as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -319,9 +348,14 @@
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from random import randint\n",
|
||||
"\n",
|
||||
"aci_service_name = 'my-aci-service-15ad'\n",
|
||||
"aci_service_name = 'onnx-tinyyolo'+str(randint(0,100))\n",
|
||||
"print(\"Service\", aci_service_name)\n",
|
||||
"aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n",
|
||||
"\n",
|
||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||
" image = image,\n",
|
||||
" name = aci_service_name,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"aci_service.wait_for_deployment(True)\n",
|
||||
"print(aci_service.state)"
|
||||
]
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
"\n",
|
||||
"### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n",
|
||||
"\n",
|
||||
"In the following lines of code, we download [the trained ONNX Emotion FER+ model and corresponding test data](https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus) and place them in the same folder as this tutorial notebook. For more information about the FER+ dataset, please visit Microsoft Researcher Emad Barsoum's [FER+ source data repository](https://github.com/ebarsoum/FERPlus)."
|
||||
"In the following lines of code, we download [the trained ONNX Emotion FER+ model and corresponding test data](https://github.com/onnx/models/tree/master/emotion_ferplus) and place them in the same folder as this tutorial notebook. For more information about the FER+ dataset, please visit Microsoft Researcher Emad Barsoum's [FER+ source data repository](https://github.com/ebarsoum/FERPlus)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -176,7 +176,7 @@
|
||||
"source": [
|
||||
"### ONNX FER+ Model Methodology\n",
|
||||
"\n",
|
||||
"The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the well-known FER+ data set, provided as part of the [trained Emotion Recognition model](https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus) in the ONNX model zoo.\n",
|
||||
"The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the well-known FER+ data set, provided as part of the [trained Emotion Recognition model](https://github.com/onnx/models/tree/master/emotion_ferplus) in the ONNX model zoo.\n",
|
||||
"\n",
|
||||
"The original Facial Emotion Recognition (FER) Dataset was released in 2013 by Pierre-Luc Carrier and Aaron Courville as part of a [Kaggle Competition](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data), but some of the labels are not entirely appropriate for the expression. In the FER+ Dataset, each photo was evaluated by at least 10 croud sourced reviewers, creating a more accurate basis for ground truth. \n",
|
||||
"\n",
|
||||
@@ -341,7 +341,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setup inference configuration"
|
||||
"### Create the Container Image\n",
|
||||
"\n",
|
||||
"This step will likely take a few minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -350,19 +352,48 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" extra_docker_file_steps = \"Dockerfile\")"
|
||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||
" runtime = \"python\",\n",
|
||||
" conda_file = \"myenv.yml\",\n",
|
||||
" docker_file = \"Dockerfile\",\n",
|
||||
" description = \"Emotion ONNX Runtime container\",\n",
|
||||
" tags = {\"demo\": \"onnx\"})\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"image = ContainerImage.create(name = \"onnximage\",\n",
|
||||
" # this is the model object\n",
|
||||
" models = [model],\n",
|
||||
" image_config = image_config,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"image.wait_for_creation(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy the model"
|
||||
"In case you need to debug your code, the next line of code accesses the log file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(image.image_build_log_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We're all done specifying what we want our virtual machine to do. Let's configure and deploy our container image.\n",
|
||||
"\n",
|
||||
"### Deploy the container image"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -379,13 +410,6 @@
|
||||
" description = 'ONNX for emotion recognition model')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following cell will likely take a few minutes to run as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -396,11 +420,23 @@
|
||||
"\n",
|
||||
"aci_service_name = 'onnx-demo-emotion'\n",
|
||||
"print(\"Service\", aci_service_name)\n",
|
||||
"aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n",
|
||||
"\n",
|
||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||
" image = image,\n",
|
||||
" name = aci_service_name,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"aci_service.wait_for_deployment(True)\n",
|
||||
"print(aci_service.state)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following cell will likely take a few minutes to run as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -434,7 +470,7 @@
|
||||
"\n",
|
||||
"### Useful Helper Functions\n",
|
||||
"\n",
|
||||
"We preprocess and postprocess our data (see score.py file) using the helper functions specified in the [ONNX FER+ Model page in the Model Zoo repository](https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus)."
|
||||
"We preprocess and postprocess our data (see score.py file) using the helper functions specified in the [ONNX FER+ Model page in the Model Zoo repository](https://github.com/onnx/models/tree/master/emotion_ferplus)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -54,7 +54,7 @@
|
||||
"\n",
|
||||
"### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n",
|
||||
"\n",
|
||||
"In the following lines of code, we download [the trained ONNX MNIST model and corresponding test data](https://github.com/onnx/models/tree/master/vision/classification/mnist) and place them in the same folder as this tutorial notebook. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/)."
|
||||
"In the following lines of code, we download [the trained ONNX MNIST model and corresponding test data](https://github.com/onnx/models/tree/master/mnist) and place them in the same folder as this tutorial notebook. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -187,7 +187,7 @@
|
||||
"source": [
|
||||
"### ONNX MNIST Model Methodology\n",
|
||||
"\n",
|
||||
"The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the famous MNIST data set, provided as part of the [trained MNIST model](https://github.com/onnx/models/tree/master/vision/classification/mnist) in the ONNX model zoo.\n",
|
||||
"The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the famous MNIST data set, provided as part of the [trained MNIST model](https://github.com/onnx/models/tree/master/mnist) in the ONNX model zoo.\n",
|
||||
"\n",
|
||||
"***Input: Handwritten Images from MNIST Dataset***\n",
|
||||
"\n",
|
||||
@@ -325,7 +325,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Inference Configuration"
|
||||
"### Create the Container Image\n",
|
||||
"This step will likely take a few minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -334,19 +335,48 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" extra_docker_file_steps = \"Dockerfile\",\n",
|
||||
" conda_file=\"myenv.yml\")"
|
||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||
" runtime = \"python\",\n",
|
||||
" conda_file = \"myenv.yml\",\n",
|
||||
" docker_file = \"Dockerfile\",\n",
|
||||
" description = \"MNIST ONNX Runtime container\",\n",
|
||||
" tags = {\"demo\": \"onnx\"}) \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"image = ContainerImage.create(name = \"onnximage\",\n",
|
||||
" # this is the model object\n",
|
||||
" models = [model],\n",
|
||||
" image_config = image_config,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"image.wait_for_creation(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy the model"
|
||||
"In case you need to debug your code, the next line of code accesses the log file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(image.image_build_log_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We're all done specifying what we want our virtual machine to do. Let's configure and deploy our container image.\n",
|
||||
"\n",
|
||||
"### Deploy the container image"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -367,7 +397,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following cell will likely take a few minutes to run."
|
||||
"The following cell will likely take a few minutes to run as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -380,7 +410,12 @@
|
||||
"\n",
|
||||
"aci_service_name = 'onnx-demo-mnist'\n",
|
||||
"print(\"Service\", aci_service_name)\n",
|
||||
"aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n",
|
||||
"\n",
|
||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||
" image = image,\n",
|
||||
" name = aci_service_name,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"aci_service.wait_for_deployment(True)\n",
|
||||
"print(aci_service.state)"
|
||||
]
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
"ONNX is an open format for representing machine learning and deep learning models. ONNX enables open and interoperable AI by enabling data scientists and developers to use the tools of their choice without worrying about lock-in and flexibility to deploy to a variety of platforms. ONNX is developed and supported by a community of partners including Microsoft, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai).\n",
|
||||
"\n",
|
||||
"## ResNet50 Details\n",
|
||||
"ResNet classifies the major object in an input image into a set of 1000 pre-defined classes. For more information about the ResNet50 model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/vision/classification/resnet). "
|
||||
"ResNet classifies the major object in an input image into a set of 1000 pre-defined classes. For more information about the ResNet50 model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/models/image_classification/resnet). "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -221,7 +221,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create inference configuration"
|
||||
"### Create container image"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -249,7 +249,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create the inference configuration object"
|
||||
"Then we have Azure ML create the container. This step will likely take a few minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -258,19 +258,48 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" extra_docker_file_steps = \"Dockerfile\")"
|
||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||
" runtime = \"python\",\n",
|
||||
" conda_file = \"myenv.yml\",\n",
|
||||
" docker_file = \"Dockerfile\",\n",
|
||||
" description = \"ONNX ResNet50 Demo\",\n",
|
||||
" tags = {\"demo\": \"onnx\"}\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"image = ContainerImage.create(name = \"onnxresnet50v2\",\n",
|
||||
" models = [model],\n",
|
||||
" image_config = image_config,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"image.wait_for_creation(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy the model"
|
||||
"In case you need to debug your code, the next line of code accesses the log file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(image.image_build_log_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We're all set! Let's get our model chugging.\n",
|
||||
"\n",
|
||||
"### Deploy the container image"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -305,7 +334,12 @@
|
||||
"\n",
|
||||
"aci_service_name = 'onnx-demo-resnet50'+str(randint(0,100))\n",
|
||||
"print(\"Service\", aci_service_name)\n",
|
||||
"aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n",
|
||||
"\n",
|
||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||
" image = image,\n",
|
||||
" name = aci_service_name,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"aci_service.wait_for_deployment(True)\n",
|
||||
"print(aci_service.state)"
|
||||
]
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
"ONNX is an open format for representing machine learning and deep learning models. ONNX enables open and interoperable AI by enabling data scientists and developers to use the tools of their choice without worrying about lock-in and flexibility to deploy to a variety of platforms. ONNX is developed and supported by a community of partners including Microsoft, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai).\n",
|
||||
"\n",
|
||||
"## MNIST Details\n",
|
||||
"The Modified National Institute of Standards and Technology (MNIST) dataset consists of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing numbers from 0 to 9. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/). For more information about the MNIST model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/vision/classification/mnist). "
|
||||
"The Modified National Institute of Standards and Technology (MNIST) dataset consists of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing numbers from 0 to 9. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/). For more information about the MNIST model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/mnist). "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -401,7 +401,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create inference configuration\n",
|
||||
"### Create container image\n",
|
||||
"First we create a YAML file that specifies which dependencies we would like to see in our container."
|
||||
]
|
||||
},
|
||||
@@ -423,7 +423,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we setup the inference configuration "
|
||||
"Then we have Azure ML create the container. This step will likely take a few minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -432,19 +432,48 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" extra_docker_file_steps = \"Dockerfile\")"
|
||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||
" runtime = \"python\",\n",
|
||||
" conda_file = \"myenv.yml\",\n",
|
||||
" docker_file = \"Dockerfile\",\n",
|
||||
" description = \"MNIST ONNX Demo\",\n",
|
||||
" tags = {\"demo\": \"onnx\"}\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"image = ContainerImage.create(name = \"onnxmnistdemo\",\n",
|
||||
" models = [model],\n",
|
||||
" image_config = image_config,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"image.wait_for_creation(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy the model"
|
||||
"In case you need to debug your code, the next line of code accesses the log file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(image.image_build_log_uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We're all set! Let's get our model chugging.\n",
|
||||
"\n",
|
||||
"### Deploy the container image"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -475,12 +504,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from random import randint\n",
|
||||
"\n",
|
||||
"aci_service_name = 'onnx-demo-mnist'+str(randint(0,100))\n",
|
||||
"print(\"Service\", aci_service_name)\n",
|
||||
"aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n",
|
||||
"\n",
|
||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||
" image = image,\n",
|
||||
" name = aci_service_name,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"aci_service.wait_for_deployment(True)\n",
|
||||
"print(aci_service.state)"
|
||||
]
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
"from azureml.core import Workspace\n",
|
||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||
"from azureml.core.webservice import Webservice, AksWebservice\n",
|
||||
"from azureml.core.image import Image\n",
|
||||
"from azureml.core.model import Model"
|
||||
]
|
||||
},
|
||||
@@ -96,51 +97,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Create the Environment\n",
|
||||
"Create an environment that the model will be deployed with"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"conda_deps = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-defaults'])\n",
|
||||
"myenv = Environment(name='myenv')\n",
|
||||
"myenv.python.conda_dependencies = conda_deps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Use a custom Docker image\n",
|
||||
"\n",
|
||||
"You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
|
||||
"\n",
|
||||
"Only supported with `python` runtime.\n",
|
||||
"```python\n",
|
||||
"# use an image available in public Container Registry without authentication\n",
|
||||
"myenv.docker.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
|
||||
"\n",
|
||||
"# or, use an image available in a private Container Registry\n",
|
||||
"myenv.docker.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
|
||||
"myenv.docker.base_image_registry.address = \"myregistry.azurecr.io\"\n",
|
||||
"myenv.docker.base_image_registry.username = \"username\"\n",
|
||||
"myenv.docker.base_image_registry.password = \"password\"\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Write the Entry Script\n",
|
||||
"Write the script that will be used to predict on your model"
|
||||
"# Create an image\n",
|
||||
"Create an image using the registered model the script that will load and run the model."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -179,11 +137,17 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create the InferenceConfig\n",
|
||||
"Create the inference config that will be used when deploying the model"
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -192,9 +156,47 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"\n",
|
||||
"inf_config = InferenceConfig(entry_script='score.py', environment=myenv)"
|
||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||
" runtime = \"python\",\n",
|
||||
" conda_file = \"myenv.yml\",\n",
|
||||
" description = \"Image with ridge regression model\",\n",
|
||||
" tags = {'area': \"diabetes\", 'type': \"regression\"}\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"image = ContainerImage.create(name = \"myimage1\",\n",
|
||||
" # this is the model object\n",
|
||||
" models = [model],\n",
|
||||
" image_config = image_config,\n",
|
||||
" workspace = ws)\n",
|
||||
"\n",
|
||||
"image.wait_for_creation(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Use a custom Docker image\n",
|
||||
"\n",
|
||||
"You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n",
|
||||
"\n",
|
||||
"Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n",
|
||||
"```python\n",
|
||||
"# use an image available in public Container Registry without authentication\n",
|
||||
"image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n",
|
||||
"\n",
|
||||
"# or, use an image available in a private Container Registry\n",
|
||||
"image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n",
|
||||
"image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n",
|
||||
"image_config.base_image_registry.username = \"username\"\n",
|
||||
"image_config.base_image_registry.password = \"password\"\n",
|
||||
"\n",
|
||||
"# or, use an image built during training.\n",
|
||||
"image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n",
|
||||
"```\n",
|
||||
"You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -235,21 +237,23 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# from azureml.core.compute import ComputeTarget, AksCompute\n",
|
||||
"'''\n",
|
||||
"from azureml.core.compute import ComputeTarget, AksCompute\n",
|
||||
"\n",
|
||||
"# # Create the compute configuration and set virtual network information\n",
|
||||
"# config = AksCompute.provisioning_configuration(location=\"eastus2\")\n",
|
||||
"# config.vnet_resourcegroup_name = \"mygroup\"\n",
|
||||
"# config.vnet_name = \"mynetwork\"\n",
|
||||
"# config.subnet_name = \"default\"\n",
|
||||
"# config.service_cidr = \"10.0.0.0/16\"\n",
|
||||
"# config.dns_service_ip = \"10.0.0.10\"\n",
|
||||
"# config.docker_bridge_cidr = \"172.17.0.1/16\"\n",
|
||||
"# Create the compute configuration and set virtual network information\n",
|
||||
"config = AksCompute.provisioning_configuration(location=\"eastus2\")\n",
|
||||
"config.vnet_resourcegroup_name = \"mygroup\"\n",
|
||||
"config.vnet_name = \"mynetwork\"\n",
|
||||
"config.subnet_name = \"default\"\n",
|
||||
"config.service_cidr = \"10.0.0.0/16\"\n",
|
||||
"config.dns_service_ip = \"10.0.0.10\"\n",
|
||||
"config.docker_bridge_cidr = \"172.17.0.1/16\"\n",
|
||||
"\n",
|
||||
"# # Create the compute target\n",
|
||||
"# aks_target = ComputeTarget.create(workspace = ws,\n",
|
||||
"# name = \"myaks\",\n",
|
||||
"# provisioning_configuration = config)"
|
||||
"# Create the compute target\n",
|
||||
"aks_target = ComputeTarget.create(workspace = ws,\n",
|
||||
" name = \"myaks\",\n",
|
||||
" provisioning_configuration = config)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -296,15 +300,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # Use the default configuration (can also provide parameters to customize)\n",
|
||||
"# resource_id = '/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/raymondsdk0604/providers/Microsoft.ContainerService/managedClusters/my-aks-0605d37425356b7d01'\n",
|
||||
"'''\n",
|
||||
"# Use the default configuration (can also provide parameters to customize)\n",
|
||||
"resource_id = '/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/raymondsdk0604/providers/Microsoft.ContainerService/managedClusters/my-aks-0605d37425356b7d01'\n",
|
||||
"\n",
|
||||
"# create_name='my-existing-aks' \n",
|
||||
"# # Create the cluster\n",
|
||||
"# attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n",
|
||||
"# aks_target = ComputeTarget.attach(workspace=ws, name=create_name, attach_configuration=attach_config)\n",
|
||||
"# # Wait for the operation to complete\n",
|
||||
"# aks_target.wait_for_completion(True)"
|
||||
"create_name='my-existing-aks' \n",
|
||||
"# Create the cluster\n",
|
||||
"attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n",
|
||||
"aks_target = ComputeTarget.attach(workspace=ws, name=create_name, attach_configuration=attach_config)\n",
|
||||
"# Wait for the operation to complete\n",
|
||||
"aks_target.wait_for_completion(True)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -320,11 +326,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Set the web service configuration (using default here)\n",
|
||||
"aks_config = AksWebservice.deploy_configuration()\n",
|
||||
"\n",
|
||||
"# # Enable token auth and disable (key) auth on the webservice\n",
|
||||
"# aks_config = AksWebservice.deploy_configuration(token_auth_enabled=True, auth_enabled=False)\n"
|
||||
"#Set the web service configuration (using default here)\n",
|
||||
"aks_config = AksWebservice.deploy_configuration()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -336,13 +339,11 @@
|
||||
"%%time\n",
|
||||
"aks_service_name ='aks-service-1'\n",
|
||||
"\n",
|
||||
"aks_service = Model.deploy(workspace=ws,\n",
|
||||
" name=aks_service_name,\n",
|
||||
" models=[model],\n",
|
||||
" inference_config=inf_config,\n",
|
||||
" deployment_config=aks_config,\n",
|
||||
" deployment_target=aks_target)\n",
|
||||
"\n",
|
||||
"aks_service = Webservice.deploy_from_image(workspace = ws, \n",
|
||||
" name = aks_service_name,\n",
|
||||
" image = image,\n",
|
||||
" deployment_config = aks_config,\n",
|
||||
" deployment_target = aks_target)\n",
|
||||
"aks_service.wait_for_deployment(show_output = True)\n",
|
||||
"print(aks_service.state)"
|
||||
]
|
||||
@@ -389,12 +390,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # if (key) auth is enabled, retrieve the API keys. AML generates two keys.\n",
|
||||
"# key1, Key2 = aks_service.get_keys()\n",
|
||||
"# print(key1)\n",
|
||||
"\n",
|
||||
"# # if token auth is enabled, retrieve the token.\n",
|
||||
"# access_token, refresh_after = aks_service.get_token()"
|
||||
"# retreive the API keys. AML generates two keys.\n",
|
||||
"'''\n",
|
||||
"key1, Key2 = aks_service.get_keys()\n",
|
||||
"print(key1)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -404,28 +404,27 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# construct raw HTTP request and send to the service\n",
|
||||
"# %%time\n",
|
||||
"'''\n",
|
||||
"%%time\n",
|
||||
"\n",
|
||||
"# import requests\n",
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"# import json\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# test_sample = json.dumps({'data': [\n",
|
||||
"# [1,2,3,4,5,6,7,8,9,10], \n",
|
||||
"# [10,9,8,7,6,5,4,3,2,1]\n",
|
||||
"# ]})\n",
|
||||
"# test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
||||
"test_sample = json.dumps({'data': [\n",
|
||||
" [1,2,3,4,5,6,7,8,9,10], \n",
|
||||
" [10,9,8,7,6,5,4,3,2,1]\n",
|
||||
"]})\n",
|
||||
"test_sample = bytes(test_sample,encoding = 'utf8')\n",
|
||||
"\n",
|
||||
"# # If (key) auth is enabled, don't forget to add key to the HTTP header.\n",
|
||||
"# headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
||||
"# Don't forget to add key to the HTTP header.\n",
|
||||
"headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n",
|
||||
"\n",
|
||||
"# # If token auth is enabled, don't forget to add token to the HTTP header.\n",
|
||||
"# headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + access_token}\n",
|
||||
"\n",
|
||||
"# resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)\n",
|
||||
"resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# print(\"prediction:\", resp.text)"
|
||||
"print(\"prediction:\", resp.text)\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -444,6 +443,7 @@
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"aks_service.delete()\n",
|
||||
"image.delete()\n",
|
||||
"model.delete()"
|
||||
]
|
||||
}
|
||||
@@ -470,7 +470,27 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
}
|
||||
},
|
||||
"friendly_name": "Prepare data for regression modeling",
|
||||
"exclude_from_index": false,
|
||||
"order_index": 1,
|
||||
"category": "deployment",
|
||||
"tags": [
|
||||
"featured"
|
||||
],
|
||||
"task": "Regression",
|
||||
"datasets": [
|
||||
"test"
|
||||
],
|
||||
"compute": [
|
||||
"localtest"
|
||||
],
|
||||
"deployment": [
|
||||
"AKS"
|
||||
],
|
||||
"framework": [
|
||||
"test1"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -1,748 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Train and explain models remotely via Azure Machine Learning Compute\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"_**This notebook showcases how to use the Azure Machine Learning Interpretability SDK to train and explain a regression model remotely on an Azure Machine Leanrning Compute Target (AMLCompute).**_\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Table of Contents\n",
|
||||
"\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
" 1. Initialize a Workspace\n",
|
||||
" 1. Create an Experiment\n",
|
||||
" 1. Introduction to AmlCompute\n",
|
||||
" 1. Submit an AmlCompute run in a few different ways\n",
|
||||
" 1. Option 1: Provision as a run based compute target \n",
|
||||
" 1. Option 2: Provision as a persistent compute target (Basic)\n",
|
||||
" 1. Option 3: Provision as a persistent compute target (Advanced)\n",
|
||||
"1. Additional operations to perform on AmlCompute\n",
|
||||
"1. [Download model explanations from Azure Machine Learning Run History](#Download)\n",
|
||||
"1. [Visualize explanations](#Visualize)\n",
|
||||
"1. [Next steps](#Next)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"This notebook showcases how to train and explain a regression model remotely via Azure Machine Learning Compute (AMLCompute), and download the calculated explanations locally for visualization.\n",
|
||||
"It demonstrates the API calls that you need to make to submit a run for training and explaining a model to AMLCompute, download the compute explanations remotely, and visualizing the global and local explanations via a visualization dashboard that provides an interactive way of discovering patterns in model predictions and downloaded explanations.\n",
|
||||
"\n",
|
||||
"We will showcase one of the tabular data explainers: TabularExplainer (SHAP).\n",
|
||||
"\n",
|
||||
"Problem: Boston Housing Price Prediction with scikit-learn (train a model and run an explainer remotely via AMLCompute, and download and visualize the remotely-calculated explanations.)\n",
|
||||
"\n",
|
||||
"|  |\n",
|
||||
"|:--:|\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n",
|
||||
"```\n",
|
||||
"Or\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"If you are using Jupyter Labs run the following commands instead:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"(myenv) $ jupyter labextension install microsoft-mli-widget\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize a Workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"create workspace"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create An Experiment\n",
|
||||
"\n",
|
||||
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"experiment_name = 'explainer-remote-run-on-amlcompute'\n",
|
||||
"experiment = Experiment(workspace=ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction to AmlCompute\n",
|
||||
"\n",
|
||||
"Azure Machine Learning Compute is managed compute infrastructure that allows the user to easily create single to multi-node compute of the appropriate VM Family. It is created **within your workspace region** and is a resource that can be used by other users in your workspace. It autoscales by default to the max_nodes, when a job is submitted, and executes in a containerized environment packaging the dependencies as specified by the user. \n",
|
||||
"\n",
|
||||
"Since it is managed compute, job scheduling and cluster management are handled internally by Azure Machine Learning service. \n",
|
||||
"\n",
|
||||
"For more information on Azure Machine Learning Compute, please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)\n",
|
||||
"\n",
|
||||
"If you are an existing BatchAI customer who is migrating to Azure Machine Learning, please read [this article](https://aka.ms/batchai-retirement)\n",
|
||||
"\n",
|
||||
"**Note**: As with other Azure services, there are limits on certain resources (for eg. AmlCompute quota) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The training script `train_explain.py` is already created for you. Let's have a look."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Submit an AmlCompute run in a few different ways\n",
|
||||
"\n",
|
||||
"First lets check which VM families are available in your region. Azure is a regional service and some specialized SKUs (especially GPUs) are only available in certain regions. Since AmlCompute is created in the region of your workspace, we will use the supported_vms () function to see if the VM family we want to use ('STANDARD_D2_V2') is supported.\n",
|
||||
"\n",
|
||||
"You can also pass a different region to check availability and then re-create your workspace in that region through the [configuration notebook](../../../configuration.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"\n",
|
||||
"AmlCompute.supported_vmsizes(workspace=ws)\n",
|
||||
"# AmlCompute.supported_vmsizes(workspace=ws, location='southcentralus')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create project directory\n",
|
||||
"\n",
|
||||
"Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import shutil\n",
|
||||
"\n",
|
||||
"project_folder = './explainer-remote-run-on-amlcompute'\n",
|
||||
"os.makedirs(project_folder, exist_ok=True)\n",
|
||||
"shutil.copy('train_explain.py', project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Option 1: Provision as a run based compute target\n",
|
||||
"\n",
|
||||
"You can provision AmlCompute as a compute target at run-time. In this case, the compute is auto-created for your run, scales up to max_nodes that you specify, and then **deleted automatically** after the run completes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"# create a new runconfig object\n",
|
||||
"run_config = RunConfiguration()\n",
|
||||
"\n",
|
||||
"# signal that you want to use AmlCompute to execute script.\n",
|
||||
"run_config.target = \"amlcompute\"\n",
|
||||
"\n",
|
||||
"# AmlCompute will be created in the same region as workspace\n",
|
||||
"# Set vm size for AmlCompute\n",
|
||||
"run_config.amlcompute.vm_size = 'STANDARD_D2_V2'\n",
|
||||
"\n",
|
||||
"# enable Docker \n",
|
||||
"run_config.environment.docker.enabled = True\n",
|
||||
"\n",
|
||||
"# set Docker base image to the default CPU-based image\n",
|
||||
"run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
|
||||
"run_config.environment.python.user_managed_dependencies = False\n",
|
||||
"\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-explain-model', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-explain-model', 'sklearn-pandas', 'azureml-dataprep'\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],\n",
|
||||
" pip_packages=azureml_pip_packages)\n",
|
||||
"\n",
|
||||
"# Now submit a run on AmlCompute\n",
|
||||
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
||||
"\n",
|
||||
"script_run_config = ScriptRunConfig(source_directory=project_folder,\n",
|
||||
" script='train_explain.py',\n",
|
||||
" run_config=run_config)\n",
|
||||
"\n",
|
||||
"run = experiment.submit(script_run_config)\n",
|
||||
"\n",
|
||||
"# Show run details\n",
|
||||
"run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# Shows output of the run on stdout.\n",
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Option 2: Provision as a persistent compute target (Basic)\n",
|
||||
"\n",
|
||||
"You can provision a persistent AmlCompute resource by simply defining two parameters thanks to smart defaults. By default it autoscales from 0 nodes and provisions dedicated VMs to run your job in a container. This is useful when you want to continously re-use the same target, debug it between jobs or simply share the resource with other users of your workspace.\n",
|
||||
"\n",
|
||||
"* `vm_size`: VM family of the nodes provisioned by AmlCompute. Simply choose from the supported_vmsizes() above\n",
|
||||
"* `max_nodes`: Maximum nodes to autoscale to while running a job on AmlCompute"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"cpu_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||
" max_nodes=4)\n",
|
||||
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
"cpu_cluster.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure & Run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"run_config = RunConfiguration(framework=\"python\")\n",
|
||||
"\n",
|
||||
"# Set compute target to AmlCompute target created in previous step\n",
|
||||
"run_config.target = cpu_cluster.name\n",
|
||||
"\n",
|
||||
"# enable Docker \n",
|
||||
"run_config.environment.docker.enabled = True\n",
|
||||
"\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-explain-model', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-explain-model', 'azureml-dataprep'\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],\n",
|
||||
" pip_packages=azureml_pip_packages)\n",
|
||||
"\n",
|
||||
"from azureml.core import Run\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"\n",
|
||||
"src = ScriptRunConfig(source_directory=project_folder, \n",
|
||||
" script='train_explain.py', \n",
|
||||
" run_config=run_config) \n",
|
||||
"run = experiment.submit(config=src)\n",
|
||||
"run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# Shows output of the run on stdout.\n",
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Option 3: Provision as a persistent compute target (Advanced)\n",
|
||||
"\n",
|
||||
"You can also specify additional properties or change defaults while provisioning AmlCompute using a more advanced configuration. This is useful when you want a dedicated cluster of 4 nodes (for example you can set the min_nodes and max_nodes to 4), or want the compute to be within an existing VNet in your subscription.\n",
|
||||
"\n",
|
||||
"In addition to `vm_size` and `max_nodes`, you can specify:\n",
|
||||
"* `min_nodes`: Minimum nodes (default 0 nodes) to downscale to while running a job on AmlCompute\n",
|
||||
"* `vm_priority`: Choose between 'dedicated' (default) and 'lowpriority' VMs when provisioning AmlCompute. Low Priority VMs use Azure's excess capacity and are thus cheaper but risk your run being pre-empted\n",
|
||||
"* `idle_seconds_before_scaledown`: Idle time (default 120 seconds) to wait after run completion before auto-scaling to min_nodes\n",
|
||||
"* `vnet_resourcegroup_name`: Resource group of the **existing** VNet within which AmlCompute should be provisioned\n",
|
||||
"* `vnet_name`: Name of VNet\n",
|
||||
"* `subnet_name`: Name of SubNet within the VNet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"cpu_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
|
||||
" vm_priority='lowpriority',\n",
|
||||
" min_nodes=2,\n",
|
||||
" max_nodes=4,\n",
|
||||
" idle_seconds_before_scaledown='300',\n",
|
||||
" vnet_resourcegroup_name='<my-resource-group>',\n",
|
||||
" vnet_name='<my-vnet-name>',\n",
|
||||
" subnet_name='<my-subnet-name>')\n",
|
||||
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
"cpu_cluster.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure & Run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"run_config = RunConfiguration(framework=\"python\")\n",
|
||||
"\n",
|
||||
"# Set compute target to AmlCompute target created in previous step\n",
|
||||
"run_config.target = cpu_cluster.name\n",
|
||||
"\n",
|
||||
"# enable Docker \n",
|
||||
"run_config.environment.docker.enabled = True\n",
|
||||
"\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-explain-model', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-explain-model', 'azureml-dataprep'\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],\n",
|
||||
" pip_packages=azureml_pip_packages)\n",
|
||||
"\n",
|
||||
"from azureml.core import Run\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"\n",
|
||||
"src = ScriptRunConfig(source_directory=project_folder, \n",
|
||||
" script='train_explain.py', \n",
|
||||
" run_config=run_config) \n",
|
||||
"run = experiment.submit(config=src)\n",
|
||||
"run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# Shows output of the run on stdout.\n",
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient\n",
|
||||
"\n",
|
||||
"client = ExplanationClient.from_run(run)\n",
|
||||
"# Get the top k (e.g., 4) most important features with their importance values\n",
|
||||
"explanation = client.download_model_explanation(top_k=4)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Additional operations to perform on AmlCompute\n",
|
||||
"\n",
|
||||
"You can perform more operations on AmlCompute such as updating the node counts or deleting the compute. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get_status () gets the latest status of the AmlCompute target\n",
|
||||
"cpu_cluster.get_status().serialize()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Update () takes in the min_nodes, max_nodes and idle_seconds_before_scaledown and updates the AmlCompute target\n",
|
||||
"# cpu_cluster.update(min_nodes=1)\n",
|
||||
"# cpu_cluster.update(max_nodes=10)\n",
|
||||
"cpu_cluster.update(idle_seconds_before_scaledown=300)\n",
|
||||
"# cpu_cluster.update(min_nodes=2, max_nodes=4, idle_seconds_before_scaledown=600)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Delete () is used to deprovision and delete the AmlCompute target. Useful if you want to re-use the compute name \n",
|
||||
"# 'cpu-cluster' in this case but use a different VM family for instance.\n",
|
||||
"\n",
|
||||
"# cpu_cluster.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Download \n",
|
||||
"1. Download model explanation data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient\n",
|
||||
"\n",
|
||||
"# Get model explanation data\n",
|
||||
"client = ExplanationClient.from_run(run)\n",
|
||||
"global_explanation = client.download_model_explanation()\n",
|
||||
"local_importance_values = global_explanation.local_importance_values\n",
|
||||
"expected_values = global_explanation.expected_values\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Or you can use the saved run.id to retrive the feature importance values\n",
|
||||
"client = ExplanationClient.from_run_id(ws, experiment_name, run.id)\n",
|
||||
"global_explanation = client.download_model_explanation()\n",
|
||||
"local_importance_values = global_explanation.local_importance_values\n",
|
||||
"expected_values = global_explanation.expected_values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the top k (e.g., 4) most important features with their importance values\n",
|
||||
"global_explanation_topk = client.download_model_explanation(top_k=4)\n",
|
||||
"global_importance_values = global_explanation_topk.get_ranked_global_values()\n",
|
||||
"global_importance_names = global_explanation_topk.get_ranked_global_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('global importance values: {}'.format(global_importance_values))\n",
|
||||
"print('global importance names: {}'.format(global_importance_names))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"2. Download model file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# retrieve model for visualization and deployment\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"original_model = Model(ws, 'original_model')\n",
|
||||
"model_path = original_model.download(exist_ok=True)\n",
|
||||
"original_model = joblib.load(model_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"3. Download test dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# retrieve x_test for visualization\n",
|
||||
"from sklearn.externals import joblib\n",
|
||||
"x_test_path = './x_test_boston_housing.pkl'\n",
|
||||
"run.download_file('x_test_boston_housing.pkl', output_file_path=x_test_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"x_test = joblib.load('x_test_boston_housing.pkl')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Visualize\n",
|
||||
"Load the visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.explain.model.visualize import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, original_model, x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
"1. [Training time: regression problem](../../tabular-data/explain-binary-classification-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](../../tabular-data/explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](../../tabular-data/explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. Explain models with engineered features:\n",
|
||||
" 1. [Simple feature transformations](../../tabular-data/simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](../../tabular-data/advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Save model explanations via Azure Machine Learning Run History](../run-history/save-retrieve-explanations-run-history.ipynb)\n",
|
||||
"1. Inferencing time: deploy a classification model and explainer:\n",
|
||||
" 1. [Deploy a locally-trained model and explainer](../scoring-time/train-explain-model-locally-and-deploy.ipynb)\n",
|
||||
" 1. [Deploy a remotely-trained model and explainer](../scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
name: explain-model-on-amlcompute
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-explain-model
|
||||
- sklearn-pandas
|
||||
- azureml-dataprep
|
||||
@@ -1,63 +0,0 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn.linear_model import Ridge
|
||||
from azureml.explain.model.tabular_explainer import TabularExplainer
|
||||
from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient
|
||||
from sklearn.model_selection import train_test_split
|
||||
from azureml.core.run import Run
|
||||
from sklearn.externals import joblib
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
OUTPUT_DIR = './outputs/'
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
boston_data = datasets.load_boston()
|
||||
|
||||
run = Run.get_context()
|
||||
client = ExplanationClient.from_run(run)
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(boston_data.data,
|
||||
boston_data.target,
|
||||
test_size=0.2,
|
||||
random_state=0)
|
||||
# write x_test out as a pickle file for later visualization
|
||||
x_test_pkl = 'x_test.pkl'
|
||||
with open(x_test_pkl, 'wb') as file:
|
||||
joblib.dump(value=X_test, filename=os.path.join(OUTPUT_DIR, x_test_pkl))
|
||||
run.upload_file('x_test_boston_housing.pkl', os.path.join(OUTPUT_DIR, x_test_pkl))
|
||||
|
||||
|
||||
alpha = 0.5
|
||||
# Use Ridge algorithm to create a regression model
|
||||
reg = Ridge(alpha)
|
||||
model = reg.fit(X_train, y_train)
|
||||
|
||||
preds = reg.predict(X_test)
|
||||
run.log('alpha', alpha)
|
||||
|
||||
model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
|
||||
# save model in the outputs folder so it automatically get uploaded
|
||||
with open(model_file_name, 'wb') as file:
|
||||
joblib.dump(value=reg, filename=os.path.join(OUTPUT_DIR,
|
||||
model_file_name))
|
||||
|
||||
# register the model
|
||||
run.upload_file('original_model.pkl', os.path.join('./outputs/', model_file_name))
|
||||
original_model = run.register_model(model_name='original_model', model_path='original_model.pkl')
|
||||
|
||||
# Explain predictions on your local machine
|
||||
tabular_explainer = TabularExplainer(model, X_train, features=boston_data.feature_names)
|
||||
|
||||
# Explain overall model predictions (global explanation)
|
||||
# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data
|
||||
# x_train can be passed as well, but with more examples explanations it will
|
||||
# take longer although they may be more accurate
|
||||
global_explanation = tabular_explainer.explain_global(X_test)
|
||||
|
||||
# Uploading model explanation data for storage or visualization in webUX
|
||||
# The explanation can then be downloaded on any compute
|
||||
comment = 'Global explanation on regression model trained on boston dataset'
|
||||
client.upload_model_explanation(global_explanation, comment=comment)
|
||||
@@ -241,7 +241,7 @@
|
||||
"\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-explain-model', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-explain-model', 'azureml-dataprep'\n",
|
||||
" 'azureml-explain-model'\n",
|
||||
"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
@@ -545,4 +545,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
@@ -4,5 +4,5 @@ dependencies:
|
||||
- azureml-sdk
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-explain-model
|
||||
- sklearn-pandas
|
||||
- azureml-dataprep
|
||||
- sklearn-pandas
|
||||
|
||||
@@ -460,7 +460,7 @@
|
||||
"source": [
|
||||
"# Submit syntax\n",
|
||||
"# submit(experiment_name, \n",
|
||||
"# pipeline_parameters=None, \n",
|
||||
"# pipeline_params=None, \n",
|
||||
"# continue_on_step_failure=False, \n",
|
||||
"# regenerate_outputs=False)\n",
|
||||
"\n",
|
||||
|
||||
@@ -321,11 +321,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"hyperdriveconfig-remarks-sample"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hd_config = HyperDriveConfig(estimator=est, \n",
|
||||
|
||||
@@ -299,7 +299,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core import PipelineParameter\n",
|
||||
"from azureml.pipelince.core import PipelineParameter\n",
|
||||
"\n",
|
||||
"# Use the default blob storage\n",
|
||||
"def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
|
||||
|
||||
@@ -28,14 +28,14 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"In this example we showcase how you can use AzureML Dataset to load data for AutoML via AML Pipeline. \n",
|
||||
"In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML via AML Pipeline. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you have executed the [configuration](https://aka.ms/pl-config) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Create or Attach existing AmlCompute to a workspace.\n",
|
||||
"3. Define data loading in a `TabularDataset`.\n",
|
||||
"3. Define data loading and preparation steps in a `Dataflow` using `azureml.dataprep`.\n",
|
||||
"4. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"5. Use AutoMLStep\n",
|
||||
"6. Train the model using AmlCompute\n",
|
||||
@@ -65,6 +65,7 @@
|
||||
"import pandas as pd\n",
|
||||
"from sklearn import datasets\n",
|
||||
"import pkg_resources\n",
|
||||
"import azureml.dataprep as dprep\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
@@ -72,7 +73,6 @@
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
@@ -197,10 +197,13 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can use `auto_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
|
||||
"# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n",
|
||||
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
|
||||
"# and convert column types manually.\n",
|
||||
"example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n",
|
||||
"dataset = Dataset.Tabular.from_delimited_files(example_data)\n",
|
||||
"dataset.to_pandas_dataframe().describe()"
|
||||
"dflow = dprep.auto_read_file(example_data).skip(1) # Remove the header row.\n",
|
||||
"dflow.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -209,18 +212,20 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
"# As `Primary Type` is our y data, we need to drop the values those are null in this column.\n",
|
||||
"dflow = dflow.drop_nulls('Primary Type')\n",
|
||||
"dflow.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Review the Dataset Result\n",
|
||||
"### Review the Data Preparation Result\n",
|
||||
"\n",
|
||||
"You can peek the result of a TabularDataset at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only `j` records for all the steps in the TabularDataset, which makes it fast even against large datasets.\n",
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets.\n",
|
||||
"\n",
|
||||
"`TabularDataset` objects are composed of a list of transformation steps (optional)."
|
||||
"`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -229,8 +234,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X = dataset.drop_columns(columns=['Primary Type', 'FBI Code'])\n",
|
||||
"y = dataset.keep_columns(columns=['Primary Type'], validate=True)\n",
|
||||
"X = dflow.drop_columns(columns=['Primary Type', 'FBI Code'])\n",
|
||||
"y = dflow.keep_columns(columns=['Primary Type'], validate_column_exists=True)\n",
|
||||
"print('X and y are ready!')"
|
||||
]
|
||||
},
|
||||
@@ -436,12 +441,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = Dataset.Tabular.from_delimited_files(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv')\n",
|
||||
"df_test = dataset_test.to_pandas_dataframe()\n",
|
||||
"df_test = df_test[pd.notnull(df['Primary Type'])]\n",
|
||||
"\n",
|
||||
"y_test = df_test[['Primary Type']]\n",
|
||||
"X_test = df_test.drop(['Primary Type', 'FBI Code'], axis=1)"
|
||||
"dflow_test = dprep.auto_read_file(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv').skip(1)\n",
|
||||
"dflow_test = dflow_test.drop_nulls('Primary Type')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -461,6 +462,10 @@
|
||||
"source": [
|
||||
"from pandas_ml import ConfusionMatrix\n",
|
||||
"\n",
|
||||
"y_test = dflow_test.keep_columns(columns=['Primary Type']).to_pandas_dataframe()\n",
|
||||
"X_test = dflow_test.drop_columns(columns=['Primary Type', 'FBI Code']).to_pandas_dataframe()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"ypred = best_model.predict(X_test)\n",
|
||||
"\n",
|
||||
"cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n",
|
||||
|
||||
@@ -1,12 +1,5 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -194,19 +187,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"\n",
|
||||
"aml_compute = ws.get_default_compute_target(\"CPU\")\n",
|
||||
"\n",
|
||||
"if aml_compute is None:\n",
|
||||
" amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n",
|
||||
" max_nodes = 4)\n",
|
||||
"\n",
|
||||
" aml_compute = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" aml_compute.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
"aml_compute"
|
||||
]
|
||||
},
|
||||
@@ -754,8 +735,6 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile $train_model_folder/get_data.py\n",
|
||||
"import os\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"def get_data():\n",
|
||||
" print(\"In get_data\")\n",
|
||||
|
||||
@@ -387,15 +387,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"pipelineparameterssample"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline = Pipeline(workspace=ws, steps=[batch_score_step])\n",
|
||||
"pipeline_run = Experiment(ws, 'batch_scoring').submit(pipeline, pipeline_parameters={\"param_batch_size\": 20})"
|
||||
"pipeline_run = Experiment(ws, 'batch_scoring').submit(pipeline, pipeline_params={\"param_batch_size\": 20})"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -384,7 +384,7 @@
|
||||
"source": [
|
||||
"pipeline = Pipeline(workspace=ws, steps=[stitch_video_step])\n",
|
||||
"# submit the pipeline and provide values for the PipelineParameters used in the pipeline\n",
|
||||
"pipeline_run = Experiment(ws, 'style_transfer').submit(pipeline, pipeline_parameters={\"style\": \"mosaic\", \"nodecount\": 3})"
|
||||
"pipeline_run = Experiment(ws, 'style_transfer').submit(pipeline, pipeline_params={\"style\": \"mosaic\", \"nodecount\": 3})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -645,7 +645,26 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
}
|
||||
},
|
||||
"friendly_name": "Pipeline test",
|
||||
"exclude_from_index": false,
|
||||
"order_index": 1,
|
||||
"category": "training",
|
||||
"tags": [
|
||||
],
|
||||
"task": "Regression",
|
||||
"datasets": [
|
||||
"NYC Taxi"
|
||||
],
|
||||
"compute": [
|
||||
"local"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"framework": [
|
||||
"Azure ML AutoML"
|
||||
]
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -26,10 +26,9 @@
|
||||
"\n",
|
||||
" 1. Interactive Login Authentication\n",
|
||||
" 2. Azure CLI Authentication\n",
|
||||
" 3. Managed Service Identity (MSI) Authentication\n",
|
||||
" 4. Service Principal Authentication\n",
|
||||
" 3. Service Principal Authentication\n",
|
||||
" \n",
|
||||
"The interactive authentication is suitable for local experimentation on your own computer. Azure CLI authentication is suitable if you are already using Azure CLI for managing Azure resources, and want to sign in only once. The MSI and Service Principal authentication are suitable for automated workflows, for example as part of Azure Devops build."
|
||||
"The interactive authentication is suitable for local experimentation on your own computer. Azure CLI authentication is suitable if you are already using Azure CLI for managing Azure resources, and want to sign in only once. The Service Principal authentication is suitable for automated workflows, for example as part of Azure Devops build."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,43 +145,6 @@
|
||||
"print(\"Found workspace {} at location {}\".format(ws.name, ws.location))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### MSI Authentication\n",
|
||||
"\n",
|
||||
"__Note__: _MSI authentication is supported only when using SDK from Azure Virtual Machine. The code below will fail on local computer._\n",
|
||||
"\n",
|
||||
"When using Azure ML SDK on Azure Virtual Machine (VM), you can use Managed Service Identity (MSI) based authentication. This mode allows the VM connect to the Workspace without storing credentials in the Python code.\n",
|
||||
"\n",
|
||||
"As a pre-requisite, enable System-assigned Managed Identity for your VM as described in [this document](https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/qs-configure-portal-windows-vm).\n",
|
||||
"\n",
|
||||
"Then, assign the VM access to your Workspace. For example from Azure Portal, navigate to your workspace, select __Access Control (IAM)__, __Add Role Assignment__, specify __Virtual Machine__ for __Assign Access To__ dropdown, and select your VM's identity.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"After completing these steps, you can use authenticate using MsiAuthentication instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.authentication import MsiAuthentication\n",
|
||||
"\n",
|
||||
"msi_auth = MsiAuthentication()\n",
|
||||
"\n",
|
||||
"ws = Workspace(subscription_id=\"my-subscription-id\",\n",
|
||||
" resource_group=\"my-ml-rg\",\n",
|
||||
" workspace_name=\"my-ml-workspace\",\n",
|
||||
" auth=msi_auth)\n",
|
||||
"\n",
|
||||
"print(\"Found workspace {} at location {}\".format(ws.name, ws.location))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -276,135 +238,6 @@
|
||||
"See [Register an application with the Microsoft identity platform](https://docs.microsoft.com/en-us/azure/active-directory/develop/quickstart-register-app) quickstart for more details about application registrations. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using Secrets in Remote Runs\n",
|
||||
"\n",
|
||||
"Sometimes, you may have to pass a secret to a remote run, for example username and password to authenticate against external data source.\n",
|
||||
"\n",
|
||||
"Azure ML SDK enables this use case through Key Vault associated with your workspace. The workflow for adding a secret is following.\n",
|
||||
"\n",
|
||||
"On local computer:\n",
|
||||
"\n",
|
||||
" 1. Read in a local secret, for example from environment variable or user input. To keep them secret, do not insert secret values into code as hard-coded strings.\n",
|
||||
" 2. Obtain a reference to the keyvault\n",
|
||||
" 3. Add the secret name-value pair in the key vault.\n",
|
||||
" \n",
|
||||
"The secret is then available for remote runs as shown further below.\n",
|
||||
"\n",
|
||||
"__Note__: The _azureml.core.keyvault.Keyvault_ is different from _azure.keyvault_ library. It is intended as simplified wrapper for setting, getting and listing user secrets in Workspace Key Vault."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os, uuid\n",
|
||||
"\n",
|
||||
"local_secret = os.environ.get(\"LOCAL_SECRET\", default = str(uuid.uuid4())) # Use random UUID as a substitute for real secret.\n",
|
||||
"keyvault = ws.get_default_keyvault()\n",
|
||||
"keyvault.set_secret(name=\"secret-name\", value = local_secret)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The _set_secret_ method adds a new secret if one doesn't exist, or updates an existing one with new value.\n",
|
||||
"\n",
|
||||
"You can list secret names you've added. This method doesn't return the values of the secrets."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"keyvault.list_secrets()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can retrieve the value of the secret, and validate that it matches the original value. \n",
|
||||
"\n",
|
||||
"__Note__: This method returns the secret value. Take care not to write the the secret value to output."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retrieved_secret = keyvault.get_secret(name=\"secret-name\")\n",
|
||||
"local_secret==retrieved_secret"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In submitted runs on local and remote compute, you can use the get_secret method of Run instance to get the secret value from Key Vault. \n",
|
||||
"\n",
|
||||
"The method gives you a simple shortcut: the Run instance is aware of its Workspace and Keyvault, so it can directly obtain the secret without you having to instantiate the Workspace and Keyvault within remote run.\n",
|
||||
"\n",
|
||||
"__Note__: This method returns the secret value. Take care not to write the secret to output.\n",
|
||||
"\n",
|
||||
"For example, let's create a simple script _get_secret.py_ that gets the secret we set earlier. In an actual appication, you would use the secret, for example to access a database or other password-protected resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile get_secret.py\n",
|
||||
"\n",
|
||||
"from azureml.core import Run\n",
|
||||
"\n",
|
||||
"run = Run.get_context()\n",
|
||||
"secret_value = run.get_secret(name=\"secret-name\")\n",
|
||||
"print(\"Got secret value {} , but don't write it out!\".format(len(secret_value) * \"*\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then, submit the script as a regular script run, and find the obfuscated secret value in run output. You can use the same approach to other kinds of runs, such as Estimator ones."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment, Run\n",
|
||||
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
||||
"\n",
|
||||
"exp = Experiment(workspace = ws, name=\"try-secret\")\n",
|
||||
"src = ScriptRunConfig(source_directory=\".\", script=\"get_secret.py\")\n",
|
||||
"\n",
|
||||
"run = exp.submit(src)\n",
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Furthermore, you can set and get multiple secrets using set_secrets and get_secrets methods."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -434,7 +267,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
|
||||
## Follow these sample notebooks to learn:
|
||||
|
||||
1. [Logging API](./logging-api/logging-api.ipynb): experiment with various logging functions to create runs and automatically generate graphs.
|
||||
2. [Manage runs](./manage-runs/manage-runs.ipynb): learn different ways how to start runs and child runs, monitor them, and cancel them.
|
||||
1. [Tensorboard to monitor runs](./tensorboard/tensorboard.ipynb)
|
||||
|
||||
## Use MLflow with Azure Machine Learning service (Preview)
|
||||
|
||||
[MLflow](https://mlflow.org/) is an open-source platform for tracking machine learning experiments and managing models. You can use MLflow logging APIs with Azure Machine Learning service: the metrics and artifacts are logged to your Azure ML Workspace.
|
||||
|
||||
Try out the sample notebooks:
|
||||
1. [Use MLflow with Azure Machine Learning for Local Training Run](./train-local/train-local.ipynb)
|
||||
1. [Use MLflow with Azure Machine Learning for Remote Training Run](./train-remote/train-remote.ipynb)
|
||||
1. [Deploy Model as Azure Machine Learning Web Service using MLflow](./deploy-model/deploy-model.ipynb)
|
||||
1. [Train and Deploy PyTorch Image Classifier](./train-deploy-pytorch/train-deploy-pytorch.ipynb)
|
||||
|
||||

|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 29 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 32 KiB |
@@ -1,545 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Logging\n",
|
||||
"\n",
|
||||
"_**This notebook showcases various ways to use the Azure Machine Learning service run logging APIs, and view the results in the Azure portal.**_\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Table of Contents\n",
|
||||
"\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
" 1. Validate Azure ML SDK installation\n",
|
||||
" 1. Initialize workspace\n",
|
||||
" 1. Set experiment\n",
|
||||
"1. [Logging](#Logging)\n",
|
||||
" 1. Starting a run\n",
|
||||
" 1. Viewing a run in the portal\n",
|
||||
" 1. Viewing the experiment in the portal\n",
|
||||
" 1. Logging metrics\n",
|
||||
" 1. Logging string metrics\n",
|
||||
" 1. Logging numeric metrics\n",
|
||||
" 1. Logging vectors\n",
|
||||
" 1. Logging tables\n",
|
||||
" 1. Uploading files\n",
|
||||
"1. [Analyzing results](#Analyzing-results)\n",
|
||||
" 1. Tagging a run\n",
|
||||
"1. [Next steps](#Next-steps)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"Logging metrics from runs in your experiments allows you to track results from one run to another, determining trends in your outputs and understand how your inputs correspond to your model and script performance. Azure Machine Learning services (AzureML) allows you to track various types of metrics including images and arbitrary files in order to understand, analyze, and audit your experimental progress. \n",
|
||||
"\n",
|
||||
"Typically you should log all parameters for your experiment and all numerical and string outputs of your experiment. This will allow you to analyze the performance of your experiments across multiple runs, correlate inputs to outputs, and filter runs based on interesting criteria.\n",
|
||||
"\n",
|
||||
"The experiment's Run History report page automatically creates a report that can be customized to show the KPI's, charts, and column sets that are interesting to you. \n",
|
||||
"\n",
|
||||
"|  |  |\n",
|
||||
"|:--:|:--:|\n",
|
||||
"| *Run Details* | *Run History* |\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't already to establish your connection to the AzureML Workspace. Also make sure you have tqdm and matplotlib installed in the current kernel.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"(myenv) $ conda install -y tqdm matplotlib\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Validate Azure ML SDK installation and get version number for debugging purposes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"install"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment, Workspace, Run\n",
|
||||
"import azureml.core\n",
|
||||
"import numpy as np\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using SDK version 1.0.57, you are currently running version\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"create workspace"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set experiment\n",
|
||||
"Create a new experiment (or get the one with the specified name). An *experiment* is a container for an arbitrary set of *runs*. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment = Experiment(workspace=ws, name='logging-api-test')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Logging\n",
|
||||
"In this section we will explore the various logging mechanisms.\n",
|
||||
"\n",
|
||||
"### Starting a run\n",
|
||||
"\n",
|
||||
"A *run* is a singular experimental trial. In this notebook we will create a run directly on the experiment by calling `run = exp.start_logging()`. If you were experimenting by submitting a script file as an experiment using ``experiment.submit()``, you would call `run = Run.get_context()` in your script to access the run context of your code. In either case, the logging methods on the returned run object work the same.\n",
|
||||
"\n",
|
||||
"This cell also stores the run id for use later in this notebook. The run_id is not necessary for logging."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# start logging for the run\n",
|
||||
"run = experiment.start_logging()\n",
|
||||
"\n",
|
||||
"# access the run id for use later\n",
|
||||
"run_id = run.id\n",
|
||||
"\n",
|
||||
"# change the scale factor on different runs to see how you can compare multiple runs\n",
|
||||
"scale_factor = 2\n",
|
||||
"\n",
|
||||
"# change the category on different runs to see how to organize data in reports\n",
|
||||
"category = 'Red'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Viewing a run in the Portal\n",
|
||||
"Once a run is started you can see the run in the portal by simply typing ``run``. Clicking on the \"Link to Portal\" link will take you to the Run Details page that shows the metrics you have logged and other run properties. You can refresh this page after each logging statement to see the updated results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Viewing an experiment in the portal\n",
|
||||
"You can also view an experiement similarly by typing `experiment`. The portal link will take you to the experiment's Run History page that shows all runs and allows you to analyze trends across multiple runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Logging metrics\n",
|
||||
"Metrics are visible in the run details page in the AzureML portal and also can be analyzed in experiment reports. The run details page looks as below and contains tabs for Details, Outputs, Logs, and Snapshot. \n",
|
||||
"* The Details page displays attributes about the run, plus logged metrics and images. Metrics that are vectors appear as charts. \n",
|
||||
"* The Outputs page contains any files, such as models, you uploaded into the \"outputs\" directory from your run into storage. If you place files in the \"outputs\" directory locally, the files are automatically uploaded on your behald when the run is completed.\n",
|
||||
"* The Logs page allows you to view any log files created by your run. Logging runs created in notebooks typically do not generate log files.\n",
|
||||
"* The Snapshot page contains a snapshot of the directory specified in the ''start_logging'' statement, plus the notebook at the time of the ''start_logging'' call. This snapshot and notebook can be downloaded from the Run Details page to continue or reproduce an experiment.\n",
|
||||
"\n",
|
||||
"### Logging string metrics\n",
|
||||
"The following cell logs a string metric. A string metric is simply a string value associated with a name. A string metric String metrics are useful for labelling runs and to organize your data. Typically you should log all string parameters as metrics for later analysis - even information such as paths can help to understand how individual experiements perform differently.\n",
|
||||
"\n",
|
||||
"String metrics can be used in the following ways:\n",
|
||||
"* Plot in hitograms\n",
|
||||
"* Group by indicators for numerical plots\n",
|
||||
"* Filtering runs\n",
|
||||
"\n",
|
||||
"String metrics appear in the **Tracked Metrics** section of the Run Details page and can be added as a column in Run History reports."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# log a string metric\n",
|
||||
"run.log(name='Category', value=category)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Logging numerical metrics\n",
|
||||
"The following cell logs some numerical metrics. Numerical metrics can include metrics such as AUC or MSE. You should log any parameter or significant output measure in order to understand trends across multiple experiments. Numerical metrics appear in the **Tracked Metrics** section of the Run Details page, and can be used in charts or KPI's in experiment Run History reports."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# log numerical values\n",
|
||||
"run.log(name=\"scale factor\", value = scale_factor)\n",
|
||||
"run.log(name='Magic Number', value=42 * scale_factor)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Logging vectors\n",
|
||||
"Vectors are good for recording information such as loss curves. You can log a vector by creating a list of numbers, calling ``log_list()`` and supplying a name and the list, or by repeatedly logging a value using the same name.\n",
|
||||
"\n",
|
||||
"Vectors are presented in Run Details as a chart, and are directly comparable in experiment reports when placed in a chart. \n",
|
||||
"\n",
|
||||
"**Note:** vectors logged into the run are expected to be relatively small. Logging very large vectors into Azure ML can result in reduced performance. If you need to store large amounts of data associated with the run, you can write the data to file that will be uploaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fibonacci_values = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89]\n",
|
||||
"scaled_values = (i * scale_factor for i in fibonacci_values)\n",
|
||||
"\n",
|
||||
"# Log a list of values. Note this will generate a single-variable line chart.\n",
|
||||
"run.log_list(name='Fibonacci', value=scaled_values)\n",
|
||||
"\n",
|
||||
"for i in tqdm(range(-10, 10)):\n",
|
||||
" # log a metric value repeatedly, this will generate a single-variable line chart.\n",
|
||||
" run.log(name='Sigmoid', value=1 / (1 + np.exp(-i)))\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Logging tables\n",
|
||||
"Tables are good for recording related sets of information such as accuracy tables, confusion matrices, etc. \n",
|
||||
"You can log a table in two ways:\n",
|
||||
"* Create a dictionary of lists where each list represents a column in the table and call ``log_table()``\n",
|
||||
"* Repeatedly call ``log_row()`` providing the same table name with a consistent set of named args as the column values\n",
|
||||
"\n",
|
||||
"Tables are presented in Run Details as a chart using the first two columns of the table \n",
|
||||
"\n",
|
||||
"**Note:** tables logged into the run are expected to be relatively small. Logging very large tables into Azure ML can result in reduced performance. If you need to store large amounts of data associated with the run, you can write the data to file that will be uploaded."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create a dictionary to hold a table of values\n",
|
||||
"sines = {}\n",
|
||||
"sines['angle'] = []\n",
|
||||
"sines['sine'] = []\n",
|
||||
"\n",
|
||||
"for i in tqdm(range(-10, 10)):\n",
|
||||
" angle = i / 2.0 * scale_factor\n",
|
||||
" \n",
|
||||
" # log a 2 (or more) values as a metric repeatedly. This will generate a 2-variable line chart if you have 2 numerical columns.\n",
|
||||
" run.log_row(name='Cosine Wave', angle=angle, cos=np.cos(angle))\n",
|
||||
" \n",
|
||||
" sines['angle'].append(angle)\n",
|
||||
" sines['sine'].append(np.sin(angle))\n",
|
||||
"\n",
|
||||
"# log a dictionary as a table, this will generate a 2-variable chart if you have 2 numerical columns\n",
|
||||
"run.log_table(name='Sine Wave', value=sines)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Logging images\n",
|
||||
"You can directly log _matplotlib_ plots and arbitrary images to your run record. This code logs a _matplotlib_ pyplot object. Images show up in the run details page in the Azure ML Portal."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"# Create a plot\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"angle = np.linspace(-3, 3, 50) * scale_factor\n",
|
||||
"plt.plot(angle,np.tanh(angle), label='tanh')\n",
|
||||
"plt.legend(fontsize=12)\n",
|
||||
"plt.title('Hyperbolic Tangent', fontsize=16)\n",
|
||||
"plt.grid(True)\n",
|
||||
"\n",
|
||||
"# Log the plot to the run. To log an arbitrary image, use the form run.log_image(name, path='./image_path.png')\n",
|
||||
"run.log_image(name='Hyperbolic Tangent', plot=plt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Uploading files\n",
|
||||
"\n",
|
||||
"Files can also be uploaded explicitly and stored as artifacts along with the run record. These files are also visible in the *Outputs* tab of the Run Details page.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"file_name = 'outputs/myfile.txt'\n",
|
||||
"\n",
|
||||
"with open(file_name, \"w\") as f:\n",
|
||||
" f.write('This is an output file that will be uploaded.\\n')\n",
|
||||
"\n",
|
||||
"# Upload the file explicitly into artifacts \n",
|
||||
"run.upload_file(name = file_name, path_or_stream = file_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Completing the run\n",
|
||||
"\n",
|
||||
"Calling `run.complete()` marks the run as completed and triggers the output file collection. If for any reason you need to indicate the run failed or simply need to cancel the run you can call `run.fail()` or `run.cancel()`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.complete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"\n",
|
||||
"## Analyzing results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can refresh the run in the Azure portal to see all of your results. In many cases you will want to analyze runs that were performed previously to inspect the contents or compare results. Runs can be fetched from their parent Experiment object using the ``Run()`` constructor or the ``experiment.get_runs()`` method. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fetched_run = Run(experiment, run_id)\n",
|
||||
"fetched_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call ``run.get_metrics()`` to retrieve all the metrics from a run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fetched_run.get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"See the files uploaded for this run by calling ``run.get_file_names()``"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fetched_run.get_file_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once you know the file names in a run, you can download the files using the ``run.download_file()`` method"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.makedirs('files', exist_ok=True)\n",
|
||||
"\n",
|
||||
"for f in run.get_file_names():\n",
|
||||
" dest = os.path.join('files', f.split('/')[-1])\n",
|
||||
" print('Downloading file {} to {}...'.format(f, dest))\n",
|
||||
" fetched_run.download_file(f, dest) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Tagging a run\n",
|
||||
"Often when you analyze the results of a run, you may need to tag that run with important personal or external information. You can add a tag to a run using the ``run.tag()`` method. AzureML supports valueless and valued tags."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fetched_run.tag(\"My Favorite Run\")\n",
|
||||
"fetched_run.tag(\"Competition Rank\", 1)\n",
|
||||
"\n",
|
||||
"fetched_run.get_tags()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next steps\n",
|
||||
"To experiment more with logging and to understand how metrics can be visualized, go back to the *Start a run* section, try changing the category and scale_factor values and going through the notebook several times. Play with the KPI, charting, and column selection options on the experiment's Run History reports page to see how the various metrics can be combined and visualized.\n",
|
||||
"\n",
|
||||
"After learning about all of the logging options, go to the [train on remote vm](..\\train-on-remote-vm\\train-on-remote-vm.ipynb) notebook and experiment with logging from remote compute contexts."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "roastala"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
name: logging-api
|
||||
dependencies:
|
||||
- numpy
|
||||
- matplotlib
|
||||
- tqdm
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
@@ -1,7 +0,0 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from azureml.core import Run
|
||||
|
||||
submitted_run = Run.get_context()
|
||||
submitted_run.log(name="message", value="Hello from run!")
|
||||
@@ -1,11 +0,0 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from azureml.core import Run
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
child_runs = run.create_children(count=5)
|
||||
for c, child in enumerate(child_runs):
|
||||
child.log(name="Hello from child run ", value=c)
|
||||
child.complete()
|
||||
@@ -1,8 +0,0 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import time
|
||||
|
||||
print("Wait for 10 seconds..")
|
||||
time.sleep(10)
|
||||
print("Done waiting")
|
||||
@@ -1,602 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Manage runs\n",
|
||||
"\n",
|
||||
"## Table of contents\n",
|
||||
"\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Start, monitor and complete a run](#Start,-monitor-and-complete-a-run)\n",
|
||||
"1. [Add properties and tags](#Add-properties-and-tags)\n",
|
||||
"1. [Query properties and tags](#Query-properties-and-tags)\n",
|
||||
"1. [Start and query child runs](#Start-and-query-child-runs)\n",
|
||||
"1. [Cancel or fail runs](#Cancel-or-fail-runs)\n",
|
||||
"1. [Reproduce a run](#Reproduce-a-run)\n",
|
||||
"1. [Next steps](#Next-steps)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"When you're building enterprise-grade machine learning models, it is important to track, organize, monitor and reproduce your training runs. For example, you might want to trace the lineage behind a model deployed to production, and re-run the training experiment to troubleshoot issues. \n",
|
||||
"\n",
|
||||
"This notebooks shows examples how to use Azure Machine Learning services to manage your training runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration](../../../configuration.ipynb) Notebook first if you haven't already to establish your connection to the AzureML Workspace. Also, if you're new to Azure ML, we recommend that you go through [the tutorial](https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-train-models-with-aml) first to learn the basic concepts.\n",
|
||||
"\n",
|
||||
"Let's first import required packages, check Azure ML SDK version, connect to your workspace and create an Experiment to hold the runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace, Experiment, Run\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"\n",
|
||||
"print(azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"exp = Experiment(workspace=ws, name=\"explore-runs\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Start, monitor and complete a run\n",
|
||||
"\n",
|
||||
"A run is an unit of execution, typically to train a model, but for other purposes as well, such as loading or transforming data. Runs are tracked by Azure ML service, and can be instrumented with metrics and artifact logging.\n",
|
||||
"\n",
|
||||
"A simplest way to start a run in your interactive Python session is to call *Experiment.start_logging* method. You can then log metrics from within the run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"notebook_run = exp.start_logging()\n",
|
||||
"\n",
|
||||
"notebook_run.log(name=\"message\", value=\"Hello from run!\")\n",
|
||||
"\n",
|
||||
"print(notebook_run.get_status())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use *get_status method* to get the status of the run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(notebook_run.get_status())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Also, you can simply enter the run to get a link to Azure Portal details"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"notebook_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Method *get_details* gives you more details on the run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"notebook_run.get_details()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use *complete* method to end the run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"notebook_run.complete()\n",
|
||||
"print(notebook_run.get_status())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also use Python's *with...as* pattern. The run will automatically complete when moving out of scope. This way you don't need to manually complete the run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with exp.start_logging() as notebook_run:\n",
|
||||
" notebook_run.log(name=\"message\", value=\"Hello from run!\")\n",
|
||||
" print(\"Is it still running?\",notebook_run.get_status())\n",
|
||||
" \n",
|
||||
"print(\"Has it completed?\",notebook_run.get_status())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, let's look at submitting a run as a separate Python process. To keep the example simple, we submit the run on local computer. Other targets could include remote VMs and Machine Learning Compute clusters in your Azure ML Workspace.\n",
|
||||
"\n",
|
||||
"We use *hello.py* script as an example. To perform logging, we need to get a reference to the Run instance from within the scope of the script. We do this using *Run.get_context* method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!more hello.py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's submit the run on a local computer. A standard pattern in Azure ML SDK is to create run configuration, and then use *Experiment.submit* method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_config = ScriptRunConfig(source_directory='.', script='hello.py')\n",
|
||||
"\n",
|
||||
"local_script_run = exp.submit(run_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can view the status of the run as before"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(local_script_run.get_status())\n",
|
||||
"local_script_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Submitted runs have additional log files you can inspect using *get_details_with_logs*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_script_run.get_details_with_logs()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use *wait_for_completion* method to block the local execution until remote run is complete."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_script_run.wait_for_completion(show_output=True)\n",
|
||||
"print(local_script_run.get_status())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Add properties and tags\n",
|
||||
"\n",
|
||||
"Properties and tags help you organize your runs. You can use them to describe, for example, who authored the run, what the results were, and what machine learning approach was used. And as you'll later learn, properties and tags can be used to query the history of your runs to find the important ones.\n",
|
||||
"\n",
|
||||
"For example, let's add \"author\" property to the run:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_script_run.add_properties({\"author\":\"azureml-user\"})\n",
|
||||
"print(local_script_run.get_properties())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Properties are immutable. Once you assign a value it cannot be changed, making them useful as a permanent record for auditing purposes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" local_script_run.add_properties({\"author\":\"different-user\"})\n",
|
||||
"except Exception as e:\n",
|
||||
" print(e)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Tags on the other hand can be changed:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_script_run.tag(\"quality\", \"great run\")\n",
|
||||
"print(local_script_run.get_tags())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_script_run.tag(\"quality\", \"fantastic run\")\n",
|
||||
"print(local_script_run.get_tags())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also add a simple string tag. It appears in the tag dictionary with value of None"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_script_run.tag(\"worth another look\")\n",
|
||||
"print(local_script_run.get_tags())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Query properties and tags\n",
|
||||
"\n",
|
||||
"You can quary runs within an experiment that match specific properties and tags. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"list(exp.get_runs(properties={\"author\":\"azureml-user\"},tags={\"quality\":\"fantastic run\"}))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"list(exp.get_runs(properties={\"author\":\"azureml-user\"},tags=\"worth another look\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Start and query child runs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can use child runs to group together related runs, for example different hyperparameter tuning iterations.\n",
|
||||
"\n",
|
||||
"Let's use *hello_with_children* script to create a batch of 5 child runs from within a submitted run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!more hello_with_children.py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_config = ScriptRunConfig(source_directory='.', script='hello_with_children.py')\n",
|
||||
"\n",
|
||||
"local_script_run = exp.submit(run_config)\n",
|
||||
"local_script_run.wait_for_completion(show_output=True)\n",
|
||||
"print(local_script_run.get_status())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can start child runs one by one. Note that this is less efficient than submitting a batch of runs, because each creation results in a network call.\n",
|
||||
"\n",
|
||||
"Child runs too complete automatically as they move out of scope."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with exp.start_logging() as parent_run:\n",
|
||||
" for c,count in enumerate(range(5)):\n",
|
||||
" with parent_run.child_run() as child:\n",
|
||||
" child.log(name=\"Hello from child run\", value=c)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To query the child runs belonging to specific parent, use *get_children* method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"list(parent_run.get_children())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cancel or fail runs\n",
|
||||
"\n",
|
||||
"Sometimes, you realize that the run is not performing as intended, and you want to cancel it instead of waiting for it to complete.\n",
|
||||
"\n",
|
||||
"As an example, let's create a Python script with a delay in the middle."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!more hello_with_delay.py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can use *cancel* method to cancel a run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_config = ScriptRunConfig(source_directory='.', script='hello_with_delay.py')\n",
|
||||
"\n",
|
||||
"local_script_run = exp.submit(run_config)\n",
|
||||
"print(\"Did the run start?\",local_script_run.get_status())\n",
|
||||
"local_script_run.cancel()\n",
|
||||
"print(\"Did the run cancel?\",local_script_run.get_status())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also mark an unsuccessful run as failed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_script_run = exp.submit(run_config)\n",
|
||||
"local_script_run.fail()\n",
|
||||
"print(local_script_run.get_status())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Reproduce a run\n",
|
||||
"\n",
|
||||
"When updating or troubleshooting on a model deployed to production, you sometimes need to revisit the original training run that produced the model. To help you with this, Azure ML service by default creates snapshots of your scripts a the time of run submission:\n",
|
||||
"\n",
|
||||
"You can use *restore_snapshot* to obtain a zip package of the latest snapshot of the script folder. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_script_run.restore_snapshot(path=\"snapshots\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can then extract the zip package, examine the code, and submit your run again."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
" * To learn more about logging APIs, see [logging API notebook](./logging-api/logging-api.ipynb)\n",
|
||||
" * To learn more about remote runs, see [train on AML compute notebook](./train-on-amlcompute/train-on-amlcompute.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "roastala"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
name: manage-runs
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -1,562 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tensorboard Integration with Run History\n",
|
||||
"\n",
|
||||
"1. Run a Tensorflow job locally and view its TB output live.\n",
|
||||
"2. The same, for a DSVM.\n",
|
||||
"3. And once more, with an AmlCompute cluster.\n",
|
||||
"4. Finally, we'll collect all of these historical runs together into a single Tensorboard graph."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) notebook to:\n",
|
||||
" * install the AML SDK\n",
|
||||
" * create a workspace and its configuration file (`config.json`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"Diagnostics"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize Workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set experiment name and create project\n",
|
||||
"Choose a name for your run history container in the workspace, and create a folder for the project."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from os import path, makedirs\n",
|
||||
"experiment_name = 'tensorboard-demo'\n",
|
||||
"\n",
|
||||
"# experiment folder\n",
|
||||
"exp_dir = './sample_projects/' + experiment_name\n",
|
||||
"\n",
|
||||
"if not path.exists(exp_dir):\n",
|
||||
" makedirs(exp_dir)\n",
|
||||
"\n",
|
||||
"# runs we started in this session, for the finale\n",
|
||||
"runs = []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Download Tensorflow Tensorboard demo code\n",
|
||||
"\n",
|
||||
"Tensorflow's repository has an MNIST demo with extensive Tensorboard instrumentation. We'll use it here for our purposes.\n",
|
||||
"\n",
|
||||
"Note that we don't need to make any code changes at all - the code works without modification from the Tensorflow repository."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"tf_code = requests.get(\"https://raw.githubusercontent.com/tensorflow/tensorflow/r1.8/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py\")\n",
|
||||
"with open(os.path.join(exp_dir, \"mnist_with_summaries.py\"), \"w\") as file:\n",
|
||||
" file.write(tf_code.text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure and run locally\n",
|
||||
"\n",
|
||||
"We'll start by running this locally. While it might not initially seem that useful to use this for a local run - why not just run TB against the files generated locally? - even in this case there is some value to using this feature. Your local run will be registered in the run history, and your Tensorboard logs will be uploaded to the artifact store associated with this run. Later, you'll be able to restore the logs from any run, regardless of where it happened.\n",
|
||||
"\n",
|
||||
"Note that for this run, you will need to install Tensorflow on your local machine by yourself. Further, the Tensorboard module (that is, the one included with Tensorflow) must be accessible to this notebook's kernel, as the local machine is what runs Tensorboard."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"\n",
|
||||
"# Create a run configuration.\n",
|
||||
"run_config = RunConfiguration()\n",
|
||||
"run_config.environment.python.user_managed_dependencies = True\n",
|
||||
"\n",
|
||||
"# You can choose a specific Python environment by pointing to a Python path \n",
|
||||
"#run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"from azureml.core.script_run_config import ScriptRunConfig\n",
|
||||
"\n",
|
||||
"logs_dir = os.path.join(os.curdir, \"logs\")\n",
|
||||
"data_dir = os.path.abspath(os.path.join(os.curdir, \"mnist_data\"))\n",
|
||||
"\n",
|
||||
"if not path.exists(data_dir):\n",
|
||||
" makedirs(data_dir)\n",
|
||||
"\n",
|
||||
"os.environ[\"TEST_TMPDIR\"] = data_dir\n",
|
||||
"\n",
|
||||
"# Writing logs to ./logs results in their being uploaded to Artifact Service,\n",
|
||||
"# and thus, made accessible to our Tensorboard instance.\n",
|
||||
"arguments_list = [\"--log_dir\", logs_dir]\n",
|
||||
"\n",
|
||||
"# Create an experiment\n",
|
||||
"exp = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"# If you would like the run to go for longer, add --max_steps 5000 to the arguments list:\n",
|
||||
"# arguments_list += [\"--max_steps\", \"5000\"]\n",
|
||||
"\n",
|
||||
"script = ScriptRunConfig(exp_dir,\n",
|
||||
" script=\"mnist_with_summaries.py\",\n",
|
||||
" run_config=run_config,\n",
|
||||
" arguments=arguments_list)\n",
|
||||
"\n",
|
||||
"run = exp.submit(script)\n",
|
||||
"# You can also wait for the run to complete\n",
|
||||
"# run.wait_for_completion(show_output=True)\n",
|
||||
"runs.append(run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Start Tensorboard\n",
|
||||
"\n",
|
||||
"Now, while the run is in progress, we just need to start Tensorboard with the run as its target, and it will begin streaming logs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"tensorboard-sample"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.tensorboard import Tensorboard\n",
|
||||
"\n",
|
||||
"# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here\n",
|
||||
"tb = Tensorboard([run])\n",
|
||||
"\n",
|
||||
"# If successful, start() returns a string with the URI of the instance.\n",
|
||||
"tb.start()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Stop Tensorboard\n",
|
||||
"\n",
|
||||
"When you're done, make sure to call the `stop()` method of the Tensorboard object, or it will stay running even after your job completes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tb.stop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Now, with a DSVM\n",
|
||||
"\n",
|
||||
"Tensorboard uploading works with all compute targets. Here we demonstrate it from a DSVM.\n",
|
||||
"Note that the Tensorboard instance itself will be run by the notebook kernel. Again, this means this notebook's kernel must have access to the Tensorboard module.\n",
|
||||
"\n",
|
||||
"If you are unfamiliar with DSVM configuration, check [Train in a remote VM](../../training/train-on-remote-vm/train-on-remote-vm.ipynb) for a more detailed breakdown.\n",
|
||||
"\n",
|
||||
"**Note**: To streamline the compute that Azure Machine Learning creates, we are making updates to support creating only single to multi-node `AmlCompute`. The `DSVMCompute` class will be deprecated in a later release, but the DSVM can be created using the below single line command and then attached(like any VM) using the sample code below. Also note, that we only support Linux VMs for remote execution from AML and the commands below will spin a Linux VM only.\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"# create a DSVM in your resource group\n",
|
||||
"# note you need to be at least a contributor to the resource group in order to execute this command successfully.\n",
|
||||
"(myenv) $ az vm create --resource-group <resource_group_name> --name <some_vm_name> --image microsoft-dsvm:linux-data-science-vm-ubuntu:linuxdsvmubuntu:latest --admin-username <username> --admin-password <password> --generate-ssh-keys --authentication-type password\n",
|
||||
"```\n",
|
||||
"You can also use [this url](https://portal.azure.com/#create/microsoft-dsvm.linux-data-science-vm-ubuntulinuxdsvmubuntu) to create the VM using the Azure Portal."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, RemoteCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"username = os.getenv('AZUREML_DSVM_USERNAME', default='<my_username>')\n",
|
||||
"address = os.getenv('AZUREML_DSVM_ADDRESS', default='<ip_address_or_fqdn>')\n",
|
||||
"\n",
|
||||
"compute_target_name = 'cpudsvm'\n",
|
||||
"# if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n",
|
||||
"try:\n",
|
||||
" attached_dsvm_compute = RemoteCompute(workspace=ws, name=compute_target_name)\n",
|
||||
" print('found existing:', attached_dsvm_compute.name)\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" config = RemoteCompute.attach_configuration(username=username,\n",
|
||||
" address=address,\n",
|
||||
" ssh_port=22,\n",
|
||||
" private_key_file='./.ssh/id_rsa')\n",
|
||||
" attached_dsvm_compute = ComputeTarget.attach(ws, compute_target_name, config)\n",
|
||||
" \n",
|
||||
" attached_dsvm_compute.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Submit run using TensorFlow estimator\n",
|
||||
"\n",
|
||||
"Instead of manually configuring the DSVM environment, we can use the TensorFlow estimator and everything is set up automatically."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import TensorFlow\n",
|
||||
"\n",
|
||||
"script_params = {\"--log_dir\": \"./logs\"}\n",
|
||||
"\n",
|
||||
"# If you want the run to go longer, set --max-steps to a higher number.\n",
|
||||
"# script_params[\"--max_steps\"] = \"5000\"\n",
|
||||
"\n",
|
||||
"tf_estimator = TensorFlow(source_directory=exp_dir,\n",
|
||||
" compute_target=attached_dsvm_compute,\n",
|
||||
" entry_script='mnist_with_summaries.py',\n",
|
||||
" script_params=script_params)\n",
|
||||
"\n",
|
||||
"run = exp.submit(tf_estimator)\n",
|
||||
"\n",
|
||||
"runs.append(run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Start Tensorboard with this run\n",
|
||||
"\n",
|
||||
"Just like before."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here\n",
|
||||
"tb = Tensorboard([run])\n",
|
||||
"\n",
|
||||
"# If successful, start() returns a string with the URI of the instance.\n",
|
||||
"tb.start()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Stop Tensorboard\n",
|
||||
"\n",
|
||||
"When you're done, make sure to call the `stop()` method of the Tensorboard object, or it will stay running even after your job completes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tb.stop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Once more, with an AmlCompute cluster\n",
|
||||
"\n",
|
||||
"Just to prove we can, let's create an AmlCompute CPU cluster, and run our demo there, as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"cpucluster\"\n",
|
||||
"\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"found = False\n",
|
||||
"if cluster_name in cts and cts[cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[cluster_name]\n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', \n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
"compute_target.wait_for_completion(show_output=True, min_node_count=None)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||
"# print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Submit run using TensorFlow estimator\n",
|
||||
"\n",
|
||||
"Again, we can use the TensorFlow estimator and everything is set up automatically."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"script_params = {\"--log_dir\": \"./logs\"}\n",
|
||||
"\n",
|
||||
"# If you want the run to go longer, set --max-steps to a higher number.\n",
|
||||
"# script_params[\"--max_steps\"] = \"5000\"\n",
|
||||
"\n",
|
||||
"tf_estimator = TensorFlow(source_directory=exp_dir,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='mnist_with_summaries.py',\n",
|
||||
" script_params=script_params)\n",
|
||||
"\n",
|
||||
"run = exp.submit(tf_estimator)\n",
|
||||
"\n",
|
||||
"runs.append(run)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Start Tensorboard with this run\n",
|
||||
"\n",
|
||||
"Once more..."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The Tensorboard constructor takes an array of runs, so be sure and pass it in as a single-element array here\n",
|
||||
"tb = Tensorboard([run])\n",
|
||||
"\n",
|
||||
"# If successful, start() returns a string with the URI of the instance.\n",
|
||||
"tb.start()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Stop Tensorboard\n",
|
||||
"\n",
|
||||
"When you're done, make sure to call the `stop()` method of the Tensorboard object, or it will stay running even after your job completes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tb.stop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Finale\n",
|
||||
"\n",
|
||||
"If you've paid close attention, you'll have noticed that we've been saving the run objects in an array as we went along. We can start a Tensorboard instance that combines all of these run objects into a single process. This way, you can compare historical runs. You can even do this with live runs; if you made some of those previous runs longer via the `--max_steps` parameter, they might still be running, and you'll see them live in this instance as well."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The Tensorboard constructor takes an array of runs...\n",
|
||||
"# and it turns out that we have been building one of those all along.\n",
|
||||
"tb = Tensorboard(runs)\n",
|
||||
"\n",
|
||||
"# If successful, start() returns a string with the URI of the instance.\n",
|
||||
"tb.start()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Stop Tensorboard\n",
|
||||
"\n",
|
||||
"As you might already know, make sure to call the `stop()` method of the Tensorboard object, or it will stay running (until you kill the kernel associated with this notebook, at least)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tb.stop()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "roastala"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
name: tensorboard
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-tensorboard
|
||||
- tensorflow
|
||||
@@ -1,322 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy Model as Azure Machine Learning Web Service using MLflow\n",
|
||||
"\n",
|
||||
"This example shows you how to use mlflow together with Azure Machine Learning services for deploying a model as a web service. You'll learn how to:\n",
|
||||
"\n",
|
||||
" 1. Retrieve a previously trained scikit-learn model\n",
|
||||
" 2. Create a Docker image from the model\n",
|
||||
" 3. Deploy the model as a web service on Azure Container Instance\n",
|
||||
" 4. Make a scoring request against the web service.\n",
|
||||
"\n",
|
||||
"## Prerequisites and Set-up\n",
|
||||
"\n",
|
||||
"This notebook requires you to first complete the [Use MLflow with Azure Machine Learning for Local Training Run](../train-local/train-local.ipnyb) or [Use MLflow with Azure Machine Learning for Remote Training Run](../train-remote/train-remote.ipnyb) notebook, so as to have an experiment run with uploaded model in your Azure Machine Learning Workspace.\n",
|
||||
"\n",
|
||||
"Also install following packages if you haven't already\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"pip install azureml-mlflow pandas\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Then, import necessary packages:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import mlflow\n",
|
||||
"import azureml.mlflow\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to workspace and set MLflow tracking URI\n",
|
||||
"\n",
|
||||
"Setting the tracking URI is required for retrieving the model and creating an image using the MLflow APIs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve model from previous run\n",
|
||||
"\n",
|
||||
"Let's retrieve the experiment from training notebook, and list the runs within that experiment."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = \"experiment-with-mlflow\"\n",
|
||||
"exp = ws.experiments[experiment_name]\n",
|
||||
"\n",
|
||||
"runs = list(exp.get_runs())\n",
|
||||
"runs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then, let's select the most recent training run and find its ID. You also need to specify the path in run history where the model was saved. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"runid = runs[0].id\n",
|
||||
"model_save_path = \"model\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Docker image\n",
|
||||
"\n",
|
||||
"To create a Docker image with Azure Machine Learning for Model Management, use ```mlflow.azureml.build_image``` method. Specify the model path, your workspace, run ID and other parameters.\n",
|
||||
"\n",
|
||||
"MLflow automatically recognizes the model framework as scikit-learn, and creates the scoring logic and includes library dependencies for you.\n",
|
||||
"\n",
|
||||
"Note that the image creation can take several minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import mlflow.azureml\n",
|
||||
"\n",
|
||||
"azure_image, azure_model = mlflow.azureml.build_image(model_uri=\"runs:/{}/{}\".format(runid, model_save_path),\n",
|
||||
" workspace=ws,\n",
|
||||
" model_name='diabetes-sklearn-model',\n",
|
||||
" image_name='diabetes-sklearn-image',\n",
|
||||
" synchronous=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy web service\n",
|
||||
"\n",
|
||||
"Let's use Azure Machine Learning SDK to deploy the image as a web service. \n",
|
||||
"\n",
|
||||
"First, specify the deployment configuration. Azure Container Instance is a suitable choice for a quick dev-test deployment, while Azure Kubernetes Service is suitable for scalable production deployments.\n",
|
||||
"\n",
|
||||
"Then, deploy the image using Azure Machine Learning SDK's ```deploy_from_image``` method.\n",
|
||||
"\n",
|
||||
"Note that the deployment can take several minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"aci_config = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
" memory_gb=1, \n",
|
||||
" tags={\"method\" : \"sklearn\"}, \n",
|
||||
" description='Diabetes model',\n",
|
||||
" location='eastus2')\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Deploy the image to Azure Container Instances (ACI) for real-time serving\n",
|
||||
"webservice = Webservice.deploy_from_image(\n",
|
||||
" image=azure_image, workspace=ws, name=\"diabetes-model-1\", deployment_config=aci_config)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"webservice.wait_for_deployment(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Make a scoring request\n",
|
||||
"\n",
|
||||
"Let's take the first few rows of test data and score them using the web service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_rows = [\n",
|
||||
" [0.01991321, 0.05068012, 0.10480869, 0.07007254, -0.03596778,\n",
|
||||
" -0.0266789 , -0.02499266, -0.00259226, 0.00371174, 0.04034337],\n",
|
||||
" [-0.01277963, -0.04464164, 0.06061839, 0.05285819, 0.04796534,\n",
|
||||
" 0.02937467, -0.01762938, 0.03430886, 0.0702113 , 0.00720652],\n",
|
||||
" [ 0.03807591, 0.05068012, 0.00888341, 0.04252958, -0.04284755,\n",
|
||||
" -0.02104223, -0.03971921, -0.00259226, -0.01811827, 0.00720652]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"MLflow-based web service for scikit-learn model requires the data to be converted to Pandas DataFrame, and then serialized as JSON. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"test_rows_as_json = pd.DataFrame(test_rows).to_json(orient=\"split\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's pass the conveted and serialized data to web service to get the predictions."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"predictions = webservice.run(test_rows_as_json)\n",
|
||||
"\n",
|
||||
"print(predictions)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can use the web service's scoring URI to make a raw HTTP request"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"webservice.scoring_uri"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can diagnose the web service using ```get_logs``` method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"webservice.get_logs()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next Steps\n",
|
||||
"\n",
|
||||
"Learn about [model management and inference in Azure Machine Learning service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-model-management-and-deployment)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "rastala"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
name: deploy-model
|
||||
dependencies:
|
||||
- scikit-learn
|
||||
- matplotlib
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-mlflow
|
||||
- pandas
|
||||
@@ -1,150 +0,0 @@
|
||||
# Copyright (c) 2017, PyTorch Team
|
||||
# All rights reserved
|
||||
# Licensed under BSD 3-Clause License.
|
||||
|
||||
# This example is based on PyTorch MNIST example:
|
||||
# https://github.com/pytorch/examples/blob/master/mnist/main.py
|
||||
|
||||
import mlflow
|
||||
import mlflow.pytorch
|
||||
from mlflow.utils.environment import _mlflow_conda_env
|
||||
import warnings
|
||||
import cloudpickle
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
import torchvision
|
||||
from torchvision import datasets, transforms
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(1, 20, 5, 1)
|
||||
self.conv2 = nn.Conv2d(20, 50, 5, 1)
|
||||
self.fc1 = nn.Linear(4 * 4 * 50, 500)
|
||||
self.fc2 = nn.Linear(500, 10)
|
||||
|
||||
def forward(self, x):
|
||||
# Added the view for reshaping score requests
|
||||
x = x.view(-1, 1, 28, 28)
|
||||
x = F.relu(self.conv1(x))
|
||||
x = F.max_pool2d(x, 2, 2)
|
||||
x = F.relu(self.conv2(x))
|
||||
x = F.max_pool2d(x, 2, 2)
|
||||
x = x.view(-1, 4 * 4 * 50)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = self.fc2(x)
|
||||
return F.log_softmax(x, dim=1)
|
||||
|
||||
|
||||
def train(args, model, device, train_loader, optimizer, epoch):
|
||||
model.train()
|
||||
for batch_idx, (data, target) in enumerate(train_loader):
|
||||
data, target = data.to(device), target.to(device)
|
||||
optimizer.zero_grad()
|
||||
output = model(data)
|
||||
loss = F.nll_loss(output, target)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
if batch_idx % args.log_interval == 0:
|
||||
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
|
||||
epoch, batch_idx * len(data), len(train_loader.dataset),
|
||||
100. * batch_idx / len(train_loader), loss.item()))
|
||||
# Use MLflow logging
|
||||
mlflow.log_metric("epoch_loss", loss.item())
|
||||
|
||||
|
||||
def test(args, model, device, test_loader):
|
||||
model.eval()
|
||||
test_loss = 0
|
||||
correct = 0
|
||||
with torch.no_grad():
|
||||
for data, target in test_loader:
|
||||
data, target = data.to(device), target.to(device)
|
||||
output = model(data)
|
||||
# sum up batch loss
|
||||
test_loss += F.nll_loss(output, target, reduction="sum").item()
|
||||
# get the index of the max log-probability
|
||||
pred = output.argmax(dim=1, keepdim=True)
|
||||
correct += pred.eq(target.view_as(pred)).sum().item()
|
||||
|
||||
test_loss /= len(test_loader.dataset)
|
||||
print("\n")
|
||||
print("Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
|
||||
test_loss, correct, len(test_loader.dataset),
|
||||
100. * correct / len(test_loader.dataset)))
|
||||
# Use MLflow logging
|
||||
mlflow.log_metric("average_loss", test_loss)
|
||||
|
||||
|
||||
class Args(object):
|
||||
pass
|
||||
|
||||
|
||||
# Training settings
|
||||
args = Args()
|
||||
setattr(args, 'batch_size', 64)
|
||||
setattr(args, 'test_batch_size', 1000)
|
||||
setattr(args, 'epochs', 3) # Higher number for better convergence
|
||||
setattr(args, 'lr', 0.01)
|
||||
setattr(args, 'momentum', 0.5)
|
||||
setattr(args, 'no_cuda', True)
|
||||
setattr(args, 'seed', 1)
|
||||
setattr(args, 'log_interval', 10)
|
||||
setattr(args, 'save_model', True)
|
||||
|
||||
use_cuda = not args.no_cuda and torch.cuda.is_available()
|
||||
|
||||
torch.manual_seed(args.seed)
|
||||
|
||||
device = torch.device("cuda" if use_cuda else "cpu")
|
||||
|
||||
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST('../data', train=True, download=True,
|
||||
transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))
|
||||
])),
|
||||
batch_size=args.batch_size, shuffle=True, **kwargs)
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
datasets.MNIST(
|
||||
'../data',
|
||||
train=False,
|
||||
transform=transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize((0.1307,), (0.3081,))])),
|
||||
batch_size=args.test_batch_size, shuffle=True, **kwargs)
|
||||
|
||||
|
||||
def driver():
|
||||
warnings.filterwarnings("ignore")
|
||||
# Dependencies for deploying the model
|
||||
pytorch_index = "https://download.pytorch.org/whl/"
|
||||
pytorch_version = "cpu/torch-1.1.0-cp36-cp36m-linux_x86_64.whl"
|
||||
deps = [
|
||||
"cloudpickle=={}".format(cloudpickle.__version__),
|
||||
pytorch_index + pytorch_version,
|
||||
"torchvision=={}".format(torchvision.__version__),
|
||||
"Pillow=={}".format("6.0.0")
|
||||
]
|
||||
with mlflow.start_run() as run:
|
||||
model = Net().to(device)
|
||||
optimizer = optim.SGD(
|
||||
model.parameters(),
|
||||
lr=args.lr,
|
||||
momentum=args.momentum)
|
||||
for epoch in range(1, args.epochs + 1):
|
||||
train(args, model, device, train_loader, optimizer, epoch)
|
||||
test(args, model, device, test_loader)
|
||||
# Log model to run history using MLflow
|
||||
if args.save_model:
|
||||
model_env = _mlflow_conda_env(additional_pip_deps=deps)
|
||||
mlflow.pytorch.log_model(model, "model", conda_env=model_env)
|
||||
return run
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
driver()
|
||||
@@ -1,481 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use MLflow with Azure Machine Learning to Train and Deploy PyTorch Image Classifier\n",
|
||||
"\n",
|
||||
"This example shows you how to use MLflow together with Azure Machine Learning services for tracking the metrics and artifacts while training a PyTorch model to classify MNIST digit images, and then deploy the model as a web service. You'll learn how to:\n",
|
||||
"\n",
|
||||
" 1. Set up MLflow tracking URI so as to use Azure ML\n",
|
||||
" 2. Create experiment\n",
|
||||
" 3. Instrument your model with MLflow tracking\n",
|
||||
" 4. Train a PyTorch model locally\n",
|
||||
" 5. Train a model on GPU compute on Azure\n",
|
||||
" 6. View your experiment within your Azure ML Workspace in Azure Portal\n",
|
||||
" 7. Create a Docker image from the trained model\n",
|
||||
" 8. Deploy the model as a web service on Azure Container Instance\n",
|
||||
" 9. Call the model to make predictions\n",
|
||||
" \n",
|
||||
"### Pre-requisites\n",
|
||||
" \n",
|
||||
"Make sure you have completed the [Configuration](../../../configuration.ipnyb) notebook to set up your Azure Machine Learning workspace and ensure other common prerequisites are met.\n",
|
||||
"\n",
|
||||
"Also, install mlflow-azureml package using ```pip install mlflow-azureml```. Note that mlflow-azureml installs mlflow package itself as a dependency, if you haven't done so previously.\n",
|
||||
"\n",
|
||||
"### Set-up\n",
|
||||
"\n",
|
||||
"Import packages and check versions of Azure ML SDK and MLflow installed on your computer. Then connect to your Workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys, os\n",
|
||||
"import mlflow\n",
|
||||
"import mlflow.azureml\n",
|
||||
"import mlflow.sklearn\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)\n",
|
||||
"print(\"MLflow version:\", mlflow.version.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"ws.get_details()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set tracking URI\n",
|
||||
"\n",
|
||||
"Set the MLFlow tracking URI to point to your Azure ML Workspace. The subsequent logging calls from MLFlow APIs will go to Azure ML services and will be tracked under your Workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Experiment\n",
|
||||
"\n",
|
||||
"In both MLflow and Azure ML, training runs are grouped into experiments. Let's create one for our experimentation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = \"pytorch-with-mlflow\"\n",
|
||||
"mlflow.set_experiment(experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Train model locally while logging metrics and artifacts\n",
|
||||
"\n",
|
||||
"The ```scripts/train.py``` program contains the code to load the image dataset, and train and test the model. Within this program, the train.driver function wraps the end-to-end workflow.\n",
|
||||
"\n",
|
||||
"Within the driver, the ```mlflow.start_run``` starts MLflow tracking. Then, ```mlflow.log_metric``` functions are used to track the convergence of the neural network training iterations. Finally ```mlflow.pytorch.save_model``` is used to save the trained model in framework-aware manner.\n",
|
||||
"\n",
|
||||
"Let's add the program to search path, import it as a module, and then invoke the driver function. Note that the training can take few minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lib_path = os.path.abspath(\"scripts\")\n",
|
||||
"sys.path.append(lib_path)\n",
|
||||
"\n",
|
||||
"import train"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = train.driver()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can view the metrics of the run at Azure Portal"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(azureml.mlflow.get_portal_url(run))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Train model on GPU compute on Azure\n",
|
||||
"\n",
|
||||
"Next, let's run the same script on GPU-enabled compute for faster training. If you've completed the the [Configuration](../../../configuration.ipnyb) notebook, you should have a GPU cluster named \"gpu-cluster\" available in your workspace. Otherwise, follow the instructions in the notebook to create one. For simplicity, this example uses single process on single VM to train the model.\n",
|
||||
"\n",
|
||||
"Create a PyTorch estimator to specify the training configuration: script, compute as well as additional packages needed. To enable MLflow tracking, include ```azureml-mlflow``` as pip package. The low-level specifications for the training run are encapsulated in the estimator instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import PyTorch\n",
|
||||
"\n",
|
||||
"pt = PyTorch(source_directory=\"./scripts\", \n",
|
||||
" entry_script = \"train.py\", \n",
|
||||
" compute_target = \"gpu-cluster\", \n",
|
||||
" node_count = 1, \n",
|
||||
" process_count_per_node = 1, \n",
|
||||
" use_gpu=True,\n",
|
||||
" pip_packages = [\"azureml-mlflow\", \"Pillow==6.0.0\"])\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Get a reference to the experiment you created previously, but this time, as Azure Machine Learning experiment object.\n",
|
||||
"\n",
|
||||
"Then, use ```Experiment.submit``` method to start the remote training run. Note that the first training run often takes longer as Azure Machine Learning service builds the Docker image for executing the script. Subsequent runs will be faster as cached image is used."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"exp = Experiment(ws, experiment_name)\n",
|
||||
"run = exp.submit(pt)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can monitor the run and its metrics on Azure Portal."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Also, you can wait for run to complete."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy model as web service\n",
|
||||
"\n",
|
||||
"To deploy a web service, first create a Docker image, and then deploy that Docker image on inferencing compute.\n",
|
||||
"\n",
|
||||
"The ```mlflow.azureml.build_image``` function builds a Docker image from saved PyTorch model in a framework-aware manner. It automatically creates the PyTorch-specific inferencing wrapper code and specififies package dependencies for you."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.get_file_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then build a docker image using *runs:/<run.id>/model* as the model_uri path.\n",
|
||||
"\n",
|
||||
"Note that the image building can take several minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_path = \"model\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"azure_image, azure_model = mlflow.azureml.build_image(model_uri='runs:/{}/{}'.format(run.id, model_path),\n",
|
||||
" workspace=ws,\n",
|
||||
" model_name='pytorch_mnist',\n",
|
||||
" image_name='pytorch-mnist-img',\n",
|
||||
" synchronous=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then, deploy the Docker image to Azure Container Instance: a serverless compute capable of running a single container. You can tag and add descriptions to help keep track of your web service. \n",
|
||||
"\n",
|
||||
"[Other inferencing compute choices](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where) include Azure Kubernetes Service which provides scalable endpoint suitable for production use.\n",
|
||||
"\n",
|
||||
"Note that the service deployment can take several minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||
"\n",
|
||||
"aci_config = AciWebservice.deploy_configuration(cpu_cores=2, \n",
|
||||
" memory_gb=5, \n",
|
||||
" tags={\"data\": \"MNIST\", \"method\" : \"pytorch\"}, \n",
|
||||
" description=\"Predict using webservice\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Deploy the image to Azure Container Instances (ACI) for real-time serving\n",
|
||||
"webservice = Webservice.deploy_from_image(\n",
|
||||
" image=azure_image, workspace=ws, name=\"pytorch-mnist-1\", deployment_config=aci_config)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"webservice.wait_for_deployment()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Once the deployment has completed you can check the scoring URI of the web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"Scoring URI is: {}\".format(webservice.scoring_uri))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In case of a service creation issue, you can use ```webservice.get_logs()``` to get logs to debug."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Make predictions using web service\n",
|
||||
"\n",
|
||||
"To make the web service, create a test data set as normalized PyTorch tensors. \n",
|
||||
"\n",
|
||||
"Then, let's define a utility function that takes a random image and converts it into format and shape suitable for as input to PyTorch inferencing end-point. The conversion is done by: \n",
|
||||
"\n",
|
||||
" 1. Select a random (image, label) tuple\n",
|
||||
" 2. Take the image and converting the tensor to NumPy array \n",
|
||||
" 3. Reshape array into 1 x 1 x N array\n",
|
||||
" * 1 image in batch, 1 color channel, N = 784 pixels for MNIST images\n",
|
||||
" * Note also ```x = x.view(-1, 1, 28, 28)``` in net definition in ```train.py``` program to shape incoming scoring requests.\n",
|
||||
" 4. Convert the NumPy array to list to make it into a built-in type.\n",
|
||||
" 5. Create a dictionary {\"data\", <list>} that can be converted to JSON string for web service requests."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from torchvision import datasets, transforms\n",
|
||||
"import random\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"test_data = datasets.MNIST('../data', train=False, transform=transforms.Compose([\n",
|
||||
" transforms.ToTensor(),\n",
|
||||
" transforms.Normalize((0.1307,), (0.3081,))]))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_random_image():\n",
|
||||
" image_idx = random.randint(0,len(test_data))\n",
|
||||
" image_as_tensor = test_data[image_idx][0]\n",
|
||||
" return {\"data\": elem for elem in image_as_tensor.numpy().reshape(1,1,-1).tolist()}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then, invoke the web service using a random test image. Convert the dictionary containing the image to JSON string before passing it to web service.\n",
|
||||
"\n",
|
||||
"The response contains the raw scores for each label, with greater value indicating higher probability. Sort the labels and select the one with greatest score to get the prediction. Let's also plot the image sent to web service for comparison purposes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"import json\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"test_image = get_random_image()\n",
|
||||
"\n",
|
||||
"response = webservice.run(json.dumps(test_image))\n",
|
||||
"\n",
|
||||
"response = sorted(response[0].items(), key = lambda x: x[1], reverse = True)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"Predicted label:\", response[0][0])\n",
|
||||
"plt.imshow(np.array(test_image[\"data\"]).reshape(28,28), cmap = \"gray\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also call the web service using a raw POST method against the web service"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"response = requests.post(url=webservice.scoring_uri, data=json.dumps(test_image),headers={\"Content-type\": \"application/json\"})\n",
|
||||
"print(response.text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "roastala"
|
||||
}
|
||||
],
|
||||
"celltoolbar": "Edit Metadata",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
},
|
||||
"name": "mlflow-sparksummit-pytorch",
|
||||
"notebookId": 2495374963457641
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
name: train-and-deploy-pytorch
|
||||
dependencies:
|
||||
- matplotlib
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-mlflow
|
||||
- https://download.pytorch.org/whl/cpu/torch-1.1.0-cp35-cp35m-win_amd64.whl
|
||||
- https://download.pytorch.org/whl/cpu/torchvision-0.3.0-cp35-cp35m-win_amd64.whl
|
||||
@@ -1,248 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use MLflow with Azure Machine Learning for Local Training Run\n",
|
||||
"\n",
|
||||
"This example shows you how to use mlflow tracking APIs together with Azure Machine Learning services for storing your metrics and artifacts, from local Notebook run. You'll learn how to:\n",
|
||||
"\n",
|
||||
" 1. Set up MLflow tracking URI so as to use Azure ML\n",
|
||||
" 2. Create experiment\n",
|
||||
" 3. Train a model on your local computer while logging metrics and artifacts\n",
|
||||
" 4. View your experiment within your Azure ML Workspace in Azure Portal.\n",
|
||||
"\n",
|
||||
"## Prerequisites and Set-up\n",
|
||||
"\n",
|
||||
"Make sure you have completed the [Configuration](../../../configuration.ipnyb) notebook to set up your Azure Machine Learning workspace and ensure other common prerequisites are met.\n",
|
||||
"\n",
|
||||
"Install azureml-mlflow package before running this notebook. Note that mlflow itself gets installed as dependency if you haven't installed it yet.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"pip install azureml-mlflow\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"This example also uses scikit-learn and matplotlib packages. Install them:\n",
|
||||
"```\n",
|
||||
"pip install scikit-learn matplotlib\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Then, import necessary packages"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import mlflow\n",
|
||||
"import mlflow.sklearn\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set tracking URI\n",
|
||||
"\n",
|
||||
"Set the MLflow tracking URI to point to your Azure ML Workspace. The subsequent logging calls from MLflow APIs will go to Azure ML services and will be tracked under your Workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"\n",
|
||||
"In both MLflow and Azure ML, training runs are grouped into experiments. Let's create one for our experimentation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = \"experiment-with-mlflow\"\n",
|
||||
"mlflow.set_experiment(experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create training and test data set\n",
|
||||
"\n",
|
||||
"This example uses diabetes dataset to build a simple regression model. Let's load the dataset and split it into training and test sets."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"from sklearn.datasets import load_diabetes\n",
|
||||
"from sklearn.linear_model import Ridge\n",
|
||||
"from sklearn.metrics import mean_squared_error\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"X, y = load_diabetes(return_X_y = True)\n",
|
||||
"columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
|
||||
"data = {\n",
|
||||
" \"train\":{\"X\": X_train, \"y\": y_train}, \n",
|
||||
" \"test\":{\"X\": X_test, \"y\": y_test}\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"print (\"Data contains\", len(data['train']['X']), \"training samples and\",len(data['test']['X']), \"test samples\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train while logging metrics and artifacts\n",
|
||||
"\n",
|
||||
"Next, start a mlflow run to train a scikit-learn regression model. Note that the training script has been instrumented using MLflow to:\n",
|
||||
" * Log model hyperparameter alpha value\n",
|
||||
" * Log mean squared error against test set\n",
|
||||
" * Save the scikit-learn based regression model produced by training\n",
|
||||
" * Save an image that shows actuals vs predictions against test set.\n",
|
||||
" \n",
|
||||
"These metrics and artifacts have been recorded to your Azure ML Workspace; in the next step you'll learn how to view them."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create a run object in the experiment\n",
|
||||
"model_save_path = \"model\"\n",
|
||||
"\n",
|
||||
"with mlflow.start_run() as run:\n",
|
||||
" # Log the algorithm parameter alpha to the run\n",
|
||||
" mlflow.log_metric('alpha', 0.03)\n",
|
||||
" # Create, fit, and test the scikit-learn Ridge regression model\n",
|
||||
" regression_model = Ridge(alpha=0.03)\n",
|
||||
" regression_model.fit(data['train']['X'], data['train']['y'])\n",
|
||||
" preds = regression_model.predict(data['test']['X'])\n",
|
||||
"\n",
|
||||
" # Log mean squared error\n",
|
||||
" print('Mean Squared Error is', mean_squared_error(data['test']['y'], preds))\n",
|
||||
" mlflow.log_metric('mse', mean_squared_error(data['test']['y'], preds))\n",
|
||||
" \n",
|
||||
" # Save the model to the outputs directory for capture\n",
|
||||
" mlflow.sklearn.log_model(regression_model,model_save_path)\n",
|
||||
" \n",
|
||||
" # Plot actuals vs predictions and save the plot within the run\n",
|
||||
" fig = plt.figure(1)\n",
|
||||
" idx = np.argsort(data['test']['y'])\n",
|
||||
" plt.plot(data['test']['y'][idx],preds[idx])\n",
|
||||
" fig.savefig(\"actuals_vs_predictions.png\")\n",
|
||||
" mlflow.log_artifact(\"actuals_vs_predictions.png\") "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can open the report page for your experiment and runs within it from Azure Portal.\n",
|
||||
"\n",
|
||||
"Select one of the runs to view the metrics, and the plot you saved. The saved scikit-learn model appears under **outputs** tab."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws.experiments[experiment_name]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Next steps\n",
|
||||
"\n",
|
||||
"Try out these notebooks to learn more about MLflow-Azure Machine Learning integration:\n",
|
||||
"\n",
|
||||
" * [Train a model using remote compute on Azure Cloud](../train-on-remote/train-on-remote.ipynb)\n",
|
||||
" * [Deploy the model as a web service](../deploy-model/deploy-model.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "rastala"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
name: train-local
|
||||
dependencies:
|
||||
- scikit-learn
|
||||
- matplotlib
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-mlflow
|
||||
@@ -1,318 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use MLflow with Azure Machine Learning for Remote Training Run\n",
|
||||
"\n",
|
||||
"This example shows you how to use MLflow tracking APIs together with Azure Machine Learning services for storing your metrics and artifacts, from local Notebook run. You'll learn how to:\n",
|
||||
"\n",
|
||||
" 1. Set up MLflow tracking URI so as to use Azure ML\n",
|
||||
" 2. Create experiment\n",
|
||||
" 3. Train a model on Machine Learning Compute while logging metrics and artifacts\n",
|
||||
" 4. View your experiment within your Azure ML Workspace in Azure Portal."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"Make sure you have completed the [Configuration](../../../configuration.ipnyb) notebook to set up your Azure Machine Learning workspace and ensure other common prerequisites are met."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set-up\n",
|
||||
"\n",
|
||||
"Check Azure ML SDK version installed on your computer, and then connect to your Workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace, Experiment\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's also create a Machine Learning Compute cluster for submitting the remote run. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"cpu_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||
" print(\"Found existing cpu-cluster\")\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print(\"Creating new cpu-cluster\")\n",
|
||||
" \n",
|
||||
" # Specify the configuration for the new cluster\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_D2_V2\",\n",
|
||||
" min_nodes=0,\n",
|
||||
" max_nodes=1)\n",
|
||||
"\n",
|
||||
" # Create the cluster with the specified name and configuration\n",
|
||||
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||
" \n",
|
||||
" # Wait for the cluster to complete, show the output log\n",
|
||||
" cpu_cluster.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Azure ML Experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The following steps show how to submit a training Python script to a cluster as an Azure ML run, while logging happens through MLflow APIs to your Azure ML Workspace. Let's first create an experiment to hold the training runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"experiment_name = \"experiment-with-mlflow\"\n",
|
||||
"exp = Experiment(workspace=ws, name=experiment_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Instrument remote training script using MLflow"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's use [*train_diabetes.py*](train_diabetes.py) to train a regression model against diabetes dataset as the example. Note that the training script uses mlflow.start_run() to start logging, and then logs metrics, saves the trained scikit-learn model, and saves a plot as an artifact.\n",
|
||||
"\n",
|
||||
"Run following command to view the script file. Notice the mlflow logging statements, and also notice that the script doesn't have explicit dependencies on azureml library."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"training_script = 'train_diabetes.py'\n",
|
||||
"with open(training_script, 'r') as f:\n",
|
||||
" print(f.read())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit Run to Cluster \n",
|
||||
"\n",
|
||||
"Let's submit the run to cluster. When running on the remote cluster as submitted run, Azure ML sets the MLflow tracking URI to point to your Azure ML Workspace, so that the metrics and artifacts are automatically logged there.\n",
|
||||
"\n",
|
||||
"Note that you have to specify the packages your script depends on, including *azureml-mlflow* that implicitly enables the MLflow logging to Azure ML. \n",
|
||||
"\n",
|
||||
"First, create a environment with Docker enable and required package dependencies specified."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"mlflow"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"env = Environment(name=\"mlflow-env\")\n",
|
||||
"\n",
|
||||
"env.docker.enabled = True\n",
|
||||
"\n",
|
||||
"# Specify conda dependencies with scikit-learn and temporary pointers to mlflow extensions\n",
|
||||
"cd = CondaDependencies.create(\n",
|
||||
" conda_packages=[\"scikit-learn\", \"matplotlib\"],\n",
|
||||
" pip_packages=[\"azureml-mlflow\", \"numpy\"]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"env.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, specify a script run configuration that includes the training script, environment and CPU cluster created earlier."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"\n",
|
||||
"src = ScriptRunConfig(source_directory=\".\", script=training_script)\n",
|
||||
"src.run_config.environment = env\n",
|
||||
"src.run_config.target = cpu_cluster.name"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, submit the run. Note that the first instance of the run typically takes longer as the Docker-based environment is created, several minutes. Subsequent runs reuse the image and are faster."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = exp.submit(src)\n",
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can navigate to your Azure ML Workspace at Azure Portal to view the run metrics and artifacts. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also get the metrics and bring them to your local notebook, and view the details of the run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws.get_details()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Next steps\n",
|
||||
"\n",
|
||||
" * [Deploy the model as a web service](../deploy-model/deploy-model.ipynb)\n",
|
||||
" * [Learn more about Azure Machine Learning compute options](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "rastala"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
name: train-remote
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -1,46 +0,0 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import numpy as np
|
||||
from sklearn.datasets import load_diabetes
|
||||
from sklearn.linear_model import Ridge
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import train_test_split
|
||||
import mlflow
|
||||
import mlflow.sklearn
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use('Agg')
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
with mlflow.start_run():
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
|
||||
data = {
|
||||
"train": {"X": X_train, "y": y_train},
|
||||
"test": {"X": X_test, "y": y_test}}
|
||||
|
||||
mlflow.log_metric("Training samples", len(data['train']['X']))
|
||||
mlflow.log_metric("Test samples", len(data['test']['X']))
|
||||
|
||||
# Log the algorithm parameter alpha to the run
|
||||
mlflow.log_metric('alpha', 0.03)
|
||||
# Create, fit, and test the scikit-learn Ridge regression model
|
||||
regression_model = Ridge(alpha=0.03)
|
||||
regression_model.fit(data['train']['X'], data['train']['y'])
|
||||
preds = regression_model.predict(data['test']['X'])
|
||||
|
||||
# Log mean squared error
|
||||
print('Mean Squared Error is', mean_squared_error(data['test']['y'], preds))
|
||||
mlflow.log_metric('mse', mean_squared_error(data['test']['y'], preds))
|
||||
|
||||
# Save the model to the outputs directory for capture
|
||||
mlflow.sklearn.log_model(regression_model, "model")
|
||||
|
||||
# Plot actuals vs predictions and save the plot within the run
|
||||
fig = plt.figure(1)
|
||||
idx = np.argsort(data['test']['y'])
|
||||
plt.plot(data['test']['y'][idx], preds[idx])
|
||||
fig.savefig("actuals_vs_predictions.png")
|
||||
mlflow.log_artifact("actuals_vs_predictions.png")
|
||||
@@ -286,11 +286,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"estimator-remarks-sample"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.estimator import Estimator\n",
|
||||
|
||||
@@ -252,11 +252,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"dnn-chainer-remarks-sample"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import Chainer\n",
|
||||
|
||||
@@ -250,11 +250,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"dnn-pytorch-remarks-sample"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import PyTorch\n",
|
||||
|
||||
@@ -412,11 +412,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"dnn-tensorflow-remarks-sample"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import TensorFlow\n",
|
||||
|
||||
@@ -7,6 +7,8 @@ Follow these sample notebooks to learn:
|
||||
3. [Train on remote VM](train-on-remote-vm): train a model using a remote Azure VM as compute target.
|
||||
4. [Train on ML Compute](train-on-amlcompute): train a model using an ML Compute cluster as compute target.
|
||||
5. [Train in an HDI Spark cluster](train-in-spark): train a Spark ML model using an HDInsight Spark cluster as compute target.
|
||||
6. [Train and hyperparameter tune on Iris Dataset with Scikit-learn](train-hyperparameter-tune-deploy-with-sklearn): train a model using the Scikit-learn estimator and tune hyperparameters with Hyperdrive.
|
||||
6. [Logging API](logging-api): experiment with various logging functions to create runs and automatically generate graphs.
|
||||
7. [Manage runs](manage-runs): learn different ways how to start runs and child runs, monitor them, and cancel them.
|
||||
8. [Train and hyperparameter tune on Iris Dataset with Scikit-learn](train-hyperparameter-tune-deploy-with-sklearn): train a model using the Scikit-learn estimator and tune hyperparameters with Hyperdrive.
|
||||
|
||||

|
||||
@@ -298,11 +298,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"sklearn-remarks-sample"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.sklearn import SKLearn\n",
|
||||
|
||||
@@ -332,11 +332,7 @@
|
||||
"\n",
|
||||
"* [Train on ML Compute](../../train-on-amlcompute)\n",
|
||||
"\n",
|
||||
"* [Train on remote VM](../../train-on-remote-vm)\n",
|
||||
"\n",
|
||||
"Learn more about registering and deploying a model:\n",
|
||||
"\n",
|
||||
"* [Model Register and Deploy](../../deploy-to-cloud/model-register-and-deploy.ipynb)"
|
||||
"* [Train on remote VM](../../train-on-remote-vm)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
-----BEGIN PRIVATE KEY-----
|
||||
MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDEg09d0uWdyo9c
|
||||
uKbaJss7BT/NNuBw0Nh2pyHzLCJHyShcRi8UcmAeTlaMXdyr5NqTjqc1VT+CZA/o
|
||||
IZQxbFfkt87pyRmbIw34B1rCy3/FuT4o6n+rcWaRppBo8bBt1+9P7GID3KS0HWuk
|
||||
fWoAJaODsuC+mlbuB2s6CwPKbF8X30YGTL12SN73o4xewU8BDRUrSQEG1Gh5+5sV
|
||||
3abQFx/4DYNVqWQy4e15N5QkV8qCa06wCGAgq6NkgnVZVRZbxS2VQo2V+xEFkJEG
|
||||
yhtfTS+pRLsvTZQoIoYC+E7gAYmB9KhLPtX50DJ/xmI93/qL4Yt6pcjioecq4//n
|
||||
NORKAFHBAgMBAAECggEAYab67p3ZmsLI4QOlbmyuu0KNhPXLLGSr3LKLDWMWGeQd
|
||||
WVVLGfcISqcVHSWbfhP4hjDyaG9XYv1EZk8hbDnxp2eru8NCJTSTQXiuInSrpt65
|
||||
w+1byh9NH/3Mb0oDKWKPuoC16ENh2VtxXUkxPqd1jQF761uY7Snkn/BPTuzxiFN8
|
||||
Swrhum4b0CZf1XS4rTuk0b8tgSilGbk1DVMYANmQGb5TjMKjAJHzTIF5LYosXppQ
|
||||
q8xr24XRMpz4m9KuGZTPePZ3ycGadnQV205uE0fuCsru1V5xsNYKh9LYJYWPyD49
|
||||
L4eFHgLc2uVL9XFJZ7wujW0z5ZxyhWwObHoWvYrRjQKBgQD4frvJ7W8wJxNDa+3k
|
||||
rKVnN+vjCSLDqc3HVZPvVkEhZRXAx6PSTYJVAMi2ULdhoxrT6jDwsN7KA1qpqb9n
|
||||
NOttuAqFrJLPRRTjc5YjvBL1Yb4/wFUMR4OgrhhtwIEXlftXN830zlW1Wvo6S8o1
|
||||
vkGG9KuoVhfroyu4XAJpokd6CwKBgQDKcqvKIzhrF7Oed3STIpLJieeE0n+Dkz+I
|
||||
AEXm1E7ulT57BYTYO2jLLSUYnetew+QL85cXFSsuEUgH3H2fhckBdq8jcjJGi+YB
|
||||
7OA1WLUyvDvM6E6CxguzdNNNbtmhXNyLCOrxjiV35wzj47y/UPcWrURQZgzaxovH
|
||||
+c8mPeeO4wKBgQDB/GVqwFDxXT+7fVDsGB7TUiNyTBp4dmFvAA6JY2Nax4fQw8jO
|
||||
jrV02DTXpnFR5js2PXdRHjH9r9qh4iLKVdSIBYkpS0wcREiHOx907Ag8yL31FJcQ
|
||||
C+/kiqQFYaclG29naef8+OqNteTrh2jmxYxv5ybuNa9cwzeJJ0K25fk4ewKBgQCl
|
||||
2tooqUAgZHOQILdNj2aIXEVjSHyVE75ZsjeSS187EOP2L2hNKibJRXv9terNYVjj
|
||||
/bVLgNk2TYwgfKAiX510aIJFXNoZd6WA8EojCkCwhwvK7IrdkliltdEiv+zlyMkZ
|
||||
0r2AFf9WQuEJllrctf0oA91SrLhdR4ne1CbEYrThFwKBgEoK2tStBVypdnAZe7mI
|
||||
ahk4Lv3QYqwD+qd8H6VRwbX1EtggWCQh0jAohcCzn2HHq+zjUlT3RF7ey46z0gel
|
||||
+58sKj7uAHuHJ+pg8xI0CWS8Vy6E2hT5bCanb0rKXguuwx+90Kn/xj/yAK7CeIId
|
||||
PrJHSlG9/au3N6cbVM65RHPG
|
||||
-----END PRIVATE KEY-----
|
||||
-----BEGIN CERTIFICATE-----
|
||||
MIICoTCCAYkCAgPoMA0GCSqGSIb3DQEBBQUAMBQxEjAQBgNVBAMMCUNMSS1Mb2dp
|
||||
bjAiGA8yMDE5MDcxNjAxMDEwNloYDzIwMjAwNzE2MDEwMTA4WjAUMRIwEAYDVQQD
|
||||
DAlDTEktTG9naW4wggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDEg09d
|
||||
0uWdyo9cuKbaJss7BT/NNuBw0Nh2pyHzLCJHyShcRi8UcmAeTlaMXdyr5NqTjqc1
|
||||
VT+CZA/oIZQxbFfkt87pyRmbIw34B1rCy3/FuT4o6n+rcWaRppBo8bBt1+9P7GID
|
||||
3KS0HWukfWoAJaODsuC+mlbuB2s6CwPKbF8X30YGTL12SN73o4xewU8BDRUrSQEG
|
||||
1Gh5+5sV3abQFx/4DYNVqWQy4e15N5QkV8qCa06wCGAgq6NkgnVZVRZbxS2VQo2V
|
||||
+xEFkJEGyhtfTS+pRLsvTZQoIoYC+E7gAYmB9KhLPtX50DJ/xmI93/qL4Yt6pcji
|
||||
oecq4//nNORKAFHBAgMBAAEwDQYJKoZIhvcNAQEFBQADggEBAIDer4wNPbb+FEGs
|
||||
P+qwYWkDoDHjk3zG2bw8LEjp28PfzlXg5ng2W/rcNHnWTxkDSp7xCaJLhNuCRXx6
|
||||
vF8sNsQscW9219ZWv5OSETYivLDX1It24ZepAetWmM4NAamU9ZkJHIVidpyZPtZ+
|
||||
I9PvrTh44KW8VaPhhR5Gv0cUgq4rjhyHCyk8ZpEB4fO83/1fu5MnQUsPvqzrlgEa
|
||||
p3/GwG7AGSye0QyWdjrt2rcO0QWrCelZdkFut8kV0FHOzrrEgvoLDBlgzN9/qY+a
|
||||
Yb0+kqR1WBr58HZRG4i4abRpI49xMNp+egASN/8tPSsaR2BIsVmXBSg9Bd+k/f1V
|
||||
IUg8NDw=
|
||||
-----END CERTIFICATE-----
|
||||
@@ -1,20 +0,0 @@
|
||||
# Azure Machine Learning datasets (preview)
|
||||
|
||||
Azure Machine Learning datasets (preview) let data scientists and machine learning engineers apply data for ML with confidence. By creating a dataset, you create a reference to the data source location, along with a copy of its metadata. The data remains in its existing location, so no extra storage cost is incurred.
|
||||
|
||||
With Azure Machine Learning datasets, you can:
|
||||
|
||||
* **Keep a single copy of data in your storage** referenced by datasets.
|
||||
|
||||
* **Easily access data during model training** without worrying about connection string or data path.
|
||||
|
||||
* **Share data & collaborate** with other users.
|
||||
|
||||
## Learn how to use Azure Machine Learning datasets:
|
||||
* [Create and register datasets](https://aka.ms/azureml/howto/createdatasets)
|
||||
* Use TabularDatasets in [automated machine learning training](https://aka.ms/automl-dataset)
|
||||
* Use TabularDatasets in [training](https://aka.ms/tabulardataset-samplenotebook)
|
||||
* For existing Dataset users: [Dataset API change notice](dataset-api-change-notice.md)
|
||||
|
||||
|
||||

|
||||
@@ -1,57 +0,0 @@
|
||||
# Dataset API change notice
|
||||
|
||||
## Why are Dataset API changes essential?
|
||||
|
||||
The existing Dataset class only supports data in tabular format. In order to support binary data and address a wider range of machine learning scenarios including deep learning, we will introduce Dataset types. Datasets are categorized into various types based on how users consume them in training. List of Dataset types:
|
||||
- **TabularDataset**: Represents data in a tabular format by parsing the provided file or list of files. TabularDataset can be created from csv, tsv, parquet files, SQL query results etc. For the complete list, please visit our [documentation](https://aka.ms/tabulardataset-api-reference). It provides you with the ability to materialize the data into a pandas DataFrame.
|
||||
- (upcoming) **FileDataset**: References single or multiple files in your datastores or public urls. The files can be of any format. FileDataset provides you with the ability to download or mount the files to your compute.
|
||||
- (upcoming) **LabeledDataset**: Represents labeled data that are produced by Azure Machine Learning Labeling service. LabaledDataset provides you with the ability to materialize the data into formats like [COCO](http://cocodataset.org/#homeo) or [TFRecord](https://www.tensorflow.org/tutorials/load_data/tf_records) on your compute.
|
||||
- (upcoming) **TimeSeriesDataset**: An extension of TabularDataset that allows for specification of a time column and filtering the Dataset by time.
|
||||
|
||||
In order to transit from the current Dataset design to typed Dataset, we will deprecate a series of methods on the Dataset class and launch the FileDataset and TabularDataset classes.
|
||||
|
||||
## Which methods on Dataset class will be deprecated in upcoming releases?
|
||||
Methods to be deprecated|Replacement in the new version|
|
||||
----|--------
|
||||
[Dataset.get()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#get-workspace--name-none--id-none-)|`Dataset.get_by_name()`
|
||||
[Dataset.from_pandas_dataframe()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#from-pandas-dataframe-dataframe--path-none--in-memory-false-)|Creating a Dataset from in-memory DataFrame or local files will cause errors in training on remote compute. Therefore, the new Dataset design will only support creating Datasets from paths in datastores or public web urls. If you are using pandas, you can write the DataFrame into a parquet file, upload it to the cloud, and create a TabularDataset referencing the parquet file using `Dataset.Tabular.from_parquet_files()`
|
||||
[Dataset.from_delimited_files()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#from-delimited-files-path--separator------header--promoteheadersbehavior-all-files-have-same-headers--3---encoding--fileencoding-utf8--0---quoting-false--infer-column-types-true--skip-rows-0--skip-mode--skiplinesbehavior-no-rows--0---comment-none--include-path-false--archive-options-none--partition-format-none-)|`Dataset.Tabular.from_delimited_files()`
|
||||
[Dataset.auto_read_files()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#auto-read-files-path--include-path-false--partition-format-none-)|`auto_read_files` does not always produce results that match with users' expectation. To avoid confusion, this method is not introduced with TabularDataset for now. Please use `Dataset.Tabular.from_parquet_files()` or `Dataset.Tabular.from_delimited_files()` depending on your file format.
|
||||
[Dataset.from_parquet_files()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#from-parquet-files-path--include-path-false--partition-format-none-)|`Dataset.Tabular.from_parquet_files()`
|
||||
[Dataset.from_sql_query()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#from-sql-query-data-source--query-)|`Dataset.Tabular.from_sql_query()`
|
||||
[Dataset.from_excel_files()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#from-excel-files-path--sheet-name-none--use-column-headers-false--skip-rows-0--include-path-false--infer-column-types-true--partition-format-none-)|We will support creating a TabularDataset from Excel files in a future release.
|
||||
[Dataset.from_json_files()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#from-json-files-path--encoding--fileencoding-utf8--0---flatten-nested-arrays-false--include-path-false--partition-format-none-)| We will support creating a TabularDataset from json files in a future release.
|
||||
[Dataset.to_pandas_dataframe()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#to-pandas-dataframe--)|`TabularDataset.to_pandas_dataframe()`
|
||||
[Dataset.to_spark_dataframe()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#to-spark-dataframe--)|`TabularDataset.to_spark_dataframe()`
|
||||
[Dataset.head(3)](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#head-count-)|`TabularDataset.take(3).to_pandas_dataframe()`
|
||||
[Dataset.sample()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#sample-sample-strategy--arguments-)|`TabularDataset.take_sample()`
|
||||
[Dataset.from_binary_files()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.dataset.dataset?view=azure-ml-py#from-binary-files-path-)|`Dataset.File.from_files()`
|
||||
|
||||
|
||||
## Why should I use the new Dataset API if I'm only dealing with tabular data?
|
||||
The current Dataset will be kept around for backward compatibility, but we strongly encourage you to move to TabularDataset for the new capabilities listed below:
|
||||
|
||||
- You are able to use TabularDatasets as automated ML input. [Learn How](https://aka.ms/automl-dataset)
|
||||
- You are able to version the new typed Datasets. [Learn How](https://aka.ms/azureml/howto/createdatasets)
|
||||
- You will be able to use the new typed Datasets as ScriptRun, Estimator, HyperDrive input.
|
||||
- You will be able to use the new typed Datasets in Azure Machine Learning Pipelines.
|
||||
- You will be able to track the lineage of new typed Datasets for model reproducibility.
|
||||
|
||||
|
||||
## How to migrate registered Datasets to new typed Datasets?
|
||||
If you have registered Datasets created using the old API, you can easily migrate these old Datasets to the new typed Datasets using the following code.
|
||||
```Python
|
||||
from azureml.core.workspace import Workspace
|
||||
from azureml.core.dataset import Dataset
|
||||
|
||||
# get existing workspace
|
||||
workspace = Workspace.from_config()
|
||||
# This method will convert old Dataset without type to a TabularDataset object automatically.
|
||||
new_ds = Dataset.get_by_name(workapce, 'old_ds_name')
|
||||
|
||||
# register the new typed Dataset with the workspace
|
||||
new_ds.register(workspace, 'new_ds_name')
|
||||
```
|
||||
|
||||
## How to provide feedback?
|
||||
If you have any feedback about our product, or if there is any missing capability that is essential for you to use new Dataset API, please email us at [AskAzureMLData@microsoft.com](mailto:AskAzureMLData@microsoft.com).
|
||||
@@ -1,312 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tutorial: Learn how to use TabularDatasets in Azure Machine Learning"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this tutorial, you will learn how to use Azure Machine Learning Datasets to train a classification model with the Azure Machine Learning SDK for Python. You will:\n",
|
||||
"\n",
|
||||
"☑ Setup a Python environment and import packages\n",
|
||||
"\n",
|
||||
"☑ Load the Titanic data from your Azure Blob Storage. (The [original data](https://www.kaggle.com/c/titanic/data) can be found on Kaggle)\n",
|
||||
"\n",
|
||||
"☑ Create and register a TabularDataset in your workspace\n",
|
||||
"\n",
|
||||
"☑ Train a classification model using the TabularDataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Pre-requisites:\n",
|
||||
"To create and work with datasets, you need:\n",
|
||||
"* An Azure subscription. If you don\u00e2\u20ac\u2122t have an Azure subscription, create a free account before you begin. Try the [free or paid version of Azure Machine Learning service](https://aka.ms/AMLFree) today.\n",
|
||||
"* An [Azure Machine Learning service workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace)\n",
|
||||
"* The [Azure Machine Learning SDK for Python installed](https://docs.microsoft.com/python/api/overview/azure/ml/install?view=azure-ml-py), which includes the azureml-datasets package.\n",
|
||||
"\n",
|
||||
"Data and train.py script to store in your Azure Blob Storage Account.\n",
|
||||
" * [Titanic data](./train-dataset/Titanic.csv)\n",
|
||||
" * [train.py](./train-dataset/train.py)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize a Workspace\n",
|
||||
"\n",
|
||||
"Initialize a workspace object from persisted configuration"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace, Datastore, Dataset\n",
|
||||
"\n",
|
||||
"# Get existing workspace from config.json file in the same folder as the tutorial notebook\n",
|
||||
"# You can download the config file from your workspace\n",
|
||||
"workspace = Workspace.from_config()\n",
|
||||
"print(workspace)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a TabularDataset\n",
|
||||
"\n",
|
||||
"Datasets are categorized into various types based on how users consume them in training. In this tutorial, you will create and use a TabularDataset in training. A TabularDataset represents data in a tabular format by parsing the provided file or list of files. TabularDataset can be created from csv, tsv, parquet files, SQL query results etc. For the complete list, please visit our [documentation](https://aka.ms/tabulardataset-api-reference). It provides you with the ability to materialize the data into a pandas DataFrame."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"By creating a dataset, you create a reference to the data source location, along with a copy of its metadata. The data remains in its existing location, so no extra storage cost is incurred.\n",
|
||||
"\n",
|
||||
"We will now upload the [Titanic data](./train-dataset/Titanic.csv) to the default datastore(blob) within your workspace.."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"datastore = workspace.get_default_datastore()\n",
|
||||
"datastore.upload_files(files = ['./train-dataset/Titanic.csv'],\n",
|
||||
" target_path = 'train-dataset/',\n",
|
||||
" overwrite = True,\n",
|
||||
" show_progress = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we will create an unregistered TabularDataset pointing to the path in the datastore. We also support create a Dataset from multiple paths. [learn more](https://aka.ms/azureml/howto/createdatasets) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'train-dataset/Titanic.csv')])\n",
|
||||
"\n",
|
||||
"#preview the first 3 rows of the dataset\n",
|
||||
"dataset.take(3).to_pandas_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use the `register()` method to register datasets to your workspace so they can be shared with others, reused across various experiments, and refered to by name in your training script."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = dataset.register(workspace = workspace,\n",
|
||||
" name = 'titanic dataset',\n",
|
||||
" description='training dataset')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create or Attach existing AmlCompute\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your training. In this tutorial, you create `AmlCompute` as your training compute resource.\n",
|
||||
"\n",
|
||||
"**Creation of AmlCompute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||
"\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"your cluster name\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = workspace.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
"\n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 6)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\\n\",\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
"\n",
|
||||
"print('Checking cluster status...')\n",
|
||||
"# Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
"# If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
"compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
"\n",
|
||||
"# For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create an Experiment\n",
|
||||
"**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"experiment_name = 'training-datasets'\n",
|
||||
"experiment = Experiment(workspace = workspace, name = experiment_name)\n",
|
||||
"project_folder = './train-dataset/'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure & Run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
"\n",
|
||||
"# Set compute target to AmlCompute\n",
|
||||
"conda_run_config.target = compute_target\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk', 'scikit-learn', 'pandas', dprep_dependency])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create a new RunConfig object\n",
|
||||
"run_config = RunConfiguration()\n",
|
||||
"\n",
|
||||
"run_config.environment.python.user_managed_dependencies = True\n",
|
||||
"\n",
|
||||
"from azureml.core import Run\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
"\n",
|
||||
"src = ScriptRunConfig(source_directory=project_folder, \n",
|
||||
" script='train.py', \n",
|
||||
" run_config=conda_run_config) \n",
|
||||
"run = experiment.submit(config=src)\n",
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## View run history details"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You have now finished using a dataset from start to finish of your experiment!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "cforbe"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
},
|
||||
"notice": "Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License."
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,892 +0,0 @@
|
||||
PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
|
||||
1,0,3,"Braund, Mr. Owen Harris",0,22,1,0,A/5 21171,7.25,,S
|
||||
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",1,38,1,0,PC 17599,71.2833,C85,C
|
||||
3,1,3,"Heikkinen, Miss. Laina",1,26,0,0,STON/O2. 3101282,7.925,,S
|
||||
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35,1,0,113803,53.1,C123,S
|
||||
5,0,3,"Allen, Mr. William Henry",0,35,0,0,373450,8.05,,S
|
||||
6,0,3,"Moran, Mr. James",0,,0,0,330877,8.4583,,Q
|
||||
7,0,1,"McCarthy, Mr. Timothy J",0,54,0,0,17463,51.8625,E46,S
|
||||
8,0,3,"Palsson, Master. Gosta Leonard",0,2,3,1,349909,21.075,,S
|
||||
9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",1,27,0,2,347742,11.1333,,S
|
||||
10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",1,14,1,0,237736,30.0708,,C
|
||||
11,1,3,"Sandstrom, Miss. Marguerite Rut",1,4,1,1,PP 9549,16.7,G6,S
|
||||
12,1,1,"Bonnell, Miss. Elizabeth",1,58,0,0,113783,26.55,C103,S
|
||||
13,0,3,"Saundercock, Mr. William Henry",0,20,0,0,A/5. 2151,8.05,,S
|
||||
14,0,3,"Andersson, Mr. Anders Johan",0,39,1,5,347082,31.275,,S
|
||||
15,0,3,"Vestrom, Miss. Hulda Amanda Adolfina",1,14,0,0,350406,7.8542,,S
|
||||
16,1,2,"Hewlett, Mrs. (Mary D Kingcome) ",1,55,0,0,248706,16,,S
|
||||
17,0,3,"Rice, Master. Eugene",0,2,4,1,382652,29.125,,Q
|
||||
18,1,2,"Williams, Mr. Charles Eugene",0,,0,0,244373,13,,S
|
||||
19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)",1,31,1,0,345763,18,,S
|
||||
20,1,3,"Masselmani, Mrs. Fatima",1,,0,0,2649,7.225,,C
|
||||
21,0,2,"Fynney, Mr. Joseph J",0,35,0,0,239865,26,,S
|
||||
22,1,2,"Beesley, Mr. Lawrence",0,34,0,0,248698,13,D56,S
|
||||
23,1,3,"McGowan, Miss. Anna ""Annie""",1,15,0,0,330923,8.0292,,Q
|
||||
24,1,1,"Sloper, Mr. William Thompson",0,28,0,0,113788,35.5,A6,S
|
||||
25,0,3,"Palsson, Miss. Torborg Danira",1,8,3,1,349909,21.075,,S
|
||||
26,1,3,"Asplund, Mrs. Carl Oscar (Selma Augusta Emilia Johansson)",1,38,1,5,347077,31.3875,,S
|
||||
27,0,3,"Emir, Mr. Farred Chehab",0,,0,0,2631,7.225,,C
|
||||
28,0,1,"Fortune, Mr. Charles Alexander",0,19,3,2,19950,263,C23 C25 C27,S
|
||||
29,1,3,"O'Dwyer, Miss. Ellen ""Nellie""",1,,0,0,330959,7.8792,,Q
|
||||
30,0,3,"Todoroff, Mr. Lalio",0,,0,0,349216,7.8958,,S
|
||||
31,0,1,"Uruchurtu, Don. Manuel E",0,40,0,0,PC 17601,27.7208,,C
|
||||
32,1,1,"Spencer, Mrs. William Augustus (Marie Eugenie)",1,,1,0,PC 17569,146.5208,B78,C
|
||||
33,1,3,"Glynn, Miss. Mary Agatha",1,,0,0,335677,7.75,,Q
|
||||
34,0,2,"Wheadon, Mr. Edward H",0,66,0,0,C.A. 24579,10.5,,S
|
||||
35,0,1,"Meyer, Mr. Edgar Joseph",0,28,1,0,PC 17604,82.1708,,C
|
||||
36,0,1,"Holverson, Mr. Alexander Oskar",0,42,1,0,113789,52,,S
|
||||
37,1,3,"Mamee, Mr. Hanna",0,,0,0,2677,7.2292,,C
|
||||
38,0,3,"Cann, Mr. Ernest Charles",0,21,0,0,A./5. 2152,8.05,,S
|
||||
39,0,3,"Vander Planke, Miss. Augusta Maria",1,18,2,0,345764,18,,S
|
||||
40,1,3,"Nicola-Yarred, Miss. Jamila",1,14,1,0,2651,11.2417,,C
|
||||
41,0,3,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",1,40,1,0,7546,9.475,,S
|
||||
42,0,2,"Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)",1,27,1,0,11668,21,,S
|
||||
43,0,3,"Kraeff, Mr. Theodor",0,,0,0,349253,7.8958,,C
|
||||
44,1,2,"Laroche, Miss. Simonne Marie Anne Andree",1,3,1,2,SC/Paris 2123,41.5792,,C
|
||||
45,1,3,"Devaney, Miss. Margaret Delia",1,19,0,0,330958,7.8792,,Q
|
||||
46,0,3,"Rogers, Mr. William John",0,,0,0,S.C./A.4. 23567,8.05,,S
|
||||
47,0,3,"Lennon, Mr. Denis",0,,1,0,370371,15.5,,Q
|
||||
48,1,3,"O'Driscoll, Miss. Bridget",1,,0,0,14311,7.75,,Q
|
||||
49,0,3,"Samaan, Mr. Youssef",0,,2,0,2662,21.6792,,C
|
||||
50,0,3,"Arnold-Franchi, Mrs. Josef (Josefine Franchi)",1,18,1,0,349237,17.8,,S
|
||||
51,0,3,"Panula, Master. Juha Niilo",0,7,4,1,3101295,39.6875,,S
|
||||
52,0,3,"Nosworthy, Mr. Richard Cater",0,21,0,0,A/4. 39886,7.8,,S
|
||||
53,1,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)",1,49,1,0,PC 17572,76.7292,D33,C
|
||||
54,1,2,"Faunthorpe, Mrs. Lizzie (Elizabeth Anne Wilkinson)",1,29,1,0,2926,26,,S
|
||||
55,0,1,"Ostby, Mr. Engelhart Cornelius",0,65,0,1,113509,61.9792,B30,C
|
||||
56,1,1,"Woolner, Mr. Hugh",0,,0,0,19947,35.5,C52,S
|
||||
57,1,2,"Rugg, Miss. Emily",1,21,0,0,C.A. 31026,10.5,,S
|
||||
58,0,3,"Novel, Mr. Mansouer",0,28.5,0,0,2697,7.2292,,C
|
||||
59,1,2,"West, Miss. Constance Mirium",1,5,1,2,C.A. 34651,27.75,,S
|
||||
60,0,3,"Goodwin, Master. William Frederick",0,11,5,2,CA 2144,46.9,,S
|
||||
61,0,3,"Sirayanian, Mr. Orsen",0,22,0,0,2669,7.2292,,C
|
||||
62,1,1,"Icard, Miss. Amelie",1,38,0,0,113572,80,B28,
|
||||
63,0,1,"Harris, Mr. Henry Birkhardt",0,45,1,0,36973,83.475,C83,S
|
||||
64,0,3,"Skoog, Master. Harald",0,4,3,2,347088,27.9,,S
|
||||
65,0,1,"Stewart, Mr. Albert A",0,,0,0,PC 17605,27.7208,,C
|
||||
66,1,3,"Moubarek, Master. Gerios",0,,1,1,2661,15.2458,,C
|
||||
67,1,2,"Nye, Mrs. (Elizabeth Ramell)",1,29,0,0,C.A. 29395,10.5,F33,S
|
||||
68,0,3,"Crease, Mr. Ernest James",0,19,0,0,S.P. 3464,8.1583,,S
|
||||
69,1,3,"Andersson, Miss. Erna Alexandra",1,17,4,2,3101281,7.925,,S
|
||||
70,0,3,"Kink, Mr. Vincenz",0,26,2,0,315151,8.6625,,S
|
||||
71,0,2,"Jenkin, Mr. Stephen Curnow",0,32,0,0,C.A. 33111,10.5,,S
|
||||
72,0,3,"Goodwin, Miss. Lillian Amy",1,16,5,2,CA 2144,46.9,,S
|
||||
73,0,2,"Hood, Mr. Ambrose Jr",0,21,0,0,S.O.C. 14879,73.5,,S
|
||||
74,0,3,"Chronopoulos, Mr. Apostolos",0,26,1,0,2680,14.4542,,C
|
||||
75,1,3,"Bing, Mr. Lee",0,32,0,0,1601,56.4958,,S
|
||||
76,0,3,"Moen, Mr. Sigurd Hansen",0,25,0,0,348123,7.65,F G73,S
|
||||
77,0,3,"Staneff, Mr. Ivan",0,,0,0,349208,7.8958,,S
|
||||
78,0,3,"Moutal, Mr. Rahamin Haim",0,,0,0,374746,8.05,,S
|
||||
79,1,2,"Caldwell, Master. Alden Gates",0,0.83,0,2,248738,29,,S
|
||||
80,1,3,"Dowdell, Miss. Elizabeth",1,30,0,0,364516,12.475,,S
|
||||
81,0,3,"Waelens, Mr. Achille",0,22,0,0,345767,9,,S
|
||||
82,1,3,"Sheerlinck, Mr. Jan Baptist",0,29,0,0,345779,9.5,,S
|
||||
83,1,3,"McDermott, Miss. Brigdet Delia",1,,0,0,330932,7.7875,,Q
|
||||
84,0,1,"Carrau, Mr. Francisco M",0,28,0,0,113059,47.1,,S
|
||||
85,1,2,"Ilett, Miss. Bertha",1,17,0,0,SO/C 14885,10.5,,S
|
||||
86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gustafsson)",1,33,3,0,3101278,15.85,,S
|
||||
87,0,3,"Ford, Mr. William Neal",0,16,1,3,W./C. 6608,34.375,,S
|
||||
88,0,3,"Slocovski, Mr. Selman Francis",0,,0,0,SOTON/OQ 392086,8.05,,S
|
||||
89,1,1,"Fortune, Miss. Mabel Helen",1,23,3,2,19950,263,C23 C25 C27,S
|
||||
90,0,3,"Celotti, Mr. Francesco",0,24,0,0,343275,8.05,,S
|
||||
91,0,3,"Christmann, Mr. Emil",0,29,0,0,343276,8.05,,S
|
||||
92,0,3,"Andreasson, Mr. Paul Edvin",0,20,0,0,347466,7.8542,,S
|
||||
93,0,1,"Chaffee, Mr. Herbert Fuller",0,46,1,0,W.E.P. 5734,61.175,E31,S
|
||||
94,0,3,"Dean, Mr. Bertram Frank",0,26,1,2,C.A. 2315,20.575,,S
|
||||
95,0,3,"Coxon, Mr. Daniel",0,59,0,0,364500,7.25,,S
|
||||
96,0,3,"Shorney, Mr. Charles Joseph",0,,0,0,374910,8.05,,S
|
||||
97,0,1,"Goldschmidt, Mr. George B",0,71,0,0,PC 17754,34.6542,A5,C
|
||||
98,1,1,"Greenfield, Mr. William Bertram",0,23,0,1,PC 17759,63.3583,D10 D12,C
|
||||
99,1,2,"Doling, Mrs. John T (Ada Julia Bone)",1,34,0,1,231919,23,,S
|
||||
100,0,2,"Kantor, Mr. Sinai",0,34,1,0,244367,26,,S
|
||||
101,0,3,"Petranec, Miss. Matilda",1,28,0,0,349245,7.8958,,S
|
||||
102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",0,,0,0,349215,7.8958,,S
|
||||
103,0,1,"White, Mr. Richard Frasar",0,21,0,1,35281,77.2875,D26,S
|
||||
104,0,3,"Johansson, Mr. Gustaf Joel",0,33,0,0,7540,8.6542,,S
|
||||
105,0,3,"Gustafsson, Mr. Anders Vilhelm",0,37,2,0,3101276,7.925,,S
|
||||
106,0,3,"Mionoff, Mr. Stoytcho",0,28,0,0,349207,7.8958,,S
|
||||
107,1,3,"Salkjelsvik, Miss. Anna Kristine",1,21,0,0,343120,7.65,,S
|
||||
108,1,3,"Moss, Mr. Albert Johan",0,,0,0,312991,7.775,,S
|
||||
109,0,3,"Rekic, Mr. Tido",0,38,0,0,349249,7.8958,,S
|
||||
110,1,3,"Moran, Miss. Bertha",1,,1,0,371110,24.15,,Q
|
||||
111,0,1,"Porter, Mr. Walter Chamberlain",0,47,0,0,110465,52,C110,S
|
||||
112,0,3,"Zabour, Miss. Hileni",1,14.5,1,0,2665,14.4542,,C
|
||||
113,0,3,"Barton, Mr. David John",0,22,0,0,324669,8.05,,S
|
||||
114,0,3,"Jussila, Miss. Katriina",1,20,1,0,4136,9.825,,S
|
||||
115,0,3,"Attalah, Miss. Malake",1,17,0,0,2627,14.4583,,C
|
||||
116,0,3,"Pekoniemi, Mr. Edvard",0,21,0,0,STON/O 2. 3101294,7.925,,S
|
||||
117,0,3,"Connors, Mr. Patrick",0,70.5,0,0,370369,7.75,,Q
|
||||
118,0,2,"Turpin, Mr. William John Robert",0,29,1,0,11668,21,,S
|
||||
119,0,1,"Baxter, Mr. Quigg Edmond",0,24,0,1,PC 17558,247.5208,B58 B60,C
|
||||
120,0,3,"Andersson, Miss. Ellis Anna Maria",1,2,4,2,347082,31.275,,S
|
||||
121,0,2,"Hickman, Mr. Stanley George",0,21,2,0,S.O.C. 14879,73.5,,S
|
||||
122,0,3,"Moore, Mr. Leonard Charles",0,,0,0,A4. 54510,8.05,,S
|
||||
123,0,2,"Nasser, Mr. Nicholas",0,32.5,1,0,237736,30.0708,,C
|
||||
124,1,2,"Webber, Miss. Susan",1,32.5,0,0,27267,13,E101,S
|
||||
125,0,1,"White, Mr. Percival Wayland",0,54,0,1,35281,77.2875,D26,S
|
||||
126,1,3,"Nicola-Yarred, Master. Elias",0,12,1,0,2651,11.2417,,C
|
||||
127,0,3,"McMahon, Mr. Martin",0,,0,0,370372,7.75,,Q
|
||||
128,1,3,"Madsen, Mr. Fridtjof Arne",0,24,0,0,C 17369,7.1417,,S
|
||||
129,1,3,"Peter, Miss. Anna",1,,1,1,2668,22.3583,F E69,C
|
||||
130,0,3,"Ekstrom, Mr. Johan",0,45,0,0,347061,6.975,,S
|
||||
131,0,3,"Drazenoic, Mr. Jozef",0,33,0,0,349241,7.8958,,C
|
||||
132,0,3,"Coelho, Mr. Domingos Fernandeo",0,20,0,0,SOTON/O.Q. 3101307,7.05,,S
|
||||
133,0,3,"Robins, Mrs. Alexander A (Grace Charity Laury)",1,47,1,0,A/5. 3337,14.5,,S
|
||||
134,1,2,"Weisz, Mrs. Leopold (Mathilde Francoise Pede)",1,29,1,0,228414,26,,S
|
||||
135,0,2,"Sobey, Mr. Samuel James Hayden",0,25,0,0,C.A. 29178,13,,S
|
||||
136,0,2,"Richard, Mr. Emile",0,23,0,0,SC/PARIS 2133,15.0458,,C
|
||||
137,1,1,"Newsom, Miss. Helen Monypeny",1,19,0,2,11752,26.2833,D47,S
|
||||
138,0,1,"Futrelle, Mr. Jacques Heath",0,37,1,0,113803,53.1,C123,S
|
||||
139,0,3,"Osen, Mr. Olaf Elon",0,16,0,0,7534,9.2167,,S
|
||||
140,0,1,"Giglio, Mr. Victor",0,24,0,0,PC 17593,79.2,B86,C
|
||||
141,0,3,"Boulos, Mrs. Joseph (Sultana)",1,,0,2,2678,15.2458,,C
|
||||
142,1,3,"Nysten, Miss. Anna Sofia",1,22,0,0,347081,7.75,,S
|
||||
143,1,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)",1,24,1,0,STON/O2. 3101279,15.85,,S
|
||||
144,0,3,"Burke, Mr. Jeremiah",0,19,0,0,365222,6.75,,Q
|
||||
145,0,2,"Andrew, Mr. Edgardo Samuel",0,18,0,0,231945,11.5,,S
|
||||
146,0,2,"Nicholls, Mr. Joseph Charles",0,19,1,1,C.A. 33112,36.75,,S
|
||||
147,1,3,"Andersson, Mr. August Edvard (""Wennerstrom"")",0,27,0,0,350043,7.7958,,S
|
||||
148,0,3,"Ford, Miss. Robina Maggie ""Ruby""",1,9,2,2,W./C. 6608,34.375,,S
|
||||
149,0,2,"Navratil, Mr. Michel (""Louis M Hoffman"")",0,36.5,0,2,230080,26,F2,S
|
||||
150,0,2,"Byles, Rev. Thomas Roussel Davids",0,42,0,0,244310,13,,S
|
||||
151,0,2,"Bateman, Rev. Robert James",0,51,0,0,S.O.P. 1166,12.525,,S
|
||||
152,1,1,"Pears, Mrs. Thomas (Edith Wearne)",1,22,1,0,113776,66.6,C2,S
|
||||
153,0,3,"Meo, Mr. Alfonzo",0,55.5,0,0,A.5. 11206,8.05,,S
|
||||
154,0,3,"van Billiard, Mr. Austin Blyler",0,40.5,0,2,A/5. 851,14.5,,S
|
||||
155,0,3,"Olsen, Mr. Ole Martin",0,,0,0,Fa 265302,7.3125,,S
|
||||
156,0,1,"Williams, Mr. Charles Duane",0,51,0,1,PC 17597,61.3792,,C
|
||||
157,1,3,"Gilnagh, Miss. Katherine ""Katie""",1,16,0,0,35851,7.7333,,Q
|
||||
158,0,3,"Corn, Mr. Harry",0,30,0,0,SOTON/OQ 392090,8.05,,S
|
||||
159,0,3,"Smiljanic, Mr. Mile",0,,0,0,315037,8.6625,,S
|
||||
160,0,3,"Sage, Master. Thomas Henry",0,,8,2,CA. 2343,69.55,,S
|
||||
161,0,3,"Cribb, Mr. John Hatfield",0,44,0,1,371362,16.1,,S
|
||||
162,1,2,"Watt, Mrs. James (Elizabeth ""Bessie"" Inglis Milne)",1,40,0,0,C.A. 33595,15.75,,S
|
||||
163,0,3,"Bengtsson, Mr. John Viktor",0,26,0,0,347068,7.775,,S
|
||||
164,0,3,"Calic, Mr. Jovo",0,17,0,0,315093,8.6625,,S
|
||||
165,0,3,"Panula, Master. Eino Viljami",0,1,4,1,3101295,39.6875,,S
|
||||
166,1,3,"Goldsmith, Master. Frank John William ""Frankie""",0,9,0,2,363291,20.525,,S
|
||||
167,1,1,"Chibnall, Mrs. (Edith Martha Bowerman)",1,,0,1,113505,55,E33,S
|
||||
168,0,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)",1,45,1,4,347088,27.9,,S
|
||||
169,0,1,"Baumann, Mr. John D",0,,0,0,PC 17318,25.925,,S
|
||||
170,0,3,"Ling, Mr. Lee",0,28,0,0,1601,56.4958,,S
|
||||
171,0,1,"Van der hoef, Mr. Wyckoff",0,61,0,0,111240,33.5,B19,S
|
||||
172,0,3,"Rice, Master. Arthur",0,4,4,1,382652,29.125,,Q
|
||||
173,1,3,"Johnson, Miss. Eleanor Ileen",1,1,1,1,347742,11.1333,,S
|
||||
174,0,3,"Sivola, Mr. Antti Wilhelm",0,21,0,0,STON/O 2. 3101280,7.925,,S
|
||||
175,0,1,"Smith, Mr. James Clinch",0,56,0,0,17764,30.6958,A7,C
|
||||
176,0,3,"Klasen, Mr. Klas Albin",0,18,1,1,350404,7.8542,,S
|
||||
177,0,3,"Lefebre, Master. Henry Forbes",0,,3,1,4133,25.4667,,S
|
||||
178,0,1,"Isham, Miss. Ann Elizabeth",1,50,0,0,PC 17595,28.7125,C49,C
|
||||
179,0,2,"Hale, Mr. Reginald",0,30,0,0,250653,13,,S
|
||||
180,0,3,"Leonard, Mr. Lionel",0,36,0,0,LINE,0,,S
|
||||
181,0,3,"Sage, Miss. Constance Gladys",1,,8,2,CA. 2343,69.55,,S
|
||||
182,0,2,"Pernot, Mr. Rene",0,,0,0,SC/PARIS 2131,15.05,,C
|
||||
183,0,3,"Asplund, Master. Clarence Gustaf Hugo",0,9,4,2,347077,31.3875,,S
|
||||
184,1,2,"Becker, Master. Richard F",0,1,2,1,230136,39,F4,S
|
||||
185,1,3,"Kink-Heilmann, Miss. Luise Gretchen",1,4,0,2,315153,22.025,,S
|
||||
186,0,1,"Rood, Mr. Hugh Roscoe",0,,0,0,113767,50,A32,S
|
||||
187,1,3,"O'Brien, Mrs. Thomas (Johanna ""Hannah"" Godfrey)",1,,1,0,370365,15.5,,Q
|
||||
188,1,1,"Romaine, Mr. Charles Hallace (""Mr C Rolmane"")",0,45,0,0,111428,26.55,,S
|
||||
189,0,3,"Bourke, Mr. John",0,40,1,1,364849,15.5,,Q
|
||||
190,0,3,"Turcin, Mr. Stjepan",0,36,0,0,349247,7.8958,,S
|
||||
191,1,2,"Pinsky, Mrs. (Rosa)",1,32,0,0,234604,13,,S
|
||||
192,0,2,"Carbines, Mr. William",0,19,0,0,28424,13,,S
|
||||
193,1,3,"Andersen-Jensen, Miss. Carla Christine Nielsine",1,19,1,0,350046,7.8542,,S
|
||||
194,1,2,"Navratil, Master. Michel M",0,3,1,1,230080,26,F2,S
|
||||
195,1,1,"Brown, Mrs. James Joseph (Margaret Tobin)",1,44,0,0,PC 17610,27.7208,B4,C
|
||||
196,1,1,"Lurette, Miss. Elise",1,58,0,0,PC 17569,146.5208,B80,C
|
||||
197,0,3,"Mernagh, Mr. Robert",0,,0,0,368703,7.75,,Q
|
||||
198,0,3,"Olsen, Mr. Karl Siegwart Andreas",0,42,0,1,4579,8.4042,,S
|
||||
199,1,3,"Madigan, Miss. Margaret ""Maggie""",1,,0,0,370370,7.75,,Q
|
||||
200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",1,24,0,0,248747,13,,S
|
||||
201,0,3,"Vande Walle, Mr. Nestor Cyriel",0,28,0,0,345770,9.5,,S
|
||||
202,0,3,"Sage, Mr. Frederick",0,,8,2,CA. 2343,69.55,,S
|
||||
203,0,3,"Johanson, Mr. Jakob Alfred",0,34,0,0,3101264,6.4958,,S
|
||||
204,0,3,"Youseff, Mr. Gerious",0,45.5,0,0,2628,7.225,,C
|
||||
205,1,3,"Cohen, Mr. Gurshon ""Gus""",0,18,0,0,A/5 3540,8.05,,S
|
||||
206,0,3,"Strom, Miss. Telma Matilda",1,2,0,1,347054,10.4625,G6,S
|
||||
207,0,3,"Backstrom, Mr. Karl Alfred",0,32,1,0,3101278,15.85,,S
|
||||
208,1,3,"Albimona, Mr. Nassef Cassem",0,26,0,0,2699,18.7875,,C
|
||||
209,1,3,"Carr, Miss. Helen ""Ellen""",1,16,0,0,367231,7.75,,Q
|
||||
210,1,1,"Blank, Mr. Henry",0,40,0,0,112277,31,A31,C
|
||||
211,0,3,"Ali, Mr. Ahmed",0,24,0,0,SOTON/O.Q. 3101311,7.05,,S
|
||||
212,1,2,"Cameron, Miss. Clear Annie",1,35,0,0,F.C.C. 13528,21,,S
|
||||
213,0,3,"Perkin, Mr. John Henry",0,22,0,0,A/5 21174,7.25,,S
|
||||
214,0,2,"Givard, Mr. Hans Kristensen",0,30,0,0,250646,13,,S
|
||||
215,0,3,"Kiernan, Mr. Philip",0,,1,0,367229,7.75,,Q
|
||||
216,1,1,"Newell, Miss. Madeleine",1,31,1,0,35273,113.275,D36,C
|
||||
217,1,3,"Honkanen, Miss. Eliina",1,27,0,0,STON/O2. 3101283,7.925,,S
|
||||
218,0,2,"Jacobsohn, Mr. Sidney Samuel",0,42,1,0,243847,27,,S
|
||||
219,1,1,"Bazzani, Miss. Albina",1,32,0,0,11813,76.2917,D15,C
|
||||
220,0,2,"Harris, Mr. Walter",0,30,0,0,W/C 14208,10.5,,S
|
||||
221,1,3,"Sunderland, Mr. Victor Francis",0,16,0,0,SOTON/OQ 392089,8.05,,S
|
||||
222,0,2,"Bracken, Mr. James H",0,27,0,0,220367,13,,S
|
||||
223,0,3,"Green, Mr. George Henry",0,51,0,0,21440,8.05,,S
|
||||
224,0,3,"Nenkoff, Mr. Christo",0,,0,0,349234,7.8958,,S
|
||||
225,1,1,"Hoyt, Mr. Frederick Maxfield",0,38,1,0,19943,90,C93,S
|
||||
226,0,3,"Berglund, Mr. Karl Ivar Sven",0,22,0,0,PP 4348,9.35,,S
|
||||
227,1,2,"Mellors, Mr. William John",0,19,0,0,SW/PP 751,10.5,,S
|
||||
228,0,3,"Lovell, Mr. John Hall (""Henry"")",0,20.5,0,0,A/5 21173,7.25,,S
|
||||
229,0,2,"Fahlstrom, Mr. Arne Jonas",0,18,0,0,236171,13,,S
|
||||
230,0,3,"Lefebre, Miss. Mathilde",1,,3,1,4133,25.4667,,S
|
||||
231,1,1,"Harris, Mrs. Henry Birkhardt (Irene Wallach)",1,35,1,0,36973,83.475,C83,S
|
||||
232,0,3,"Larsson, Mr. Bengt Edvin",0,29,0,0,347067,7.775,,S
|
||||
233,0,2,"Sjostedt, Mr. Ernst Adolf",0,59,0,0,237442,13.5,,S
|
||||
234,1,3,"Asplund, Miss. Lillian Gertrud",1,5,4,2,347077,31.3875,,S
|
||||
235,0,2,"Leyson, Mr. Robert William Norman",0,24,0,0,C.A. 29566,10.5,,S
|
||||
236,0,3,"Harknett, Miss. Alice Phoebe",1,,0,0,W./C. 6609,7.55,,S
|
||||
237,0,2,"Hold, Mr. Stephen",0,44,1,0,26707,26,,S
|
||||
238,1,2,"Collyer, Miss. Marjorie ""Lottie""",1,8,0,2,C.A. 31921,26.25,,S
|
||||
239,0,2,"Pengelly, Mr. Frederick William",0,19,0,0,28665,10.5,,S
|
||||
240,0,2,"Hunt, Mr. George Henry",0,33,0,0,SCO/W 1585,12.275,,S
|
||||
241,0,3,"Zabour, Miss. Thamine",1,,1,0,2665,14.4542,,C
|
||||
242,1,3,"Murphy, Miss. Katherine ""Kate""",1,,1,0,367230,15.5,,Q
|
||||
243,0,2,"Coleridge, Mr. Reginald Charles",0,29,0,0,W./C. 14263,10.5,,S
|
||||
244,0,3,"Maenpaa, Mr. Matti Alexanteri",0,22,0,0,STON/O 2. 3101275,7.125,,S
|
||||
245,0,3,"Attalah, Mr. Sleiman",0,30,0,0,2694,7.225,,C
|
||||
246,0,1,"Minahan, Dr. William Edward",0,44,2,0,19928,90,C78,Q
|
||||
247,0,3,"Lindahl, Miss. Agda Thorilda Viktoria",1,25,0,0,347071,7.775,,S
|
||||
248,1,2,"Hamalainen, Mrs. William (Anna)",1,24,0,2,250649,14.5,,S
|
||||
249,1,1,"Beckwith, Mr. Richard Leonard",0,37,1,1,11751,52.5542,D35,S
|
||||
250,0,2,"Carter, Rev. Ernest Courtenay",0,54,1,0,244252,26,,S
|
||||
251,0,3,"Reed, Mr. James George",0,,0,0,362316,7.25,,S
|
||||
252,0,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)",1,29,1,1,347054,10.4625,G6,S
|
||||
253,0,1,"Stead, Mr. William Thomas",0,62,0,0,113514,26.55,C87,S
|
||||
254,0,3,"Lobb, Mr. William Arthur",0,30,1,0,A/5. 3336,16.1,,S
|
||||
255,0,3,"Rosblom, Mrs. Viktor (Helena Wilhelmina)",1,41,0,2,370129,20.2125,,S
|
||||
256,1,3,"Touma, Mrs. Darwis (Hanne Youssef Razi)",1,29,0,2,2650,15.2458,,C
|
||||
257,1,1,"Thorne, Mrs. Gertrude Maybelle",1,,0,0,PC 17585,79.2,,C
|
||||
258,1,1,"Cherry, Miss. Gladys",1,30,0,0,110152,86.5,B77,S
|
||||
259,1,1,"Ward, Miss. Anna",1,35,0,0,PC 17755,512.3292,,C
|
||||
260,1,2,"Parrish, Mrs. (Lutie Davis)",1,50,0,1,230433,26,,S
|
||||
261,0,3,"Smith, Mr. Thomas",0,,0,0,384461,7.75,,Q
|
||||
262,1,3,"Asplund, Master. Edvin Rojj Felix",0,3,4,2,347077,31.3875,,S
|
||||
263,0,1,"Taussig, Mr. Emil",0,52,1,1,110413,79.65,E67,S
|
||||
264,0,1,"Harrison, Mr. William",0,40,0,0,112059,0,B94,S
|
||||
265,0,3,"Henry, Miss. Delia",1,,0,0,382649,7.75,,Q
|
||||
266,0,2,"Reeves, Mr. David",0,36,0,0,C.A. 17248,10.5,,S
|
||||
267,0,3,"Panula, Mr. Ernesti Arvid",0,16,4,1,3101295,39.6875,,S
|
||||
268,1,3,"Persson, Mr. Ernst Ulrik",0,25,1,0,347083,7.775,,S
|
||||
269,1,1,"Graham, Mrs. William Thompson (Edith Junkins)",1,58,0,1,PC 17582,153.4625,C125,S
|
||||
270,1,1,"Bissette, Miss. Amelia",1,35,0,0,PC 17760,135.6333,C99,S
|
||||
271,0,1,"Cairns, Mr. Alexander",0,,0,0,113798,31,,S
|
||||
272,1,3,"Tornquist, Mr. William Henry",0,25,0,0,LINE,0,,S
|
||||
273,1,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)",1,41,0,1,250644,19.5,,S
|
||||
274,0,1,"Natsch, Mr. Charles H",0,37,0,1,PC 17596,29.7,C118,C
|
||||
275,1,3,"Healy, Miss. Hanora ""Nora""",1,,0,0,370375,7.75,,Q
|
||||
276,1,1,"Andrews, Miss. Kornelia Theodosia",1,63,1,0,13502,77.9583,D7,S
|
||||
277,0,3,"Lindblom, Miss. Augusta Charlotta",1,45,0,0,347073,7.75,,S
|
||||
278,0,2,"Parkes, Mr. Francis ""Frank""",0,,0,0,239853,0,,S
|
||||
279,0,3,"Rice, Master. Eric",0,7,4,1,382652,29.125,,Q
|
||||
280,1,3,"Abbott, Mrs. Stanton (Rosa Hunt)",1,35,1,1,C.A. 2673,20.25,,S
|
||||
281,0,3,"Duane, Mr. Frank",0,65,0,0,336439,7.75,,Q
|
||||
282,0,3,"Olsson, Mr. Nils Johan Goransson",0,28,0,0,347464,7.8542,,S
|
||||
283,0,3,"de Pelsmaeker, Mr. Alfons",0,16,0,0,345778,9.5,,S
|
||||
284,1,3,"Dorking, Mr. Edward Arthur",0,19,0,0,A/5. 10482,8.05,,S
|
||||
285,0,1,"Smith, Mr. Richard William",0,,0,0,113056,26,A19,S
|
||||
286,0,3,"Stankovic, Mr. Ivan",0,33,0,0,349239,8.6625,,C
|
||||
287,1,3,"de Mulder, Mr. Theodore",0,30,0,0,345774,9.5,,S
|
||||
288,0,3,"Naidenoff, Mr. Penko",0,22,0,0,349206,7.8958,,S
|
||||
289,1,2,"Hosono, Mr. Masabumi",0,42,0,0,237798,13,,S
|
||||
290,1,3,"Connolly, Miss. Kate",1,22,0,0,370373,7.75,,Q
|
||||
291,1,1,"Barber, Miss. Ellen ""Nellie""",1,26,0,0,19877,78.85,,S
|
||||
292,1,1,"Bishop, Mrs. Dickinson H (Helen Walton)",1,19,1,0,11967,91.0792,B49,C
|
||||
293,0,2,"Levy, Mr. Rene Jacques",0,36,0,0,SC/Paris 2163,12.875,D,C
|
||||
294,0,3,"Haas, Miss. Aloisia",1,24,0,0,349236,8.85,,S
|
||||
295,0,3,"Mineff, Mr. Ivan",0,24,0,0,349233,7.8958,,S
|
||||
296,0,1,"Lewy, Mr. Ervin G",0,,0,0,PC 17612,27.7208,,C
|
||||
297,0,3,"Hanna, Mr. Mansour",0,23.5,0,0,2693,7.2292,,C
|
||||
298,0,1,"Allison, Miss. Helen Loraine",1,2,1,2,113781,151.55,C22 C26,S
|
||||
299,1,1,"Saalfeld, Mr. Adolphe",0,,0,0,19988,30.5,C106,S
|
||||
300,1,1,"Baxter, Mrs. James (Helene DeLaudeniere Chaput)",1,50,0,1,PC 17558,247.5208,B58 B60,C
|
||||
301,1,3,"Kelly, Miss. Anna Katherine ""Annie Kate""",1,,0,0,9234,7.75,,Q
|
||||
302,1,3,"McCoy, Mr. Bernard",0,,2,0,367226,23.25,,Q
|
||||
303,0,3,"Johnson, Mr. William Cahoone Jr",0,19,0,0,LINE,0,,S
|
||||
304,1,2,"Keane, Miss. Nora A",1,,0,0,226593,12.35,E101,Q
|
||||
305,0,3,"Williams, Mr. Howard Hugh ""Harry""",0,,0,0,A/5 2466,8.05,,S
|
||||
306,1,1,"Allison, Master. Hudson Trevor",0,0.92,1,2,113781,151.55,C22 C26,S
|
||||
307,1,1,"Fleming, Miss. Margaret",1,,0,0,17421,110.8833,,C
|
||||
308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)",1,17,1,0,PC 17758,108.9,C65,C
|
||||
309,0,2,"Abelson, Mr. Samuel",0,30,1,0,P/PP 3381,24,,C
|
||||
310,1,1,"Francatelli, Miss. Laura Mabel",1,30,0,0,PC 17485,56.9292,E36,C
|
||||
311,1,1,"Hays, Miss. Margaret Bechstein",1,24,0,0,11767,83.1583,C54,C
|
||||
312,1,1,"Ryerson, Miss. Emily Borie",1,18,2,2,PC 17608,262.375,B57 B59 B63 B66,C
|
||||
313,0,2,"Lahtinen, Mrs. William (Anna Sylfven)",1,26,1,1,250651,26,,S
|
||||
314,0,3,"Hendekovic, Mr. Ignjac",0,28,0,0,349243,7.8958,,S
|
||||
315,0,2,"Hart, Mr. Benjamin",0,43,1,1,F.C.C. 13529,26.25,,S
|
||||
316,1,3,"Nilsson, Miss. Helmina Josefina",1,26,0,0,347470,7.8542,,S
|
||||
317,1,2,"Kantor, Mrs. Sinai (Miriam Sternin)",1,24,1,0,244367,26,,S
|
||||
318,0,2,"Moraweck, Dr. Ernest",0,54,0,0,29011,14,,S
|
||||
319,1,1,"Wick, Miss. Mary Natalie",1,31,0,2,36928,164.8667,C7,S
|
||||
320,1,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)",1,40,1,1,16966,134.5,E34,C
|
||||
321,0,3,"Dennis, Mr. Samuel",0,22,0,0,A/5 21172,7.25,,S
|
||||
322,0,3,"Danoff, Mr. Yoto",0,27,0,0,349219,7.8958,,S
|
||||
323,1,2,"Slayter, Miss. Hilda Mary",1,30,0,0,234818,12.35,,Q
|
||||
324,1,2,"Caldwell, Mrs. Albert Francis (Sylvia Mae Harbaugh)",1,22,1,1,248738,29,,S
|
||||
325,0,3,"Sage, Mr. George John Jr",0,,8,2,CA. 2343,69.55,,S
|
||||
326,1,1,"Young, Miss. Marie Grice",1,36,0,0,PC 17760,135.6333,C32,C
|
||||
327,0,3,"Nysveen, Mr. Johan Hansen",0,61,0,0,345364,6.2375,,S
|
||||
328,1,2,"Ball, Mrs. (Ada E Hall)",1,36,0,0,28551,13,D,S
|
||||
329,1,3,"Goldsmith, Mrs. Frank John (Emily Alice Brown)",1,31,1,1,363291,20.525,,S
|
||||
330,1,1,"Hippach, Miss. Jean Gertrude",1,16,0,1,111361,57.9792,B18,C
|
||||
331,1,3,"McCoy, Miss. Agnes",1,,2,0,367226,23.25,,Q
|
||||
332,0,1,"Partner, Mr. Austen",0,45.5,0,0,113043,28.5,C124,S
|
||||
333,0,1,"Graham, Mr. George Edward",0,38,0,1,PC 17582,153.4625,C91,S
|
||||
334,0,3,"Vander Planke, Mr. Leo Edmondus",0,16,2,0,345764,18,,S
|
||||
335,1,1,"Frauenthal, Mrs. Henry William (Clara Heinsheimer)",1,,1,0,PC 17611,133.65,,S
|
||||
336,0,3,"Denkoff, Mr. Mitto",0,,0,0,349225,7.8958,,S
|
||||
337,0,1,"Pears, Mr. Thomas Clinton",0,29,1,0,113776,66.6,C2,S
|
||||
338,1,1,"Burns, Miss. Elizabeth Margaret",1,41,0,0,16966,134.5,E40,C
|
||||
339,1,3,"Dahl, Mr. Karl Edwart",0,45,0,0,7598,8.05,,S
|
||||
340,0,1,"Blackwell, Mr. Stephen Weart",0,45,0,0,113784,35.5,T,S
|
||||
341,1,2,"Navratil, Master. Edmond Roger",0,2,1,1,230080,26,F2,S
|
||||
342,1,1,"Fortune, Miss. Alice Elizabeth",1,24,3,2,19950,263,C23 C25 C27,S
|
||||
343,0,2,"Collander, Mr. Erik Gustaf",0,28,0,0,248740,13,,S
|
||||
344,0,2,"Sedgwick, Mr. Charles Frederick Waddington",0,25,0,0,244361,13,,S
|
||||
345,0,2,"Fox, Mr. Stanley Hubert",0,36,0,0,229236,13,,S
|
||||
346,1,2,"Brown, Miss. Amelia ""Mildred""",1,24,0,0,248733,13,F33,S
|
||||
347,1,2,"Smith, Miss. Marion Elsie",1,40,0,0,31418,13,,S
|
||||
348,1,3,"Davison, Mrs. Thomas Henry (Mary E Finck)",1,,1,0,386525,16.1,,S
|
||||
349,1,3,"Coutts, Master. William Loch ""William""",0,3,1,1,C.A. 37671,15.9,,S
|
||||
350,0,3,"Dimic, Mr. Jovan",0,42,0,0,315088,8.6625,,S
|
||||
351,0,3,"Odahl, Mr. Nils Martin",0,23,0,0,7267,9.225,,S
|
||||
352,0,1,"Williams-Lambert, Mr. Fletcher Fellows",0,,0,0,113510,35,C128,S
|
||||
353,0,3,"Elias, Mr. Tannous",0,15,1,1,2695,7.2292,,C
|
||||
354,0,3,"Arnold-Franchi, Mr. Josef",0,25,1,0,349237,17.8,,S
|
||||
355,0,3,"Yousif, Mr. Wazli",0,,0,0,2647,7.225,,C
|
||||
356,0,3,"Vanden Steen, Mr. Leo Peter",0,28,0,0,345783,9.5,,S
|
||||
357,1,1,"Bowerman, Miss. Elsie Edith",1,22,0,1,113505,55,E33,S
|
||||
358,0,2,"Funk, Miss. Annie Clemmer",1,38,0,0,237671,13,,S
|
||||
359,1,3,"McGovern, Miss. Mary",1,,0,0,330931,7.8792,,Q
|
||||
360,1,3,"Mockler, Miss. Helen Mary ""Ellie""",1,,0,0,330980,7.8792,,Q
|
||||
361,0,3,"Skoog, Mr. Wilhelm",0,40,1,4,347088,27.9,,S
|
||||
362,0,2,"del Carlo, Mr. Sebastiano",0,29,1,0,SC/PARIS 2167,27.7208,,C
|
||||
363,0,3,"Barbara, Mrs. (Catherine David)",1,45,0,1,2691,14.4542,,C
|
||||
364,0,3,"Asim, Mr. Adola",0,35,0,0,SOTON/O.Q. 3101310,7.05,,S
|
||||
365,0,3,"O'Brien, Mr. Thomas",0,,1,0,370365,15.5,,Q
|
||||
366,0,3,"Adahl, Mr. Mauritz Nils Martin",0,30,0,0,C 7076,7.25,,S
|
||||
367,1,1,"Warren, Mrs. Frank Manley (Anna Sophia Atkinson)",1,60,1,0,110813,75.25,D37,C
|
||||
368,1,3,"Moussa, Mrs. (Mantoura Boulos)",1,,0,0,2626,7.2292,,C
|
||||
369,1,3,"Jermyn, Miss. Annie",1,,0,0,14313,7.75,,Q
|
||||
370,1,1,"Aubart, Mme. Leontine Pauline",1,24,0,0,PC 17477,69.3,B35,C
|
||||
371,1,1,"Harder, Mr. George Achilles",0,25,1,0,11765,55.4417,E50,C
|
||||
372,0,3,"Wiklund, Mr. Jakob Alfred",0,18,1,0,3101267,6.4958,,S
|
||||
373,0,3,"Beavan, Mr. William Thomas",0,19,0,0,323951,8.05,,S
|
||||
374,0,1,"Ringhini, Mr. Sante",0,22,0,0,PC 17760,135.6333,,C
|
||||
375,0,3,"Palsson, Miss. Stina Viola",1,3,3,1,349909,21.075,,S
|
||||
376,1,1,"Meyer, Mrs. Edgar Joseph (Leila Saks)",1,,1,0,PC 17604,82.1708,,C
|
||||
377,1,3,"Landergren, Miss. Aurora Adelia",1,22,0,0,C 7077,7.25,,S
|
||||
378,0,1,"Widener, Mr. Harry Elkins",0,27,0,2,113503,211.5,C82,C
|
||||
379,0,3,"Betros, Mr. Tannous",0,20,0,0,2648,4.0125,,C
|
||||
380,0,3,"Gustafsson, Mr. Karl Gideon",0,19,0,0,347069,7.775,,S
|
||||
381,1,1,"Bidois, Miss. Rosalie",1,42,0,0,PC 17757,227.525,,C
|
||||
382,1,3,"Nakid, Miss. Maria (""Mary"")",1,1,0,2,2653,15.7417,,C
|
||||
383,0,3,"Tikkanen, Mr. Juho",0,32,0,0,STON/O 2. 3101293,7.925,,S
|
||||
384,1,1,"Holverson, Mrs. Alexander Oskar (Mary Aline Towner)",1,35,1,0,113789,52,,S
|
||||
385,0,3,"Plotcharsky, Mr. Vasil",0,,0,0,349227,7.8958,,S
|
||||
386,0,2,"Davies, Mr. Charles Henry",0,18,0,0,S.O.C. 14879,73.5,,S
|
||||
387,0,3,"Goodwin, Master. Sidney Leonard",0,1,5,2,CA 2144,46.9,,S
|
||||
388,1,2,"Buss, Miss. Kate",1,36,0,0,27849,13,,S
|
||||
389,0,3,"Sadlier, Mr. Matthew",0,,0,0,367655,7.7292,,Q
|
||||
390,1,2,"Lehmann, Miss. Bertha",1,17,0,0,SC 1748,12,,C
|
||||
391,1,1,"Carter, Mr. William Ernest",0,36,1,2,113760,120,B96 B98,S
|
||||
392,1,3,"Jansson, Mr. Carl Olof",0,21,0,0,350034,7.7958,,S
|
||||
393,0,3,"Gustafsson, Mr. Johan Birger",0,28,2,0,3101277,7.925,,S
|
||||
394,1,1,"Newell, Miss. Marjorie",1,23,1,0,35273,113.275,D36,C
|
||||
395,1,3,"Sandstrom, Mrs. Hjalmar (Agnes Charlotta Bengtsson)",1,24,0,2,PP 9549,16.7,G6,S
|
||||
396,0,3,"Johansson, Mr. Erik",0,22,0,0,350052,7.7958,,S
|
||||
397,0,3,"Olsson, Miss. Elina",1,31,0,0,350407,7.8542,,S
|
||||
398,0,2,"McKane, Mr. Peter David",0,46,0,0,28403,26,,S
|
||||
399,0,2,"Pain, Dr. Alfred",0,23,0,0,244278,10.5,,S
|
||||
400,1,2,"Trout, Mrs. William H (Jessie L)",1,28,0,0,240929,12.65,,S
|
||||
401,1,3,"Niskanen, Mr. Juha",0,39,0,0,STON/O 2. 3101289,7.925,,S
|
||||
402,0,3,"Adams, Mr. John",0,26,0,0,341826,8.05,,S
|
||||
403,0,3,"Jussila, Miss. Mari Aina",1,21,1,0,4137,9.825,,S
|
||||
404,0,3,"Hakkarainen, Mr. Pekka Pietari",0,28,1,0,STON/O2. 3101279,15.85,,S
|
||||
405,0,3,"Oreskovic, Miss. Marija",1,20,0,0,315096,8.6625,,S
|
||||
406,0,2,"Gale, Mr. Shadrach",0,34,1,0,28664,21,,S
|
||||
407,0,3,"Widegren, Mr. Carl/Charles Peter",0,51,0,0,347064,7.75,,S
|
||||
408,1,2,"Richards, Master. William Rowe",0,3,1,1,29106,18.75,,S
|
||||
409,0,3,"Birkeland, Mr. Hans Martin Monsen",0,21,0,0,312992,7.775,,S
|
||||
410,0,3,"Lefebre, Miss. Ida",1,,3,1,4133,25.4667,,S
|
||||
411,0,3,"Sdycoff, Mr. Todor",0,,0,0,349222,7.8958,,S
|
||||
412,0,3,"Hart, Mr. Henry",0,,0,0,394140,6.8583,,Q
|
||||
413,1,1,"Minahan, Miss. Daisy E",1,33,1,0,19928,90,C78,Q
|
||||
414,0,2,"Cunningham, Mr. Alfred Fleming",0,,0,0,239853,0,,S
|
||||
415,1,3,"Sundman, Mr. Johan Julian",0,44,0,0,STON/O 2. 3101269,7.925,,S
|
||||
416,0,3,"Meek, Mrs. Thomas (Annie Louise Rowley)",1,,0,0,343095,8.05,,S
|
||||
417,1,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)",1,34,1,1,28220,32.5,,S
|
||||
418,1,2,"Silven, Miss. Lyyli Karoliina",1,18,0,2,250652,13,,S
|
||||
419,0,2,"Matthews, Mr. William John",0,30,0,0,28228,13,,S
|
||||
420,0,3,"Van Impe, Miss. Catharina",1,10,0,2,345773,24.15,,S
|
||||
421,0,3,"Gheorgheff, Mr. Stanio",0,,0,0,349254,7.8958,,C
|
||||
422,0,3,"Charters, Mr. David",0,21,0,0,A/5. 13032,7.7333,,Q
|
||||
423,0,3,"Zimmerman, Mr. Leo",0,29,0,0,315082,7.875,,S
|
||||
424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)",1,28,1,1,347080,14.4,,S
|
||||
425,0,3,"Rosblom, Mr. Viktor Richard",0,18,1,1,370129,20.2125,,S
|
||||
426,0,3,"Wiseman, Mr. Phillippe",0,,0,0,A/4. 34244,7.25,,S
|
||||
427,1,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)",1,28,1,0,2003,26,,S
|
||||
428,1,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")",1,19,0,0,250655,26,,S
|
||||
429,0,3,"Flynn, Mr. James",0,,0,0,364851,7.75,,Q
|
||||
430,1,3,"Pickard, Mr. Berk (Berk Trembisky)",0,32,0,0,SOTON/O.Q. 392078,8.05,E10,S
|
||||
431,1,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan",0,28,0,0,110564,26.55,C52,S
|
||||
432,1,3,"Thorneycroft, Mrs. Percival (Florence Kate White)",1,,1,0,376564,16.1,,S
|
||||
433,1,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)",1,42,1,0,SC/AH 3085,26,,S
|
||||
434,0,3,"Kallio, Mr. Nikolai Erland",0,17,0,0,STON/O 2. 3101274,7.125,,S
|
||||
435,0,1,"Silvey, Mr. William Baird",0,50,1,0,13507,55.9,E44,S
|
||||
436,1,1,"Carter, Miss. Lucile Polk",1,14,1,2,113760,120,B96 B98,S
|
||||
437,0,3,"Ford, Miss. Doolina Margaret ""Daisy""",1,21,2,2,W./C. 6608,34.375,,S
|
||||
438,1,2,"Richards, Mrs. Sidney (Emily Hocking)",1,24,2,3,29106,18.75,,S
|
||||
439,0,1,"Fortune, Mr. Mark",0,64,1,4,19950,263,C23 C25 C27,S
|
||||
440,0,2,"Kvillner, Mr. Johan Henrik Johannesson",0,31,0,0,C.A. 18723,10.5,,S
|
||||
441,1,2,"Hart, Mrs. Benjamin (Esther Ada Bloomfield)",1,45,1,1,F.C.C. 13529,26.25,,S
|
||||
442,0,3,"Hampe, Mr. Leon",0,20,0,0,345769,9.5,,S
|
||||
443,0,3,"Petterson, Mr. Johan Emil",0,25,1,0,347076,7.775,,S
|
||||
444,1,2,"Reynaldo, Ms. Encarnacion",1,28,0,0,230434,13,,S
|
||||
445,1,3,"Johannesen-Bratthammer, Mr. Bernt",0,,0,0,65306,8.1125,,S
|
||||
446,1,1,"Dodge, Master. Washington",0,4,0,2,33638,81.8583,A34,S
|
||||
447,1,2,"Mellinger, Miss. Madeleine Violet",1,13,0,1,250644,19.5,,S
|
||||
448,1,1,"Seward, Mr. Frederic Kimber",0,34,0,0,113794,26.55,,S
|
||||
449,1,3,"Baclini, Miss. Marie Catherine",1,5,2,1,2666,19.2583,,C
|
||||
450,1,1,"Peuchen, Major. Arthur Godfrey",0,52,0,0,113786,30.5,C104,S
|
||||
451,0,2,"West, Mr. Edwy Arthur",0,36,1,2,C.A. 34651,27.75,,S
|
||||
452,0,3,"Hagland, Mr. Ingvald Olai Olsen",0,,1,0,65303,19.9667,,S
|
||||
453,0,1,"Foreman, Mr. Benjamin Laventall",0,30,0,0,113051,27.75,C111,C
|
||||
454,1,1,"Goldenberg, Mr. Samuel L",0,49,1,0,17453,89.1042,C92,C
|
||||
455,0,3,"Peduzzi, Mr. Joseph",0,,0,0,A/5 2817,8.05,,S
|
||||
456,1,3,"Jalsevac, Mr. Ivan",0,29,0,0,349240,7.8958,,C
|
||||
457,0,1,"Millet, Mr. Francis Davis",0,65,0,0,13509,26.55,E38,S
|
||||
458,1,1,"Kenyon, Mrs. Frederick R (Marion)",1,,1,0,17464,51.8625,D21,S
|
||||
459,1,2,"Toomey, Miss. Ellen",1,50,0,0,F.C.C. 13531,10.5,,S
|
||||
460,0,3,"O'Connor, Mr. Maurice",0,,0,0,371060,7.75,,Q
|
||||
461,1,1,"Anderson, Mr. Harry",0,48,0,0,19952,26.55,E12,S
|
||||
462,0,3,"Morley, Mr. William",0,34,0,0,364506,8.05,,S
|
||||
463,0,1,"Gee, Mr. Arthur H",0,47,0,0,111320,38.5,E63,S
|
||||
464,0,2,"Milling, Mr. Jacob Christian",0,48,0,0,234360,13,,S
|
||||
465,0,3,"Maisner, Mr. Simon",0,,0,0,A/S 2816,8.05,,S
|
||||
466,0,3,"Goncalves, Mr. Manuel Estanslas",0,38,0,0,SOTON/O.Q. 3101306,7.05,,S
|
||||
467,0,2,"Campbell, Mr. William",0,,0,0,239853,0,,S
|
||||
468,0,1,"Smart, Mr. John Montgomery",0,56,0,0,113792,26.55,,S
|
||||
469,0,3,"Scanlan, Mr. James",0,,0,0,36209,7.725,,Q
|
||||
470,1,3,"Baclini, Miss. Helene Barbara",1,0.75,2,1,2666,19.2583,,C
|
||||
471,0,3,"Keefe, Mr. Arthur",0,,0,0,323592,7.25,,S
|
||||
472,0,3,"Cacic, Mr. Luka",0,38,0,0,315089,8.6625,,S
|
||||
473,1,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)",1,33,1,2,C.A. 34651,27.75,,S
|
||||
474,1,2,"Jerwan, Mrs. Amin S (Marie Marthe Thuillard)",1,23,0,0,SC/AH Basle 541,13.7917,D,C
|
||||
475,0,3,"Strandberg, Miss. Ida Sofia",1,22,0,0,7553,9.8375,,S
|
||||
476,0,1,"Clifford, Mr. George Quincy",0,,0,0,110465,52,A14,S
|
||||
477,0,2,"Renouf, Mr. Peter Henry",0,34,1,0,31027,21,,S
|
||||
478,0,3,"Braund, Mr. Lewis Richard",0,29,1,0,3460,7.0458,,S
|
||||
479,0,3,"Karlsson, Mr. Nils August",0,22,0,0,350060,7.5208,,S
|
||||
480,1,3,"Hirvonen, Miss. Hildur E",1,2,0,1,3101298,12.2875,,S
|
||||
481,0,3,"Goodwin, Master. Harold Victor",0,9,5,2,CA 2144,46.9,,S
|
||||
482,0,2,"Frost, Mr. Anthony Wood ""Archie""",0,,0,0,239854,0,,S
|
||||
483,0,3,"Rouse, Mr. Richard Henry",0,50,0,0,A/5 3594,8.05,,S
|
||||
484,1,3,"Turkula, Mrs. (Hedwig)",1,63,0,0,4134,9.5875,,S
|
||||
485,1,1,"Bishop, Mr. Dickinson H",0,25,1,0,11967,91.0792,B49,C
|
||||
486,0,3,"Lefebre, Miss. Jeannie",1,,3,1,4133,25.4667,,S
|
||||
487,1,1,"Hoyt, Mrs. Frederick Maxfield (Jane Anne Forby)",1,35,1,0,19943,90,C93,S
|
||||
488,0,1,"Kent, Mr. Edward Austin",0,58,0,0,11771,29.7,B37,C
|
||||
489,0,3,"Somerton, Mr. Francis William",0,30,0,0,A.5. 18509,8.05,,S
|
||||
490,1,3,"Coutts, Master. Eden Leslie ""Neville""",0,9,1,1,C.A. 37671,15.9,,S
|
||||
491,0,3,"Hagland, Mr. Konrad Mathias Reiersen",0,,1,0,65304,19.9667,,S
|
||||
492,0,3,"Windelov, Mr. Einar",0,21,0,0,SOTON/OQ 3101317,7.25,,S
|
||||
493,0,1,"Molson, Mr. Harry Markland",0,55,0,0,113787,30.5,C30,S
|
||||
494,0,1,"Artagaveytia, Mr. Ramon",0,71,0,0,PC 17609,49.5042,,C
|
||||
495,0,3,"Stanley, Mr. Edward Roland",0,21,0,0,A/4 45380,8.05,,S
|
||||
496,0,3,"Yousseff, Mr. Gerious",0,,0,0,2627,14.4583,,C
|
||||
497,1,1,"Eustis, Miss. Elizabeth Mussey",1,54,1,0,36947,78.2667,D20,C
|
||||
498,0,3,"Shellard, Mr. Frederick William",0,,0,0,C.A. 6212,15.1,,S
|
||||
499,0,1,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",1,25,1,2,113781,151.55,C22 C26,S
|
||||
500,0,3,"Svensson, Mr. Olof",0,24,0,0,350035,7.7958,,S
|
||||
501,0,3,"Calic, Mr. Petar",0,17,0,0,315086,8.6625,,S
|
||||
502,0,3,"Canavan, Miss. Mary",1,21,0,0,364846,7.75,,Q
|
||||
503,0,3,"O'Sullivan, Miss. Bridget Mary",1,,0,0,330909,7.6292,,Q
|
||||
504,0,3,"Laitinen, Miss. Kristina Sofia",1,37,0,0,4135,9.5875,,S
|
||||
505,1,1,"Maioni, Miss. Roberta",1,16,0,0,110152,86.5,B79,S
|
||||
506,0,1,"Penasco y Castellana, Mr. Victor de Satode",0,18,1,0,PC 17758,108.9,C65,C
|
||||
507,1,2,"Quick, Mrs. Frederick Charles (Jane Richards)",1,33,0,2,26360,26,,S
|
||||
508,1,1,"Bradley, Mr. George (""George Arthur Brayton"")",0,,0,0,111427,26.55,,S
|
||||
509,0,3,"Olsen, Mr. Henry Margido",0,28,0,0,C 4001,22.525,,S
|
||||
510,1,3,"Lang, Mr. Fang",0,26,0,0,1601,56.4958,,S
|
||||
511,1,3,"Daly, Mr. Eugene Patrick",0,29,0,0,382651,7.75,,Q
|
||||
512,0,3,"Webber, Mr. James",0,,0,0,SOTON/OQ 3101316,8.05,,S
|
||||
513,1,1,"McGough, Mr. James Robert",0,36,0,0,PC 17473,26.2875,E25,S
|
||||
514,1,1,"Rothschild, Mrs. Martin (Elizabeth L. Barrett)",1,54,1,0,PC 17603,59.4,,C
|
||||
515,0,3,"Coleff, Mr. Satio",0,24,0,0,349209,7.4958,,S
|
||||
516,0,1,"Walker, Mr. William Anderson",0,47,0,0,36967,34.0208,D46,S
|
||||
517,1,2,"Lemore, Mrs. (Amelia Milley)",1,34,0,0,C.A. 34260,10.5,F33,S
|
||||
518,0,3,"Ryan, Mr. Patrick",0,,0,0,371110,24.15,,Q
|
||||
519,1,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)",1,36,1,0,226875,26,,S
|
||||
520,0,3,"Pavlovic, Mr. Stefo",0,32,0,0,349242,7.8958,,S
|
||||
521,1,1,"Perreault, Miss. Anne",1,30,0,0,12749,93.5,B73,S
|
||||
522,0,3,"Vovk, Mr. Janko",0,22,0,0,349252,7.8958,,S
|
||||
523,0,3,"Lahoud, Mr. Sarkis",0,,0,0,2624,7.225,,C
|
||||
524,1,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)",1,44,0,1,111361,57.9792,B18,C
|
||||
525,0,3,"Kassem, Mr. Fared",0,,0,0,2700,7.2292,,C
|
||||
526,0,3,"Farrell, Mr. James",0,40.5,0,0,367232,7.75,,Q
|
||||
527,1,2,"Ridsdale, Miss. Lucy",1,50,0,0,W./C. 14258,10.5,,S
|
||||
528,0,1,"Farthing, Mr. John",0,,0,0,PC 17483,221.7792,C95,S
|
||||
529,0,3,"Salonen, Mr. Johan Werner",0,39,0,0,3101296,7.925,,S
|
||||
530,0,2,"Hocking, Mr. Richard George",0,23,2,1,29104,11.5,,S
|
||||
531,1,2,"Quick, Miss. Phyllis May",1,2,1,1,26360,26,,S
|
||||
532,0,3,"Toufik, Mr. Nakli",0,,0,0,2641,7.2292,,C
|
||||
533,0,3,"Elias, Mr. Joseph Jr",0,17,1,1,2690,7.2292,,C
|
||||
534,1,3,"Peter, Mrs. Catherine (Catherine Rizk)",1,,0,2,2668,22.3583,,C
|
||||
535,0,3,"Cacic, Miss. Marija",1,30,0,0,315084,8.6625,,S
|
||||
536,1,2,"Hart, Miss. Eva Miriam",1,7,0,2,F.C.C. 13529,26.25,,S
|
||||
537,0,1,"Butt, Major. Archibald Willingham",0,45,0,0,113050,26.55,B38,S
|
||||
538,1,1,"LeRoy, Miss. Bertha",1,30,0,0,PC 17761,106.425,,C
|
||||
539,0,3,"Risien, Mr. Samuel Beard",0,,0,0,364498,14.5,,S
|
||||
540,1,1,"Frolicher, Miss. Hedwig Margaritha",1,22,0,2,13568,49.5,B39,C
|
||||
541,1,1,"Crosby, Miss. Harriet R",1,36,0,2,WE/P 5735,71,B22,S
|
||||
542,0,3,"Andersson, Miss. Ingeborg Constanzia",1,9,4,2,347082,31.275,,S
|
||||
543,0,3,"Andersson, Miss. Sigrid Elisabeth",1,11,4,2,347082,31.275,,S
|
||||
544,1,2,"Beane, Mr. Edward",0,32,1,0,2908,26,,S
|
||||
545,0,1,"Douglas, Mr. Walter Donald",0,50,1,0,PC 17761,106.425,C86,C
|
||||
546,0,1,"Nicholson, Mr. Arthur Ernest",0,64,0,0,693,26,,S
|
||||
547,1,2,"Beane, Mrs. Edward (Ethel Clarke)",1,19,1,0,2908,26,,S
|
||||
548,1,2,"Padro y Manent, Mr. Julian",0,,0,0,SC/PARIS 2146,13.8625,,C
|
||||
549,0,3,"Goldsmith, Mr. Frank John",0,33,1,1,363291,20.525,,S
|
||||
550,1,2,"Davies, Master. John Morgan Jr",0,8,1,1,C.A. 33112,36.75,,S
|
||||
551,1,1,"Thayer, Mr. John Borland Jr",0,17,0,2,17421,110.8833,C70,C
|
||||
552,0,2,"Sharp, Mr. Percival James R",0,27,0,0,244358,26,,S
|
||||
553,0,3,"O'Brien, Mr. Timothy",0,,0,0,330979,7.8292,,Q
|
||||
554,1,3,"Leeni, Mr. Fahim (""Philip Zenni"")",0,22,0,0,2620,7.225,,C
|
||||
555,1,3,"Ohman, Miss. Velin",1,22,0,0,347085,7.775,,S
|
||||
556,0,1,"Wright, Mr. George",0,62,0,0,113807,26.55,,S
|
||||
557,1,1,"Duff Gordon, Lady. (Lucille Christiana Sutherland) (""Mrs Morgan"")",1,48,1,0,11755,39.6,A16,C
|
||||
558,0,1,"Robbins, Mr. Victor",0,,0,0,PC 17757,227.525,,C
|
||||
559,1,1,"Taussig, Mrs. Emil (Tillie Mandelbaum)",1,39,1,1,110413,79.65,E67,S
|
||||
560,1,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)",1,36,1,0,345572,17.4,,S
|
||||
561,0,3,"Morrow, Mr. Thomas Rowan",0,,0,0,372622,7.75,,Q
|
||||
562,0,3,"Sivic, Mr. Husein",0,40,0,0,349251,7.8958,,S
|
||||
563,0,2,"Norman, Mr. Robert Douglas",0,28,0,0,218629,13.5,,S
|
||||
564,0,3,"Simmons, Mr. John",0,,0,0,SOTON/OQ 392082,8.05,,S
|
||||
565,0,3,"Meanwell, Miss. (Marion Ogden)",1,,0,0,SOTON/O.Q. 392087,8.05,,S
|
||||
566,0,3,"Davies, Mr. Alfred J",0,24,2,0,A/4 48871,24.15,,S
|
||||
567,0,3,"Stoytcheff, Mr. Ilia",0,19,0,0,349205,7.8958,,S
|
||||
568,0,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)",1,29,0,4,349909,21.075,,S
|
||||
569,0,3,"Doharr, Mr. Tannous",0,,0,0,2686,7.2292,,C
|
||||
570,1,3,"Jonsson, Mr. Carl",0,32,0,0,350417,7.8542,,S
|
||||
571,1,2,"Harris, Mr. George",0,62,0,0,S.W./PP 752,10.5,,S
|
||||
572,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",1,53,2,0,11769,51.4792,C101,S
|
||||
573,1,1,"Flynn, Mr. John Irwin (""Irving"")",0,36,0,0,PC 17474,26.3875,E25,S
|
||||
574,1,3,"Kelly, Miss. Mary",1,,0,0,14312,7.75,,Q
|
||||
575,0,3,"Rush, Mr. Alfred George John",0,16,0,0,A/4. 20589,8.05,,S
|
||||
576,0,3,"Patchett, Mr. George",0,19,0,0,358585,14.5,,S
|
||||
577,1,2,"Garside, Miss. Ethel",1,34,0,0,243880,13,,S
|
||||
578,1,1,"Silvey, Mrs. William Baird (Alice Munger)",1,39,1,0,13507,55.9,E44,S
|
||||
579,0,3,"Caram, Mrs. Joseph (Maria Elias)",1,,1,0,2689,14.4583,,C
|
||||
580,1,3,"Jussila, Mr. Eiriik",0,32,0,0,STON/O 2. 3101286,7.925,,S
|
||||
581,1,2,"Christy, Miss. Julie Rachel",1,25,1,1,237789,30,,S
|
||||
582,1,1,"Thayer, Mrs. John Borland (Marian Longstreth Morris)",1,39,1,1,17421,110.8833,C68,C
|
||||
583,0,2,"Downton, Mr. William James",0,54,0,0,28403,26,,S
|
||||
584,0,1,"Ross, Mr. John Hugo",0,36,0,0,13049,40.125,A10,C
|
||||
585,0,3,"Paulner, Mr. Uscher",0,,0,0,3411,8.7125,,C
|
||||
586,1,1,"Taussig, Miss. Ruth",1,18,0,2,110413,79.65,E68,S
|
||||
587,0,2,"Jarvis, Mr. John Denzil",0,47,0,0,237565,15,,S
|
||||
588,1,1,"Frolicher-Stehli, Mr. Maxmillian",0,60,1,1,13567,79.2,B41,C
|
||||
589,0,3,"Gilinski, Mr. Eliezer",0,22,0,0,14973,8.05,,S
|
||||
590,0,3,"Murdlin, Mr. Joseph",0,,0,0,A./5. 3235,8.05,,S
|
||||
591,0,3,"Rintamaki, Mr. Matti",0,35,0,0,STON/O 2. 3101273,7.125,,S
|
||||
592,1,1,"Stephenson, Mrs. Walter Bertram (Martha Eustis)",1,52,1,0,36947,78.2667,D20,C
|
||||
593,0,3,"Elsbury, Mr. William James",0,47,0,0,A/5 3902,7.25,,S
|
||||
594,0,3,"Bourke, Miss. Mary",1,,0,2,364848,7.75,,Q
|
||||
595,0,2,"Chapman, Mr. John Henry",0,37,1,0,SC/AH 29037,26,,S
|
||||
596,0,3,"Van Impe, Mr. Jean Baptiste",0,36,1,1,345773,24.15,,S
|
||||
597,1,2,"Leitch, Miss. Jessie Wills",1,,0,0,248727,33,,S
|
||||
598,0,3,"Johnson, Mr. Alfred",0,49,0,0,LINE,0,,S
|
||||
599,0,3,"Boulos, Mr. Hanna",0,,0,0,2664,7.225,,C
|
||||
600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",0,49,1,0,PC 17485,56.9292,A20,C
|
||||
601,1,2,"Jacobsohn, Mrs. Sidney Samuel (Amy Frances Christy)",1,24,2,1,243847,27,,S
|
||||
602,0,3,"Slabenoff, Mr. Petco",0,,0,0,349214,7.8958,,S
|
||||
603,0,1,"Harrington, Mr. Charles H",0,,0,0,113796,42.4,,S
|
||||
604,0,3,"Torber, Mr. Ernst William",0,44,0,0,364511,8.05,,S
|
||||
605,1,1,"Homer, Mr. Harry (""Mr E Haven"")",0,35,0,0,111426,26.55,,C
|
||||
606,0,3,"Lindell, Mr. Edvard Bengtsson",0,36,1,0,349910,15.55,,S
|
||||
607,0,3,"Karaic, Mr. Milan",0,30,0,0,349246,7.8958,,S
|
||||
608,1,1,"Daniel, Mr. Robert Williams",0,27,0,0,113804,30.5,,S
|
||||
609,1,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)",1,22,1,2,SC/Paris 2123,41.5792,,C
|
||||
610,1,1,"Shutes, Miss. Elizabeth W",1,40,0,0,PC 17582,153.4625,C125,S
|
||||
611,0,3,"Andersson, Mrs. Anders Johan (Alfrida Konstantia Brogren)",1,39,1,5,347082,31.275,,S
|
||||
612,0,3,"Jardin, Mr. Jose Neto",0,,0,0,SOTON/O.Q. 3101305,7.05,,S
|
||||
613,1,3,"Murphy, Miss. Margaret Jane",1,,1,0,367230,15.5,,Q
|
||||
614,0,3,"Horgan, Mr. John",0,,0,0,370377,7.75,,Q
|
||||
615,0,3,"Brocklebank, Mr. William Alfred",0,35,0,0,364512,8.05,,S
|
||||
616,1,2,"Herman, Miss. Alice",1,24,1,2,220845,65,,S
|
||||
617,0,3,"Danbom, Mr. Ernst Gilbert",0,34,1,1,347080,14.4,,S
|
||||
618,0,3,"Lobb, Mrs. William Arthur (Cordelia K Stanlick)",1,26,1,0,A/5. 3336,16.1,,S
|
||||
619,1,2,"Becker, Miss. Marion Louise",1,4,2,1,230136,39,F4,S
|
||||
620,0,2,"Gavey, Mr. Lawrence",0,26,0,0,31028,10.5,,S
|
||||
621,0,3,"Yasbeck, Mr. Antoni",0,27,1,0,2659,14.4542,,C
|
||||
622,1,1,"Kimball, Mr. Edwin Nelson Jr",0,42,1,0,11753,52.5542,D19,S
|
||||
623,1,3,"Nakid, Mr. Sahid",0,20,1,1,2653,15.7417,,C
|
||||
624,0,3,"Hansen, Mr. Henry Damsgaard",0,21,0,0,350029,7.8542,,S
|
||||
625,0,3,"Bowen, Mr. David John ""Dai""",0,21,0,0,54636,16.1,,S
|
||||
626,0,1,"Sutton, Mr. Frederick",0,61,0,0,36963,32.3208,D50,S
|
||||
627,0,2,"Kirkland, Rev. Charles Leonard",0,57,0,0,219533,12.35,,Q
|
||||
628,1,1,"Longley, Miss. Gretchen Fiske",1,21,0,0,13502,77.9583,D9,S
|
||||
629,0,3,"Bostandyeff, Mr. Guentcho",0,26,0,0,349224,7.8958,,S
|
||||
630,0,3,"O'Connell, Mr. Patrick D",0,,0,0,334912,7.7333,,Q
|
||||
631,1,1,"Barkworth, Mr. Algernon Henry Wilson",0,80,0,0,27042,30,A23,S
|
||||
632,0,3,"Lundahl, Mr. Johan Svensson",0,51,0,0,347743,7.0542,,S
|
||||
633,1,1,"Stahelin-Maeglin, Dr. Max",0,32,0,0,13214,30.5,B50,C
|
||||
634,0,1,"Parr, Mr. William Henry Marsh",0,,0,0,112052,0,,S
|
||||
635,0,3,"Skoog, Miss. Mabel",1,9,3,2,347088,27.9,,S
|
||||
636,1,2,"Davis, Miss. Mary",1,28,0,0,237668,13,,S
|
||||
637,0,3,"Leinonen, Mr. Antti Gustaf",0,32,0,0,STON/O 2. 3101292,7.925,,S
|
||||
638,0,2,"Collyer, Mr. Harvey",0,31,1,1,C.A. 31921,26.25,,S
|
||||
639,0,3,"Panula, Mrs. Juha (Maria Emilia Ojala)",1,41,0,5,3101295,39.6875,,S
|
||||
640,0,3,"Thorneycroft, Mr. Percival",0,,1,0,376564,16.1,,S
|
||||
641,0,3,"Jensen, Mr. Hans Peder",0,20,0,0,350050,7.8542,,S
|
||||
642,1,1,"Sagesser, Mlle. Emma",1,24,0,0,PC 17477,69.3,B35,C
|
||||
643,0,3,"Skoog, Miss. Margit Elizabeth",1,2,3,2,347088,27.9,,S
|
||||
644,1,3,"Foo, Mr. Choong",0,,0,0,1601,56.4958,,S
|
||||
645,1,3,"Baclini, Miss. Eugenie",1,0.75,2,1,2666,19.2583,,C
|
||||
646,1,1,"Harper, Mr. Henry Sleeper",0,48,1,0,PC 17572,76.7292,D33,C
|
||||
647,0,3,"Cor, Mr. Liudevit",0,19,0,0,349231,7.8958,,S
|
||||
648,1,1,"Simonius-Blumer, Col. Oberst Alfons",0,56,0,0,13213,35.5,A26,C
|
||||
649,0,3,"Willey, Mr. Edward",0,,0,0,S.O./P.P. 751,7.55,,S
|
||||
650,1,3,"Stanley, Miss. Amy Zillah Elsie",1,23,0,0,CA. 2314,7.55,,S
|
||||
651,0,3,"Mitkoff, Mr. Mito",0,,0,0,349221,7.8958,,S
|
||||
652,1,2,"Doling, Miss. Elsie",1,18,0,1,231919,23,,S
|
||||
653,0,3,"Kalvik, Mr. Johannes Halvorsen",0,21,0,0,8475,8.4333,,S
|
||||
654,1,3,"O'Leary, Miss. Hanora ""Norah""",1,,0,0,330919,7.8292,,Q
|
||||
655,0,3,"Hegarty, Miss. Hanora ""Nora""",1,18,0,0,365226,6.75,,Q
|
||||
656,0,2,"Hickman, Mr. Leonard Mark",0,24,2,0,S.O.C. 14879,73.5,,S
|
||||
657,0,3,"Radeff, Mr. Alexander",0,,0,0,349223,7.8958,,S
|
||||
658,0,3,"Bourke, Mrs. John (Catherine)",1,32,1,1,364849,15.5,,Q
|
||||
659,0,2,"Eitemiller, Mr. George Floyd",0,23,0,0,29751,13,,S
|
||||
660,0,1,"Newell, Mr. Arthur Webster",0,58,0,2,35273,113.275,D48,C
|
||||
661,1,1,"Frauenthal, Dr. Henry William",0,50,2,0,PC 17611,133.65,,S
|
||||
662,0,3,"Badt, Mr. Mohamed",0,40,0,0,2623,7.225,,C
|
||||
663,0,1,"Colley, Mr. Edward Pomeroy",0,47,0,0,5727,25.5875,E58,S
|
||||
664,0,3,"Coleff, Mr. Peju",0,36,0,0,349210,7.4958,,S
|
||||
665,1,3,"Lindqvist, Mr. Eino William",0,20,1,0,STON/O 2. 3101285,7.925,,S
|
||||
666,0,2,"Hickman, Mr. Lewis",0,32,2,0,S.O.C. 14879,73.5,,S
|
||||
667,0,2,"Butler, Mr. Reginald Fenton",0,25,0,0,234686,13,,S
|
||||
668,0,3,"Rommetvedt, Mr. Knud Paust",0,,0,0,312993,7.775,,S
|
||||
669,0,3,"Cook, Mr. Jacob",0,43,0,0,A/5 3536,8.05,,S
|
||||
670,1,1,"Taylor, Mrs. Elmer Zebley (Juliet Cummins Wright)",1,,1,0,19996,52,C126,S
|
||||
671,1,2,"Brown, Mrs. Thomas William Solomon (Elizabeth Catherine Ford)",1,40,1,1,29750,39,,S
|
||||
672,0,1,"Davidson, Mr. Thornton",0,31,1,0,F.C. 12750,52,B71,S
|
||||
673,0,2,"Mitchell, Mr. Henry Michael",0,70,0,0,C.A. 24580,10.5,,S
|
||||
674,1,2,"Wilhelms, Mr. Charles",0,31,0,0,244270,13,,S
|
||||
675,0,2,"Watson, Mr. Ennis Hastings",0,,0,0,239856,0,,S
|
||||
676,0,3,"Edvardsson, Mr. Gustaf Hjalmar",0,18,0,0,349912,7.775,,S
|
||||
677,0,3,"Sawyer, Mr. Frederick Charles",0,24.5,0,0,342826,8.05,,S
|
||||
678,1,3,"Turja, Miss. Anna Sofia",1,18,0,0,4138,9.8417,,S
|
||||
679,0,3,"Goodwin, Mrs. Frederick (Augusta Tyler)",1,43,1,6,CA 2144,46.9,,S
|
||||
680,1,1,"Cardeza, Mr. Thomas Drake Martinez",0,36,0,1,PC 17755,512.3292,B51 B53 B55,C
|
||||
681,0,3,"Peters, Miss. Katie",1,,0,0,330935,8.1375,,Q
|
||||
682,1,1,"Hassab, Mr. Hammad",0,27,0,0,PC 17572,76.7292,D49,C
|
||||
683,0,3,"Olsvigen, Mr. Thor Anderson",0,20,0,0,6563,9.225,,S
|
||||
684,0,3,"Goodwin, Mr. Charles Edward",0,14,5,2,CA 2144,46.9,,S
|
||||
685,0,2,"Brown, Mr. Thomas William Solomon",0,60,1,1,29750,39,,S
|
||||
686,0,2,"Laroche, Mr. Joseph Philippe Lemercier",0,25,1,2,SC/Paris 2123,41.5792,,C
|
||||
687,0,3,"Panula, Mr. Jaako Arnold",0,14,4,1,3101295,39.6875,,S
|
||||
688,0,3,"Dakic, Mr. Branko",0,19,0,0,349228,10.1708,,S
|
||||
689,0,3,"Fischer, Mr. Eberhard Thelander",0,18,0,0,350036,7.7958,,S
|
||||
690,1,1,"Madill, Miss. Georgette Alexandra",1,15,0,1,24160,211.3375,B5,S
|
||||
691,1,1,"Dick, Mr. Albert Adrian",0,31,1,0,17474,57,B20,S
|
||||
692,1,3,"Karun, Miss. Manca",1,4,0,1,349256,13.4167,,C
|
||||
693,1,3,"Lam, Mr. Ali",0,,0,0,1601,56.4958,,S
|
||||
694,0,3,"Saad, Mr. Khalil",0,25,0,0,2672,7.225,,C
|
||||
695,0,1,"Weir, Col. John",0,60,0,0,113800,26.55,,S
|
||||
696,0,2,"Chapman, Mr. Charles Henry",0,52,0,0,248731,13.5,,S
|
||||
697,0,3,"Kelly, Mr. James",0,44,0,0,363592,8.05,,S
|
||||
698,1,3,"Mullens, Miss. Katherine ""Katie""",1,,0,0,35852,7.7333,,Q
|
||||
699,0,1,"Thayer, Mr. John Borland",0,49,1,1,17421,110.8833,C68,C
|
||||
700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",0,42,0,0,348121,7.65,F G63,S
|
||||
701,1,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)",1,18,1,0,PC 17757,227.525,C62 C64,C
|
||||
702,1,1,"Silverthorne, Mr. Spencer Victor",0,35,0,0,PC 17475,26.2875,E24,S
|
||||
703,0,3,"Barbara, Miss. Saiide",1,18,0,1,2691,14.4542,,C
|
||||
704,0,3,"Gallagher, Mr. Martin",0,25,0,0,36864,7.7417,,Q
|
||||
705,0,3,"Hansen, Mr. Henrik Juul",0,26,1,0,350025,7.8542,,S
|
||||
706,0,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")",0,39,0,0,250655,26,,S
|
||||
707,1,2,"Kelly, Mrs. Florence ""Fannie""",1,45,0,0,223596,13.5,,S
|
||||
708,1,1,"Calderhead, Mr. Edward Pennington",0,42,0,0,PC 17476,26.2875,E24,S
|
||||
709,1,1,"Cleaver, Miss. Alice",1,22,0,0,113781,151.55,,S
|
||||
710,1,3,"Moubarek, Master. Halim Gonios (""William George"")",0,,1,1,2661,15.2458,,C
|
||||
711,1,1,"Mayne, Mlle. Berthe Antonine (""Mrs de Villiers"")",1,24,0,0,PC 17482,49.5042,C90,C
|
||||
712,0,1,"Klaber, Mr. Herman",0,,0,0,113028,26.55,C124,S
|
||||
713,1,1,"Taylor, Mr. Elmer Zebley",0,48,1,0,19996,52,C126,S
|
||||
714,0,3,"Larsson, Mr. August Viktor",0,29,0,0,7545,9.4833,,S
|
||||
715,0,2,"Greenberg, Mr. Samuel",0,52,0,0,250647,13,,S
|
||||
716,0,3,"Soholt, Mr. Peter Andreas Lauritz Andersen",0,19,0,0,348124,7.65,F G73,S
|
||||
717,1,1,"Endres, Miss. Caroline Louise",1,38,0,0,PC 17757,227.525,C45,C
|
||||
718,1,2,"Troutt, Miss. Edwina Celia ""Winnie""",1,27,0,0,34218,10.5,E101,S
|
||||
719,0,3,"McEvoy, Mr. Michael",0,,0,0,36568,15.5,,Q
|
||||
720,0,3,"Johnson, Mr. Malkolm Joackim",0,33,0,0,347062,7.775,,S
|
||||
721,1,2,"Harper, Miss. Annie Jessie ""Nina""",1,6,0,1,248727,33,,S
|
||||
722,0,3,"Jensen, Mr. Svend Lauritz",0,17,1,0,350048,7.0542,,S
|
||||
723,0,2,"Gillespie, Mr. William Henry",0,34,0,0,12233,13,,S
|
||||
724,0,2,"Hodges, Mr. Henry Price",0,50,0,0,250643,13,,S
|
||||
725,1,1,"Chambers, Mr. Norman Campbell",0,27,1,0,113806,53.1,E8,S
|
||||
726,0,3,"Oreskovic, Mr. Luka",0,20,0,0,315094,8.6625,,S
|
||||
727,1,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)",1,30,3,0,31027,21,,S
|
||||
728,1,3,"Mannion, Miss. Margareth",1,,0,0,36866,7.7375,,Q
|
||||
729,0,2,"Bryhl, Mr. Kurt Arnold Gottfrid",0,25,1,0,236853,26,,S
|
||||
730,0,3,"Ilmakangas, Miss. Pieta Sofia",1,25,1,0,STON/O2. 3101271,7.925,,S
|
||||
731,1,1,"Allen, Miss. Elisabeth Walton",1,29,0,0,24160,211.3375,B5,S
|
||||
732,0,3,"Hassan, Mr. Houssein G N",0,11,0,0,2699,18.7875,,C
|
||||
733,0,2,"Knight, Mr. Robert J",0,,0,0,239855,0,,S
|
||||
734,0,2,"Berriman, Mr. William John",0,23,0,0,28425,13,,S
|
||||
735,0,2,"Troupiansky, Mr. Moses Aaron",0,23,0,0,233639,13,,S
|
||||
736,0,3,"Williams, Mr. Leslie",0,28.5,0,0,54636,16.1,,S
|
||||
737,0,3,"Ford, Mrs. Edward (Margaret Ann Watson)",1,48,1,3,W./C. 6608,34.375,,S
|
||||
738,1,1,"Lesurer, Mr. Gustave J",0,35,0,0,PC 17755,512.3292,B101,C
|
||||
739,0,3,"Ivanoff, Mr. Kanio",0,,0,0,349201,7.8958,,S
|
||||
740,0,3,"Nankoff, Mr. Minko",0,,0,0,349218,7.8958,,S
|
||||
741,1,1,"Hawksford, Mr. Walter James",0,,0,0,16988,30,D45,S
|
||||
742,0,1,"Cavendish, Mr. Tyrell William",0,36,1,0,19877,78.85,C46,S
|
||||
743,1,1,"Ryerson, Miss. Susan Parker ""Suzette""",1,21,2,2,PC 17608,262.375,B57 B59 B63 B66,C
|
||||
744,0,3,"McNamee, Mr. Neal",0,24,1,0,376566,16.1,,S
|
||||
745,1,3,"Stranden, Mr. Juho",0,31,0,0,STON/O 2. 3101288,7.925,,S
|
||||
746,0,1,"Crosby, Capt. Edward Gifford",0,70,1,1,WE/P 5735,71,B22,S
|
||||
747,0,3,"Abbott, Mr. Rossmore Edward",0,16,1,1,C.A. 2673,20.25,,S
|
||||
748,1,2,"Sinkkonen, Miss. Anna",1,30,0,0,250648,13,,S
|
||||
749,0,1,"Marvin, Mr. Daniel Warner",0,19,1,0,113773,53.1,D30,S
|
||||
750,0,3,"Connaghton, Mr. Michael",0,31,0,0,335097,7.75,,Q
|
||||
751,1,2,"Wells, Miss. Joan",1,4,1,1,29103,23,,S
|
||||
752,1,3,"Moor, Master. Meier",0,6,0,1,392096,12.475,E121,S
|
||||
753,0,3,"Vande Velde, Mr. Johannes Joseph",0,33,0,0,345780,9.5,,S
|
||||
754,0,3,"Jonkoff, Mr. Lalio",0,23,0,0,349204,7.8958,,S
|
||||
755,1,2,"Herman, Mrs. Samuel (Jane Laver)",1,48,1,2,220845,65,,S
|
||||
756,1,2,"Hamalainen, Master. Viljo",0,0.67,1,1,250649,14.5,,S
|
||||
757,0,3,"Carlsson, Mr. August Sigfrid",0,28,0,0,350042,7.7958,,S
|
||||
758,0,2,"Bailey, Mr. Percy Andrew",0,18,0,0,29108,11.5,,S
|
||||
759,0,3,"Theobald, Mr. Thomas Leonard",0,34,0,0,363294,8.05,,S
|
||||
760,1,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)",1,33,0,0,110152,86.5,B77,S
|
||||
761,0,3,"Garfirth, Mr. John",0,,0,0,358585,14.5,,S
|
||||
762,0,3,"Nirva, Mr. Iisakki Antino Aijo",0,41,0,0,SOTON/O2 3101272,7.125,,S
|
||||
763,1,3,"Barah, Mr. Hanna Assi",0,20,0,0,2663,7.2292,,C
|
||||
764,1,1,"Carter, Mrs. William Ernest (Lucile Polk)",1,36,1,2,113760,120,B96 B98,S
|
||||
765,0,3,"Eklund, Mr. Hans Linus",0,16,0,0,347074,7.775,,S
|
||||
766,1,1,"Hogeboom, Mrs. John C (Anna Andrews)",1,51,1,0,13502,77.9583,D11,S
|
||||
767,0,1,"Brewe, Dr. Arthur Jackson",0,,0,0,112379,39.6,,C
|
||||
768,0,3,"Mangan, Miss. Mary",1,30.5,0,0,364850,7.75,,Q
|
||||
769,0,3,"Moran, Mr. Daniel J",0,,1,0,371110,24.15,,Q
|
||||
770,0,3,"Gronnestad, Mr. Daniel Danielsen",0,32,0,0,8471,8.3625,,S
|
||||
771,0,3,"Lievens, Mr. Rene Aime",0,24,0,0,345781,9.5,,S
|
||||
772,0,3,"Jensen, Mr. Niels Peder",0,48,0,0,350047,7.8542,,S
|
||||
773,0,2,"Mack, Mrs. (Mary)",1,57,0,0,S.O./P.P. 3,10.5,E77,S
|
||||
774,0,3,"Elias, Mr. Dibo",0,,0,0,2674,7.225,,C
|
||||
775,1,2,"Hocking, Mrs. Elizabeth (Eliza Needs)",1,54,1,3,29105,23,,S
|
||||
776,0,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm",0,18,0,0,347078,7.75,,S
|
||||
777,0,3,"Tobin, Mr. Roger",0,,0,0,383121,7.75,F38,Q
|
||||
778,1,3,"Emanuel, Miss. Virginia Ethel",1,5,0,0,364516,12.475,,S
|
||||
779,0,3,"Kilgannon, Mr. Thomas J",0,,0,0,36865,7.7375,,Q
|
||||
780,1,1,"Robert, Mrs. Edward Scott (Elisabeth Walton McMillan)",1,43,0,1,24160,211.3375,B3,S
|
||||
781,1,3,"Ayoub, Miss. Banoura",1,13,0,0,2687,7.2292,,C
|
||||
782,1,1,"Dick, Mrs. Albert Adrian (Vera Gillespie)",1,17,1,0,17474,57,B20,S
|
||||
783,0,1,"Long, Mr. Milton Clyde",0,29,0,0,113501,30,D6,S
|
||||
784,0,3,"Johnston, Mr. Andrew G",0,,1,2,W./C. 6607,23.45,,S
|
||||
785,0,3,"Ali, Mr. William",0,25,0,0,SOTON/O.Q. 3101312,7.05,,S
|
||||
786,0,3,"Harmer, Mr. Abraham (David Lishin)",0,25,0,0,374887,7.25,,S
|
||||
787,1,3,"Sjoblom, Miss. Anna Sofia",1,18,0,0,3101265,7.4958,,S
|
||||
788,0,3,"Rice, Master. George Hugh",0,8,4,1,382652,29.125,,Q
|
||||
789,1,3,"Dean, Master. Bertram Vere",0,1,1,2,C.A. 2315,20.575,,S
|
||||
790,0,1,"Guggenheim, Mr. Benjamin",0,46,0,0,PC 17593,79.2,B82 B84,C
|
||||
791,0,3,"Keane, Mr. Andrew ""Andy""",0,,0,0,12460,7.75,,Q
|
||||
792,0,2,"Gaskell, Mr. Alfred",0,16,0,0,239865,26,,S
|
||||
793,0,3,"Sage, Miss. Stella Anna",1,,8,2,CA. 2343,69.55,,S
|
||||
794,0,1,"Hoyt, Mr. William Fisher",0,,0,0,PC 17600,30.6958,,C
|
||||
795,0,3,"Dantcheff, Mr. Ristiu",0,25,0,0,349203,7.8958,,S
|
||||
796,0,2,"Otter, Mr. Richard",0,39,0,0,28213,13,,S
|
||||
797,1,1,"Leader, Dr. Alice (Farnham)",1,49,0,0,17465,25.9292,D17,S
|
||||
798,1,3,"Osman, Mrs. Mara",1,31,0,0,349244,8.6833,,S
|
||||
799,0,3,"Ibrahim Shawah, Mr. Yousseff",0,30,0,0,2685,7.2292,,C
|
||||
800,0,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)",1,30,1,1,345773,24.15,,S
|
||||
801,0,2,"Ponesell, Mr. Martin",0,34,0,0,250647,13,,S
|
||||
802,1,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)",1,31,1,1,C.A. 31921,26.25,,S
|
||||
803,1,1,"Carter, Master. William Thornton II",0,11,1,2,113760,120,B96 B98,S
|
||||
804,1,3,"Thomas, Master. Assad Alexander",0,0.42,0,1,2625,8.5167,,C
|
||||
805,1,3,"Hedman, Mr. Oskar Arvid",0,27,0,0,347089,6.975,,S
|
||||
806,0,3,"Johansson, Mr. Karl Johan",0,31,0,0,347063,7.775,,S
|
||||
807,0,1,"Andrews, Mr. Thomas Jr",0,39,0,0,112050,0,A36,S
|
||||
808,0,3,"Pettersson, Miss. Ellen Natalia",1,18,0,0,347087,7.775,,S
|
||||
809,0,2,"Meyer, Mr. August",0,39,0,0,248723,13,,S
|
||||
810,1,1,"Chambers, Mrs. Norman Campbell (Bertha Griggs)",1,33,1,0,113806,53.1,E8,S
|
||||
811,0,3,"Alexander, Mr. William",0,26,0,0,3474,7.8875,,S
|
||||
812,0,3,"Lester, Mr. James",0,39,0,0,A/4 48871,24.15,,S
|
||||
813,0,2,"Slemen, Mr. Richard James",0,35,0,0,28206,10.5,,S
|
||||
814,0,3,"Andersson, Miss. Ebba Iris Alfrida",1,6,4,2,347082,31.275,,S
|
||||
815,0,3,"Tomlin, Mr. Ernest Portage",0,30.5,0,0,364499,8.05,,S
|
||||
816,0,1,"Fry, Mr. Richard",0,,0,0,112058,0,B102,S
|
||||
817,0,3,"Heininen, Miss. Wendla Maria",1,23,0,0,STON/O2. 3101290,7.925,,S
|
||||
818,0,2,"Mallet, Mr. Albert",0,31,1,1,S.C./PARIS 2079,37.0042,,C
|
||||
819,0,3,"Holm, Mr. John Fredrik Alexander",0,43,0,0,C 7075,6.45,,S
|
||||
820,0,3,"Skoog, Master. Karl Thorsten",0,10,3,2,347088,27.9,,S
|
||||
821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gregg)",1,52,1,1,12749,93.5,B69,S
|
||||
822,1,3,"Lulic, Mr. Nikola",0,27,0,0,315098,8.6625,,S
|
||||
823,0,1,"Reuchlin, Jonkheer. John George",0,38,0,0,19972,0,,S
|
||||
824,1,3,"Moor, Mrs. (Beila)",1,27,0,1,392096,12.475,E121,S
|
||||
825,0,3,"Panula, Master. Urho Abraham",0,2,4,1,3101295,39.6875,,S
|
||||
826,0,3,"Flynn, Mr. John",0,,0,0,368323,6.95,,Q
|
||||
827,0,3,"Lam, Mr. Len",0,,0,0,1601,56.4958,,S
|
||||
828,1,2,"Mallet, Master. Andre",0,1,0,2,S.C./PARIS 2079,37.0042,,C
|
||||
829,1,3,"McCormack, Mr. Thomas Joseph",0,,0,0,367228,7.75,,Q
|
||||
830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",1,62,0,0,113572,80,B28,
|
||||
831,1,3,"Yasbeck, Mrs. Antoni (Selini Alexander)",1,15,1,0,2659,14.4542,,C
|
||||
832,1,2,"Richards, Master. George Sibley",0,0.83,1,1,29106,18.75,,S
|
||||
833,0,3,"Saad, Mr. Amin",0,,0,0,2671,7.2292,,C
|
||||
834,0,3,"Augustsson, Mr. Albert",0,23,0,0,347468,7.8542,,S
|
||||
835,0,3,"Allum, Mr. Owen George",0,18,0,0,2223,8.3,,S
|
||||
836,1,1,"Compton, Miss. Sara Rebecca",1,39,1,1,PC 17756,83.1583,E49,C
|
||||
837,0,3,"Pasic, Mr. Jakob",0,21,0,0,315097,8.6625,,S
|
||||
838,0,3,"Sirota, Mr. Maurice",0,,0,0,392092,8.05,,S
|
||||
839,1,3,"Chip, Mr. Chang",0,32,0,0,1601,56.4958,,S
|
||||
840,1,1,"Marechal, Mr. Pierre",0,,0,0,11774,29.7,C47,C
|
||||
841,0,3,"Alhomaki, Mr. Ilmari Rudolf",0,20,0,0,SOTON/O2 3101287,7.925,,S
|
||||
842,0,2,"Mudd, Mr. Thomas Charles",0,16,0,0,S.O./P.P. 3,10.5,,S
|
||||
843,1,1,"Serepeca, Miss. Augusta",1,30,0,0,113798,31,,C
|
||||
844,0,3,"Lemberopolous, Mr. Peter L",0,34.5,0,0,2683,6.4375,,C
|
||||
845,0,3,"Culumovic, Mr. Jeso",0,17,0,0,315090,8.6625,,S
|
||||
846,0,3,"Abbing, Mr. Anthony",0,42,0,0,C.A. 5547,7.55,,S
|
||||
847,0,3,"Sage, Mr. Douglas Bullen",0,,8,2,CA. 2343,69.55,,S
|
||||
848,0,3,"Markoff, Mr. Marin",0,35,0,0,349213,7.8958,,C
|
||||
849,0,2,"Harper, Rev. John",0,28,0,1,248727,33,,S
|
||||
850,1,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)",1,,1,0,17453,89.1042,C92,C
|
||||
851,0,3,"Andersson, Master. Sigvard Harald Elias",0,4,4,2,347082,31.275,,S
|
||||
852,0,3,"Svensson, Mr. Johan",0,74,0,0,347060,7.775,,S
|
||||
853,0,3,"Boulos, Miss. Nourelain",1,9,1,1,2678,15.2458,,C
|
||||
854,1,1,"Lines, Miss. Mary Conover",1,16,0,1,PC 17592,39.4,D28,S
|
||||
855,0,2,"Carter, Mrs. Ernest Courtenay (Lilian Hughes)",1,44,1,0,244252,26,,S
|
||||
856,1,3,"Aks, Mrs. Sam (Leah Rosen)",1,18,0,1,392091,9.35,,S
|
||||
857,1,1,"Wick, Mrs. George Dennick (Mary Hitchcock)",1,45,1,1,36928,164.8667,,S
|
||||
858,1,1,"Daly, Mr. Peter Denis ",0,51,0,0,113055,26.55,E17,S
|
||||
859,1,3,"Baclini, Mrs. Solomon (Latifa Qurban)",1,24,0,3,2666,19.2583,,C
|
||||
860,0,3,"Razi, Mr. Raihed",0,,0,0,2629,7.2292,,C
|
||||
861,0,3,"Hansen, Mr. Claus Peter",0,41,2,0,350026,14.1083,,S
|
||||
862,0,2,"Giles, Mr. Frederick Edward",0,21,1,0,28134,11.5,,S
|
||||
863,1,1,"Swift, Mrs. Frederick Joel (Margaret Welles Barron)",1,48,0,0,17466,25.9292,D17,S
|
||||
864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",1,,8,2,CA. 2343,69.55,,S
|
||||
865,0,2,"Gill, Mr. John William",0,24,0,0,233866,13,,S
|
||||
866,1,2,"Bystrom, Mrs. (Karolina)",1,42,0,0,236852,13,,S
|
||||
867,1,2,"Duran y More, Miss. Asuncion",1,27,1,0,SC/PARIS 2149,13.8583,,C
|
||||
868,0,1,"Roebling, Mr. Washington Augustus II",0,31,0,0,PC 17590,50.4958,A24,S
|
||||
869,0,3,"van Melkebeke, Mr. Philemon",0,,0,0,345777,9.5,,S
|
||||
870,1,3,"Johnson, Master. Harold Theodor",0,4,1,1,347742,11.1333,,S
|
||||
871,0,3,"Balkic, Mr. Cerin",0,26,0,0,349248,7.8958,,S
|
||||
872,1,1,"Beckwith, Mrs. Richard Leonard (Sallie Monypeny)",1,47,1,1,11751,52.5542,D35,S
|
||||
873,0,1,"Carlsson, Mr. Frans Olof",0,33,0,0,695,5,B51 B53 B55,S
|
||||
874,0,3,"Vander Cruyssen, Mr. Victor",0,47,0,0,345765,9,,S
|
||||
875,1,2,"Abelson, Mrs. Samuel (Hannah Wizosky)",1,28,1,0,P/PP 3381,24,,C
|
||||
876,1,3,"Najib, Miss. Adele Kiamie ""Jane""",1,15,0,0,2667,7.225,,C
|
||||
877,0,3,"Gustafsson, Mr. Alfred Ossian",0,20,0,0,7534,9.8458,,S
|
||||
878,0,3,"Petroff, Mr. Nedelio",0,19,0,0,349212,7.8958,,S
|
||||
879,0,3,"Laleff, Mr. Kristo",0,,0,0,349217,7.8958,,S
|
||||
880,1,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)",1,56,0,1,11767,83.1583,C50,C
|
||||
881,1,2,"Shelley, Mrs. William (Imanita Parrish Hall)",1,25,0,1,230433,26,,S
|
||||
882,0,3,"Markun, Mr. Johann",0,33,0,0,349257,7.8958,,S
|
||||
883,0,3,"Dahlberg, Miss. Gerda Ulrika",1,22,0,0,7552,10.5167,,S
|
||||
884,0,2,"Banfield, Mr. Frederick James",0,28,0,0,C.A./SOTON 34068,10.5,,S
|
||||
885,0,3,"Sutehall, Mr. Henry Jr",0,25,0,0,SOTON/OQ 392076,7.05,,S
|
||||
886,0,3,"Rice, Mrs. William (Margaret Norton)",1,39,0,5,382652,29.125,,Q
|
||||
887,0,2,"Montvila, Rev. Juozas",0,27,0,0,211536,13,,S
|
||||
888,1,1,"Graham, Miss. Margaret Edith",1,19,0,0,112053,30,B42,S
|
||||
889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",1,,1,2,W./C. 6607,23.45,,S
|
||||
890,1,1,"Behr, Mr. Karl Howell",0,26,0,0,111369,30,C148,C
|
||||
891,0,3,"Dooley, Mr. Patrick",0,32,0,0,370376,7.75,,Q
|
||||
|
130
index.md
Normal file
130
index.md
Normal file
@@ -0,0 +1,130 @@
|
||||
|
||||
# Index
|
||||
Azure Machine Learning is a cloud service that you use to train, deploy, automate, and manage machine learning models. This index should assist in navigating the Azure Machine Learning notebook samples and encourage efficient retrieval of topics and content.
|
||||
|
||||
|
||||
## Getting Started
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|
|
||||
|
||||
|
||||
## Tutorials
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|
|
||||
|
||||
|
||||
## Training
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|
|
||||
| [Pipeline test](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb) | Regression | NYC Taxi | local | None | Azure ML AutoML |
|
||||
|
||||
## Deployment
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|
|
||||
| :star:[Prepare data for regression modeling](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb) | Regression | test | localtest | AKS | test1 |
|
||||
|
||||
## Other Notebooks
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|
|
||||
| [auto-ml-classification-bank-marketing](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb) | | | | | |
|
||||
| [auto-ml-classification-credit-card-fraud](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb) | | | | | |
|
||||
| [auto-ml-classification-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb) | | | | | |
|
||||
| [auto-ml-classification-with-onnx](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb) | | | | | |
|
||||
| [auto-ml-classification-with-whitelisting](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb) | | | | | |
|
||||
| [auto-ml-dataprep](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/dataprep/auto-ml-dataprep.ipynb) | | | | | |
|
||||
| [auto-ml-dataprep-remote-execution](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/dataprep-remote-execution/auto-ml-dataprep-remote-execution.ipynb) | | | | | |
|
||||
| [auto-ml-exploring-previous-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/exploring-previous-runs/auto-ml-exploring-previous-runs.ipynb) | | | | | |
|
||||
| [auto-ml-forecasting-bike-share](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb) | | | | | |
|
||||
| [auto-ml-forecasting-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb) | | | | | |
|
||||
| [auto-ml-forecasting-orange-juice-sales](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb) | | | | | |
|
||||
| [auto-ml-missing-data-blacklist-early-termination](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb) | | | | | |
|
||||
| [auto-ml-model-explanation](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/model-explanation/auto-ml-model-explanation.ipynb) | | | | | |
|
||||
| [auto-ml-regression](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/regression/auto-ml-regression.ipynb) | | | | | |
|
||||
| [auto-ml-regression-concrete-strength](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb) | | | | | |
|
||||
| [auto-ml-regression-hardware-performance](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb) | | | | | |
|
||||
| [auto-ml-remote-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.ipynb) | | | | | |
|
||||
| [auto-ml-remote-amlcompute-with-onnx](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.ipynb) | | | | | |
|
||||
| [auto-ml-sample-weight](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/sample-weight/auto-ml-sample-weight.ipynb) | | | | | |
|
||||
| [auto-ml-sparse-data-train-test-split](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/sparse-data-train-test-split/auto-ml-sparse-data-train-test-split.ipynb) | | | | | |
|
||||
| [auto-ml-sql-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.ipynb) | | | | | |
|
||||
| [auto-ml-sql-setup](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb) | | | | | |
|
||||
| [auto-ml-subsampling-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//automated-machine-learning/subsampling/auto-ml-subsampling-local.ipynb) | | | | | |
|
||||
| [build-model-run-history-03](https://github.com/Azure/MachineLearningNotebooks/blob/master//azure-databricks/amlsdk/build-model-run-history-03.ipynb) | | | | | |
|
||||
| [deploy-to-aci-04](https://github.com/Azure/MachineLearningNotebooks/blob/master//azure-databricks/amlsdk/deploy-to-aci-04.ipynb) | | | | | |
|
||||
| [deploy-to-aks-existingimage-05](https://github.com/Azure/MachineLearningNotebooks/blob/master//azure-databricks/amlsdk/deploy-to-aks-existingimage-05.ipynb) | | | | | |
|
||||
| [ingest-data-02](https://github.com/Azure/MachineLearningNotebooks/blob/master//azure-databricks/amlsdk/ingest-data-02.ipynb) | | | | | |
|
||||
| [installation-and-configuration-01](https://github.com/Azure/MachineLearningNotebooks/blob/master//azure-databricks/amlsdk/installation-and-configuration-01.ipynb) | | | | | |
|
||||
| [automl-databricks-local-01](https://github.com/Azure/MachineLearningNotebooks/blob/master//azure-databricks/automl/automl-databricks-local-01.ipynb) | | | | | |
|
||||
| [automl-databricks-local-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//azure-databricks/automl/automl-databricks-local-with-deployment.ipynb) | | | | | |
|
||||
| [aml-pipelines-use-databricks-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master//azure-databricks/databricks-as-remote-compute-target/aml-pipelines-use-databricks-as-compute-target.ipynb) | | | | | |
|
||||
| [automl_hdi_local_classification](https://github.com/Azure/MachineLearningNotebooks/blob/master//azure-hdi/automl_hdi_local_classification.ipynb) | | | | | |
|
||||
| [model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//deploy-to-cloud/model-register-and-deploy.ipynb) | | | | | |
|
||||
| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master//deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | |
|
||||
| [register-model-deploy-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//deploy-to-local/register-model-deploy-local.ipynb) | | | | | |
|
||||
| [accelerated-models-object-detection](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/accelerated-models/accelerated-models-object-detection.ipynb) | | | | | |
|
||||
| [accelerated-models-quickstart](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/accelerated-models/accelerated-models-quickstart.ipynb) | | | | | |
|
||||
| [accelerated-models-training](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/accelerated-models/accelerated-models-training.ipynb) | | | | | |
|
||||
| [enable-app-insights-in-production-service](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb) | | | | | |
|
||||
| [enable-data-collection-for-models-in-aks](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/enable-data-collection-for-models-in-aks/enable-data-collection-for-models-in-aks.ipynb) | | | | | |
|
||||
| [onnx-convert-aml-deploy-tinyyolo](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb) | | | | | |
|
||||
| [onnx-inference-facial-expression-recognition-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb) | | | | | |
|
||||
| [onnx-inference-mnist-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/onnx/onnx-inference-mnist-deploy.ipynb) | | | | | |
|
||||
| [onnx-modelzoo-aml-deploy-resnet50](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb) | | | | | |
|
||||
| [onnx-train-pytorch-aml-deploy-mnist](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb) | | | | | |
|
||||
| [production-deploy-to-aks-gpu](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/production-deploy-to-aks-gpu/production-deploy-to-aks-gpu.ipynb) | | | | | |
|
||||
| [register-model-create-image-deploy-service](https://github.com/Azure/MachineLearningNotebooks/blob/master//deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb) | | | | | |
|
||||
| [save-retrieve-explanations-run-history](https://github.com/Azure/MachineLearningNotebooks/blob/master//explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb) | | | | | |
|
||||
| [train-explain-model-locally-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb) | | | | | |
|
||||
| [train-explain-model-on-amlcompute-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb) | | | | | |
|
||||
| [advanced-feature-transformations-explain-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//explain-model/tabular-data/advanced-feature-transformations-explain-local.ipynb) | | | | | |
|
||||
| [explain-binary-classification-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//explain-model/tabular-data/explain-binary-classification-local.ipynb) | | | | | |
|
||||
| [explain-multiclass-classification-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//explain-model/tabular-data/explain-multiclass-classification-local.ipynb) | | | | | |
|
||||
| [explain-regression-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//explain-model/tabular-data/explain-regression-local.ipynb) | | | | | |
|
||||
| [simple-feature-transformations-explain-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//explain-model/tabular-data/simple-feature-transformations-explain-local.ipynb) | | | | | |
|
||||
| [aml-pipelines-data-transfer](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb) | | | | | |
|
||||
| [aml-pipelines-getting-started](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb) | | | | | |
|
||||
| [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb) | | | | | |
|
||||
| [aml-pipelines-how-to-use-estimatorstep](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb) | | | | | |
|
||||
| [aml-pipelines-parameter-tuning-with-hyperdrive](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb) | | | | | |
|
||||
| [aml-pipelines-publish-and-run-using-rest-endpoint](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-publish-and-run-using-rest-endpoint.ipynb) | | | | | |
|
||||
| [aml-pipelines-setup-schedule-for-a-published-pipeline](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb) | | | | | |
|
||||
| [aml-pipelines-setup-versioned-pipeline-endpoints](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.ipynb) | | | | | |
|
||||
| [aml-pipelines-use-adla-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-adla-as-compute-target.ipynb) | | | | | |
|
||||
| [aml-pipelines-use-databricks-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb) | | | | | |
|
||||
| [aml-pipelines-with-automated-machine-learning-step](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb) | | | | | |
|
||||
| [aml-pipelines-with-data-dependency-steps](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb) | | | | | |
|
||||
| [nyc-taxi-data-regression-model-building](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb) | | | | | |
|
||||
| [pipeline-batch-scoring](https://github.com/Azure/MachineLearningNotebooks/blob/master//machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb) | | | | | |
|
||||
| [authentication-in-azureml](https://github.com/Azure/MachineLearningNotebooks/blob/master//manage-azureml-service/authentication-in-azureml/authentication-in-azureml.ipynb) | | | | | |
|
||||
| [azure-ml-datadrift](https://github.com/Azure/MachineLearningNotebooks/blob/master//monitor-models/data-drift/azure-ml-datadrift.ipynb) | | | | | |
|
||||
| [logging-api](https://github.com/Azure/MachineLearningNotebooks/blob/master//training/logging-api/logging-api.ipynb) | | | | | |
|
||||
| [manage-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master//training/manage-runs/manage-runs.ipynb) | | | | | |
|
||||
| [train-hyperparameter-tune-deploy-with-sklearn](https://github.com/Azure/MachineLearningNotebooks/blob/master//training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.ipynb) | | | | | |
|
||||
| [train-in-spark](https://github.com/Azure/MachineLearningNotebooks/blob/master//training/train-in-spark/train-in-spark.ipynb) | | | | | |
|
||||
| [train-on-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master//training/train-on-amlcompute/train-on-amlcompute.ipynb) | | | | | |
|
||||
| [train-on-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//training/train-on-local/train-on-local.ipynb) | | | | | |
|
||||
| [train-on-remote-vm](https://github.com/Azure/MachineLearningNotebooks/blob/master//training/train-on-remote-vm/train-on-remote-vm.ipynb) | | | | | |
|
||||
| [train-within-notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master//training/train-within-notebook/train-within-notebook.ipynb) | | | | | |
|
||||
| [using-environments](https://github.com/Azure/MachineLearningNotebooks/blob/master//training/using-environments/using-environments.ipynb) | | | | | |
|
||||
| [distributed-chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/distributed-chainer/distributed-chainer.ipynb) | | | | | |
|
||||
| [distributed-cntk-with-custom-docker](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/distributed-cntk-with-custom-docker/distributed-cntk-with-custom-docker.ipynb) | | | | | |
|
||||
| [distributed-pytorch-with-horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb) | | | | | |
|
||||
| [distributed-tensorflow-with-horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb) | | | | | |
|
||||
| [distributed-tensorflow-with-parameter-server](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb) | | | | | |
|
||||
| [export-run-history-to-tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/export-run-history-to-tensorboard/export-run-history-to-tensorboard.ipynb) | | | | | |
|
||||
| [how-to-use-estimator](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb) | | | | | |
|
||||
| [notebook_example](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/how-to-use-estimator/notebook_example.ipynb) | | | | | |
|
||||
| [tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/tensorboard/tensorboard.ipynb) | | | | | |
|
||||
| [train-hyperparameter-tune-deploy-with-chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.ipynb) | | | | | |
|
||||
| [train-hyperparameter-tune-deploy-with-keras](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb) | | | | | |
|
||||
| [train-hyperparameter-tune-deploy-with-pytorch](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) | | | | | |
|
||||
| [train-hyperparameter-tune-deploy-with-tensorflow](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) | | | | | |
|
||||
| [train-tensorflow-resume-training](https://github.com/Azure/MachineLearningNotebooks/blob/master//training-with-deep-learning/train-tensorflow-resume-training/train-tensorflow-resume-training.ipynb) | | | | | |
|
||||
| [deploy-model](https://github.com/Azure/MachineLearningNotebooks/blob/master//using-mlflow/deploy-model/deploy-model.ipynb) | | | | | |
|
||||
| [train-and-deploy-pytorch](https://github.com/Azure/MachineLearningNotebooks/blob/master//using-mlflow/train-deploy-pytorch/train-and-deploy-pytorch.ipynb) | | | | | |
|
||||
| [train-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//using-mlflow/train-local/train-local.ipynb) | | | | | |
|
||||
| [train-remote](https://github.com/Azure/MachineLearningNotebooks/blob/master//using-mlflow/train-remote/train-remote.ipynb) | | | | | |
|
||||
190
index2.md
Normal file
190
index2.md
Normal file
@@ -0,0 +1,190 @@
|
||||
# Index
|
||||
Azure Machine Learning is a cloud service that you use to train, deploy, automate,
|
||||
and manage machine learning models. This index should assist in navigating the Azure
|
||||
Machine Learning notebook samples and encourage efficient retrieval of topics and content.
|
||||

|
||||
|
||||
## Getting Started
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:|
|
||||
|
||||
|
||||
## Tutorials
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:|
|
||||
|
||||
|
||||
## Training
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:|
|
||||
|
||||
|
||||
|
||||
## Deployment
|
||||
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:|
|
||||
|
||||
|
||||
|
||||
## Other Notebooks
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:|
|
||||
| [Logging APIs](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb) | Logging APIs and analyzing results | None | None | None | None | None |
|
||||
| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azuremlconfiguration.ipynb) | | | | | | |
|
||||
| [azure-ml-with-nvidia-rapids](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/contrib/RAPIDS/azure-ml-with-nvidia-rapids.ipynb) | | | | | | |
|
||||
| [auto-ml-classification](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb) | | | | | | |
|
||||
| [auto-ml-classification-bank-marketing](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb) | | | | | | |
|
||||
| [auto-ml-classification-credit-card-fraud](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb) | | | | | | |
|
||||
| [auto-ml-classification-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb) | | | | | | |
|
||||
| [auto-ml-classification-with-onnx](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-onnx/auto-ml-classification-with-onnx.ipynb) | | | | | | |
|
||||
| [auto-ml-classification-with-whitelisting](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb) | | | | | | |
|
||||
| [auto-ml-dataset](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb) | | | | | | |
|
||||
| [auto-ml-dataset-remote-execution](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb) | | | | | | |
|
||||
| [auto-ml-exploring-previous-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/exploring-previous-runs/auto-ml-exploring-previous-runs.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-bike-share](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-orange-juice-sales](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb) | | | | | | |
|
||||
| [auto-ml-missing-data-blacklist-early-termination](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/missing-data-blacklist-early-termination/auto-ml-missing-data-blacklist-early-termination.ipynb) | | | | | | |
|
||||
| [auto-ml-model-explanation](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/model-explanation/auto-ml-model-explanation.ipynb) | | | | | | |
|
||||
| [auto-ml-regression](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb) | | | | | | |
|
||||
| [auto-ml-regression-concrete-strength](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb) | | | | | | |
|
||||
| [auto-ml-regression-hardware-performance](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb) | | | | | | |
|
||||
| [auto-ml-remote-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.ipynb) | | | | | | |
|
||||
| [auto-ml-remote-amlcompute-with-onnx](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.ipynb) | | | | | | |
|
||||
| [auto-ml-sample-weight](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/sample-weight/auto-ml-sample-weight.ipynb) | | | | | | |
|
||||
| [auto-ml-sparse-data-train-test-split](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/sparse-data-train-test-split/auto-ml-sparse-data-train-test-split.ipynb) | | | | | | |
|
||||
| [auto-ml-sql-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.ipynb) | | | | | | |
|
||||
| [auto-ml-sql-setup](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb) | | | | | | |
|
||||
| [auto-ml-subsampling-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/subsampling/auto-ml-subsampling-local.ipynb) | | | | | | |
|
||||
| [build-model-run-history-03](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/amlsdk/build-model-run-history-03.ipynb) | | | | | | |
|
||||
| [deploy-to-aci-04](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/amlsdk/deploy-to-aci-04.ipynb) | | | | | | |
|
||||
| [deploy-to-aks-existingimage-05](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/amlsdk/deploy-to-aks-existingimage-05.ipynb) | | | | | | |
|
||||
| [ingest-data-02](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/amlsdk/ingest-data-02.ipynb) | | | | | | |
|
||||
| [installation-and-configuration-01](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/amlsdk/installation-and-configuration-01.ipynb) | | | | | | |
|
||||
| [automl-databricks-local-01](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb) | | | | | | |
|
||||
| [automl-databricks-local-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb) | | | | | | |
|
||||
| [aml-pipelines-use-databricks-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-databricks/databricks-as-remote-compute-target/aml-pipelines-use-databricks-as-compute-target.ipynb) | | | | | | |
|
||||
| [automl_hdi_local_classification](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/azure-hdi/automl_hdi_local_classification.ipynb) | | | | | | |
|
||||
| [model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deploy-to-cloud/model-register-and-deploy.ipynb) | | | | | | |
|
||||
| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | |
|
||||
| [register-model-deploy-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deploy-to-local/register-model-deploy-local.ipynb) | | | | | | |
|
||||
| [accelerated-models-object-detection](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/accelerated-models/accelerated-models-object-detection.ipynb) | | | | | | |
|
||||
| [accelerated-models-quickstart](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb) | | | | | | |
|
||||
| [accelerated-models-training](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/accelerated-models/accelerated-models-training.ipynb) | | | | | | |
|
||||
| [model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/deploy-to-cloud/model-register-and-deploy.ipynb) | | | | | | |
|
||||
| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | |
|
||||
| [register-model-deploy-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local.ipynb) | | | | | | |
|
||||
| [enable-app-insights-in-production-service](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb) | | | | | | |
|
||||
| [enable-data-collection-for-models-in-aks](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/enable-data-collection-for-models-in-aks/enable-data-collection-for-models-in-aks.ipynb) | | | | | | |
|
||||
| [onnx-convert-aml-deploy-tinyyolo](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb) | | | | | | |
|
||||
| [onnx-inference-facial-expression-recognition-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb) | | | | | | |
|
||||
| [onnx-inference-mnist-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb) | | | | | | |
|
||||
| [onnx-modelzoo-aml-deploy-resnet50](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb) | | | | | | |
|
||||
| [onnx-train-pytorch-aml-deploy-mnist](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb) | | | | | | |
|
||||
| [production-deploy-to-aks](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb) | | | | | | |
|
||||
| [production-deploy-to-aks-gpu](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/production-deploy-to-aks-gpu/production-deploy-to-aks-gpu.ipynb) | | | | | | |
|
||||
| [register-model-create-image-deploy-service](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb) | | | | | | |
|
||||
| [explain-model-on-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb) | | | | | | |
|
||||
| [save-retrieve-explanations-run-history](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb) | | | | | | |
|
||||
| [train-explain-model-locally-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb) | | | | | | |
|
||||
| [train-explain-model-on-amlcompute-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb) | | | | | | |
|
||||
| [advanced-feature-transformations-explain-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/tabular-data/advanced-feature-transformations-explain-local.ipynb) | | | | | | |
|
||||
| [explain-binary-classification-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/tabular-data/explain-binary-classification-local.ipynb) | | | | | | |
|
||||
| [explain-multiclass-classification-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/tabular-data/explain-multiclass-classification-local.ipynb) | | | | | | |
|
||||
| [explain-regression-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/tabular-data/explain-regression-local.ipynb) | | | | | | |
|
||||
| [simple-feature-transformations-explain-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/explain-model/tabular-data/simple-feature-transformations-explain-local.ipynb) | | | | | | |
|
||||
| [aml-pipelines-data-transfer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb) | | | | | | |
|
||||
| [aml-pipelines-getting-started](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb) | | | | | |
|
||||
| [aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb) | | | | | | |
|
||||
| [aml-pipelines-how-to-use-estimatorstep](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb) | | | | | | |
|
||||
| [aml-pipelines-how-to-use-pipeline-drafts](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-pipeline-drafts.ipynb) | | | | | | |
|
||||
| [aml-pipelines-parameter-tuning-with-hyperdrive](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb) | | | | | | |
|
||||
| [aml-pipelines-publish-and-run-using-rest-endpoint](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-publish-and-run-using-rest-endpoint.ipynb) | | | | | | |
|
||||
| [aml-pipelines-setup-schedule-for-a-published-pipeline](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb) | | | | | | |
|
||||
| [aml-pipelines-setup-versioned-pipeline-endpoints](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.ipynb) | | | | | | |
|
||||
| [aml-pipelines-use-adla-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-adla-as-compute-target.ipynb) | | | | | | |
|
||||
| [aml-pipelines-use-databricks-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb) | | | | | | |
|
||||
| [aml-pipelines-with-automated-machine-learning-step](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb) | | | | | | |
|
||||
| [aml-pipelines-with-data-dependency-steps](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb) | | | | | | |
|
||||
| [nyc-taxi-data-regression-model-building](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb) | | | | | | |
|
||||
| [pipeline-batch-scoring](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb) | | | | | | |
|
||||
| [pipeline-style-transfer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb) | | | | | | |
|
||||
| [authentication-in-azureml](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azureml.ipynb) | | | | | | |
|
||||
| [azure-ml-datadrift](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/monitor-models/data-drift/azure-ml-datadrift.ipynb) | | | | | | |
|
||||
| [manage-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/track-and-monitor-experiments/manage-runs/manage-runs.ipynb) | | | | | | |
|
||||
| [tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/track-and-monitor-experiments/tensorboard/tensorboard.ipynb) | | | | | | |
|
||||
| [deploy-model](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml//track-and-monitor-experiments/using-mlflow/deploy-model/deploy-model.ipynb) | | | | | | |
|
||||
| [train-and-deploy-pytorch](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-deploy-pytorch/train-and-deploy-pytorch.ipynb) | | | | | | |
|
||||
| [train-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-local/train-local.ipynb) | | | | | | |
|
||||
| [train-remote](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-remote/train-remote.ipynb) | | | | | | |
|
||||
| [logging-api](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/logging-api/logging-api.ipynb) | | | | | | |
|
||||
| [manage-runs](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/manage-runs/manage-runs.ipynb) | | | | | | |
|
||||
| [train-hyperparameter-tune-deploy-with-sklearn](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.ipynb) | | | | | | |
|
||||
| [train-in-spark](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-in-spark/train-in-spark.ipynb) | | | | | | |
|
||||
| [train-on-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-on-amlcompute/train-on-amlcompute.ipynb) | | | | | | |
|
||||
| [train-on-local](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-on-local/train-on-local.ipynb) | | | | | | |
|
||||
| [train-on-remote-vm](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb) | | | | | | |
|
||||
| [train-within-notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb) | | | | | | |
|
||||
| [using-environments](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training/using-environments/using-environments.ipynb) | | | | | | |
|
||||
| [distributed-chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/distributed-chainer/distributed-chainer.ipynb) | | | | | | |
|
||||
| [distributed-cntk-with-custom-docker](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/distributed-cntk-with-custom-docker/distributed-cntk-with-custom-docker.ipynb) | | | | | | |
|
||||
| [distributed-pytorch-with-horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb) | | | | | | |
|
||||
| [distributed-tensorflow-with-horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb) | | | | | | |
|
||||
| [distributed-tensorflow-with-parameter-server](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb) | | | | | | |
|
||||
| [export-run-history-to-tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/export-run-history-to-tensorboard/export-run-history-to-tensorboard.ipynb) | | | | | | |
|
||||
| [how-to-use-estimator](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb) | | | | | | |
|
||||
| [notebook_example](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/notebook_example.ipynb) | | | | | | |
|
||||
| [tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/tensorboard/tensorboard.ipynb) | | | | | | |
|
||||
| [train-hyperparameter-tune-deploy-with-chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.ipynb) | | | | | | |
|
||||
| [train-hyperparameter-tune-deploy-with-keras](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb) | | | | | | |
|
||||
| [train-hyperparameter-tune-deploy-with-pytorch](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) | | | | | | |
|
||||
| [train-hyperparameter-tune-deploy-with-tensorflow](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) | | | | | | |
|
||||
| [train-tensorflow-resume-training](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/training-with-deep-learning/train-tensorflow-resume-training/train-tensorflow-resume-training.ipynb) | | | | | | |
|
||||
| [new-york-taxi](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi.ipynb) | | | | | | |
|
||||
| [new-york-taxi_scale-out](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.ipynb) | | | | | | |
|
||||
| [add-column-using-expression](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/add-column-using-expression.ipynb) | | | | | | |
|
||||
| [append-columns-and-rows](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/append-columns-and-rows.ipynb) | | | | | | |
|
||||
| [assertions](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/assertions.ipynb) | | | | | | |
|
||||
| [auto-read-file](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/auto-read-file.ipynb) | | | | | | |
|
||||
| [cache](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/cache.ipynb) | | | | | | |
|
||||
| [column-manipulations](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/column-manipulations.ipynb) | | | | | | |
|
||||
| [column-type-transforms](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/column-type-transforms.ipynb) | | | | | | |
|
||||
| [custom-python-transforms](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/custom-python-transforms.ipynb) | | | | | | |
|
||||
| [data-ingestion](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/data-ingestion.ipynb) | | | | | | |
|
||||
| [data-profile](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/data-profile.ipynb) | | | | | | |
|
||||
| [datastore](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/datastore.ipynb) | | | | | | |
|
||||
| [derive-column-by-example](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/derive-column-by-example.ipynb) | | | | | | |
|
||||
| [external-references](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/external-references.ipynb) | | | | | | |
|
||||
| [filtering](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/filtering.ipynb) | | | | | | |
|
||||
| [fuzzy-group](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/fuzzy-group.ipynb) | | | | | | |
|
||||
| [impute-missing-values](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/impute-missing-values.ipynb) | | | | | | |
|
||||
| [join](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/join.ipynb) | | | | | | |
|
||||
| [label-encoder](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/label-encoder.ipynb) | | | | | | |
|
||||
| [min-max-scaler](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/min-max-scaler.ipynb) | | | | | | |
|
||||
| [one-hot-encoder](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/one-hot-encoder.ipynb) | | | | | | |
|
||||
| [open-save-dataflows](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/open-save-dataflows.ipynb) | | | | | | |
|
||||
| [quantile-transformation](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/quantile-transformation.ipynb) | | | | | | |
|
||||
| [random-split](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/random-split.ipynb) | | | | | | |
|
||||
| [replace-datasource-replace-reference](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/replace-datasource-replace-reference.ipynb) | | | | | | |
|
||||
| [replace-fill-error](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/replace-fill-error.ipynb) | | | | | | |
|
||||
| [secrets](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/secrets.ipynb) | | | | | | |
|
||||
| [semantic-types](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/semantic-types.ipynb) | | | | | | |
|
||||
| [split-column-by-example](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/split-column-by-example.ipynb) | | | | | | |
|
||||
| [subsetting-sampling](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/subsetting-sampling.ipynb) | | | | | | |
|
||||
| [summarize](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/summarize.ipynb) | | | | | | |
|
||||
| [working-with-file-streams](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/working-with-file-streams.ipynb) | | | | | | |
|
||||
| [writing-data](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/how-to-guides/writing-data.ipynb) | | | | | | |
|
||||
| [getting-started](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/dataprep/tutorials/getting-started/getting-started.ipynb) | | | | | | |
|
||||
| [datasets-diff](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/datasets/datasets-diff/datasets-diff.ipynb) | | | | | | |
|
||||
| [file-dataset-img-classification](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/file-dataset-img-classification.ipynb) | | | | | | |
|
||||
| [tabular-dataset-tutorial](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/work-with-data/datasets/datasets-tutorial/tabular-dataset-tutorial.ipynb) | | | | | | |
|
||||
| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master/setup-environment/configuration.ipynb) | | | | | | |
|
||||
| [img-classification-part1-training](https://github.com/Azure/MachineLearningNotebooks/blob/master/tutorials/img-classification-part1-training.ipynb) | | | | | | |
|
||||
| [img-classification-part2-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master/tutorials/img-classification-part2-deploy.ipynb) | | | | | | |
|
||||
| [regression-automated-ml](https://github.com/Azure/MachineLearningNotebooks/blob/master/tutorials/regression-automated-ml.ipynb) | | | | | | |
|
||||
| [tutorial-1st-experiment-sdk-train](https://github.com/Azure/MachineLearningNotebooks/blob/master/tutorials/tutorial-1st-experiment-sdk-train.ipynb) | | | | | | |
|
||||
@@ -102,7 +102,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.0.57 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.55 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -125,10 +125,10 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or attach existing compute target\n",
|
||||
"### Create or Attach existing compute resource\n",
|
||||
"By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you create Azure Machine Learning Compute as your training environment. The code below creates the compute clusters for you if they don't already exist in your workspace.\n",
|
||||
"\n",
|
||||
"**Creation of compute target takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process."
|
||||
"**Creation of compute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -258,9 +258,9 @@
|
||||
"\n",
|
||||
"### Upload data to the cloud\n",
|
||||
"\n",
|
||||
"You downloaded and used the training data on the computer your notebook is running on. In the next section, you will train a model on the remote Azure Machine Learning Compute. The remote compute resource will also need access to your data. To provide access, upload your data to a centralized datastore associated with your workspace. This datastore provides fast access to your data when using remote compute targets in the cloud, as it is in the Azure data center.\n",
|
||||
"Now make the data accessible remotely by uploading that data from your local machine into Azure so it can be accessed for remote training. The datastore is a convenient construct associated with your workspace for you to upload/download data, and interact with it from your remote compute targets. It is backed by Azure blob storage account.\n",
|
||||
"\n",
|
||||
"Upload the MNIST files into a directory named `mnist` at the root of the datastore: See [access data from your datastores](https://docs.microsoft.com/azure/machine-learning/service/how-to-access-data) for more information."
|
||||
"The MNIST files are uploaded into a directory named `mnist` at the root of the datastore. See [access data from your datastores](https://docs.microsoft.com/bs-latn-ba/azure/machine-learning/service/how-to-access-data) for more information."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -675,7 +675,7 @@
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"codemdirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
@@ -686,8 +686,27 @@
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
},
|
||||
"msauthor": "roastala"
|
||||
},
|
||||
"friendly_name": "Testing index",
|
||||
"exclude_from_index": false,
|
||||
"order_index": 1,
|
||||
"category": "tutorial",
|
||||
"tags": [
|
||||
"featured"
|
||||
],
|
||||
"task": "Regression",
|
||||
"datasets": [
|
||||
"NYC Taxi"
|
||||
],
|
||||
"compute": [
|
||||
"local"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"framework": [
|
||||
"Azure ML AutoML"
|
||||
],
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,654 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tutorial: Use automated machine learning to predict taxi fares"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this tutorial, you use automated machine learning in Azure Machine Learning service to create a regression model to predict NYC taxi fare prices. This process accepts training data and configuration settings, and automatically iterates through combinations of different feature normalization/standardization methods, models, and hyperparameter settings to arrive at the best model.\n",
|
||||
"\n",
|
||||
"In this tutorial you learn the following tasks:\n",
|
||||
"\n",
|
||||
"* Download, transform, and clean data using Azure Open Datasets\n",
|
||||
"* Train an automated machine learning regression model\n",
|
||||
"* Calculate model accuracy\n",
|
||||
"\n",
|
||||
"If you don’t have an Azure subscription, create a free account before you begin. Try the [free or paid version](https://aka.ms/AMLFree) of Azure Machine Learning service today."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* Complete the [setup tutorial](https://docs.microsoft.com/azure/machine-learning/service/tutorial-1st-experiment-sdk-setup) if you don't already have an Azure Machine Learning service workspace or notebook virtual machine.\n",
|
||||
"* After you complete the setup tutorial, open the **tutorials/regression-automated-ml.ipynb** notebook using the same notebook server.\n",
|
||||
"\n",
|
||||
"This tutorial is also available on [GitHub](https://github.com/Azure/MachineLearningNotebooks/tree/master/tutorials) if you wish to run it in your own [local environment](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-environment#local). Run `pip install azureml-sdk[automl] azureml-opendatasets azureml-widgets` to get the required packages."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Download and prepare data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Import the necessary packages. The Open Datasets package contains a class representing each data source (`NycTlcGreen` for example) to easily filter date parameters before downloading."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.opendatasets import NycTlcGreen\n",
|
||||
"import pandas as pd\n",
|
||||
"from datetime import datetime\n",
|
||||
"from dateutil.relativedelta import relativedelta"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Begin by creating a dataframe to hold the taxi data. When working in a non-Spark environment, Open Datasets only allows downloading one month of data at a time with certain classes to avoid `MemoryError` with large datasets. To download taxi data, iteratively fetch one month at a time, and before appending it to `green_taxi_df` randomly sample 2,000 records from each month to avoid bloating the dataframe. Then preview the data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"green_taxi_df = pd.DataFrame([])\n",
|
||||
"start = datetime.strptime(\"1/1/2015\",\"%m/%d/%Y\")\n",
|
||||
"end = datetime.strptime(\"1/31/2015\",\"%m/%d/%Y\")\n",
|
||||
"\n",
|
||||
"for sample_month in range(12):\n",
|
||||
" temp_df_green = NycTlcGreen(start + relativedelta(months=sample_month), end + relativedelta(months=sample_month)) \\\n",
|
||||
" .to_pandas_dataframe()\n",
|
||||
" green_taxi_df = green_taxi_df.append(temp_df_green.sample(2000))\n",
|
||||
" \n",
|
||||
"green_taxi_df.head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now that the initial data is loaded, define a function to create various time-based features from the pickup datetime field. This will create new fields for the month number, day of month, day of week, and hour of day, and will allow the model to factor in time-based seasonality. \n",
|
||||
"\n",
|
||||
"Use the `apply()` function on the dataframe to iteratively apply the `build_time_features()` function to each row in the taxi data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def build_time_features(vector):\n",
|
||||
" pickup_datetime = vector[0]\n",
|
||||
" month_num = pickup_datetime.month\n",
|
||||
" day_of_month = pickup_datetime.day\n",
|
||||
" day_of_week = pickup_datetime.weekday()\n",
|
||||
" hour_of_day = pickup_datetime.hour\n",
|
||||
" \n",
|
||||
" return pd.Series((month_num, day_of_month, day_of_week, hour_of_day))\n",
|
||||
"\n",
|
||||
"green_taxi_df[[\"month_num\", \"day_of_month\",\"day_of_week\", \"hour_of_day\"]] = green_taxi_df[[\"lpepPickupDatetime\"]].apply(build_time_features, axis=1)\n",
|
||||
"green_taxi_df.head(10)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Remove some of the columns that you won't need for training or additional feature building."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"columns_to_remove = [\"lpepPickupDatetime\", \"lpepDropoffDatetime\", \"puLocationId\", \"doLocationId\", \"extra\", \"mtaTax\",\n",
|
||||
" \"improvementSurcharge\", \"tollsAmount\", \"ehailFee\", \"tripType\", \"rateCodeID\", \n",
|
||||
" \"storeAndFwdFlag\", \"paymentType\", \"fareAmount\", \"tipAmount\"\n",
|
||||
" ]\n",
|
||||
"for col in columns_to_remove:\n",
|
||||
" green_taxi_df.pop(col)\n",
|
||||
" \n",
|
||||
"green_taxi_df.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Cleanse data "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the `describe()` function on the new dataframe to see summary statistics for each field."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"green_taxi_df.describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"From the summary statistics, you see that there are several fields that have outliers or values that will reduce model accuracy. First filter the lat/long fields to be within the bounds of the Manhattan area. This will filter out longer taxi trips or trips that are outliers in respect to their relationship with other features. \n",
|
||||
"\n",
|
||||
"Additionally filter the `tripDistance` field to be greater than zero but less than 31 miles (the haversine distance between the two lat/long pairs). This eliminates long outlier trips that have inconsistent trip cost.\n",
|
||||
"\n",
|
||||
"Lastly, the `totalAmount` field has negative values for the taxi fares, which don't make sense in the context of our model, and the `passengerCount` field has bad data with the minimum values being zero.\n",
|
||||
"\n",
|
||||
"Filter out these anomalies using query functions, and then remove the last few columns unnecessary for training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"final_df = green_taxi_df.query(\"pickupLatitude>=40.53 and pickupLatitude<=40.88\")\n",
|
||||
"final_df = final_df.query(\"pickupLongitude>=-74.09 and pickupLongitude<=-73.72\")\n",
|
||||
"final_df = final_df.query(\"tripDistance>=0.25 and tripDistance<31\")\n",
|
||||
"final_df = final_df.query(\"passengerCount>0 and totalAmount>0\")\n",
|
||||
"\n",
|
||||
"columns_to_remove_for_training = [\"pickupLongitude\", \"pickupLatitude\", \"dropoffLongitude\", \"dropoffLatitude\"]\n",
|
||||
"for col in columns_to_remove_for_training:\n",
|
||||
" final_df.pop(col)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call `describe()` again on the data to ensure cleansing worked as expected. You now have a prepared and cleansed set of taxi, holiday, and weather data to use for machine learning model training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"final_df.describe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure workspace\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create a workspace object from the existing workspace. A [Workspace](https://docs.microsoft.com/python/api/azureml-core/azureml.core.workspace.workspace?view=azure-ml-py) is a class that accepts your Azure subscription and resource information. It also creates a cloud resource to monitor and track your model runs. `Workspace.from_config()` reads the file **config.json** and loads the authentication details into an object named `ws`. `ws` is used throughout the rest of the code in this tutorial."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"ws = Workspace.from_config()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Split the data into train and test sets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Split the data into training and test sets by using the `train_test_split` function in the `scikit-learn` library. This function segregates the data into the x (**features**) data set for model training and the y (**values to predict**) data set for testing. The `test_size` parameter determines the percentage of data to allocate to testing. The `random_state` parameter sets a seed to the random generator, so that your train-test splits are deterministic."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"y_df = final_df.pop(\"totalAmount\")\n",
|
||||
"x_df = final_df\n",
|
||||
"\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=223)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The purpose of this step is to have data points to test the finished model that haven't been used to train the model, in order to measure true accuracy. \n",
|
||||
"\n",
|
||||
"In other words, a well-trained model should be able to accurately make predictions from data it hasn't already seen. You now have data prepared for auto-training a machine learning model."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Automatically train a model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To automatically train a model, take the following steps:\n",
|
||||
"1. Define settings for the experiment run. Attach your training data to the configuration, and modify settings that control the training process.\n",
|
||||
"1. Submit the experiment for model tuning. After submitting the experiment, the process iterates through different machine learning algorithms and hyperparameter settings, adhering to your defined constraints. It chooses the best-fit model by optimizing an accuracy metric."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define training settings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define the experiment parameter and model settings for training. View the full list of [settings](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train). Submitting the experiment with these default settings will take approximately 5-10 min, but if you want a shorter run time, reduce the `iterations` parameter.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"|Property| Value in this tutorial |Description|\n",
|
||||
"|----|----|---|\n",
|
||||
"|**iteration_timeout_minutes**|2|Time limit in minutes for each iteration. Reduce this value to decrease total runtime.|\n",
|
||||
"|**iterations**|20|Number of iterations. In each iteration, a new machine learning model is trained with your data. This is the primary value that affects total run time.|\n",
|
||||
"|**primary_metric**| spearman_correlation | Metric that you want to optimize. The best-fit model will be chosen based on this metric.|\n",
|
||||
"|**preprocess**| True | By using **True**, the experiment can preprocess the input data (handling missing data, converting text to numeric, etc.)|\n",
|
||||
"|**verbosity**| logging.INFO | Controls the level of logging.|\n",
|
||||
"|**n_cross_validations**|5|Number of cross-validation splits to perform when validation data is not specified.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\": 2,\n",
|
||||
" \"iterations\": 20,\n",
|
||||
" \"primary_metric\": 'spearman_correlation',\n",
|
||||
" \"preprocess\": True,\n",
|
||||
" \"verbosity\": logging.INFO,\n",
|
||||
" \"n_cross_validations\": 5\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use your defined training settings as a `**kwargs` parameter to an `AutoMLConfig` object. Additionally, specify your training data and the type of model, which is `regression` in this case."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task='regression',\n",
|
||||
" debug_log='automated_ml_errors.log',\n",
|
||||
" X=x_train.values,\n",
|
||||
" y=y_train.values.flatten(),\n",
|
||||
" **automl_settings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Automated machine learning pre-processing steps (feature normalization, handling missing data, converting text to numeric, etc.) become part of the underlying model. When using the model for predictions, the same pre-processing steps applied during training are applied to your input data automatically."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Train the automatic regression model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create an experiment object in your workspace. An experiment acts as a container for your individual runs. Pass the defined `automl_config` object to the experiment, and set the output to `True` to view progress during the run. \n",
|
||||
"\n",
|
||||
"After starting the experiment, the output shown updates live as the experiment runs. For each iteration, you see the model type, the run duration, and the training accuracy. The field `BEST` tracks the best running training score based on your metric type."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"experiment = Experiment(ws, \"taxi-experiment\")\n",
|
||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explore the results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Explore the results of automatic training with a [Jupyter widget](https://docs.microsoft.com/python/api/azureml-widgets/azureml.widgets?view=azure-ml-py). The widget allows you to see a graph and table of all individual run iterations, along with training accuracy metrics and metadata. Additionally, you can filter on different accuracy metrics than your primary metric with the dropdown selector."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(local_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the best model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Select the best model from your iterations. The `get_output` function returns the best run and the fitted model for the last fit invocation. By using the overloads on `get_output`, you can retrieve the best run and fitted model for any logged metric or a particular iteration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = local_run.get_output()\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test the best model accuracy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use the best model to run predictions on the test data set to predict taxi fares. The function `predict` uses the best model and predicts the values of y, **trip cost**, from the `x_test` data set. Print the first 10 predicted cost values from `y_predict`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_predict = fitted_model.predict(x_test.values)\n",
|
||||
"print(y_predict[:10])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Calculate the `root mean squared error` of the results. Convert the `y_test` dataframe to a list to compare to the predicted values. The function `mean_squared_error` takes two arrays of values and calculates the average squared error between them. Taking the square root of the result gives an error in the same units as the y variable, **cost**. It indicates roughly how far the taxi fare predictions are from the actual fares."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import mean_squared_error\n",
|
||||
"from math import sqrt\n",
|
||||
"\n",
|
||||
"y_actual = y_test.values.flatten().tolist()\n",
|
||||
"rmse = sqrt(mean_squared_error(y_actual, y_predict))\n",
|
||||
"rmse"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the following code to calculate mean absolute percent error (MAPE) by using the full `y_actual` and `y_predict` data sets. This metric calculates an absolute difference between each predicted and actual value and sums all the differences. Then it expresses that sum as a percent of the total of the actual values."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sum_actuals = sum_errors = 0\n",
|
||||
"\n",
|
||||
"for actual_val, predict_val in zip(y_actual, y_predict):\n",
|
||||
" abs_error = actual_val - predict_val\n",
|
||||
" if abs_error < 0:\n",
|
||||
" abs_error = abs_error * -1\n",
|
||||
"\n",
|
||||
" sum_errors = sum_errors + abs_error\n",
|
||||
" sum_actuals = sum_actuals + actual_val\n",
|
||||
"\n",
|
||||
"mean_abs_percent_error = sum_errors / sum_actuals\n",
|
||||
"print(\"Model MAPE:\")\n",
|
||||
"print(mean_abs_percent_error)\n",
|
||||
"print()\n",
|
||||
"print(\"Model Accuracy:\")\n",
|
||||
"print(1 - mean_abs_percent_error)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"From the two prediction accuracy metrics, you see that the model is fairly good at predicting taxi fares from the data set's features, typically within +- $4.00, and approximately 15% error. \n",
|
||||
"\n",
|
||||
"The traditional machine learning model development process is highly resource-intensive, and requires significant domain knowledge and time investment to run and compare the results of dozens of models. Using automated machine learning is a great way to rapidly test many different models for your scenario."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clean up resources"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Do not complete this section if you plan on running other Azure Machine Learning service tutorials."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Stop the notebook VM"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you used a cloud notebook server, stop the VM when you are not using it to reduce cost."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"1. In your workspace, select **Notebook VMs**.\n",
|
||||
"1. From the list, select the VM.\n",
|
||||
"1. Select **Stop**.\n",
|
||||
"1. When you're ready to use the server again, select **Start**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Delete everything"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you don't plan to use the resources you created, delete them, so you don't incur any charges."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"1. In the Azure portal, select **Resource groups** on the far left.\n",
|
||||
"1. From the list, select the resource group you created.\n",
|
||||
"1. Select **Delete resource group**.\n",
|
||||
"1. Enter the resource group name. Then select **Delete**.\n",
|
||||
"\n",
|
||||
"You can also keep the resource group but delete a single workspace. Display the workspace properties and select **Delete**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next steps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this automated machine learning tutorial, you did the following tasks:\n",
|
||||
"\n",
|
||||
"> * Configured a workspace and prepared data for an experiment.\n",
|
||||
"> * Trained by using an automated regression model locally with custom parameters.\n",
|
||||
"> * Explored and reviewed training results.\n",
|
||||
"\n",
|
||||
"[Deploy your model](https://docs.microsoft.com/azure/machine-learning/service/tutorial-deploy-models-with-aml) with Azure Machine Learning service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "jeffshep"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
},
|
||||
"msauthor": "trbye"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
name: regression-automated-ml
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- azureml-opendatasets
|
||||
657
tutorials/regression-part1-data-prep.ipynb
Normal file
657
tutorials/regression-part1-data-prep.ipynb
Normal file
@@ -0,0 +1,657 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Tutorial: Prepare data for regression modeling"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this tutorial, you learn how to prepare data for regression modeling by using the Azure Machine Learning Data Prep SDK. You run various transformations to filter and combine two different NYC taxi data sets.\n",
|
||||
"\n",
|
||||
"This tutorial is **part one of a two-part tutorial series**. After you complete the tutorial series, you can predict the cost of a taxi trip by training a model on data features. These features include the pickup day and time, the number of passengers, and the pickup location.\n",
|
||||
"\n",
|
||||
"In this tutorial, you:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"> * Setup a Python environment and import packages\n",
|
||||
"> * Load two datasets with different field names\n",
|
||||
"> * Cleanse data to remove anomalies\n",
|
||||
"> * Transform data using intelligent transforms to create new features\n",
|
||||
"> * Save your dataflow object to use in a regression model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"To run the notebook you will need:\n",
|
||||
"\n",
|
||||
"* A Python 3.6 notebook server with the following installed:\n",
|
||||
" * The Azure Machine Learning Data Prep SDK for Python\n",
|
||||
"* The tutorial notebook\n",
|
||||
"\n",
|
||||
"Navigate back to the [tutorial page](https://docs.microsoft.com/azure/machine-learning/service/tutorial-data-prep) for specific environment setup instructions.\n",
|
||||
"\n",
|
||||
"## <a name=\"start\"></a>Set up your development environment\n",
|
||||
"\n",
|
||||
"All the setup for your development work can be accomplished in a Python notebook. Setup includes the following actions:\n",
|
||||
"\n",
|
||||
"* Install the SDK\n",
|
||||
"* Import Python packages\n",
|
||||
"\n",
|
||||
"### Install and import packages\n",
|
||||
"\n",
|
||||
"Use the following to install necessary packages if you don't already have them.\n",
|
||||
"\n",
|
||||
"```shell\n",
|
||||
"pip install \"azureml-dataprep[pandas]>=1.1.2,<1.2.0\"\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"Import the SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.dataprep as dprep"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load data\n",
|
||||
"Download two different NYC Taxi data sets into dataflow objects. These datasets contain slightly different fields. The method `auto_read_file()` automatically recognizes the input file type."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython.display import display\n",
|
||||
"dataset_root = \"https://dprepdata.blob.core.windows.net/demo\"\n",
|
||||
"\n",
|
||||
"green_path = \"/\".join([dataset_root, \"green-small/*\"])\n",
|
||||
"yellow_path = \"/\".join([dataset_root, \"yellow-small/*\"])\n",
|
||||
"\n",
|
||||
"green_df_raw = dprep.read_csv(path=green_path, header=dprep.PromoteHeadersMode.GROUPED)\n",
|
||||
"# auto_read_file automatically identifies and parses the file type, which is useful when you don't know the file type.\n",
|
||||
"yellow_df_raw = dprep.auto_read_file(path=yellow_path)\n",
|
||||
"\n",
|
||||
"display(green_df_raw.head(5))\n",
|
||||
"display(yellow_df_raw.head(5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"A `Dataflow` object is similar to a dataframe, and represents a series of lazily-evaluated, immutable operations on data. Operations can be added by invoking the different transformation and filtering methods available. The result of adding an operation to a `Dataflow` is always a new `Dataflow` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cleanse data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now you populate some variables with shortcut transforms to apply to all dataflows. The `drop_if_all_null` variable is used to delete records where all fields are null. The `useful_columns` variable holds an array of column descriptions that are kept in each dataflow."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"all_columns = dprep.ColumnSelector(term=\".*\", use_regex=True)\n",
|
||||
"drop_if_all_null = [all_columns, dprep.ColumnRelationship(dprep.ColumnRelationship.ALL)]\n",
|
||||
"useful_columns = [\n",
|
||||
" \"cost\", \"distance\", \"dropoff_datetime\", \"dropoff_latitude\", \"dropoff_longitude\",\n",
|
||||
" \"passengers\", \"pickup_datetime\", \"pickup_latitude\", \"pickup_longitude\", \"store_forward\", \"vendor\"\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You first work with the green taxi data to get it into a valid shape that can be combined with the yellow taxi data. Call the `replace_na()`, `drop_nulls()`, and `keep_columns()` functions by using the shortcut transform variables you created. Additionally, rename all the columns in the dataframe to match the names in the `useful_columns` variable."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"green_df = (green_df_raw\n",
|
||||
" .replace_na(columns=all_columns)\n",
|
||||
" .drop_nulls(*drop_if_all_null)\n",
|
||||
" .rename_columns(column_pairs={\n",
|
||||
" \"VendorID\": \"vendor\",\n",
|
||||
" \"lpep_pickup_datetime\": \"pickup_datetime\",\n",
|
||||
" \"Lpep_dropoff_datetime\": \"dropoff_datetime\",\n",
|
||||
" \"lpep_dropoff_datetime\": \"dropoff_datetime\",\n",
|
||||
" \"Store_and_fwd_flag\": \"store_forward\",\n",
|
||||
" \"store_and_fwd_flag\": \"store_forward\",\n",
|
||||
" \"Pickup_longitude\": \"pickup_longitude\",\n",
|
||||
" \"Pickup_latitude\": \"pickup_latitude\",\n",
|
||||
" \"Dropoff_longitude\": \"dropoff_longitude\",\n",
|
||||
" \"Dropoff_latitude\": \"dropoff_latitude\",\n",
|
||||
" \"Passenger_count\": \"passengers\",\n",
|
||||
" \"Fare_amount\": \"cost\",\n",
|
||||
" \"Trip_distance\": \"distance\"\n",
|
||||
" })\n",
|
||||
" .keep_columns(columns=useful_columns))\n",
|
||||
"green_df.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the same transformation steps on the yellow taxi data. These functions ensure that null data is removed from the data set, which will help increase machine learning model accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"yellow_df = (yellow_df_raw\n",
|
||||
" .replace_na(columns=all_columns)\n",
|
||||
" .drop_nulls(*drop_if_all_null)\n",
|
||||
" .rename_columns(column_pairs={\n",
|
||||
" \"vendor_name\": \"vendor\",\n",
|
||||
" \"VendorID\": \"vendor\",\n",
|
||||
" \"vendor_id\": \"vendor\",\n",
|
||||
" \"Trip_Pickup_DateTime\": \"pickup_datetime\",\n",
|
||||
" \"tpep_pickup_datetime\": \"pickup_datetime\",\n",
|
||||
" \"Trip_Dropoff_DateTime\": \"dropoff_datetime\",\n",
|
||||
" \"tpep_dropoff_datetime\": \"dropoff_datetime\",\n",
|
||||
" \"store_and_forward\": \"store_forward\",\n",
|
||||
" \"store_and_fwd_flag\": \"store_forward\",\n",
|
||||
" \"Start_Lon\": \"pickup_longitude\",\n",
|
||||
" \"Start_Lat\": \"pickup_latitude\",\n",
|
||||
" \"End_Lon\": \"dropoff_longitude\",\n",
|
||||
" \"End_Lat\": \"dropoff_latitude\",\n",
|
||||
" \"Passenger_Count\": \"passengers\",\n",
|
||||
" \"passenger_count\": \"passengers\",\n",
|
||||
" \"Fare_Amt\": \"cost\",\n",
|
||||
" \"fare_amount\": \"cost\",\n",
|
||||
" \"Trip_Distance\": \"distance\",\n",
|
||||
" \"trip_distance\": \"distance\"\n",
|
||||
" })\n",
|
||||
" .keep_columns(columns=useful_columns))\n",
|
||||
"yellow_df.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the `append_rows()` function on the green taxi data to append the yellow taxi data. A new combined dataframe is created."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"combined_df = green_df.append_rows([yellow_df])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Convert types and filter "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Examine the pickup and drop-off coordinates summary statistics to see how the data is distributed. First, define a `TypeConverter` object to change the latitude and longitude fields to decimal type. Next, call the `keep_columns()` function to restrict output to only the latitude and longitude fields, and then call the `get_profile()` function. These function calls create a condensed view of the dataflow to just show the lat/long fields, which makes it easier to evaluate missing or out-of-scope coordinates."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"decimal_type = dprep.TypeConverter(data_type=dprep.FieldType.DECIMAL)\n",
|
||||
"combined_df = combined_df.set_column_types(type_conversions={\n",
|
||||
" \"pickup_longitude\": decimal_type,\n",
|
||||
" \"pickup_latitude\": decimal_type,\n",
|
||||
" \"dropoff_longitude\": decimal_type,\n",
|
||||
" \"dropoff_latitude\": decimal_type\n",
|
||||
"})\n",
|
||||
"combined_df.keep_columns(columns=[\n",
|
||||
" \"pickup_longitude\", \"pickup_latitude\",\n",
|
||||
" \"dropoff_longitude\", \"dropoff_latitude\"\n",
|
||||
"]).get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"From the summary statistics output, you see there are missing coordinates and coordinates that aren't in New York City (this is determined from subjective analysis). Filter out coordinates for locations that are outside the city border. Chain the column filter commands within the `filter()` function and define the minimum and maximum bounds for each field. Then call the `get_profile()` function again to verify the transformation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"latlong_filtered_df = (combined_df\n",
|
||||
" .drop_nulls(\n",
|
||||
" columns=[\"pickup_longitude\", \"pickup_latitude\", \"dropoff_longitude\", \"dropoff_latitude\"],\n",
|
||||
" column_relationship=dprep.ColumnRelationship(dprep.ColumnRelationship.ANY)\n",
|
||||
" )\n",
|
||||
" .filter(dprep.f_and(\n",
|
||||
" dprep.col(\"pickup_longitude\") <= -73.72,\n",
|
||||
" dprep.col(\"pickup_longitude\") >= -74.09,\n",
|
||||
" dprep.col(\"pickup_latitude\") <= 40.88,\n",
|
||||
" dprep.col(\"pickup_latitude\") >= 40.53,\n",
|
||||
" dprep.col(\"dropoff_longitude\") <= -73.72,\n",
|
||||
" dprep.col(\"dropoff_longitude\") >= -74.09,\n",
|
||||
" dprep.col(\"dropoff_latitude\") <= 40.88,\n",
|
||||
" dprep.col(\"dropoff_latitude\") >= 40.53\n",
|
||||
" )))\n",
|
||||
"latlong_filtered_df.keep_columns(columns=[\n",
|
||||
" \"pickup_longitude\", \"pickup_latitude\",\n",
|
||||
" \"dropoff_longitude\", \"dropoff_latitude\"\n",
|
||||
"]).get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Split and rename columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Look at the data profile for the `store_forward` column. This field is a boolean flag that is `Y` when the taxi did not have a connection to the server after the trip, and thus had to store the trip data in memory, and later forward it to the server when connected."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"latlong_filtered_df.keep_columns(columns='store_forward').get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Notice that the data profile output in the `store_forward` column shows that the data is inconsistent and there are missing or null values. Use the `replace()` and `fill_nulls()` functions to replace these values with the string \"N\":"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"replaced_stfor_vals_df = latlong_filtered_df.replace(columns=\"store_forward\", find=\"0\", replace_with=\"N\").fill_nulls(\"store_forward\", \"N\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Execute the `replace` function on the `distance` field. The function reformats distance values that are incorrectly labeled as `.00`, and fills any nulls with zeros. Convert the `distance` field to numerical format. These incorrect data points are likely anomolies in the data collection system on the taxi cabs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"replaced_distance_vals_df = replaced_stfor_vals_df.replace(columns=\"distance\", find=\".00\", replace_with=0).fill_nulls(\"distance\", 0)\n",
|
||||
"replaced_distance_vals_df = replaced_distance_vals_df.to_number([\"distance\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Split the pickup and dropoff datetime values into the respective date and time columns. Use the `split_column_by_example()` function to make the split. In this case, the optional `example` parameter of the `split_column_by_example()` function is omitted. Therefore, the function automatically determines where to split based on the data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"time_split_df = (replaced_distance_vals_df\n",
|
||||
" .split_column_by_example(source_column=\"pickup_datetime\")\n",
|
||||
" .split_column_by_example(source_column=\"dropoff_datetime\"))\n",
|
||||
"time_split_df.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Rename the columns generated by `split_column_by_example()` into meaningful names."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"renamed_col_df = (time_split_df\n",
|
||||
" .rename_columns(column_pairs={\n",
|
||||
" \"pickup_datetime_1\": \"pickup_date\",\n",
|
||||
" \"pickup_datetime_2\": \"pickup_time\",\n",
|
||||
" \"dropoff_datetime_1\": \"dropoff_date\",\n",
|
||||
" \"dropoff_datetime_2\": \"dropoff_time\"\n",
|
||||
" }))\n",
|
||||
"renamed_col_df.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the `get_profile()` function to see the full summary statistics after all cleansing steps."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"renamed_col_df.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Transform data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Split the pickup and dropoff date further into the day of the week, day of the month, and month values. To get the day of the week value, use the `derive_column_by_example()` function. The function takes an array parameter of example objects that define the input data, and the preferred output. The function automatically determines your preferred transformation. For the pickup and dropoff time columns, split the time into the hour, minute, and second by using the `split_column_by_example()` function with no example parameter.\n",
|
||||
"\n",
|
||||
"After you generate the new features, use the `drop_columns()` function to delete the original fields as the newly generated features are preferred. Rename the rest of the fields to use meaningful descriptions.\n",
|
||||
"\n",
|
||||
"Transforming the data in this way to create new time-based features will improve machine learning model accuracy. For example, generating a new feature for the weekday will help establish a relationship between the day of the week and the taxi fare price, which is often more expensive on certain days of the week due to high demand."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"transformed_features_df = (renamed_col_df\n",
|
||||
" .derive_column_by_example(\n",
|
||||
" source_columns=\"pickup_date\",\n",
|
||||
" new_column_name=\"pickup_weekday\",\n",
|
||||
" example_data=[(\"2009-01-04\", \"Sunday\"), (\"2013-08-22\", \"Thursday\")]\n",
|
||||
" )\n",
|
||||
" .derive_column_by_example(\n",
|
||||
" source_columns=\"dropoff_date\",\n",
|
||||
" new_column_name=\"dropoff_weekday\",\n",
|
||||
" example_data=[(\"2013-08-22\", \"Thursday\"), (\"2013-11-03\", \"Sunday\")]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" .split_column_by_example(source_column=\"pickup_time\")\n",
|
||||
" .split_column_by_example(source_column=\"dropoff_time\")\n",
|
||||
" # The following two calls to split_column_by_example reference the column names generated from the previous two calls.\n",
|
||||
" .split_column_by_example(source_column=\"pickup_time_1\")\n",
|
||||
" .split_column_by_example(source_column=\"dropoff_time_1\")\n",
|
||||
" .drop_columns(columns=[\n",
|
||||
" \"pickup_date\", \"pickup_time\", \"dropoff_date\", \"dropoff_time\",\n",
|
||||
" \"pickup_date_1\", \"dropoff_date_1\", \"pickup_time_1\", \"dropoff_time_1\"\n",
|
||||
" ])\n",
|
||||
"\n",
|
||||
" .rename_columns(column_pairs={\n",
|
||||
" \"pickup_date_2\": \"pickup_month\",\n",
|
||||
" \"pickup_date_3\": \"pickup_monthday\",\n",
|
||||
" \"pickup_time_1_1\": \"pickup_hour\",\n",
|
||||
" \"pickup_time_1_2\": \"pickup_minute\",\n",
|
||||
" \"pickup_time_2\": \"pickup_second\",\n",
|
||||
" \"dropoff_date_2\": \"dropoff_month\",\n",
|
||||
" \"dropoff_date_3\": \"dropoff_monthday\",\n",
|
||||
" \"dropoff_time_1_1\": \"dropoff_hour\",\n",
|
||||
" \"dropoff_time_1_2\": \"dropoff_minute\",\n",
|
||||
" \"dropoff_time_2\": \"dropoff_second\"\n",
|
||||
" }))\n",
|
||||
"\n",
|
||||
"transformed_features_df.head(5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Notice that the data shows that the pickup and dropoff date and time components produced from the derived transformations are correct. Drop the `pickup_datetime` and `dropoff_datetime` columns because they're no longer needed (granular time features like hour, minute and second are more useful for model training)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"processed_df = transformed_features_df.drop_columns(columns=[\"pickup_datetime\", \"dropoff_datetime\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Use the type inference functionality to automatically check the data type of each field, and display the inference results."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"type_infer = processed_df.builders.set_column_types()\n",
|
||||
"type_infer.learn()\n",
|
||||
"type_infer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The inference results look correct based on the data. Now apply the type conversions to the dataflow."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"type_converted_df = type_infer.to_dataflow()\n",
|
||||
"type_converted_df.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Before you package the dataflow, run two final filters on the data set. To eliminate incorrectly captured data points, filter the dataflow on records where both the `cost` and `distance` variable values are greater than zero. This step will significantly improve machine learning model accuracy, because data points with a zero cost or distance represent major outliers that throw off prediction accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"final_df = type_converted_df.filter(dprep.col(\"distance\") > 0)\n",
|
||||
"final_df = final_df.filter(dprep.col(\"cost\") > 0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You now have a fully transformed and prepared dataflow object to use in a machine learning model. The SDK includes object serialization functionality, which is used as shown in the following code."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"file_path = os.path.join(os.getcwd(), \"dflows.dprep\")\n",
|
||||
"\n",
|
||||
"final_df.save(file_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clean up resources"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To continue with part two of the tutorial, you need the **dflows.dprep** file in the current directory.\n",
|
||||
"\n",
|
||||
"If you don't plan to continue to part two, delete the **dflows.dprep** file in your current directory. Delete this file whether you're running the execution locally or in [Azure Notebooks](https://notebooks.azure.com/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next steps"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this Azure Machine Learning Data Prep SDK tutorial, you:\n",
|
||||
"\n",
|
||||
"> * Set up your development environment\n",
|
||||
"> * Loaded and cleansed data sets\n",
|
||||
"> * Used smart transforms to predict your logic based on an example\n",
|
||||
"> * Merged and packaged datasets for machine learning training\n",
|
||||
"\n",
|
||||
"You are ready to use this training data in the next part of the tutorial series:\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"> [Tutorial #2: Train regression model](regression-part2-automated-ml.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "cforbe"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
},
|
||||
"friendly_name": "Testing index 2",
|
||||
"exclude_from_index": true,
|
||||
"order_index": 1,
|
||||
"category": "training",
|
||||
"tags": [
|
||||
"featured"
|
||||
],
|
||||
"task": "Regression",
|
||||
"datasets": [
|
||||
"NYC Taxi"
|
||||
],
|
||||
"compute": [
|
||||
"ACI"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"framework": [
|
||||
"Azure ML AutoML2"
|
||||
],
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
5
tutorials/regression-part1-data-prep.yml
Normal file
5
tutorials/regression-part1-data-prep.yml
Normal file
@@ -0,0 +1,5 @@
|
||||
name: regression-part1-data-prep
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-dataprep[pandas]>=1.1.2,<1.2.0
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user