mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-19 17:17:04 -05:00
349 lines
11 KiB
Plaintext
349 lines
11 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
"\n",
|
|
"Licensed under the MIT License."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Automated Machine Learning\n",
|
|
"_**Exploring Previous Runs**_\n",
|
|
"\n",
|
|
"## Contents\n",
|
|
"1. [Introduction](#Introduction)\n",
|
|
"1. [Setup](#Setup)\n",
|
|
"1. [Explore](#Explore)\n",
|
|
"1. [Download](#Download)\n",
|
|
"1. [Register](#Register)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Introduction\n",
|
|
"In this example we present some examples on navigating previously executed runs. We also show how you can download a fitted model for any previous run.\n",
|
|
"\n",
|
|
"Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n",
|
|
"\n",
|
|
"In this notebook you will learn how to:\n",
|
|
"1. List all experiments in a workspace.\n",
|
|
"2. List all AutoML runs in an experiment.\n",
|
|
"3. Get details for an AutoML run, including settings, run widget, and all metrics.\n",
|
|
"4. Download a fitted pipeline for any iteration."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Setup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import json\n",
|
|
"\n",
|
|
"from azureml.core.experiment import Experiment\n",
|
|
"from azureml.core.workspace import Workspace\n",
|
|
"from azureml.train.automl.run import AutoMLRun"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ws = Workspace.from_config()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Explore"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### List Experiments"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"experiment_list = Experiment.list(workspace=ws)\n",
|
|
"\n",
|
|
"summary_df = pd.DataFrame(index = ['No of Runs'])\n",
|
|
"for experiment in experiment_list:\n",
|
|
" automl_runs = list(experiment.get_runs(type='automl'))\n",
|
|
" summary_df[experiment.name] = [len(automl_runs)]\n",
|
|
" \n",
|
|
"pd.set_option('display.max_colwidth', -1)\n",
|
|
"summary_df.T"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### List runs for an experiment\n",
|
|
"Set `experiment_name` to any experiment name from the result of the Experiment.list cell to load the AutoML runs."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"experiment_name = 'automl-local-classification' # Replace this with any project name from previous cell.\n",
|
|
"\n",
|
|
"proj = ws.experiments[experiment_name]\n",
|
|
"summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name'])\n",
|
|
"automl_runs = list(proj.get_runs(type='automl'))\n",
|
|
"automl_runs_project = []\n",
|
|
"for run in automl_runs:\n",
|
|
" properties = run.get_properties()\n",
|
|
" tags = run.get_tags()\n",
|
|
" amlsettings = json.loads(properties['AMLSettingsJsonString'])\n",
|
|
" if 'iterations' in tags:\n",
|
|
" iterations = tags['iterations']\n",
|
|
" else:\n",
|
|
" iterations = properties['num_iterations']\n",
|
|
" summary_df[run.id] = [amlsettings['task_type'], run.get_details()['status'], properties['primary_metric'], iterations, properties['target'], amlsettings['name']]\n",
|
|
" if run.get_details()['status'] == 'Completed':\n",
|
|
" automl_runs_project.append(run.id)\n",
|
|
" \n",
|
|
"from IPython.display import HTML\n",
|
|
"projname_html = HTML(\"<h3>{}</h3>\".format(proj.name))\n",
|
|
"\n",
|
|
"from IPython.display import display\n",
|
|
"display(projname_html)\n",
|
|
"display(summary_df.T)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Get details for a run\n",
|
|
"\n",
|
|
"Copy the project name and run id from the previous cell output to find more details on a particular run."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"run_id = automl_runs_project[0] # Replace with your own run_id from above run ids\n",
|
|
"assert (run_id in summary_df.keys()), \"Run id not found! Please set run id to a value from above run ids\"\n",
|
|
"\n",
|
|
"from azureml.widgets import RunDetails\n",
|
|
"\n",
|
|
"experiment = Experiment(ws, experiment_name)\n",
|
|
"ml_run = AutoMLRun(experiment = experiment, run_id = run_id)\n",
|
|
"\n",
|
|
"summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name', 'Start Time', 'End Time'])\n",
|
|
"properties = ml_run.get_properties()\n",
|
|
"tags = ml_run.get_tags()\n",
|
|
"status = ml_run.get_details()\n",
|
|
"amlsettings = json.loads(properties['AMLSettingsJsonString'])\n",
|
|
"if 'iterations' in tags:\n",
|
|
" iterations = tags['iterations']\n",
|
|
"else:\n",
|
|
" iterations = properties['num_iterations']\n",
|
|
"start_time = None\n",
|
|
"if 'startTimeUtc' in status:\n",
|
|
" start_time = status['startTimeUtc']\n",
|
|
"end_time = None\n",
|
|
"if 'endTimeUtc' in status:\n",
|
|
" end_time = status['endTimeUtc']\n",
|
|
"summary_df[ml_run.id] = [amlsettings['task_type'], status['status'], properties['primary_metric'], iterations, properties['target'], amlsettings['name'], start_time, end_time]\n",
|
|
"display(HTML('<h3>Runtime Details</h3>'))\n",
|
|
"display(summary_df)\n",
|
|
"\n",
|
|
"#settings_df = pd.DataFrame(data = amlsettings, index = [''])\n",
|
|
"display(HTML('<h3>AutoML Settings</h3>'))\n",
|
|
"display(amlsettings)\n",
|
|
"\n",
|
|
"display(HTML('<h3>Iterations</h3>'))\n",
|
|
"RunDetails(ml_run).show() \n",
|
|
"\n",
|
|
"all_metrics = ml_run.get_metrics(recursive=True)\n",
|
|
"metricslist = {}\n",
|
|
"for run_id, metrics in all_metrics.items():\n",
|
|
" iteration = int(run_id.split('_')[-1])\n",
|
|
" float_metrics = {k: v for k, v in metrics.items() if isinstance(v, float)}\n",
|
|
" metricslist[iteration] = float_metrics\n",
|
|
"\n",
|
|
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
|
"display(HTML('<h3>Metrics</h3>'))\n",
|
|
"display(rundata)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Download"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Download the Best Model for Any Given Metric"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"metric = 'AUC_weighted' # Replace with a metric name.\n",
|
|
"best_run, fitted_model = ml_run.get_output(metric = metric)\n",
|
|
"fitted_model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Download the Model for Any Given Iteration"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"iteration = 1 # Replace with an iteration number.\n",
|
|
"best_run, fitted_model = ml_run.get_output(iteration = iteration)\n",
|
|
"fitted_model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Register"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Register fitted model for deployment\n",
|
|
"If neither `metric` nor `iteration` are specified in the `register_model` call, the iteration with the best primary metric is registered."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"description = 'AutoML Model'\n",
|
|
"tags = None\n",
|
|
"ml_run.register_model(description = description, tags = tags)\n",
|
|
"print(ml_run.model_id) # Use this id to deploy the model as a web service in Azure."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Register the Best Model for Any Given Metric"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"metric = 'AUC_weighted' # Replace with a metric name.\n",
|
|
"description = 'AutoML Model'\n",
|
|
"tags = None\n",
|
|
"ml_run.register_model(description = description, tags = tags, metric = metric)\n",
|
|
"print(ml_run.model_id) # Use this id to deploy the model as a web service in Azure."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Register the Model for Any Given Iteration"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"iteration = 1 # Replace with an iteration number.\n",
|
|
"description = 'AutoML Model'\n",
|
|
"tags = None\n",
|
|
"ml_run.register_model(description = description, tags = tags, iteration = iteration)\n",
|
|
"print(ml_run.model_id) # Use this id to deploy the model as a web service in Azure."
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"authors": [
|
|
{
|
|
"name": "savitam"
|
|
}
|
|
],
|
|
"kernelspec": {
|
|
"display_name": "Python 3.6",
|
|
"language": "python",
|
|
"name": "python36"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
} |