mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-23 02:52:39 -05:00
update notebooks for new version
This commit is contained in:
@@ -230,6 +230,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"from azureml.core.compute import RemoteCompute \n",
|
||||
"# if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase \n",
|
||||
"attached_dsvm_compute = RemoteCompute.attach(workspace=ws,\n",
|
||||
@@ -238,7 +239,8 @@
|
||||
" address='<ip_adress_or_fqdn>',\n",
|
||||
" ssh_port=22,\n",
|
||||
" password='<password>')\n",
|
||||
"attached_dsvm_compute.wait_for_completion(show_output=True)"
|
||||
"attached_dsvm_compute.wait_for_completion(show_output=True)\n",
|
||||
"'''\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -246,7 +248,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configure & Run\n",
|
||||
"First let's create a `DataReferenceConfigruation` object to inform the system what data folder to download to the copmute target."
|
||||
"First let's create a `DataReferenceConfiguration` object to inform the system what data folder to download to the copmute target."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -292,7 +294,7 @@
|
||||
"# Set compute target to the Linux DSVM\n",
|
||||
"conda_run_config.target = dsvm_compute.name\n",
|
||||
"\n",
|
||||
"# set the data reference of the run coonfiguration\n",
|
||||
"# set the data reference of the run configuration\n",
|
||||
"conda_run_config.data_references = {ds.name: dr}\n",
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
|
||||
@@ -110,7 +110,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure an ACI run"
|
||||
"### Configure an ACI run\n",
|
||||
"Before you try running on an actual Spark cluster, you can use a Docker image with Spark already baked in, and run it in ACI(Azure Container Registry)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -180,13 +181,20 @@
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note** you can also create a new VM, or attach an existing VM, and use Docker-based execution to run the Spark job. Please see the `04.train-in-vm` for example on how to configure and run in Docker mode in a VM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Attach an HDI cluster\n",
|
||||
"Now we can use a real Spark cluster, HDInsight for Spark, to run this job. To use HDI commpute target:\n",
|
||||
" 1. Create an Spark for HDI cluster in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
|
||||
" 1. Create a Spark for HDI cluster in Azure. Here are some [quick instructions](https://docs.microsoft.com/en-us/azure/hdinsight/spark/apache-spark-jupyter-spark-sql). Make sure you use the Ubuntu flavor, NOT CentOS.\n",
|
||||
" 2. Enter the IP address, username and password below"
|
||||
]
|
||||
},
|
||||
@@ -203,7 +211,7 @@
|
||||
" # if you want to connect using SSH key instead of username/password you can provide parameters private_key_file and private_key_passphrase\n",
|
||||
" hdi_compute = HDInsightCompute.attach(workspace=ws, \n",
|
||||
" name=\"myhdi\", \n",
|
||||
" address=\"myhdi-ssh.azurehdinsight.net\", \n",
|
||||
" address=\"<myhdi-ssh>.azurehdinsight.net\", \n",
|
||||
" ssh_port=22, \n",
|
||||
" username='<ssh-username>', \n",
|
||||
" password='<ssh-pwd>')\n",
|
||||
@@ -232,16 +240,13 @@
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Load the \"cpu-dsvm.runconfig\" file (created by the above attach operation) in memory\n",
|
||||
"# use pyspark framework\n",
|
||||
"hdi_run_config = RunConfiguration(framework=\"pyspark\")\n",
|
||||
"\n",
|
||||
"# Set compute target to the Linux DSVM\n",
|
||||
"# Set compute target to the HDI cluster\n",
|
||||
"hdi_run_config.target = hdi_compute.name\n",
|
||||
"\n",
|
||||
"# Ask system to provision a new one based on the conda_dependencies.yml file\n",
|
||||
"hdi_run_config.environment.python.user_managed_dependencies = False\n",
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"# specify CondaDependencies object to ask system installing numpy\n",
|
||||
"cd = CondaDependencies()\n",
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"hdi_run_config.environment.python.conda_dependencies = cd"
|
||||
@@ -265,7 +270,7 @@
|
||||
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
|
||||
" script= 'train-spark.py',\n",
|
||||
" run_config = hdi_run_config)\n",
|
||||
"run = exp.submit(script_run_config)"
|
||||
"run = exp.submit(config=script_run_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -299,9 +304,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
|
||||
@@ -136,6 +136,7 @@
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||
"myenv.add_pip_package(\"pynacl==1.2.1\")\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
"### b. In your init function add:\n",
|
||||
"```python \n",
|
||||
"global inputs_dc, prediction_d\n",
|
||||
"inputs_dc = ModelDataCollector(\"best_model\", identifier=\"inputs\", feature_names=[\"feat1\", \"feat2\", \"feat3\". \"feat4\", \"feat5\", \"Feat6\"])\n",
|
||||
"inputs_dc = ModelDataCollector(\"best_model\", identifier=\"inputs\", feature_names=[\"feat1\", \"feat2\", \"feat3\", \"feat4\", \"feat5\", \"Feat6\"])\n",
|
||||
"prediction_dc = ModelDataCollector(\"best_model\", identifier=\"predictions\", feature_names=[\"prediction1\", \"prediction2\"])```\n",
|
||||
" \n",
|
||||
"* Identifier: Identifier is later used for building the folder structure in your Blob, it can be used to divide \"raw\" data versus \"processed\".\n",
|
||||
@@ -180,6 +180,7 @@
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||
"myenv.add_pip_package(\"azureml-monitoring\")\n",
|
||||
"myenv.add_pip_package(\"pynacl==1.2.1\")\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
@@ -286,7 +287,7 @@
|
||||
" create_name= 'myaks4'\n",
|
||||
" aks_target = AksCompute.attach(workspace = ws, \n",
|
||||
" name = create_name, \n",
|
||||
" #esource_id=resource_id)\n",
|
||||
" resource_id=resource_id)\n",
|
||||
" ## Wait for the operation to complete\n",
|
||||
" aks_target.wait_for_provisioning(True)```"
|
||||
]
|
||||
|
||||
@@ -13,25 +13,25 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 00. configuration\n",
|
||||
"# AutoML 00. Configuration\n",
|
||||
"\n",
|
||||
"In this example you will create an Azure Machine Learning Workspace and initialize your notebook directory to easily use this workspace. Typically you will only need to run this once per notebook directory, and all other notebooks in this directory or any sub-directories will automatically use the settings you indicate here.\n",
|
||||
"In this example you will create an Azure Machine Learning `Workspace` object and initialize your notebook directory to easily reload this object from a configuration file. Typically you will only need to run this once per notebook directory, and all other notebooks in this directory or any sub-directories will automatically use the settings you indicate here.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Prerequisites:\n",
|
||||
"\n",
|
||||
"Before running this notebook, run the automl_setup script described in README.md.\n"
|
||||
"Before running this notebook, run the `automl_setup` script described in README.md.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to your Azure Subscription\n",
|
||||
"### Connect to Your Azure Subscription\n",
|
||||
"\n",
|
||||
"In order to use an AML Workspace, first you need access to an Azure Subscription. You can [create your own](https://azure.microsoft.com/en-us/free/) or get your existing subscription information from the [Azure portal](https://portal.azure.com).\n",
|
||||
"In order to use an Azure ML workspace, you need access to an Azure Subscription. You can [create a new Azure Subscription](https://azure.microsoft.com/en-us/free) or get existing subscription information from the [Azure portal](https://portal.azure.com).\n",
|
||||
"\n",
|
||||
"First login to azure and follow prompts to authenticate. Then check that your subscription is correct"
|
||||
"First login to Azure and follow prompts to authenticate. Then check that your subscription is correct."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -56,7 +56,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you have multiple subscriptions and need to change the active one, you can use a command\n",
|
||||
"If you have multiple subscriptions and need to change the active one, you can use this command:\n",
|
||||
"```shell\n",
|
||||
"az account set -s <subscription-id>\n",
|
||||
"```"
|
||||
@@ -77,10 +77,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# register the new RP\n",
|
||||
"# Register the new resource provider.\n",
|
||||
"!az provider register -n Microsoft.MachineLearningServices\n",
|
||||
"\n",
|
||||
"# check the registration status\n",
|
||||
"# Check resource provider registration status.\n",
|
||||
"!az provider show -n Microsoft.MachineLearningServices"
|
||||
]
|
||||
},
|
||||
@@ -88,7 +88,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Check core SDK version number for validate your installation and for debugging purposes"
|
||||
"### Check the Azure ML Core SDK Version to Validate Your Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -107,17 +107,17 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize an Azure ML Workspace\n",
|
||||
"### What is an Azure ML Workspace and why do I need one?\n",
|
||||
"### What is an Azure ML Workspace and Why Do I Need One?\n",
|
||||
"\n",
|
||||
"An AML Workspace is an Azure resource that organaizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an AML Workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, operationalization, and the monitoring of operationalized models.\n",
|
||||
"An Azure ML workspace is an Azure resource that organizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an Azure ML workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, operationalization, and the monitoring of operationalized models.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### What do I need\n",
|
||||
"### What do I Need?\n",
|
||||
"\n",
|
||||
"To create or access an Azure ML Workspace, you will need to import the AML library and specify following information:\n",
|
||||
"To create or access an Azure ML workspace, you will need to import the Azure ML library and specify following information:\n",
|
||||
"* A name for your workspace. You can choose one.\n",
|
||||
"* Your subscription id. Use *id* value from *az account show* output above. \n",
|
||||
"* The resource group name. Resource group organizes Azure resources and provides default region for the resources in the group. You can either specify a new one, in which case it gets created for your Workspace, or use an existing one or create a new one from [Azure portal](https://portal.azure.com)\n",
|
||||
"* Your subscription id. Use the `id` value from the `az account show` command output above.\n",
|
||||
"* The resource group name. The resource group organizes Azure resources and provides default region for the resources in the group. You can either specify a new one, in which case it gets created for your workspace, or use an existing one or create a new one from [Azure portal](https://portal.azure.com)\n",
|
||||
"* Supported regions include `eastus2`, `eastus`,`westcentralus`, `southeastasia`, `westeurope`, `australiaeast`, `westus2`, `southcentralus`."
|
||||
]
|
||||
},
|
||||
@@ -137,17 +137,17 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Creating a workspace\n",
|
||||
"If you already have access to an AML Workspace you want to use, you can skip this cell. Otherwise, this cell will create an AML workspace for you in a subscription provided you have the correct permissions for the given `subscription_id`.\n",
|
||||
"## Creating a Workspace\n",
|
||||
"If you already have access to an Azure ML workspace you want to use, you can skip this cell. Otherwise, this cell will create an Azure ML workspace for you in the specified subscription, provided you have the correct permissions for the given `subscription_id`.\n",
|
||||
"\n",
|
||||
"This will fail when:\n",
|
||||
"1. The workspace already exists\n",
|
||||
"2. You do not have permission to create a workspace in the resource group\n",
|
||||
"3. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this subscription\n",
|
||||
"1. The workspace already exists.\n",
|
||||
"2. You do not have permission to create a workspace in the resource group.\n",
|
||||
"3. You are not a subscription owner or contributor and no Azure ML workspaces have ever been created in this subscription.\n",
|
||||
"\n",
|
||||
"If workspace creation fails for any reason other than already existing, please work with your IT admin to provide you with the appropriate permissions or to provision the required resources.\n",
|
||||
"If workspace creation fails for any reason other than already existing, please work with your IT administrator to provide you with the appropriate permissions or to provision the required resources.\n",
|
||||
"\n",
|
||||
"**Note** The workspace creation can take several minutes."
|
||||
"**Note:** Creation of a new workspace can take several minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -156,7 +156,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# import the Workspace class and check the azureml SDK version\n",
|
||||
"# Import the Workspace class and check the Azure ML SDK version.\n",
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.create(name = workspace_name,\n",
|
||||
@@ -170,7 +170,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Configuring your local environment\n",
|
||||
"## Configuring Your Local Environment\n",
|
||||
"You can validate that you have access to the specified workspace and write a configuration file to the default configuration location, `./aml_config/config.json`."
|
||||
]
|
||||
},
|
||||
@@ -186,7 +186,7 @@
|
||||
" subscription_id = subscription_id,\n",
|
||||
" resource_group = resource_group)\n",
|
||||
"\n",
|
||||
"# persist the subscription id, resource group name, and workspace name in aml_config/config.json.\n",
|
||||
"# Persist the subscription id, resource group name, and workspace name in aml_config/config.json.\n",
|
||||
"ws.write_config()"
|
||||
]
|
||||
},
|
||||
@@ -203,7 +203,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load workspace configuratio from ./aml_config/config.json file.\n",
|
||||
"# Load workspace configuration from ./aml_config/config.json file.\n",
|
||||
"my_workspace = Workspace.from_config()\n",
|
||||
"my_workspace.get_details()"
|
||||
]
|
||||
@@ -212,8 +212,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a folder to host all sample projects\n",
|
||||
"Lastly, create a folder where all the sample projects will be hosted."
|
||||
"## Create a Folder to Host All Sample Projects\n",
|
||||
"Finally, create a folder where all the sample projects will be hosted."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -13,27 +13,27 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 01: Classification with local compute\n",
|
||||
"# AutoML 01: Classification with Local Compute\n",
|
||||
"\n",
|
||||
"In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Creating an Experiment in an existing Workspace\n",
|
||||
"2. Instantiating AutoMLConfig\n",
|
||||
"3. Training the Model using local compute\n",
|
||||
"4. Exploring the results\n",
|
||||
"5. Testing the fitted model\n"
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"3. Train the model using local compute.\n",
|
||||
"4. Explore the results.\n",
|
||||
"5. Test the best fitted model.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -67,12 +67,11 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"experiment_name = 'automl-local-classification'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-classification'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
@@ -92,7 +91,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -102,14 +101,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Digits Dataset"
|
||||
"## Load Training Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -123,28 +122,28 @@
|
||||
"digits = datasets.load_digits()\n",
|
||||
"\n",
|
||||
"# Exclude the first 100 rows from training so that they can be used for test.\n",
|
||||
"X_digits = digits.data[100:,:]\n",
|
||||
"y_digits = digits.target[100:]"
|
||||
"X_train = digits.data[100:,:]\n",
|
||||
"y_train = digits.target[100:]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiate Auto ML Config\n",
|
||||
"## Configure AutoML\n",
|
||||
"\n",
|
||||
"Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n",
|
||||
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains a specific pipeline with the data |\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. |"
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -160,19 +159,19 @@
|
||||
" iterations = 50,\n",
|
||||
" n_cross_validations = 3,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_digits, \n",
|
||||
" y = y_digits,\n",
|
||||
" path=project_folder)"
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training the Model\n",
|
||||
"## Train the Model\n",
|
||||
"\n",
|
||||
"You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n",
|
||||
"You will see the currently running iterations printing to the console."
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -181,14 +180,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Optionally, you can continue an interrupted local run by calling continue_experiment without the <b>iterations</b> parameter, or run more iterations to a completed run by specifying the <b>iterations</b> parameter:"
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -200,34 +192,50 @@
|
||||
"local_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Optionally, you can continue an interrupted local run by calling `continue_experiment` without the `iterations` parameter, or run more iterations for a completed run by specifying the `iterations` parameter:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = local_run.continue_experiment(X = X_digits, \n",
|
||||
" y = y_digits, \n",
|
||||
"local_run = local_run.continue_experiment(X = X_train, \n",
|
||||
" y = y_train, \n",
|
||||
" show_output = True,\n",
|
||||
" iterations = 5)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## Exploring the results"
|
||||
"local_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for monitoring runs\n",
|
||||
"## Explore the Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details."
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -246,7 +254,7 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use sdk methods to fetch all the child runs and see individual metrics that we log. "
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -259,7 +267,7 @@
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
"\n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
@@ -272,7 +280,7 @@
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -290,8 +298,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model based on any other metric\n",
|
||||
"Give me the run and the model that has the smallest `log_loss`:"
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model that has the smallest `log_loss` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -310,8 +318,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a specific iteration\n",
|
||||
"Give me the run and the model from the 3rd iteration:"
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the third iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -330,7 +338,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Testing the Fitted Model \n",
|
||||
"### Test the Best Fitted Model\n",
|
||||
"\n",
|
||||
"#### Load Test Data"
|
||||
]
|
||||
@@ -342,8 +350,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[:10, :]\n",
|
||||
"y_digits = digits.target[:10]\n",
|
||||
"X_test = digits.data[:10, :]\n",
|
||||
"y_test = digits.target[:10]\n",
|
||||
"images = digits.images[:10]"
|
||||
]
|
||||
},
|
||||
@@ -351,7 +359,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing our best pipeline\n",
|
||||
"#### Testing Our Best Pipeline\n",
|
||||
"We will try to predict 2 digits and see how our model works."
|
||||
]
|
||||
},
|
||||
@@ -361,16 +369,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Randomly select digits and test\n",
|
||||
"for index in np.random.choice(len(y_digits), 2):\n",
|
||||
"# Randomly select digits and test.\n",
|
||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n",
|
||||
" label = y_digits[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n",
|
||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||
" label = y_test[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||
" fig = plt.figure(1, figsize = (3,3))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" plt.show()"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -13,27 +13,27 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 02: Regression with local compute\n",
|
||||
"# AutoML 02: Regression with Local Compute\n",
|
||||
"\n",
|
||||
"In this example we use the scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use AutoML for a simple regression problem.\n",
|
||||
"In this example we use the scikit-learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use AutoML for a simple regression problem.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Creating an Experiment using an existing Workspace\n",
|
||||
"2. Instantiating AutoMLConfig\n",
|
||||
"3. Training the Model using local compute\n",
|
||||
"4. Exploring the results\n",
|
||||
"5. Testing the fitted model"
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"3. Train the model using local compute.\n",
|
||||
"4. Explore the results.\n",
|
||||
"5. Test the best fitted model.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -67,9 +67,8 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for the experiment\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"experiment_name = 'automl-local-regression'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-regression'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
@@ -92,7 +91,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -102,14 +101,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Read Data"
|
||||
"### Load Training Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -118,7 +117,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load diabetes dataset, a well-known built-in small dataset that comes with scikit-learn\n",
|
||||
"# Load the diabetes dataset, a well-known built-in small dataset that comes with scikit-learn.\n",
|
||||
"from sklearn.datasets import load_diabetes\n",
|
||||
"from sklearn.linear_model import Ridge\n",
|
||||
"from sklearn.metrics import mean_squared_error\n",
|
||||
@@ -128,26 +127,26 @@
|
||||
"\n",
|
||||
"columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
|
||||
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiate Auto ML Config\n",
|
||||
"## Configure AutoML\n",
|
||||
"\n",
|
||||
"Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n",
|
||||
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Regression supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i><br><i>normalized_root_mean_squared_log_error</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains a specific pipeline with the data|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
]
|
||||
},
|
||||
@@ -157,26 +156,26 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task='regression',\n",
|
||||
"automl_config = AutoMLConfig(task = 'regression',\n",
|
||||
" max_time_sec = 600,\n",
|
||||
" iterations = 10,\n",
|
||||
" primary_metric = 'spearman_correlation', \n",
|
||||
" primary_metric = 'spearman_correlation',\n",
|
||||
" n_cross_validations = 5,\n",
|
||||
" debug_log = 'automl.log',\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path=project_folder)"
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training the Model\n",
|
||||
"## Train the Model\n",
|
||||
"\n",
|
||||
"You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n",
|
||||
"You will see the currently running iterations printing to the console."
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -185,7 +184,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -201,18 +200,18 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exploring the results"
|
||||
"## Explore the Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for monitoring runs\n",
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details."
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -231,7 +230,7 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use sdk methods to fetch all the child runs and see individual metrics that we log. "
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -244,9 +243,9 @@
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
"rundata"
|
||||
]
|
||||
@@ -257,7 +256,7 @@
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -275,8 +274,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model based on any other metric\n",
|
||||
"Show the run and model that has the smallest `root_mean_squared_error` (which turned out to be the same as the one with largest `spearman_correlation` value):"
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model that has the smallest `root_mean_squared_error` value (which turned out to be the same as the one with largest `spearman_correlation` value):"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -286,7 +285,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lookup_metric = \"root_mean_squared_error\"\n",
|
||||
"best_run, fitted_model = local_run.get_output(metric=lookup_metric)\n",
|
||||
"best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n",
|
||||
"print(best_run)\n",
|
||||
"print(fitted_model)"
|
||||
]
|
||||
@@ -295,9 +294,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a specific iteration\n",
|
||||
"\n",
|
||||
"Simply show the run and model from the 3rd iteration:"
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the third iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -316,7 +314,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Testing the Fitted Model"
|
||||
"### Test the Best Fitted Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -351,13 +349,13 @@
|
||||
"from sklearn import datasets\n",
|
||||
"from sklearn.metrics import mean_squared_error, r2_score\n",
|
||||
"\n",
|
||||
"# set up a multi-plot chart\n",
|
||||
"# Set up a multi-plot chart.\n",
|
||||
"f, (a0, a1) = plt.subplots(1, 2, gridspec_kw = {'width_ratios':[1, 1], 'wspace':0, 'hspace': 0})\n",
|
||||
"f.suptitle('Regression Residual Values', fontsize = 18)\n",
|
||||
"f.set_figheight(6)\n",
|
||||
"f.set_figwidth(16)\n",
|
||||
"\n",
|
||||
"# plot residual values of training set\n",
|
||||
"# Plot residual values of training set.\n",
|
||||
"a0.axis([0, 360, -200, 200])\n",
|
||||
"a0.plot(y_residual_train, 'bo', alpha = 0.5)\n",
|
||||
"a0.plot([-10,360],[0,0], 'r-', lw = 3)\n",
|
||||
@@ -365,11 +363,12 @@
|
||||
"a0.text(16,140,'R2 score = {0:.2f}'.format(r2_score(y_train, y_pred_train)), fontsize = 12)\n",
|
||||
"a0.set_xlabel('Training samples', fontsize = 12)\n",
|
||||
"a0.set_ylabel('Residual Values', fontsize = 12)\n",
|
||||
"# plot histogram\n",
|
||||
"\n",
|
||||
"# Plot a histogram.\n",
|
||||
"a0.hist(y_residual_train, orientation = 'horizontal', color = 'b', bins = 10, histtype = 'step');\n",
|
||||
"a0.hist(y_residual_train, orientation = 'horizontal', color = 'b', alpha = 0.2, bins = 10);\n",
|
||||
"\n",
|
||||
"# plot residual values of test set\n",
|
||||
"# Plot residual values of test set.\n",
|
||||
"a1.axis([0, 90, -200, 200])\n",
|
||||
"a1.plot(y_residual_test, 'bo', alpha = 0.5)\n",
|
||||
"a1.plot([-10,360],[0,0], 'r-', lw = 3)\n",
|
||||
@@ -377,9 +376,10 @@
|
||||
"a1.text(5,140,'R2 score = {0:.2f}'.format(r2_score(y_test, y_pred_test)), fontsize = 12)\n",
|
||||
"a1.set_xlabel('Test samples', fontsize = 12)\n",
|
||||
"a1.set_yticklabels([])\n",
|
||||
"# plot histogram\n",
|
||||
"a1.hist(y_residual_test, orientation = 'horizontal', color = 'b', bins = 10, histtype = 'step');\n",
|
||||
"a1.hist(y_residual_test, orientation = 'horizontal', color = 'b', alpha = 0.2, bins = 10);\n",
|
||||
"\n",
|
||||
"# Plot a histogram.\n",
|
||||
"a1.hist(y_residual_test, orientation = 'horizontal', color = 'b', bins = 10, histtype = 'step')\n",
|
||||
"a1.hist(y_residual_test, orientation = 'horizontal', color = 'b', alpha = 0.2, bins = 10)\n",
|
||||
"\n",
|
||||
"plt.show()"
|
||||
]
|
||||
|
||||
@@ -15,33 +15,33 @@
|
||||
"source": [
|
||||
"# AutoML 03: Remote Execution using DSVM (Ubuntu)\n",
|
||||
"\n",
|
||||
"In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Creating an Experiment using an existing Workspace\n",
|
||||
"2. Attaching an existing DSVM to a workspace\n",
|
||||
"3. Instantiating AutoMLConfig \n",
|
||||
"4. Training the Model using the DSVM\n",
|
||||
"5. Exploring the results\n",
|
||||
"6. Testing the fitted model\n",
|
||||
"In this notebook you wiil learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Attach an existing DSVM to a workspace.\n",
|
||||
"3. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"4. Train the model using the DSVM.\n",
|
||||
"5. Explore the results.\n",
|
||||
"6. Test the best fitted model.\n",
|
||||
"\n",
|
||||
"In addition this notebook showcases the following features\n",
|
||||
"- **Parallel** Executions for iterations\n",
|
||||
"- Asyncronous tracking of progress\n",
|
||||
"- **Cancelling** individual iterations or the entire run\n",
|
||||
"In addition, this notebook showcases the following features:\n",
|
||||
"- **Parallel** executions for iterations\n",
|
||||
"- **Asynchronous** tracking of progress\n",
|
||||
"- **Cancellation** of individual iterations or the entire run\n",
|
||||
"- Retrieving models for any iteration or logged metric\n",
|
||||
"- specify automl settings as **kwargs**\n"
|
||||
"- Specifying AutoML settings as `**kwargs`\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a workspace. For AutoML you would need to create a <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -75,12 +75,11 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for the run history container in the workspace\n",
|
||||
"# Choose a name for the run history container in the workspace.\n",
|
||||
"experiment_name = 'automl-remote-dsvm4'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-remote-dsvm4'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
@@ -100,7 +99,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -110,7 +109,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -118,9 +117,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a Remote Linux DSVM\n",
|
||||
"Note: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n",
|
||||
"\n",
|
||||
"**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you can switch to a different port (such as 5022), you can append the port number to the address. [Read more](https://render.githubusercontent.com/documentation/sdk/ssh-issue.md) on this."
|
||||
"**Note:** If creation fails with a message about Marketplace purchase eligibilty, start creation of a DSVM through the [Azure portal](https://portal.azure.com), and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled this setting, you can exit the portal without actually creating the DSVM, and creation of the DSVM through the notebook should work.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -134,9 +131,9 @@
|
||||
"dsvm_name = 'mydsvm'\n",
|
||||
"try:\n",
|
||||
" dsvm_compute = DsvmCompute(ws, dsvm_name)\n",
|
||||
" print('found existing dsvm.')\n",
|
||||
" print('Found an existing DSVM.')\n",
|
||||
"except:\n",
|
||||
" print('creating new dsvm.')\n",
|
||||
" print('Creating a new DSVM.')\n",
|
||||
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
|
||||
" dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n",
|
||||
" dsvm_compute.wait_for_completion(show_output = True)"
|
||||
@@ -147,7 +144,8 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Get Data File\n",
|
||||
"For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file."
|
||||
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
||||
"In this example, the `get_data()` function returns data from scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -175,29 +173,29 @@
|
||||
"def get_data():\n",
|
||||
" \n",
|
||||
" digits = datasets.load_digits()\n",
|
||||
" X_digits = digits.data[100:,:]\n",
|
||||
" y_digits = digits.target[100:]\n",
|
||||
" X_train = digits.data[100:,:]\n",
|
||||
" y_train = digits.target[100:]\n",
|
||||
"\n",
|
||||
" return { \"X\" : X_digits, \"y\" : y_digits }"
|
||||
" return { \"X\" : X_train, \"y\" : y_train }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiate AutoML <a class=\"anchor\" id=\"Instatiate-AutoML-Remote-DSVM\"></a>\n",
|
||||
"## Configure AutoML <a class=\"anchor\" id=\"Instantiate-AutoML-Remote-DSVM\"></a>\n",
|
||||
"\n",
|
||||
"You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n",
|
||||
"You can specify `automl_settings` as `**kwargs` as well. Also note that you can use a `get_data()` function for local excutions too.\n",
|
||||
"\n",
|
||||
"<i>Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to the fit method.</i>\n",
|
||||
"**Note:** When using Remote DSVM, you can't pass Numpy arrays directly to the fit method.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains a specific pipeline with the data|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits|\n",
|
||||
"|**concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM."
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**concurrent_iterations**|Maximum number of iterations to execute in parallel. This should be less than the number of cores on the DSVM.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -218,7 +216,7 @@
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path=project_folder, \n",
|
||||
" path = project_folder, \n",
|
||||
" compute_target = dsvm_compute,\n",
|
||||
" data_script = project_folder + \"/get_data.py\",\n",
|
||||
" **automl_settings\n",
|
||||
@@ -229,7 +227,18 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<b>Note</b> that the first run on a new DSVM may take a several minutes to preparing the environment."
|
||||
"**Note:** The first run on a new DSVM may take several minutes to prepare the environment."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train the Model\n",
|
||||
"\n",
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
|
||||
"\n",
|
||||
"In this example, we specify `show_output = False` to suppress console output while the run is in progress."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -238,37 +247,37 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run = experiment.submit(automl_config, show_output=False)"
|
||||
"remote_run = experiment.submit(automl_config, show_output = False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exploring the Results\n",
|
||||
"## Explore the Results\n",
|
||||
"\n",
|
||||
"#### Loading executed runs\n",
|
||||
"In case you need to load a previously executed run given a run id please enable the below cell"
|
||||
"#### Loading Executed Runs\n",
|
||||
"In case you need to load a previously executed run, enable the cell below and replace the `run_id` value."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"remote_run = AutoMLRun(experiment=experiment, run_id='AutoML_480d3ed6-fc94-44aa-8f4e-0b945db9d3ef')"
|
||||
"remote_run = AutoMLRun(experiment=experiment, run_id = 'AutoML_480d3ed6-fc94-44aa-8f4e-0b945db9d3ef')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for monitoring runs\n",
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under /tmp/azureml_run/{iterationid}/azureml-logs\n",
|
||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under `/tmp/azureml_run/{iterationid}/azureml-logs`\n",
|
||||
"\n",
|
||||
"NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details."
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -287,7 +296,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# wait till the run finishes\n",
|
||||
"# Wait until the run finishes.\n",
|
||||
"remote_run.wait_for_completion(show_output = True)"
|
||||
]
|
||||
},
|
||||
@@ -297,7 +306,7 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use sdk methods to fetch all the child runs and see individual metrics that we log. "
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -321,9 +330,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Canceling runs\n",
|
||||
"## Cancelling Runs\n",
|
||||
"\n",
|
||||
"You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions"
|
||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -332,10 +341,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cancel the ongoing experiment and stop scheduling new iterations\n",
|
||||
"# Cancel the ongoing experiment and stop scheduling new iterations.\n",
|
||||
"# remote_run.cancel()\n",
|
||||
"\n",
|
||||
"# Cancel iteration 1 and move onto iteration 2\n",
|
||||
"# Cancel iteration 1 and move onto iteration 2.\n",
|
||||
"# remote_run.cancel_iteration(1)"
|
||||
]
|
||||
},
|
||||
@@ -345,7 +354,7 @@
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -363,8 +372,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model based on any other metric\n",
|
||||
"Show the run/model which has the smallest `log_loss` value."
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model which has the smallest `log_loss` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -383,8 +392,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a specific iteration\n",
|
||||
"Show the run and model from the 3rd iteration."
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the third iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -394,7 +403,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iteration = 3\n",
|
||||
"third_run, third_model = remote_run.get_output(iteration=iteration)\n",
|
||||
"third_run, third_model = remote_run.get_output(iteration = iteration)\n",
|
||||
"print(third_run)\n",
|
||||
"print(third_model)"
|
||||
]
|
||||
@@ -403,7 +412,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Testing the Fitted Model <a class=\"anchor\" id=\"Testing-the-Fitted-Model-Remote-DSVM\"></a>\n",
|
||||
"### Test the Best Fitted Model <a class=\"anchor\" id=\"Testing-the-Fitted-Model-Remote-DSVM\"></a>\n",
|
||||
"\n",
|
||||
"#### Load Test Data"
|
||||
]
|
||||
@@ -415,8 +424,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[:10, :]\n",
|
||||
"y_digits = digits.target[:10]\n",
|
||||
"X_test = digits.data[:10, :]\n",
|
||||
"y_test = digits.target[:10]\n",
|
||||
"images = digits.images[:10]"
|
||||
]
|
||||
},
|
||||
@@ -424,7 +433,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing our best pipeline"
|
||||
"#### Test Our Best Pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -433,16 +442,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Randomly select digits and test\n",
|
||||
"for index in np.random.choice(len(y_digits), 2):\n",
|
||||
"# Randomly select digits and test.\n",
|
||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n",
|
||||
" label = y_digits[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n",
|
||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||
" label = y_test[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" plt.show()"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -15,33 +15,33 @@
|
||||
"source": [
|
||||
"# AutoML 03: Remote Execution using Batch AI\n",
|
||||
"\n",
|
||||
"In this example we use the scikit learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||
"In this example we use the scikit-learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [setup](setup.ipynb) before running this notebook.\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Creating an Experiment using an existing Workspace\n",
|
||||
"2. Attaching an existing Batch AI compute to a workspace\n",
|
||||
"3. Instantiating AutoMLConfig \n",
|
||||
"4. Training the Model using the Batch AI\n",
|
||||
"5. Exploring the results\n",
|
||||
"6. Testing the fitted model\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Attach an existing Batch AI compute to a workspace.\n",
|
||||
"3. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"4. Train the model using Batch AI.\n",
|
||||
"5. Explore the results.\n",
|
||||
"6. Test the best fitted model.\n",
|
||||
"\n",
|
||||
"In addition this notebook showcases the following features\n",
|
||||
"- **Parallel** Executions for iterations\n",
|
||||
"- Asyncronous tracking of progress\n",
|
||||
"- **Cancelling** individual iterations or the entire run\n",
|
||||
"- **Parallel** executions for iterations\n",
|
||||
"- **Asynchronous** tracking of progress\n",
|
||||
"- **Cancellation** of individual iterations or the entire run\n",
|
||||
"- Retrieving models for any iteration or logged metric\n",
|
||||
"- specify automl settings as **kwargs**\n"
|
||||
"- Specifying AutoML settings as `**kwargs`\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a workspace. For AutoML you would need to create a <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -75,12 +75,11 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for the run history container in the workspace\n",
|
||||
"# Choose a name for the run history container in the workspace.\n",
|
||||
"experiment_name = 'automl-remote-batchai'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-remote-batchai'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
@@ -100,7 +99,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -110,7 +109,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -120,9 +119,9 @@
|
||||
"## Create Batch AI Cluster\n",
|
||||
"The cluster is created as Machine Learning Compute and will appear under your workspace.\n",
|
||||
"\n",
|
||||
"<b>Note</b>: The cluster creation can take over 10 minutes, please be patient.\n",
|
||||
"**Note:** The creation of the Batch AI cluster can take over 10 minutes, please be patient.\n",
|
||||
"\n",
|
||||
"As with other Azure services, there are limits on certain resources (for eg. BatchAI cluster size) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
"As with other Azure services, there are limits on certain resources (e.g. Batch AI cluster size) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -134,35 +133,35 @@
|
||||
"from azureml.core.compute import BatchAiCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"batchai_cluster_name = ws.name + \"cpu\"\n",
|
||||
"# Choose a name for your cluster.\n",
|
||||
"batchai_cluster_name = \"mybatchai\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# see if this compute target already exists in the workspace\n",
|
||||
"for ct in ws.compute_targets():\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"for ct_name, ct in ws.compute_targets().items():\n",
|
||||
" print(ct.name, ct.type)\n",
|
||||
" if (ct.name == batchai_cluster_name and ct.type == 'BatchAI'):\n",
|
||||
" found = True\n",
|
||||
" print('found compute target. just use it.')\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = ct\n",
|
||||
" break\n",
|
||||
" \n",
|
||||
"if not found:\n",
|
||||
" print('creating a new compute target...')\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" autoscale_enabled = True,\n",
|
||||
" cluster_min_nodes = 1, \n",
|
||||
" cluster_max_nodes = 4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws,batchai_cluster_name, provisioning_config)\n",
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||
" # if no min node count is provided it will use the scale settings for the cluster\n",
|
||||
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n",
|
||||
" \n",
|
||||
" # For a more detailed view of current BatchAI cluster status, use the 'status' property "
|
||||
" # For a more detailed view of current Batch AI cluster status, use the 'status' property."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -170,7 +169,8 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Get Data File\n",
|
||||
"For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file."
|
||||
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
||||
"In this example, the `get_data()` function returns data from scikit-learn's [diabetes dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -198,10 +198,10 @@
|
||||
"def get_data():\n",
|
||||
" \n",
|
||||
" digits = datasets.load_digits()\n",
|
||||
" X_digits = digits.data\n",
|
||||
" y_digits = digits.target\n",
|
||||
" X_train = digits.data\n",
|
||||
" y_train = digits.target\n",
|
||||
"\n",
|
||||
" return { \"X\" : X_digits, \"y\" : y_digits }"
|
||||
" return { \"X\" : X_train, \"y\" : y_train }"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -210,17 +210,17 @@
|
||||
"source": [
|
||||
"## Instantiate AutoML <a class=\"anchor\" id=\"Instatiate-AutoML-Remote-DSVM\"></a>\n",
|
||||
"\n",
|
||||
"You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n",
|
||||
"You can specify `automl_settings` as `**kwargs` as well. Also note that you can use a `get_data()` function for local excutions too.\n",
|
||||
"\n",
|
||||
"<i>Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to the fit method.</i>\n",
|
||||
"**Note:** When using Batch AI, you can't pass Numpy arrays directly to the fit method.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains a specific pipeline with the data|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits|\n",
|
||||
"|**concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM."
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -241,50 +241,60 @@
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path=project_folder,\n",
|
||||
" path = project_folder,\n",
|
||||
" compute_target = compute_target,\n",
|
||||
" data_script = project_folder + \"/get_data.py\",\n",
|
||||
" **automl_settings\n",
|
||||
" )\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train the Model\n",
|
||||
"\n",
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
|
||||
"In this example, we specify `show_output = False` to suppress console output while the run is in progress."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run = experiment.submit(automl_config, show_output=False)"
|
||||
"remote_run = experiment.submit(automl_config, show_output = False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exploring the Results\n",
|
||||
"## Explore the Results\n",
|
||||
"\n",
|
||||
"#### Loading executed runs\n",
|
||||
"In case you need to load a previously executed run given a run id please enable the below cell"
|
||||
"In case you need to load a previously executed run, enable the cell below and replace the `run_id` value."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"remote_run = AutoMLRun(experiment=experiment, run_id='AutoML_5db13491-c92a-4f1d-b622-8ab8d973a058')"
|
||||
"remote_run = AutoMLRun(experiment = experiment, run_id = 'AutoML_5db13491-c92a-4f1d-b622-8ab8d973a058')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for monitoring runs\n",
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under /tmp/azureml_run/{iterationid}/azureml-logs\n",
|
||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under `/tmp/azureml_run/{iterationid}/azureml-logs`\n",
|
||||
"\n",
|
||||
"NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details."
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -312,7 +322,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# wait till the run finishes\n",
|
||||
"# Wait until the run finishes.\n",
|
||||
"remote_run.wait_for_completion(show_output = True)"
|
||||
]
|
||||
},
|
||||
@@ -322,7 +332,7 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use sdk methods to fetch all the child runs and see individual metrics that we log. "
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -335,7 +345,7 @@
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
"\n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
@@ -346,9 +356,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Canceling runs\n",
|
||||
"## Cancelling runs\n",
|
||||
"\n",
|
||||
"You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions"
|
||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -357,10 +367,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cancel the ongoing experiment and stop scheduling new iterations\n",
|
||||
"# Cancel the ongoing experiment and stop scheduling new iterations.\n",
|
||||
"# remote_run.cancel()\n",
|
||||
"\n",
|
||||
"# Cancel iteration 1 and move onto iteration 2\n",
|
||||
"# Cancel iteration 1 and move onto iteration 2.\n",
|
||||
"# remote_run.cancel_iteration(1)"
|
||||
]
|
||||
},
|
||||
@@ -370,7 +380,7 @@
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -388,8 +398,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model based on any other metric\n",
|
||||
"Show the run/model which has the smallest `log_loss` value."
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model which has the smallest `log_loss` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -408,8 +418,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a specific iteration\n",
|
||||
"Show the run and model from the 3rd iteration."
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the third iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -428,7 +438,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register fitted model for deployment"
|
||||
"### Register the Fitted Model for Deployment"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -439,8 +449,8 @@
|
||||
"source": [
|
||||
"description = 'AutoML Model'\n",
|
||||
"tags = None\n",
|
||||
"remote_run.register_model(description=description, tags=tags)\n",
|
||||
"remote_run.model_id # Use this id to deploy the model as a web service in Azure"
|
||||
"remote_run.register_model(description = description, tags = tags)\n",
|
||||
"remote_run.model_id # Use this id to deploy the model as a web service in Azure."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -459,8 +469,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[:10, :]\n",
|
||||
"y_digits = digits.target[:10]\n",
|
||||
"X_test = digits.data[:10, :]\n",
|
||||
"y_test = digits.target[:10]\n",
|
||||
"images = digits.images[:10]"
|
||||
]
|
||||
},
|
||||
@@ -468,7 +478,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing our best pipeline"
|
||||
"#### Testing Our Best Pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -477,25 +487,18 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Randomly select digits and test\n",
|
||||
"for index in np.random.choice(len(y_digits), 2):\n",
|
||||
"# Randomly select digits and test.\n",
|
||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n",
|
||||
" label = y_digits[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n",
|
||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||
" label = y_test[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -13,36 +13,36 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Auto ML : Remote Execution with Text data from Blobstorage\n",
|
||||
"# Auto ML 04: Remote Execution with Text Data from Azure Blob Storage\n",
|
||||
"\n",
|
||||
"In this example we use the [Burning Man 2016 dataset](https://innovate.burningman.org/datasets-page/) to showcase how you can use AutoML to handle text data from a Azure blobstorage.\n",
|
||||
"In this example we use the [Burning Man 2016 dataset](https://innovate.burningman.org/datasets-page/) to showcase how you can use AutoML to handle text data from Azure Blob Storage.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Creating an Experiment using an existing Workspace\n",
|
||||
"2. Attaching an existing DSVM to a workspace\n",
|
||||
"3. Instantiating AutoMLConfig \n",
|
||||
"4. Training the Model using the DSVM\n",
|
||||
"5. Exploring the results\n",
|
||||
"6. Testing the fitted model\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Attach an existing DSVM to a workspace.\n",
|
||||
"3. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"4. Train the model using the DSVM.\n",
|
||||
"5. Explore the results.\n",
|
||||
"6. Test the best fitted model.\n",
|
||||
"\n",
|
||||
"In addition this notebook showcases the following features\n",
|
||||
"- **Parallel** Executions for iterations\n",
|
||||
"- Asyncronous tracking of progress\n",
|
||||
"- **Cancelling** individual iterations or the entire run\n",
|
||||
"- **Parallel** executions for iterations\n",
|
||||
"- **Asynchronous** tracking of progress\n",
|
||||
"- **Cancellation** of individual iterations or the entire run\n",
|
||||
"- Retrieving models for any iteration or logged metric\n",
|
||||
"- specify automl settings as **kwargs**\n",
|
||||
"- handling **text** data with **preprocess** flag\n"
|
||||
"- Specifying AutoML settings as `**kwargs`\n",
|
||||
"- Handling **text** data using the `preprocess` flag\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -76,9 +76,8 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for the run history container in the workspace\n",
|
||||
"# Choose a name for the run history container in the workspace.\n",
|
||||
"experiment_name = 'automl-remote-dsvm-blobstore'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-remote-dsvm-blobstore'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
@@ -101,7 +100,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -111,7 +110,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -119,11 +118,11 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Attach a Remote Linux DSVM\n",
|
||||
"To use remote docker commpute target:\n",
|
||||
"1. Create a Linux DSVM in Azure. Here is some [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor, NOT CentOS. Make sure that disk space is available under /tmp because AutoML creates files under /tmp/azureml_runs. The DSVM should have more cores than the number of parallel runs that you plan to enable. It should also have at least 4Gb per core.\n",
|
||||
"2. Enter the IP address, username and password below\n",
|
||||
"To use a remote Docker compute target:\n",
|
||||
"1. Create a Linux DSVM in Azure, following these [quick instructions](https://docs.microsoft.com/en-us/azure/machine-learning/desktop-workbench/how-to-create-dsvm-hdi). Make sure you use the Ubuntu flavor (not CentOS). Make sure that disk space is available under `/tmp` because AutoML creates files under `/tmp/azureml_run`s. The DSVM should have more cores than the number of parallel runs that you plan to enable. It should also have at least 4GB per core.\n",
|
||||
"2. Enter the IP address, user name and password below.\n",
|
||||
"\n",
|
||||
"**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you can switch to a different port (such as 5022), you can append the port number to the address. [Read more](https://render.githubusercontent.com/documentation/sdk/ssh-issue.md) on this."
|
||||
"**Note:** By default, SSH runs on port 22 and you don't need to change the port number below. If you've configured SSH to use a different port, change `dsvm_ssh_port` accordinglyaddress. [Read more](https://render.githubusercontent.com/documentation/sdk/ssh-issue.md) on changing SSH ports for security reasons."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -137,10 +136,11 @@
|
||||
"# Add your VM information below\n",
|
||||
"dsvm_name = 'mydsvm1'\n",
|
||||
"dsvm_ip_addr = '<<ip_addr>>'\n",
|
||||
"dsvm_ssh_port = 22\n",
|
||||
"dsvm_username = '<<username>>'\n",
|
||||
"dsvm_password = '<<password>>'\n",
|
||||
"\n",
|
||||
"dsvm_compute = RemoteCompute.attach(workspace=ws, name=dsvm_name, address=dsvm_ip_addr, username=dsvm_username, password=dsvm_password, ssh_port=22)"
|
||||
"dsvm_compute = RemoteCompute.attach(workspace=ws, name=dsvm_name, address=dsvm_ip_addr, username=dsvm_username, password=dsvm_password, ssh_port=dsvm_ssh_port)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -148,9 +148,8 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Get Data File\n",
|
||||
"For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
||||
"\n",
|
||||
"The *get_data()* function returns a [dictionary](README.md#getdata)."
|
||||
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
||||
"In this example, the `get_data()` function returns a [dictionary](README.md#getdata)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -176,16 +175,16 @@
|
||||
"from sklearn.preprocessing import LabelEncoder\n",
|
||||
"\n",
|
||||
"def get_data():\n",
|
||||
" # Burning man 2016 data\n",
|
||||
" # Load Burning Man 2016 data.\n",
|
||||
" df = pd.read_csv(\"https://automldemods.blob.core.windows.net/datasets/PlayaEvents2016,_1.6MB,_3.4k-rows.cleaned.2.tsv\",\n",
|
||||
" delimiter=\"\\t\", quotechar='\"')\n",
|
||||
" # get integer labels\n",
|
||||
" # Get integer labels.\n",
|
||||
" le = LabelEncoder()\n",
|
||||
" le.fit(df[\"Label\"].values)\n",
|
||||
" y = le.transform(df[\"Label\"].values)\n",
|
||||
" df = df.drop([\"Label\"], axis=1)\n",
|
||||
"\n",
|
||||
" df_train, _, y_train, _ = train_test_split(df, y, test_size=0.1, random_state=42)\n",
|
||||
" df_train, _, y_train, _ = train_test_split(df, y, test_size = 0.1, random_state = 42)\n",
|
||||
"\n",
|
||||
" return { \"X\" : df, \"y\" : y }"
|
||||
]
|
||||
@@ -196,7 +195,7 @@
|
||||
"source": [
|
||||
"### View data\n",
|
||||
"\n",
|
||||
"You can execute the *get_data()* function locally to view the *train* data"
|
||||
"You can execute the `get_data()` function locally to view the training data."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -218,21 +217,21 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiate AutoML <a class=\"anchor\" id=\"Instatiate-AutoML-Remote-DSVM\"></a>\n",
|
||||
"## Configure AutoML <a class=\"anchor\" id=\"Instatiate-AutoML-Remote-DSVM\"></a>\n",
|
||||
"\n",
|
||||
"You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n",
|
||||
"You can specify `automl_settings` as `**kwargs` as well. Also note that you can use a `get_data()` function for local excutions too.\n",
|
||||
"\n",
|
||||
"<i>Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to the fit method.</i>\n",
|
||||
"**Note:** When using Remote DSVM, you can't pass Numpy arrays directly to the fit method.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains a specific pipeline with the data|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits|\n",
|
||||
"|**concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM\n",
|
||||
"|**preprocess**| *True/False* <br>Setting this to *True* enables AutoML to perform preprocessing <br>on the input to handle *missing data*, and perform some common *feature extraction*|\n",
|
||||
"|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.<br> Default is *1*, you can set it to *-1* to use all cores|"
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|\n",
|
||||
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
|
||||
"|**max_cores_per_iteration**|Indicates how many cores on the compute target would be used to train a single pipeline.<br>Default is *1*; you can set it to *-1* to use all cores.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -251,7 +250,7 @@
|
||||
"}\n",
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" path=project_folder,\n",
|
||||
" path = project_folder,\n",
|
||||
" compute_target = dsvm_compute,\n",
|
||||
" data_script = project_folder + \"/get_data.py\",\n",
|
||||
" **automl_settings\n",
|
||||
@@ -262,9 +261,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training the Model <a class=\"anchor\" id=\"Training-the-model-Remote-DSVM\"></a>\n",
|
||||
"## Train the Model <a class=\"anchor\" id=\"Training-the-model-Remote-DSVM\"></a>\n",
|
||||
"\n",
|
||||
"For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets/models even when the experiment is running to retreive the best model up to that point. Once you are satisfied with the model you can cancel a particular iteration or the whole run."
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -280,14 +279,14 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exploring the Results <a class=\"anchor\" id=\"Exploring-the-Results-Remote-DSVM\"></a>\n",
|
||||
"#### Widget for monitoring runs\n",
|
||||
"## Exploring the results <a class=\"anchor\" id=\"Exploring-the-Results-Remote-DSVM\"></a>\n",
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under /tmp/azureml_run/{iterationid}/azureml-logs\n",
|
||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under `/tmp/azureml_run/{iterationid}/azureml-logs`\n",
|
||||
"\n",
|
||||
"NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details."
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -306,7 +305,7 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use sdk methods to fetch all the child runs and see individual metrics that we log. "
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -319,7 +318,7 @@
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
"\n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
@@ -330,8 +329,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Canceling runs\n",
|
||||
"You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions"
|
||||
"## Cancelling runs\n",
|
||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -340,10 +339,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cancel the ongoing experiment and stop scheduling new iterations\n",
|
||||
"# Cancel the ongoing experiment and stop scheduling new iterations.\n",
|
||||
"remote_run.cancel()\n",
|
||||
"\n",
|
||||
"# Cancel iteration 1 and move onto iteration 2\n",
|
||||
"# Cancel iteration 1 and move onto iteration 2.\n",
|
||||
"# remote_run.cancel_iteration(1)"
|
||||
]
|
||||
},
|
||||
@@ -353,7 +352,7 @@
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -371,7 +370,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model based on any other metric"
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model which has the smallest `accuracy` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -381,14 +381,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# lookup_metric = \"accuracy\"\n",
|
||||
"# best_run, fitted_model = remote_run.get_output(metric=lookup_metric)"
|
||||
"# best_run, fitted_model = remote_run.get_output(metric = lookup_metric)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a specific iteration"
|
||||
"#### Model from a Specific Iteration"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -398,14 +398,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iteration = 0\n",
|
||||
"zero_run, zero_model = remote_run.get_output(iteration=iteration)"
|
||||
"zero_run, zero_model = remote_run.get_output(iteration = iteration)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register fitted model for deployment"
|
||||
"### Register the Fitted Model for Deployment"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -416,8 +416,8 @@
|
||||
"source": [
|
||||
"description = 'AutoML Model'\n",
|
||||
"tags = None\n",
|
||||
"remote_run.register_model(description=description, tags=tags)\n",
|
||||
"remote_run.model_id # Use this id to deploy the model as a web service in Azure"
|
||||
"remote_run.register_model(description = description, tags = tags)\n",
|
||||
"print(remote_run.model_id) # Use this id to deploy the model as a web service in Azure."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -462,13 +462,6 @@
|
||||
"\n",
|
||||
"cm.plot()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -13,33 +13,32 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 05 : Blacklisting models, Early termination and handling missing data\n",
|
||||
"# AutoML 05: Blacklisting Models, Early Termination, and Handling Missing Data\n",
|
||||
"\n",
|
||||
"In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for handling missing values in data. We also provide a stopping metric indicating a target for the primary metric so that AutoML can terminate the run without necessarly going through all the iterations. Finally, if you want to avoid a certain pipeline, we allow you to specify a black list of algos that AutoML will ignore for this run.\n",
|
||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for handling missing values in data. We also provide a stopping metric indicating a target for the primary metrics so that AutoML can terminate the run without necessarly going through all the iterations. Finally, if you want to avoid a certain pipeline, we allow you to specify a blacklist of algorithms that AutoML will ignore for this run.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Creating an Experiment using an existing Workspace\n",
|
||||
"2. Instantiating AutoMLConfig\n",
|
||||
"4. Training the Model\n",
|
||||
"5. Exploring the results\n",
|
||||
"6. Testing the fitted model\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"4. Train the model.\n",
|
||||
"5. Explore the results.\n",
|
||||
"6. Test the best fitted model.\n",
|
||||
"\n",
|
||||
"In addition this notebook showcases the following features\n",
|
||||
"- **Blacklist** certain pipelines\n",
|
||||
"- Specify a **target metrics** to indicate stopping criteria\n",
|
||||
"- Handling **Missing Data** in the input\n"
|
||||
"- **Blacklisting** certain pipelines\n",
|
||||
"- Specifying **target metrics** to indicate stopping criteria\n",
|
||||
"- Handling **missing data** in the input\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"## Create Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -73,12 +72,11 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for the experiment\n",
|
||||
"# Choose a name for the experiment.\n",
|
||||
"experiment_name = 'automl-local-missing-data'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-missing-data'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
@@ -98,7 +96,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -108,14 +106,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creating Missing Data"
|
||||
"### Creating missing data"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -127,17 +125,17 @@
|
||||
"from scipy import sparse\n",
|
||||
"\n",
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[10:,:]\n",
|
||||
"y_digits = digits.target[10:]\n",
|
||||
"X_train = digits.data[10:,:]\n",
|
||||
"y_train = digits.target[10:]\n",
|
||||
"\n",
|
||||
"# Add missing values in 75% of the lines\n",
|
||||
"# Add missing values in 75% of the lines.\n",
|
||||
"missing_rate = 0.75\n",
|
||||
"n_missing_samples = int(np.floor(X_digits.shape[0] * missing_rate))\n",
|
||||
"missing_samples = np.hstack((np.zeros(X_digits.shape[0] - n_missing_samples, dtype=np.bool), np.ones(n_missing_samples, dtype=np.bool)))\n",
|
||||
"n_missing_samples = int(np.floor(X_train.shape[0] * missing_rate))\n",
|
||||
"missing_samples = np.hstack((np.zeros(X_train.shape[0] - n_missing_samples, dtype=np.bool), np.ones(n_missing_samples, dtype=np.bool)))\n",
|
||||
"rng = np.random.RandomState(0)\n",
|
||||
"rng.shuffle(missing_samples)\n",
|
||||
"missing_features = rng.randint(0, X_digits.shape[1], n_missing_samples)\n",
|
||||
"X_digits[np.where(missing_samples)[0], missing_features] = np.nan"
|
||||
"missing_features = rng.randint(0, X_train.shape[1], n_missing_samples)\n",
|
||||
"X_train[np.where(missing_samples)[0], missing_features] = np.nan"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -146,8 +144,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.DataFrame(data=X_digits)\n",
|
||||
"df['Label'] = pd.Series(y_digits, index=df.index)\n",
|
||||
"df = pd.DataFrame(data = X_train)\n",
|
||||
"df['Label'] = pd.Series(y_train, index=df.index)\n",
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
@@ -155,24 +153,23 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiate Auto ML Config\n",
|
||||
"## Configure AutoML\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"This defines the settings and data used to run the experiment.\n",
|
||||
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains the data with a specific pipeline|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits|\n",
|
||||
"|**preprocess**| *True/False* <br>Setting this to *True* enables Auto ML to perform preprocessing <br>on the input to handle *missing data*, and perform some common *feature extraction*|\n",
|
||||
"|**exit_score**|*double* value indicating the target for *primary_metric*. <br> Once the target is surpassed the run terminates|\n",
|
||||
"|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet<i><br><i>GradientBoostingRegressor<i><br><i>DecisionTreeRegressor<i><br><i>KNeighborsRegressor<i><br><i>LassoLars<i><br><i>SGDRegressor<i><br><i>RandomForestRegressor<i><br><i>ExtraTreesRegressor<i>|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.|\n",
|
||||
"|**exit_score**|*double* value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
|
||||
"|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for AutoML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet<i><br><i>GradientBoostingRegressor<i><br><i>DecisionTreeRegressor<i><br><i>KNeighborsRegressor<i><br><i>LassoLars<i><br><i>SGDRegressor<i><br><i>RandomForestRegressor<i><br><i>ExtraTreesRegressor<i>|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. |"
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -191,19 +188,19 @@
|
||||
" exit_score = 0.994,\n",
|
||||
" blacklist_algos = ['KNeighborsClassifier','LinearSVMWrapper'],\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_digits, \n",
|
||||
" y = y_digits,\n",
|
||||
" path=project_folder)"
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training the Model\n",
|
||||
"## Train the Model\n",
|
||||
"\n",
|
||||
"You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n",
|
||||
"You will see the currently running iterations printing to the console."
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -212,25 +209,25 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exploring the results"
|
||||
"## Explore the Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for monitoring runs\n",
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"NOTE: The widget will display a link at the bottom. This will not currently work, but will eventually link to a web-ui to explore the individual run details."
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -249,7 +246,7 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use sdk methods to fetch all the child runs and see individual metrics that we log. "
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -262,7 +259,7 @@
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
"\n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
@@ -275,7 +272,7 @@
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. Each pipeline is a tuple of three elements. The first element is the score for the pipeline the second element is the string description of the pipeline and the last element are the pipeline objects used for each fold in the cross-validation."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -291,7 +288,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model based on any other metric"
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model which has the smallest `accuracy` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -301,14 +299,15 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# lookup_metric = \"accuracy\"\n",
|
||||
"# best_run, fitted_model = local_run.get_output(metric=lookup_metric)"
|
||||
"# best_run, fitted_model = local_run.get_output(metric = lookup_metric)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a specific iteration"
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the third iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -318,14 +317,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# iteration = 3\n",
|
||||
"# best_run, fitted_model = local_run.get_output(iteration=iteration)"
|
||||
"# best_run, fitted_model = local_run.get_output(iteration = iteration)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register fitted model for deployment"
|
||||
"### Register the Fitted Model for Deployment"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -336,15 +335,15 @@
|
||||
"source": [
|
||||
"description = 'AutoML Model'\n",
|
||||
"tags = None\n",
|
||||
"local_run.register_model(description=description, tags=tags)\n",
|
||||
"local_run.model_id # Use this id to deploy the model as a web service in Azure"
|
||||
"local_run.register_model(description = description, tags = tags)\n",
|
||||
"local_run.model_id # Use this id to deploy the model as a web service in Azure."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Testing the Fitted Model "
|
||||
"### Testing the Fitted Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -354,20 +353,20 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[:10, :]\n",
|
||||
"y_digits = digits.target[:10]\n",
|
||||
"X_test = digits.data[:10, :]\n",
|
||||
"y_test = digits.target[:10]\n",
|
||||
"images = digits.images[:10]\n",
|
||||
"\n",
|
||||
"#Randomly select digits and test\n",
|
||||
"for index in np.random.choice(len(y_digits), 2):\n",
|
||||
"# Randomly select digits and test.\n",
|
||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n",
|
||||
" label = y_digits[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n",
|
||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||
" label = y_test[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" plt.show()\n"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -13,31 +13,31 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 06: Custom CV splits, handling sparse data\n",
|
||||
"# AutoML 06: Custom CV Splits and Handling Sparse Data\n",
|
||||
"\n",
|
||||
"In this example we use the scikit learn's [20newsgroup](In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for handling sparse data and specify custom cross validation splits.\n",
|
||||
"In this example we use the scikit-learn's [20newsgroup](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_20newsgroups.html) to showcase how you can use AutoML for handling sparse data and how to specify custom cross validations splits.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Creating an Experiment using an existing Workspace\n",
|
||||
"2. Instantiating AutoMLConfig\n",
|
||||
"4. Training the Model\n",
|
||||
"5. Exploring the results\n",
|
||||
"6. Testing the fitted model\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"4. Train the model.\n",
|
||||
"5. Explore the results.\n",
|
||||
"6. Test the best fitted model.\n",
|
||||
"\n",
|
||||
"In addition this notebook showcases the following features\n",
|
||||
"- **Custom CV** splits \n",
|
||||
"- Handling **Sparse Data** in the input"
|
||||
"- Handling **sparse data** in the input"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -96,7 +96,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -106,7 +106,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -133,15 +133,15 @@
|
||||
" 'comp.graphics',\n",
|
||||
" 'sci.space',\n",
|
||||
"]\n",
|
||||
"data_train = fetch_20newsgroups(subset='train', categories=categories,\n",
|
||||
" shuffle=True, random_state=42,\n",
|
||||
" remove=remove)\n",
|
||||
"data_train = fetch_20newsgroups(subset = 'train', categories = categories,\n",
|
||||
" shuffle = True, random_state = 42,\n",
|
||||
" remove = remove)\n",
|
||||
"\n",
|
||||
"X_train, X_validation, y_train, y_validation = train_test_split(data_train.data, data_train.target, test_size=0.33, random_state=42)\n",
|
||||
"X_train, X_validation, y_train, y_validation = train_test_split(data_train.data, data_train.target, test_size = 0.33, random_state = 42)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"vectorizer = HashingVectorizer(stop_words='english', alternate_sign=False,\n",
|
||||
" n_features=2**16)\n",
|
||||
"vectorizer = HashingVectorizer(stop_words = 'english', alternate_sign = False,\n",
|
||||
" n_features = 2**16)\n",
|
||||
"X_train = vectorizer.transform(X_train)\n",
|
||||
"X_validation = vectorizer.transform(X_validation)\n",
|
||||
"\n",
|
||||
@@ -155,21 +155,21 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiate Auto ML Config\n",
|
||||
"## Configure AutoML\n",
|
||||
"\n",
|
||||
"This defines the settings and data used to run the experiment.\n",
|
||||
"Instantiate an `AutoMLConfig` object to specify the settings and data used to run the experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains a specific pipeline with the data|\n",
|
||||
"|**preprocess**| *True/False* <br>Setting this to *True* enables Auto ML to perform preprocessing <br>on the input to handle *missing data*, and perform some common *feature extraction*<br>*Note: If input data is Sparse you cannot use preprocess=True*|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and to perform some common *feature extraction*.<br>**Note:** If input data is sparse, you cannot use *True*.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n",
|
||||
"|**X_valid**|(sparse) array-like, shape = [n_samples, n_features] for the custom Validation set|\n",
|
||||
"|**y_valid**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. for the custom Validation set|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
||||
"|**X_valid**|(sparse) array-like, shape = [n_samples, n_features] for the custom validation set.|\n",
|
||||
"|**y_valid**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification for the custom validation set.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
]
|
||||
},
|
||||
@@ -180,27 +180,27 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log='automl_errors.log',\n",
|
||||
" primary_metric='AUC_weighted',\n",
|
||||
" max_time_sec=3600,\n",
|
||||
" iterations=5,\n",
|
||||
" preprocess=False,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" max_time_sec = 3600,\n",
|
||||
" iterations = 5,\n",
|
||||
" preprocess = False,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" X_valid = X_validation, \n",
|
||||
" y_valid = y_validation, \n",
|
||||
" path=project_folder)"
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training the Model\n",
|
||||
"## Train the Model\n",
|
||||
"\n",
|
||||
"You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n",
|
||||
"You will see the currently running iterations printing to the console."
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -216,18 +216,18 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exploring the results"
|
||||
"## Explore the Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for monitoring runs\n",
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details."
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -246,7 +246,7 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use sdk methods to fetch all the child runs and see individual metrics that we log. "
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -259,7 +259,7 @@
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
" \n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
@@ -279,7 +279,7 @@
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -295,7 +295,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model based on any other metric"
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model which has the smallest `accuracy` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -305,14 +306,15 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# lookup_metric = \"accuracy\"\n",
|
||||
"# best_run, fitted_model = local_run.get_output(metric=lookup_metric)"
|
||||
"# best_run, fitted_model = local_run.get_output(metric = lookup_metric)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a specific iteration"
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the third iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -322,14 +324,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# iteration = 3\n",
|
||||
"# best_run, fitted_model = local_run.get_output(iteration=iteration)"
|
||||
"# best_run, fitted_model = local_run.get_output(iteration = iteration)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register fitted model for deployment"
|
||||
"### Register the Fitted Model for Deployment"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -340,15 +342,15 @@
|
||||
"source": [
|
||||
"description = 'AutoML Model'\n",
|
||||
"tags = None\n",
|
||||
"local_run.register_model(description=description, tags=tags)\n",
|
||||
"local_run.model_id # Use this id to deploy the model as a web service in Azure"
|
||||
"local_run.register_model(description = description, tags = tags)\n",
|
||||
"local_run.model_id # Use this id to deploy the model as a web service in Azure."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Testing the Fitted Model "
|
||||
"### Testing the Fitted Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -357,9 +359,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"digits = datasets.load_digits()### Testing the Fitted Model\n",
|
||||
"\n",
|
||||
"#### Load Test Data\n",
|
||||
"# Load test data.\n",
|
||||
"import sklearn\n",
|
||||
"from pandas_ml import ConfusionMatrix\n",
|
||||
"\n",
|
||||
@@ -372,23 +372,23 @@
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"data_test = fetch_20newsgroups(subset='test', categories=categories,\n",
|
||||
" shuffle=True, random_state=42,\n",
|
||||
" remove=remove)\n",
|
||||
"data_test = fetch_20newsgroups(subset = 'test', categories = categories,\n",
|
||||
" shuffle = True, random_state = 42,\n",
|
||||
" remove = remove)\n",
|
||||
"\n",
|
||||
"vectorizer = HashingVectorizer(stop_words='english', alternate_sign=False,\n",
|
||||
" n_features=2**16)\n",
|
||||
"vectorizer = HashingVectorizer(stop_words = 'english', alternate_sign = False,\n",
|
||||
" n_features = 2**16)\n",
|
||||
"\n",
|
||||
"X_test = vectorizer.transform(data_test.data)\n",
|
||||
"y_test = data_test.target\n",
|
||||
"\n",
|
||||
"#### Testing our best pipeline\n",
|
||||
"# Test our best pipeline.\n",
|
||||
"\n",
|
||||
"ypred = fitted_model.predict(X_test)\n",
|
||||
"ypred_strings = [categories[i] for i in ypred]\n",
|
||||
"ytest_strings = [categories[i] for i in y_test]\n",
|
||||
"y_pred = fitted_model.predict(X_test)\n",
|
||||
"y_pred_strings = [data_test.target_names[i] for i in y_pred]\n",
|
||||
"y_test_strings = [data_test.target_names[i] for i in y_test]\n",
|
||||
"\n",
|
||||
"cm = ConfusionMatrix(ytest_strings, ypred_strings)\n",
|
||||
"cm = ConfusionMatrix(y_test_strings, y_pred_strings)\n",
|
||||
"print(cm)\n",
|
||||
"cm.plot()"
|
||||
]
|
||||
|
||||
@@ -13,17 +13,17 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 07: Exploring previous runs\n",
|
||||
"# AutoML 07: Exploring Previous Runs\n",
|
||||
"\n",
|
||||
"In this example we present some examples on navigating previously executed runs. We also show how you can download a fitted model for any previous run.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. List all Experiments for the workspace\n",
|
||||
"2. List AutoML runs for an Experiment\n",
|
||||
"3. Get details for a AutoML Run. (Automl settings, run widget & all metrics)\n",
|
||||
"4. Download fitted pipeline for any iteration\n"
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. List all experiments in a workspace.\n",
|
||||
"2. List all AutoML runs in an experiment.\n",
|
||||
"3. Get details for an AutoML run, including settings, run widget, and all metrics.\n",
|
||||
"4. Download a fitted pipeline for any iteration.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -87,7 +87,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -97,15 +97,15 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# List AutoML runs for an Experiment\n",
|
||||
"You can set <i>Experiment</i> name with any experiment name from the result of the Experiment.list cell to load the AutoML runs."
|
||||
"# List AutoML runs for an experiment\n",
|
||||
"Set `experiment_name` to any experiment name from the result of the Experiment.list cell to load the AutoML runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -114,7 +114,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = 'automl-local-classification' # Replace this with any project name from previous cell\n",
|
||||
"experiment_name = 'automl-local-classification' # Replace this with any project name from previous cell.\n",
|
||||
"\n",
|
||||
"proj = ws.experiments()[experiment_name]\n",
|
||||
"summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name'])\n",
|
||||
@@ -143,7 +143,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Get Details for a Auto ML Run\n",
|
||||
"# Get details for an AutoML run\n",
|
||||
"\n",
|
||||
"Copy the project name and run id from the previous cell output to find more details on a particular run."
|
||||
]
|
||||
@@ -154,12 +154,13 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run_id = '' # Filling your own run_id\n",
|
||||
"run_id = '' # Filling your own run_id from above run ids\n",
|
||||
"assert (run_id in summary_df.keys()),\"Run id not found! Please set run id to a value from above run ids\"\n",
|
||||
"\n",
|
||||
"from azureml.train.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"ml_run = AutoMLRun(experiment=experiment, run_id=run_id)\n",
|
||||
"ml_run = AutoMLRun(experiment = experiment, run_id = run_id)\n",
|
||||
"\n",
|
||||
"summary_df = pd.DataFrame(index = ['Type', 'Status', 'Primary Metric', 'Iterations', 'Compute', 'Name', 'Start Time', 'End Time'])\n",
|
||||
"properties = ml_run.get_properties()\n",
|
||||
@@ -180,7 +181,7 @@
|
||||
"display(HTML('<h3>Runtime Details</h3>'))\n",
|
||||
"display(summary_df)\n",
|
||||
"\n",
|
||||
"#settings_df = pd.DataFrame(data=amlsettings, index=[''])\n",
|
||||
"#settings_df = pd.DataFrame(data = amlsettings, index = [''])\n",
|
||||
"display(HTML('<h3>AutoML Settings</h3>'))\n",
|
||||
"display(amlsettings)\n",
|
||||
"\n",
|
||||
@@ -191,7 +192,7 @@
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
"\n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
@@ -210,7 +211,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Download best model for any given metric"
|
||||
"## Download the Best Model for Any Given Metric"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -219,8 +220,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metric = 'AUC_weighted' # Replace with a metric name\n",
|
||||
"best_run, fitted_model = ml_run.get_output(metric=metric)\n",
|
||||
"metric = 'AUC_weighted' # Replace with a metric name.\n",
|
||||
"best_run, fitted_model = ml_run.get_output(metric = metric)\n",
|
||||
"fitted_model"
|
||||
]
|
||||
},
|
||||
@@ -228,7 +229,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Download model for any given iteration"
|
||||
"## Download the Model for Any Given Iteration"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -237,8 +238,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iteration = 4 # Replace with an interation number\n",
|
||||
"best_run, fitted_model = ml_run.get_output(iteration=iteration)\n",
|
||||
"iteration = 4 # Replace with an iteration number.\n",
|
||||
"best_run, fitted_model = ml_run.get_output(iteration = iteration)\n",
|
||||
"fitted_model"
|
||||
]
|
||||
},
|
||||
@@ -257,15 +258,15 @@
|
||||
"source": [
|
||||
"description = 'AutoML Model'\n",
|
||||
"tags = None\n",
|
||||
"ml_run.register_model(description=description, tags=tags)\n",
|
||||
"ml_run.model_id # Use this id to deploy the model as a web service in Azure"
|
||||
"ml_run.register_model(description = description, tags = tags)\n",
|
||||
"ml_run.model_id # Use this id to deploy the model as a web service in Azure."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register best model for any given metric"
|
||||
"## Register the Best Model for Any Given Metric"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -274,18 +275,18 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metric = 'AUC_weighted' # Replace with a metric name\n",
|
||||
"metric = 'AUC_weighted' # Replace with a metric name.\n",
|
||||
"description = 'AutoML Model'\n",
|
||||
"tags = None\n",
|
||||
"ml_run.register_model(description=description, tags=tags, metric=metric)\n",
|
||||
"ml_run.model_id # Use this id to deploy the model as a web service in Azure"
|
||||
"ml_run.register_model(description = description, tags = tags, metric = metric)\n",
|
||||
"print(ml_run.model_id) # Use this id to deploy the model as a web service in Azure."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register model for any given iteration"
|
||||
"## Register the Model for Any Given Iteration"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -294,11 +295,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iteration = 4 # Replace with an interation number\n",
|
||||
"iteration = 4 # Replace with an iteration number.\n",
|
||||
"description = 'AutoML Model'\n",
|
||||
"tags = None\n",
|
||||
"ml_run.register_model(description=description, tags=tags, iteration=iteration)\n",
|
||||
"ml_run.model_id # Use this id to deploy the model as a web service in Azure"
|
||||
"ml_run.register_model(description = description, tags = tags, iteration = iteration)\n",
|
||||
"print(ml_run.model_id) # Use this id to deploy the model as a web service in Azure."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -13,15 +13,15 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 08: Remote Execution with Text file\n",
|
||||
"# AutoML 08: Remote Execution with Text File\n",
|
||||
"\n",
|
||||
"In this sample accesses a data file on a remote DSVM. This is more efficient than reading the file from Blob storage in the get_data method.\n",
|
||||
"This sample accesses a data file on a remote DSVM. This is more efficient than reading the file from Azure Blob storage in the `get_data` method.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Configuring the DSVM to allow files to be access directly by the get_data method.\n",
|
||||
"2. get_data returning data from a local file.\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Configure the DSVM to allow files to be accessed directly by the `get_data` function.\n",
|
||||
"2. Using `get_data` to return data from a local file.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
@@ -29,9 +29,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -90,7 +90,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -100,7 +100,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -108,9 +108,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a Remote Linux DSVM\n",
|
||||
"Note: If creation fails with a message about Marketplace purchase eligibilty, go to portal.azure.com, start creating DSVM there, and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled it, you can exit without actually creating VM.\n",
|
||||
"\n",
|
||||
"**Note**: By default SSH runs on port 22 and you don't need to specify it. But if for security reasons you can switch to a different port (such as 5022), you can append the port number to the address. [Read more](https://render.githubusercontent.com/documentation/sdk/ssh-issue.md) on this."
|
||||
"**Note:** If creation fails with a message about Marketplace purchase eligibilty, start creation of a DSVM through the [Azure portal](https://portal.azure.com), and select \"Want to create programmatically\" to enable programmatic creation. Once you've enabled this setting, you can exit the portal without actually creating the DSVM, and creation of the DSVM through the notebook should work.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -124,9 +122,9 @@
|
||||
"dsvm_name = 'mydsvm'\n",
|
||||
"try:\n",
|
||||
" dsvm_compute = DsvmCompute(ws, dsvm_name)\n",
|
||||
" print('found existing dsvm.')\n",
|
||||
" print('Found existing DSVM.')\n",
|
||||
"except:\n",
|
||||
" print('creating new dsvm.')\n",
|
||||
" print('Creating a new DSVM.')\n",
|
||||
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
|
||||
" dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n",
|
||||
" dsvm_compute.wait_for_completion(show_output = True)"
|
||||
@@ -136,9 +134,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Copy data file to the DSVM\n",
|
||||
"Download the data file.\n",
|
||||
"Copy the data file to the DSVM under the folder: /tmp/data"
|
||||
"## Copy the Data File to the DSVM\n",
|
||||
"Download the data file and copy the data file to the `/tmp/data` on the DSVM."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -151,17 +148,16 @@
|
||||
" delimiter=\"\\t\", quotechar='\"')\n",
|
||||
"df.to_csv(\"data.tsv\", sep=\"\\t\", quotechar='\"', index=False)\n",
|
||||
"\n",
|
||||
"# Now copy the file data.tsv to the folder /tmp/data on the DSVM"
|
||||
"# Now copy the file data.tsv to the folder /tmp/data on the DSVM."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Get Data File\n",
|
||||
"For remote executions you should author a get_data.py file containing a get_data() function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
||||
"\n",
|
||||
"The *get_data()* function returns a [dictionary](README.md#getdata)."
|
||||
"## Create the `get_data.py` File\n",
|
||||
"For remote executions you should author a `get_data.py` file containing a `get_data()` function. This file should be in the root directory of the project. You can encapsulate code to read data either from a blob storage or local disk in this file.\n",
|
||||
"In this example, the `get_data()` function returns a [dictionary](README.md#getdata)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -197,7 +193,7 @@
|
||||
" y = le.transform(df[\"Label\"].values)\n",
|
||||
" df = df.drop([\"Label\"], axis=1)\n",
|
||||
"\n",
|
||||
" df_train, _, y_train, _ = train_test_split(df, y, test_size=0.1, random_state=42)\n",
|
||||
" df_train, _, y_train, _ = train_test_split(df, y, test_size = 0.1, random_state = 42)\n",
|
||||
"\n",
|
||||
" return { \"X\" : df.values, \"y\" : y }"
|
||||
]
|
||||
@@ -206,21 +202,21 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiate AutoML <a class=\"anchor\" id=\"Instatiate-AutoML-Remote-DSVM\"></a>\n",
|
||||
"## Configure AutoML <a class=\"anchor\" id=\"Instatiate-AutoML-Remote-DSVM\"></a>\n",
|
||||
"\n",
|
||||
"You can specify automl_settings as **kwargs** as well. Also note that you can use the get_data() symantic for local excutions too. \n",
|
||||
"You can specify `automl_settings` as `**kwargs` as well. Also note that you can use a `get_data()` function for local excutions too.\n",
|
||||
"\n",
|
||||
"<i>Note: For Remote DSVM and Batch AI you cannot pass Numpy arrays directly to the fit method.</i>\n",
|
||||
"**Note:** When using Remote DSVM, you can't pass Numpy arrays directly to the fit method.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains a specific pipeline with the data|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits|\n",
|
||||
"|**concurrent_iterations**|Max number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM\n",
|
||||
"|**preprocess**| *True/False* <br>Setting this to *True* enables Auto ML to perform preprocessing <br>on the input to handle *missing data*, and perform some common *feature extraction*|\n",
|
||||
"|**max_cores_per_iteration**| Indicates how many cores on the compute target would be used to train a single pipeline.<br> Default is *1*, you can set it to *-1* to use all cores|"
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**concurrent_iterations**|Maximum number of iterations that would be executed in parallel. This should be less than the number of cores on the DSVM.|\n",
|
||||
"|**preprocess**|Setting this to *True* enables AutoML to perform preprocessing on the input to handle *missing data*, and perform some common *feature extraction*.|\n",
|
||||
"|**max_cores_per_iteration**|Indicates how many cores on the compute target would be used to train a single pipeline.<br>Default is *1*, you can set it to *-1* to use all cores.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -240,20 +236,21 @@
|
||||
"}\n",
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path=project_folder,\n",
|
||||
" path = project_folder,\n",
|
||||
" compute_target = dsvm_compute,\n",
|
||||
" data_script = project_folder + \"/get_data.py\",\n",
|
||||
" **automl_settings\n",
|
||||
" )"
|
||||
" **automl_settings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training the Model <a class=\"anchor\" id=\"Training-the-model-Remote-DSVM\"></a>\n",
|
||||
"## Train the Model <a class=\"anchor\" id=\"Training-the-model-Remote-DSVM\"></a>\n",
|
||||
"\n",
|
||||
"For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets/models even when the experiment is running to retreive the best model up to that point. Once you are satisfied with the model you can cancel a particular iteration or the whole run."
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. For remote runs the execution is asynchronous, so you will see the iterations get populated as they complete. You can interact with the widgets and models even when the experiment is running to retrieve the best model up to that point. Once you are satisfied with the model, you can cancel a particular iteration or the whole run.\n",
|
||||
"\n",
|
||||
"In this example, we specify `show_output = False` to suppress console output while the run is in progress."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -262,21 +259,27 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run = experiment.submit(automl_config, show_output=False)"
|
||||
"remote_run = experiment.submit(automl_config, show_output = False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exploring the Results <a class=\"anchor\" id=\"Exploring-the-Results-Remote-DSVM\"></a>\n",
|
||||
"#### Widget for monitoring runs\n",
|
||||
"## Exploring the results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under /tmp/azureml_run/{iterationid}/azureml-logs\n",
|
||||
"You can click on a pipeline to see run properties and output logs. Logs are also available on the DSVM under `/tmp/azureml_run/{iterationid}/azureml-logs`.\n",
|
||||
"\n",
|
||||
"NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details."
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -295,7 +298,7 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use sdk methods to fetch all the child runs and see individual metrics that we log. "
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -308,7 +311,7 @@
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
"\n",
|
||||
"rundata = pd.DataFrame(metricslist).sort_index(1)\n",
|
||||
@@ -319,8 +322,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Canceling runs\n",
|
||||
"You can cancel ongoing remote runs using the *cancel()* and *cancel_iteration()* functions"
|
||||
"## Cancelling Runs\n",
|
||||
"You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -329,10 +332,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cancel the ongoing experiment and stop scheduling new iterations\n",
|
||||
"# Cancel the ongoing experiment and stop scheduling new iterations.\n",
|
||||
"# remote_run.cancel()\n",
|
||||
"\n",
|
||||
"# Cancel iteration 1 and move onto iteration 2\n",
|
||||
"# Cancel iteration 1 and move onto iteration 2.\n",
|
||||
"# remote_run.cancel_iteration(1)"
|
||||
]
|
||||
},
|
||||
@@ -342,7 +345,7 @@
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -358,7 +361,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model based on any other metric"
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model which has the smallest `accuracy` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -368,14 +372,15 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# lookup_metric = \"accuracy\"\n",
|
||||
"# best_run, fitted_model = remote_run.get_output(metric=lookup_metric)"
|
||||
"# best_run, fitted_model = remote_run.get_output(metric = lookup_metric)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Model from a specific iteration"
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the first iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -385,14 +390,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# iteration = 1\n",
|
||||
"# best_run, fitted_model = remote_run.get_output(iteration=iteration)"
|
||||
"# best_run, fitted_model = remote_run.get_output(iteration = iteration)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register fitted model for deployment"
|
||||
"### Register the Fitted Model for Deployment"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -403,15 +408,15 @@
|
||||
"source": [
|
||||
"description = 'AutoML Model'\n",
|
||||
"tags = None\n",
|
||||
"remote_run.register_model(description=description, tags=tags)\n",
|
||||
"remote_run.model_id # Use this id to deploy the model as a web service in Azure"
|
||||
"remote_run.register_model(description = description, tags = tags)\n",
|
||||
"remote_run.model_id # Use this id to deploy the model as a web service in Azure."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Testing the Fitted Model <a class=\"anchor\" id=\"Testing-the-Fitted-Model-Remote-DSVM\"></a>\n"
|
||||
"### Test the Best Fitted Model <a class=\"anchor\" id=\"Testing-the-Fitted-Model-Remote-DSVM\"></a>\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -432,28 +437,21 @@
|
||||
"le = LabelEncoder()\n",
|
||||
"le.fit(df[\"Label\"].values)\n",
|
||||
"y = le.transform(df[\"Label\"].values)\n",
|
||||
"df = df.drop([\"Label\"], axis=1)\n",
|
||||
"df = df.drop([\"Label\"], axis = 1)\n",
|
||||
"\n",
|
||||
"_, df_test, _, y_test = train_test_split(df, y, test_size=0.1, random_state=42)\n",
|
||||
"_, df_test, _, y_test = train_test_split(df, y, test_size = 0.1, random_state = 42)\n",
|
||||
"\n",
|
||||
"ypred = fitted_model.predict(df_test.values)\n",
|
||||
"y_pred = fitted_model.predict(df_test.values)\n",
|
||||
"\n",
|
||||
"ypred_strings = le.inverse_transform(ypred)\n",
|
||||
"ytest_strings = le.inverse_transform(y_test)\n",
|
||||
"y_pred_strings = le.inverse_transform(y_pred)\n",
|
||||
"y_test_strings = le.inverse_transform(y_test)\n",
|
||||
"\n",
|
||||
"cm = ConfusionMatrix(ytest_strings, ypred_strings)\n",
|
||||
"cm = ConfusionMatrix(y_test_strings, y_pred_strings)\n",
|
||||
"\n",
|
||||
"print(cm)\n",
|
||||
"\n",
|
||||
"cm.plot()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -13,29 +13,29 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 09: Classification with deployment\n",
|
||||
"# AutoML 09: Classification with Deployment\n",
|
||||
"\n",
|
||||
"In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for a simple classification problem.\n",
|
||||
"In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use AutoML for a simple classification problem and deploy it to an Azure Container Instance (ACI).\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Creating an Experiment using an existing Workspace\n",
|
||||
"2. Instantiating AutoMLConfig\n",
|
||||
"3. Training the Model using local compute\n",
|
||||
"4. Exploring the results\n",
|
||||
"5. Registering the model\n",
|
||||
"6. Creating Image and creating aci service\n",
|
||||
"7. Testing the aci service\n"
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an experiment using an existing workspace.\n",
|
||||
"2. Configure AutoML using `AutoMLConfig`.\n",
|
||||
"3. Train the model using local compute.\n",
|
||||
"4. Explore the results.\n",
|
||||
"5. Register the model.\n",
|
||||
"6. Create a container image and create and ACI service.\n",
|
||||
"7. Test the ACI service.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -95,7 +95,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -105,27 +105,27 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiate Auto ML Config\n",
|
||||
"## Configure AutoML\n",
|
||||
"\n",
|
||||
"Instantiate a AutoMLConfig object. This defines the settings and data used to run the experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration Auto ML trains a specific pipeline with the data|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i>|\n",
|
||||
"|**max_time_sec**|Time limit in seconds for each iteration.|\n",
|
||||
"|**iterations**|Number of iterations. In each iteration AutoML trains a specific pipeline with the data.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**X**|(sparse) array-like, shape = [n_samples, n_features]|\n",
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers. |\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder. |"
|
||||
"|**y**|(sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]<br>Multi-class targets. An indicator matrix turns on multilabel classification. This should be an array of integers.|\n",
|
||||
"|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -135,30 +135,30 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[10:,:]\n",
|
||||
"y_digits = digits.target[10:]\n",
|
||||
"X_train = digits.data[10:,:]\n",
|
||||
"y_train = digits.target[10:]\n",
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" name=experiment_name,\n",
|
||||
" debug_log='automl_errors.log',\n",
|
||||
" primary_metric='AUC_weighted',\n",
|
||||
" max_time_sec=1200,\n",
|
||||
" iterations=10,\n",
|
||||
" n_cross_validations=2,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" X = X_digits, \n",
|
||||
" y = y_digits,\n",
|
||||
" path=project_folder)"
|
||||
" name = experiment_name,\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" max_time_sec = 1200,\n",
|
||||
" iterations = 10,\n",
|
||||
" n_cross_validations = 2,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training the Model\n",
|
||||
"## Train the Model\n",
|
||||
"\n",
|
||||
"You can call the submit method on the experiment object and pass the run configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n",
|
||||
"You will see the currently running iterations printing to the console."
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -167,7 +167,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -176,7 +176,7 @@
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -192,7 +192,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register fitted model for deployment"
|
||||
"### Register the Fitted Model for Deployment"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -203,7 +203,7 @@
|
||||
"source": [
|
||||
"description = 'AutoML Model'\n",
|
||||
"tags = None\n",
|
||||
"model = local_run.register_model(description=description, tags=tags, iteration=8)\n",
|
||||
"model = local_run.register_model(description = description, tags = tags, iteration = 8)\n",
|
||||
"local_run.model_id # This will be written to the script file later in the notebook."
|
||||
]
|
||||
},
|
||||
@@ -211,7 +211,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Scoring script ###"
|
||||
"### Create Scoring Script"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -249,14 +249,14 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create yml file for env"
|
||||
"### Create a YAML File for the Environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To ensure the consistence the fit results with the training results, the sdk dependence versions need to be the same as the environment that trains the model. Details about retrieving the versions can be found in notebook 12.auto-ml-retrieve-the-training-sdk-versions.ipynb."
|
||||
"To ensure the consistency of the fit results with the training results, the SDK dependency versions need to be the same as the environment that trains the model. Details about retrieving the versions can be found in notebook [12.auto-ml-retrieve-the-training-sdk-versions](12.auto-ml-retrieve-the-training-sdk-versions.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -268,7 +268,7 @@
|
||||
"experiment_name = 'automl-local-classification'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"ml_run = AutoMLRun(experiment=experiment, run_id=local_run.id)"
|
||||
"ml_run = AutoMLRun(experiment = experiment, run_id = local_run.id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -277,7 +277,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dependencies = ml_run.get_run_sdk_dependencies(iteration=7)"
|
||||
"dependencies = ml_run.get_run_sdk_dependencies(iteration = 7)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -304,7 +304,8 @@
|
||||
" - pip:\n",
|
||||
" - numpy==1.14.2\n",
|
||||
" - scikit-learn==0.19.2\n",
|
||||
" - azureml-sdk[notebooks,automl]==<<azureml-version>> "
|
||||
" - pynacl==1.2.1\n",
|
||||
" - azureml-sdk[notebooks,automl]==<<azureml-version>>"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -338,7 +339,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Image ###"
|
||||
"### Create a Container Image"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -368,7 +369,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy Image as web service on Azure Container Instance ###"
|
||||
"### Deploy the Image as a Web Service on Azure Container Instance"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -407,7 +408,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### To delete a service ##"
|
||||
"### Delete a Web Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -423,7 +424,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### To get logs from deployed service ###"
|
||||
"### Get Logs from a Deployed Web Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -439,7 +440,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Test Web Service ###"
|
||||
"### Test a Web Service"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -450,30 +451,23 @@
|
||||
"source": [
|
||||
"#Randomly select digits and test\n",
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[:10, :]\n",
|
||||
"y_digits = digits.target[:10]\n",
|
||||
"X_test = digits.data[:10, :]\n",
|
||||
"y_test = digits.target[:10]\n",
|
||||
"images = digits.images[:10]\n",
|
||||
"\n",
|
||||
"for index in np.random.choice(len(y_digits), 3):\n",
|
||||
"for index in np.random.choice(len(y_test), 3, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" test_sample = json.dumps({'data':X_digits[index:index + 1].tolist()})\n",
|
||||
" test_sample = json.dumps({'data':X_test[index:index + 1].tolist()})\n",
|
||||
" predicted = aci_service.run(input_data = test_sample)\n",
|
||||
" label = y_digits[index]\n",
|
||||
" label = y_test[index]\n",
|
||||
" predictedDict = json.loads(predicted)\n",
|
||||
" title = \"Label value = %d Predicted value = %s \" % ( label,predictedDict['result'][0])\n",
|
||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
||||
" fig = plt.figure(1, figsize = (3,3))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -13,14 +13,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 10: Multi output Example for AutoML"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook shows an example to use AutoML to train the multi output problems by leveraging the correlation between the outputs using indicator vectors."
|
||||
"# AutoML 10: Multi-output\n",
|
||||
"\n",
|
||||
"This notebook shows how to use AutoML to train multi-output problems by leveraging the correlation between the outputs using indicator vectors."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -52,7 +47,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -62,25 +57,25 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Transformer functions\n",
|
||||
"The transformation of the input are happening for input X and Y as following, e.g. Y = {y_1, y_2}, then X becomes\n",
|
||||
"## Transformer Functions\n",
|
||||
"The transformations of inputs `X` and `y` are happening as follows, e.g. `y = {y_1, y_2}`, then `X` becomes\n",
|
||||
" \n",
|
||||
"X 1 0\n",
|
||||
"`X 1 0`\n",
|
||||
" \n",
|
||||
"X 0 1\n",
|
||||
"`X 0 1`\n",
|
||||
"\n",
|
||||
"and Y becomes,\n",
|
||||
"and `y` becomes,\n",
|
||||
"\n",
|
||||
"y_1\n",
|
||||
"`y_1`\n",
|
||||
"\n",
|
||||
"y_2"
|
||||
"`y_2`"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -93,34 +88,34 @@
|
||||
"from scipy import linalg\n",
|
||||
"\n",
|
||||
"#Transformer functions\n",
|
||||
"def multi_output_transform_x_y(X, Y):\n",
|
||||
" X_new = multi_output_transformer_x(X, Y.shape[1])\n",
|
||||
" y_new = multi_output_transform_y(Y)\n",
|
||||
"def multi_output_transform_x_y(X, y):\n",
|
||||
" X_new = multi_output_transformer_x(X, y.shape[1])\n",
|
||||
" y_new = multi_output_transform_y(y)\n",
|
||||
" return X_new, y_new\n",
|
||||
"\n",
|
||||
"def multi_output_transformer_x(X, number_of_columns_Y):\n",
|
||||
" indicator_vecs = linalg.block_diag(*([np.ones((X.shape[0], 1))] * number_of_columns_Y))\n",
|
||||
"def multi_output_transformer_x(X, number_of_columns_y):\n",
|
||||
" indicator_vecs = linalg.block_diag(*([np.ones((X.shape[0], 1))] * number_of_columns_y))\n",
|
||||
" if sparse.issparse(X):\n",
|
||||
" X_new = sparse.vstack(np.tile(X, number_of_columns_Y))\n",
|
||||
" X_new = sparse.vstack(np.tile(X, number_of_columns_y))\n",
|
||||
" indicator_vecs = sparse.coo_matrix(indicator_vecs)\n",
|
||||
" X_new = sparse.hstack((X_new, indicator_vecs))\n",
|
||||
" else:\n",
|
||||
" X_new = np.tile(X, (number_of_columns_Y, 1))\n",
|
||||
" X_new = np.tile(X, (number_of_columns_y, 1))\n",
|
||||
" X_new = np.hstack((X_new, indicator_vecs))\n",
|
||||
" return X_new\n",
|
||||
"\n",
|
||||
"def multi_output_transform_y(Y):\n",
|
||||
" return Y.reshape(-1, order=\"F\")\n",
|
||||
" \n",
|
||||
"def multi_output_transform_y(y):\n",
|
||||
" return y.reshape(-1, order=\"F\")\n",
|
||||
"\n",
|
||||
"def multi_output_inverse_transform_y(y, number_of_columns_y):\n",
|
||||
" return y.reshape((-1, number_of_columns_y), order=\"F\")"
|
||||
" return y.reshape((-1, number_of_columns_y), order = \"F\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## AutoML experiment set up"
|
||||
"## AutoML Experiment Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -131,12 +126,11 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"experiment_name = 'automl-local-multi-output'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-multi-output'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
@@ -147,14 +141,14 @@
|
||||
"output['Project Directory'] = project_folder\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"pd.DataFrame(data=output, index=['']).T"
|
||||
"pd.DataFrame(data = output, index = ['']).T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create a random dataset for the test purpose "
|
||||
"## Create a Random Dataset for Test Purposes"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -164,16 +158,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rng = np.random.RandomState(1)\n",
|
||||
"X_train = np.sort(200 * rng.rand(600, 1) - 100, axis=0)\n",
|
||||
"Y_train = np.array([np.pi * np.sin(X_train).ravel(), np.pi * np.cos(X_train).ravel()]).T\n",
|
||||
"Y_train += (0.5 - rng.rand(*Y_train.shape))"
|
||||
"X_train = np.sort(200 * rng.rand(600, 1) - 100, axis = 0)\n",
|
||||
"y_train = np.array([np.pi * np.sin(X_train).ravel(), np.pi * np.cos(X_train).ravel()]).T\n",
|
||||
"y_train += (0.5 - rng.rand(*y_train.shape))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Perform X and Y transformation using transformer function"
|
||||
"Perform X and y transformation using the transformer function."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -182,7 +176,14 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train_transformed, y_train_transformed = multi_output_transform_x_y(X_train, Y_train)"
|
||||
"X_train_transformed, y_train_transformed = multi_output_transform_x_y(X_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Configure AutoML using the transformed results."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -192,21 +193,21 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'regression',\n",
|
||||
" debug_log='automl_errors_multi.log',\n",
|
||||
" primary_metric='r2_score',\n",
|
||||
" iterations=10,\n",
|
||||
" n_cross_validations=2,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" X=X_train_transformed,\n",
|
||||
" y=y_train_transformed,\n",
|
||||
" path=project_folder)"
|
||||
" debug_log = 'automl_errors_multi.log',\n",
|
||||
" primary_metric = 'r2_score',\n",
|
||||
" iterations = 10,\n",
|
||||
" n_cross_validations = 2,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train_transformed,\n",
|
||||
" y = y_train_transformed,\n",
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Fit the transformed data "
|
||||
"## Fit the Transformed Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -215,7 +216,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -224,7 +225,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the best fit model\n",
|
||||
"# Get the best fit model.\n",
|
||||
"best_run, fitted_model = local_run.get_output()"
|
||||
]
|
||||
},
|
||||
@@ -234,8 +235,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Generate random data set for predicting\n",
|
||||
"X_predict = np.sort(200 * rng.rand(200, 1) - 100, axis=0)"
|
||||
"# Generate random data set for predicting.\n",
|
||||
"X_test = np.sort(200 * rng.rand(200, 1) - 100, axis = 0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -244,11 +245,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Transform predict data\n",
|
||||
"X_predict_transformed = multi_output_transformer_x(X_predict, Y_train.shape[1])\n",
|
||||
"# Predict and inverse transform the prediction\n",
|
||||
"y_predict = fitted_model.predict(X_predict_transformed)\n",
|
||||
"Y_predict = multi_output_inverse_transform_y(y_predict, Y_train.shape[1])"
|
||||
"# Transform predict data.\n",
|
||||
"X_test_transformed = multi_output_transformer_x(X_test, y_train.shape[1])\n",
|
||||
"\n",
|
||||
"# Predict and inverse transform the prediction.\n",
|
||||
"y_predict = fitted_model.predict(X_test_transformed)\n",
|
||||
"y_predict = multi_output_inverse_transform_y(y_predict, y_train.shape[1])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -257,15 +259,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(Y_predict)"
|
||||
"print(y_predict)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -13,26 +13,22 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 11: Sample weight\n",
|
||||
"# AutoML 11: Sample Weight\n",
|
||||
"\n",
|
||||
"In this example we use the scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use sample weight with the AutoML Classifier.\n",
|
||||
"Sample weight is used where some sample values are more important than others.\n",
|
||||
"In this example we use the scikit-learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html) to showcase how you can use sample weight with AutoML. Sample weight is used where some sample values are more important than others.\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [00.configuration](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. How to specifying sample_weight\n",
|
||||
"2. The difference that it makes to test results\n",
|
||||
"\n"
|
||||
"In this notebook you will learn how to configure AutoML to use `sample_weight` and you will see the difference sample weight makes to the test results.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -66,14 +62,13 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"# Choose names for the regular and the sample weight experiments.\n",
|
||||
"experiment_name = 'non_sample_weight_experiment'\n",
|
||||
"sample_weight_experiment_name = 'sample_weight_experiment'\n",
|
||||
"\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-classification'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"sample_weight_experiment=Experiment(ws, sample_weight_experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
@@ -94,7 +89,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -104,16 +99,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiate Auto ML Config\n",
|
||||
"## Configure AutoML\n",
|
||||
"\n",
|
||||
"Instantiate two AutoMLConfig Objects. One will be used with sample_weight and one without."
|
||||
"Instantiate two `AutoMLConfig` objects. One will be used with `sample_weight` and one without."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -123,12 +118,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[100:,:]\n",
|
||||
"y_digits = digits.target[100:]\n",
|
||||
"X_train = digits.data[100:,:]\n",
|
||||
"y_train = digits.target[100:]\n",
|
||||
"\n",
|
||||
"# The example makes the sample weight 0.9 for the digit 4 and 0.1 for all other digits.\n",
|
||||
"# This makes the model more likely to classify as 4 if the image it not clear.\n",
|
||||
"sample_weight = np.array([(0.9 if x == 4 else 0.01) for x in y_digits])\n",
|
||||
"sample_weight = np.array([(0.9 if x == 4 else 0.01) for x in y_train])\n",
|
||||
"\n",
|
||||
"automl_classifier = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
@@ -137,9 +132,9 @@
|
||||
" iterations = 10,\n",
|
||||
" n_cross_validations = 2,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_digits, \n",
|
||||
" y = y_digits,\n",
|
||||
" path=project_folder)\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path = project_folder)\n",
|
||||
"\n",
|
||||
"automl_sample_weight = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
@@ -148,20 +143,20 @@
|
||||
" iterations = 10,\n",
|
||||
" n_cross_validations = 2,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_digits, \n",
|
||||
" y = y_digits,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" sample_weight = sample_weight,\n",
|
||||
" path=project_folder)"
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training the Models\n",
|
||||
"## Train the Models\n",
|
||||
"\n",
|
||||
"Call the submit method on the experiment and pass the configuration. For Local runs the execution is synchronous. Depending on the data and number of iterations this can run for while.\n",
|
||||
"You will see the currently running iterations printing to the console."
|
||||
"Call the `submit` method on the experiment objects and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -170,8 +165,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_classifier, show_output=True)\n",
|
||||
"sample_weight_run = sample_weight_experiment.submit(automl_sample_weight, show_output=True)\n",
|
||||
"local_run = experiment.submit(automl_classifier, show_output = True)\n",
|
||||
"sample_weight_run = sample_weight_experiment.submit(automl_sample_weight, show_output = True)\n",
|
||||
"\n",
|
||||
"best_run, fitted_model = local_run.get_output()\n",
|
||||
"best_run_sample_weight, fitted_model_sample_weight = sample_weight_run.get_output()"
|
||||
@@ -181,7 +176,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Testing the Fitted Models\n",
|
||||
"### Test the Best Fitted Model\n",
|
||||
"\n",
|
||||
"#### Load Test Data"
|
||||
]
|
||||
@@ -193,8 +188,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[:100, :]\n",
|
||||
"y_digits = digits.target[:100]\n",
|
||||
"X_test = digits.data[:100, :]\n",
|
||||
"y_test = digits.target[:100]\n",
|
||||
"images = digits.images[:100]"
|
||||
]
|
||||
},
|
||||
@@ -202,7 +197,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Compare the pipelines\n",
|
||||
"#### Compare the Pipelines\n",
|
||||
"The prediction from the sample weight model is more likely to correctly predict 4's. However, it is also more likely to predict 4 for some images that are not labelled as 4."
|
||||
]
|
||||
},
|
||||
@@ -212,17 +207,17 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Randomly select digits and test\n",
|
||||
"for index in range(0,len(y_digits)):\n",
|
||||
" predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n",
|
||||
" predicted_sample_weight = fitted_model_sample_weight.predict(X_digits[index:index + 1])[0]\n",
|
||||
" label = y_digits[index]\n",
|
||||
"# Randomly select digits and test.\n",
|
||||
"for index in range(0,len(y_test)):\n",
|
||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||
" predicted_sample_weight = fitted_model_sample_weight.predict(X_test[index:index + 1])[0]\n",
|
||||
" label = y_test[index]\n",
|
||||
" if predicted == 4 or predicted_sample_weight == 4 or label == 4:\n",
|
||||
" title = \"Label value = %d Predicted value = %d Prediced with sample weight = %d\" % ( label,predicted,predicted_sample_weight)\n",
|
||||
" title = \"Label value = %d Predicted value = %d Prediced with sample weight = %d\" % (label, predicted, predicted_sample_weight)\n",
|
||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" plt.show()"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@
|
||||
"source": [
|
||||
"## Diagnostics\n",
|
||||
"\n",
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases"
|
||||
"Opt-in diagnostics for better experience, quality, and security of future releases."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -56,21 +56,21 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.telemetry import set_diagnostics_collection\n",
|
||||
"set_diagnostics_collection(send_diagnostics=True)"
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 1. Retrieve the SDK versions in the current env"
|
||||
"# 1. Retrieve the SDK versions in the current environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To retrieve the SDK versions in the current env, simple running get_sdk_dependencies()"
|
||||
"To retrieve the SDK versions in the current environment, run `get_sdk_dependencies`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -86,7 +86,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 2. Training Model Using AutoML"
|
||||
"# 2. Train model using AutoML"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -97,12 +97,11 @@
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"# Choose a name for the experiment and specify the project folder.\n",
|
||||
"experiment_name = 'automl-local-classification'\n",
|
||||
"# project folder\n",
|
||||
"project_folder = './sample_projects/automl-local-classification'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['SDK version'] = azureml.core.VERSION\n",
|
||||
@@ -123,20 +122,20 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[10:,:]\n",
|
||||
"y_digits = digits.target[10:]\n",
|
||||
"X_train = digits.data[10:,:]\n",
|
||||
"y_train = digits.target[10:]\n",
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log='automl_errors.log',\n",
|
||||
" primary_metric='AUC_weighted',\n",
|
||||
" iterations=3,\n",
|
||||
" n_cross_validations=2,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" X = X_digits, \n",
|
||||
" y = y_digits,\n",
|
||||
" path=project_folder)\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" iterations = 3,\n",
|
||||
" n_cross_validations = 2,\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" path = project_folder)\n",
|
||||
"\n",
|
||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -150,7 +149,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To get the SDK versions from RunHistory, first the RunId need to be recorded. This can either be done by copy it from the output message or retieve if after each run."
|
||||
"To get the SDK versions from RunHistory, first the run id needs to be recorded. This can either be done by copying it from the output message or by retrieving it after each run."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -159,6 +158,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use a run id copied from an output message.\n",
|
||||
"#run_id = 'AutoML_c0585b1f-a0e6-490b-84c7-3a099468b28e'\n",
|
||||
"\n",
|
||||
"# Retrieve the run id from a run.\n",
|
||||
"run_id = local_run.id\n",
|
||||
"print(run_id)"
|
||||
]
|
||||
@@ -167,7 +170,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Initialize a new AutoMLRunClass."
|
||||
"Initialize a new `AutoMLRun` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -177,10 +180,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = 'automl-local-classification'\n",
|
||||
"#run_id = 'AutoML_c0585b1f-a0e6-490b-84c7-3a099468b28e'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"ml_run = AutoMLRun(experiment=experiment, run_id=run_id)"
|
||||
"ml_run = AutoMLRun(experiment = experiment, run_id = run_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -212,7 +214,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ml_run.get_run_sdk_dependencies(iteration=2)"
|
||||
"ml_run.get_run_sdk_dependencies(iteration = 2)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -14,14 +14,14 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AutoML 13: Prepare Data using `azureml.dataprep`\n",
|
||||
"In this example we showcase how you can use `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone - full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
|
||||
"In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n",
|
||||
"\n",
|
||||
"Make sure you have executed the [setup](00.configuration.ipynb) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you would see\n",
|
||||
"1. Defining data loading and preparation steps in a `Dataflow` using `azureml.dataprep`\n",
|
||||
"2. Passing the `Dataflow` to AutoML for local run\n",
|
||||
"3. Passing the `Dataflow` to AutoML for remote run"
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Define data loading and preparation steps in a `Dataflow` using `azureml.dataprep`.\n",
|
||||
"2. Pass the `Dataflow` to AutoML for a local run.\n",
|
||||
"3. Pass the `Dataflow` to AutoML for a remote run."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -31,23 +31,13 @@
|
||||
"## Install `azureml.dataprep` SDK"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Please restart your kernel after the below installs.\n",
|
||||
"\n",
|
||||
"Tornado must be downgraded to a pre-5 version due to a known Tornado x Jupyter event loop bug."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install azureml-dataprep\n",
|
||||
"!pip install tornado==4.5.1"
|
||||
"!pip install azureml-dataprep"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -73,9 +63,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Experiment\n",
|
||||
"## Create an Experiment\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created a <b>Workspace</b>. For AutoML you would need to create an <b>Experiment</b>. An <b>Experiment</b> is a named object in a <b>Workspace</b>, which is used to run experiments."
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -139,12 +129,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You can use `smart_read_file` which intelligently figures out delimiters and datatypes of a file\n",
|
||||
"# data pulled from sklearn.datasets.load_digits()\n",
|
||||
"# You can use `smart_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
|
||||
"# The data referenced here was pulled from `sklearn.datasets.load_digits()`.\n",
|
||||
"simple_example_data_root = 'https://dprepdata.blob.core.windows.net/automl-notebook-data/'\n",
|
||||
"X = dprep.smart_read_file(simple_example_data_root + 'X.csv').skip(1) # remove header\n",
|
||||
"X = dprep.smart_read_file(simple_example_data_root + 'X.csv').skip(1) # Remove the header row.\n",
|
||||
"\n",
|
||||
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter).\n",
|
||||
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
|
||||
"# and convert column types manually.\n",
|
||||
"# Here we read a comma delimited file and convert all columns to integers.\n",
|
||||
"y = dprep.read_csv(simple_example_data_root + 'y.csv').to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
|
||||
@@ -156,7 +146,7 @@
|
||||
"source": [
|
||||
"## Review the Data Preparation Result\n",
|
||||
"\n",
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large dataset."
|
||||
"You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -172,9 +162,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Instantiate AutoML Settings\n",
|
||||
"## Configure AutoML\n",
|
||||
"\n",
|
||||
"This creates a general Auto ML Settings applicable for both Local and Remote runs."
|
||||
"This creates a general AutoML settings object applicable for both local and remote runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -189,7 +179,7 @@
|
||||
" \"primary_metric\": 'AUC_weighted',\n",
|
||||
" \"preprocess\": False,\n",
|
||||
" \"verbosity\": logging.INFO,\n",
|
||||
" \"n_cross_validations\" : 3\n",
|
||||
" \"n_cross_validations\": 3\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
@@ -204,9 +194,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pass data with Dataflows\n",
|
||||
"### Pass Data with `Dataflow` Objects\n",
|
||||
"\n",
|
||||
"The `Dataflow` objects captured above can be passed to `submit` method for local run. AutoML will retrieve the results from the `Dataflow` for model training."
|
||||
"The `Dataflow` objects captured above can be passed to the `submit` method for a local run. AutoML will retrieve the results from the `Dataflow` for model training."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -228,7 +218,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output=True)"
|
||||
"local_run = experiment.submit(automl_config, show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -255,9 +245,9 @@
|
||||
"dsvm_name = 'mydsvm'\n",
|
||||
"try:\n",
|
||||
" dsvm_compute = DsvmCompute(ws, dsvm_name)\n",
|
||||
" print('found existing dsvm.')\n",
|
||||
" print('Found existing DVSM.')\n",
|
||||
"except:\n",
|
||||
" print('creating new dsvm.')\n",
|
||||
" print('Creating a new DSVM.')\n",
|
||||
" dsvm_config = DsvmCompute.provisioning_configuration(vm_size = \"Standard_D2_v2\")\n",
|
||||
" dsvm_compute = DsvmCompute.create(ws, name = dsvm_name, provisioning_configuration = dsvm_config)\n",
|
||||
" dsvm_compute.wait_for_completion(show_output = True)"
|
||||
@@ -269,7 +259,7 @@
|
||||
"source": [
|
||||
"### Update Conda Dependency file to have AutoML and DataPrep SDK\n",
|
||||
"\n",
|
||||
"Currently AutoML and DataPrep SDK is not installed with Azure ML SDK by default. Due to this we update the conda dependency file to add such dependencies."
|
||||
"Currently the AutoML and DataPrep SDKs are not installed with the Azure ML SDK by default. To circumvent this limitation, we update the conda dependency file to add these dependencies."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -287,7 +277,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a RunConfiguration with DSVM name"
|
||||
"### Create a `RunConfiguration` with DSVM name"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -305,9 +295,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Pass data with Dataflows\n",
|
||||
"### Pass Data with `Dataflow` Objects\n",
|
||||
"\n",
|
||||
"The `Dataflow` objects captured above can also be passed to `submit` method for remote run. AutoML will serialize the `Dataflow` and send to remote compute target. The `Dataflow` will not be evaluated locally."
|
||||
"The `Dataflow` objects captured above can also be passed to the `submit` method for a remote run. AutoML will serialize the `Dataflow` object and send it to the remote compute target. The `Dataflow` will not be evaluated locally."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -332,18 +322,18 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Exploring the results"
|
||||
"## Explore the Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for monitoring runs\n",
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will sit on \"loading\" until the first iteration completed, then you will see an auto-updating graph and table show up. It refreshed once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"NOTE: The widget displays a link at the bottom. This links to a web-ui to explore the individual run details."
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -353,14 +343,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.widgets import RunDetails\n",
|
||||
"RunDetails(local_run).show() "
|
||||
"RunDetails(local_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Retrieve all child runs\n",
|
||||
"#### Retrieve All Child Runs\n",
|
||||
"You can also use SDK methods to fetch all the child runs and see individual metrics that we log."
|
||||
]
|
||||
},
|
||||
@@ -388,7 +378,7 @@
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"\n",
|
||||
"Below we select the best pipeline from our iterations. The *get_output* method on automl_classifier returns the best run and the fitted model for the last *fit* invocation. There are overloads on *get_output* that allow you to retrieve the best run and fitted model for *any* logged metric or a particular *iteration*."
|
||||
"Below we select the best pipeline from our iterations. The `get_output` method on `automl_classifier` returns the best run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -406,8 +396,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model based on any other metric\n",
|
||||
"Give me the run and the model that has the smallest `log_loss`:"
|
||||
"#### Best Model Based on Any Other Metric\n",
|
||||
"Show the run and the model that has the smallest `log_loss` value:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -426,8 +416,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Best Model based on any iteration\n",
|
||||
"Give me the run and the model from the 1st iteration:"
|
||||
"#### Model from a Specific Iteration\n",
|
||||
"Show the run and the model from the first iteration:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -446,7 +436,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Testing the Fitted Model \n",
|
||||
"### Test the Best Fitted Model\n",
|
||||
"\n",
|
||||
"#### Load Test Data"
|
||||
]
|
||||
@@ -460,8 +450,8 @@
|
||||
"from sklearn import datasets\n",
|
||||
"\n",
|
||||
"digits = datasets.load_digits()\n",
|
||||
"X_digits = digits.data[:10, :]\n",
|
||||
"y_digits = digits.target[:10]\n",
|
||||
"X_test = digits.data[:10, :]\n",
|
||||
"y_test = digits.target[:10]\n",
|
||||
"images = digits.images[:10]"
|
||||
]
|
||||
},
|
||||
@@ -469,7 +459,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Testing our best pipeline\n",
|
||||
"#### Testing Our Best Pipeline\n",
|
||||
"We will try to predict 2 digits and see how our model works."
|
||||
]
|
||||
},
|
||||
@@ -485,15 +475,15 @@
|
||||
"import random\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"for index in np.random.choice(len(y_digits), 2):\n",
|
||||
"for index in np.random.choice(len(y_test), 2, replace = False):\n",
|
||||
" print(index)\n",
|
||||
" predicted = fitted_model.predict(X_digits[index:index + 1])[0]\n",
|
||||
" label = y_digits[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % ( label,predicted)\n",
|
||||
" predicted = fitted_model.predict(X_test[index:index + 1])[0]\n",
|
||||
" label = y_test[index]\n",
|
||||
" title = \"Label value = %d Predicted value = %d \" % (label, predicted)\n",
|
||||
" fig = plt.figure(1, figsize=(3,3))\n",
|
||||
" ax1 = fig.add_axes((0,0,.8,.8))\n",
|
||||
" ax1.set_title(title)\n",
|
||||
" plt.imshow(images[index], cmap=plt.cm.gray_r, interpolation='nearest')\n",
|
||||
" plt.imshow(images[index], cmap = plt.cm.gray_r, interpolation = 'nearest')\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
@@ -508,9 +498,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Capture the Dataflows to use for AutoML later\n",
|
||||
"### Capture the `Dataflow` Objects for Later Use in AutoML\n",
|
||||
"\n",
|
||||
"`Dataflow` objects are immutable. Each of them is composed of a list of data preparation steps. A `Dataflow` can be branched at any point for further usage."
|
||||
"`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -527,7 +517,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"`digits_complete` (sourced from `sklearn.datasets.load_digits()`)is forked into `dflow_X` to capture all the feature columns and `dflow_y` to capture the label column."
|
||||
"`digits_complete` (sourced from `sklearn.datasets.load_digits()`) is forked into `dflow_X` to capture all the feature columns and `dflow_y` to capture the label column."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
118
automl/README.md
118
automl/README.md
@@ -1,52 +1,24 @@
|
||||
# Table of Contents
|
||||
1. [Automated ML Introduction](#introduction)
|
||||
1. [Running samples in Azure Notebooks](#jupyter)
|
||||
1. [Running samples in a Local Conda environment](#localconda)
|
||||
1. [Automated ML SDK Sample Notebooks](#samples)
|
||||
1. [Documentation](#documentation)
|
||||
1. [Running using python command](#pythoncommand)
|
||||
1. [Troubleshooting](#troubleshooting)
|
||||
|
||||
<a name="introduction"></a>
|
||||
# Automated ML introduction
|
||||
Automated machine learning (automated ML) builds high quality machine learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, automated ML will give you a high quality machine learning model that you can use for predictions.
|
||||
1. [Auto ML Introduction](#introduction)
|
||||
2. [Running samples in a Local Conda environment](#localconda)
|
||||
3. [Auto ML SDK Sample Notebooks](#samples)
|
||||
4. [Documentation](#documentation)
|
||||
5. [Running using python command](#pythoncommand)
|
||||
6. [Troubleshooting](#troubleshooting)
|
||||
|
||||
# Auto ML Introduction <a name="introduction"></a>
|
||||
AutoML builds high quality Machine Learning models for you by automating model and hyperparameter selection. Bring a labelled dataset that you want to build a model for, AutoML will give you a high quality machine learning model that you can use for predictions.
|
||||
|
||||
If you are new to Data Science, AutoML will help you get jumpstarted by simplifying machine learning model building. It abstracts you from needing to perform model selection, hyperparameter selection and in one step creates a high quality trained model for you to use.
|
||||
|
||||
If you are an experienced data scientist, AutoML will help increase your productivity by intelligently performing the model and hyperparameter selection for your training and generates high quality models much quicker than manually specifying several combinations of the parameters and running training jobs. AutoML provides visibility and access to all the training jobs and the performance characteristics of the models to help you further tune the pipeline if you desire.
|
||||
|
||||
<a name="jupyter"></a>
|
||||
## Running samples in Azure Notebooks - Jupyter based notebooks in the Azure cloud
|
||||
|
||||
1. [](https://aka.ms/aml-clone-azure-notebooks)
|
||||
[Import sample notebooks ](https://aka.ms/aml-clone-azure-notebooks) into Azure Notebooks.
|
||||
1. Follow the instructions in the [../00.configuration](00.configuration.ipynb) notebook to create and connect to a workspace.
|
||||
1. Open one of the sample notebooks.
|
||||
# Running samples in a Local Conda environment <a name="localconda"></a>
|
||||
|
||||
**Make sure the Azure Notebook kernel is set to `Python 3.6`** when you open a notebook.
|
||||
|
||||

|
||||
|
||||
<a name="localconda"></a>
|
||||
## Running samples in a Local Conda environment
|
||||
|
||||
To run these notebook on your own notebook server, use these installation instructions.
|
||||
|
||||
The instructions below will install everything you need and then start a Jupyter notebook. To start your Jupyter notebook manually, use:
|
||||
|
||||
```
|
||||
conda activate azure_automl
|
||||
jupyter notebook
|
||||
```
|
||||
|
||||
or on Mac:
|
||||
|
||||
```
|
||||
source activate azure_automl
|
||||
jupyter notebook
|
||||
```
|
||||
You can run these notebooks in Azure Notebooks without any extra installation. To run these notebook on your own notebook server, use these installation instructions.
|
||||
|
||||
It is best if you create a new conda environment locally to try this SDK, so it doesn't mess up with your existing Python environment.
|
||||
|
||||
### 1. Install mini-conda from [here](https://conda.io/miniconda.html), choose Python 3.7 or higher.
|
||||
- **Note**: if you already have conda installed, you can keep using it but it should be version 4.4.10 or later (as shown by: conda -V). If you have a previous version installed, you can update it using the command: conda update conda.
|
||||
@@ -76,19 +48,19 @@ bash automl_setup_mac.sh
|
||||
cd to the **automl** folder where the sample notebooks were extracted and then run:
|
||||
|
||||
```
|
||||
bash automl_setup_linux.sh
|
||||
automl_setup_linux.sh
|
||||
```
|
||||
|
||||
### 4. Running configuration.ipynb
|
||||
- Before running any samples you next need to run the configuration notebook. Click on 00.configuration.ipynb notebook
|
||||
- Please make sure you use the Python [conda env:azure_automl] kernel when running this notebook.
|
||||
- Execute the cells in the notebook to Register Machine Learning Services Resource Provider and create a workspace. (*instructions in notebook*)
|
||||
|
||||
### 5. Running Samples
|
||||
- Please make sure you use the Python [conda env:azure_automl] kernel when trying the sample Notebooks.
|
||||
- Follow the instructions in the individual notebooks to explore various features in AutoML
|
||||
|
||||
<a name="samples"></a>
|
||||
# Automated ML SDK Sample Notebooks
|
||||
# Auto ML SDK Sample Notebooks <a name="samples"></a>
|
||||
- [00.configuration.ipynb](00.configuration.ipynb)
|
||||
- Register Machine Learning Services Resource Provider
|
||||
- Create new Azure ML Workspace
|
||||
@@ -115,7 +87,7 @@ bash automl_setup_linux.sh
|
||||
|
||||
- [03b.auto-ml-remote-batchai.ipynb](03b.auto-ml-remote-batchai.ipynb)
|
||||
- Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits)
|
||||
- Example of using automated ML for classification using a remote Batch AI compute for training
|
||||
- Example of using Auto ML for classification using a remote Batch AI compute for training
|
||||
- Parallel execution of iterations
|
||||
- Async tracking of progress
|
||||
- Cancelling individual iterations or entire run
|
||||
@@ -171,17 +143,20 @@ bash automl_setup_linux.sh
|
||||
- [13.auto-ml-dataprep.ipynb](13.auto-ml-dataprep.ipynb)
|
||||
- Using DataPrep for reading data
|
||||
|
||||
<a name="documentation"></a>
|
||||
# Documentation
|
||||
- [14a.auto-ml-classification-ensemble.ipynb](14a.auto-ml-classification-ensemble.ipynb)
|
||||
- Classification with ensembling
|
||||
|
||||
- [14b.auto-ml-regression-ensemble.ipynb](14b.auto-ml-regression-ensemble.ipynb)
|
||||
- Regression with ensembling
|
||||
|
||||
# Documentation <a name="documentation"></a>
|
||||
## Table of Contents
|
||||
1. [Automated ML Settings ](#automlsettings)
|
||||
1. [Cross validation split options](#cvsplits)
|
||||
1. [Get Data Syntax](#getdata)
|
||||
1. [Data pre-processing and featurization](#preprocessing)
|
||||
|
||||
<a name="automlsettings"></a>
|
||||
## Automated ML Settings
|
||||
1. [Auto ML Settings ](#automlsettings)
|
||||
2. [Cross validation split options](#cvsplits)
|
||||
3. [Get Data Syntax](#getdata)
|
||||
4. [Data pre-processing and featurization](#preprocessing)
|
||||
|
||||
## Auto ML Settings <a name="automlsettings"></a>
|
||||
|Property|Description|Default|
|
||||
|-|-|-|
|
||||
|**primary_metric**|This is the metric that you want to optimize.<br><br> Classification supports the following primary metrics <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>balanced_accuracy</i><br><i>average_precision_score_weighted</i><br><i>precision_score_weighted</i><br><br> Regression supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i><br><i>normalized_root_mean_squared_log_error</i>| Classification: accuracy <br><br> Regression: spearman_correlation
|
||||
@@ -195,8 +170,7 @@ bash automl_setup_linux.sh
|
||||
|**exit_score**|*double* value indicating the target for *primary_metric*. <br> Once the target is surpassed the run terminates|None|
|
||||
|**blacklist_algos**|*Array* of *strings* indicating pipelines to ignore for Auto ML.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGDClassifierWrapper</i><br><i>NBWrapper</i><br><i>BernoulliNB</i><br><i>SVCWrapper</i><br><i>LinearSVMWrapper</i><br><i>KNeighborsClassifier</i><br><i>DecisionTreeClassifier</i><br><i>RandomForestClassifier</i><br><i>ExtraTreesClassifier</i><br><i>gradient boosting</i><br><i>LightGBMClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoostingRegressor</i><br><i>DecisionTreeRegressor</i><br><i>KNeighborsRegressor</i><br><i>LassoLars</i><br><i>SGDRegressor</i><br><i>RandomForestRegressor</i><br><i>ExtraTreesRegressor</i>|None|
|
||||
|
||||
<a name="cvsplits"></a>
|
||||
## Cross validation split options
|
||||
## Cross validation split options <a name="cvsplits"></a>
|
||||
### K-Folds Cross Validation
|
||||
Use *n_cross_validations* setting to specify the number of cross validations. The training data set will be randomly split into *n_cross_validations* folds of equal size. During each cross validation round, one of the folds will be used for validation of the model trained on the remaining folds. This process repeats for *n_cross_validations* rounds until each fold is used once as validation set. Finally, the average scores accross all *n_cross_validations* rounds will be reported, and the corresponding model will be retrained on the whole training data set.
|
||||
|
||||
@@ -206,8 +180,7 @@ Use *validation_size* to specify the percentage of the training data set that sh
|
||||
### Custom train and validation set
|
||||
You can specify seperate train and validation set either through the get_data() or directly to the fit method.
|
||||
|
||||
<a name="getdata"></a>
|
||||
## get_data() syntax
|
||||
## get_data() syntax <a name="getdata"></a>
|
||||
The *get_data()* function can be used to return a dictionary with these values:
|
||||
|
||||
|Key|Type|Dependency|Mutually Exclusive with|Description|
|
||||
@@ -223,23 +196,21 @@ The *get_data()* function can be used to return a dictionary with these values:
|
||||
|columns|Array of strings|data_train||*Optional* Whitelist of columns to use for features|
|
||||
|cv_splits_indices|Array of integers|data_train||*Optional* List of indexes to split the data for cross validation|
|
||||
|
||||
<a name="preprocessing"></a>
|
||||
## Data pre-processing and featurization
|
||||
If you use `preprocess=True`, the following data preprocessing steps are performed automatically for you:
|
||||
## Data pre-processing and featurization <a name="preprocessing"></a>
|
||||
If you use "preprocess=True", the following data preprocessing steps are performed automatically for you:
|
||||
### 1. Dropping high cardinality or no variance features
|
||||
- Features with no useful information are dropped from training and validation sets. These include features with all values missing, same value across all rows or with extremely high cardinality (e.g., hashes, IDs or GUIDs).
|
||||
### 2. Missing value imputation
|
||||
- For numerical features, missing values are imputed with average of values in the column.
|
||||
- For categorical features, missing values are imputed with most frequent value.
|
||||
### 3. Generating additional features
|
||||
- For DateTime features: Year, Month, Day, Day of week, Day of year, Quarter, Week of the year, Hour, Minute, Second.
|
||||
- For Text features: Term frequency based on bi-grams and tri-grams, Count vectorizer.
|
||||
### 4. Transformations and encodings
|
||||
- Numeric features with very few unique values are transformed into categorical features.
|
||||
- Depending on cardinality of categorical features label encoding or (hashing) one-hot encoding is performed.
|
||||
|
||||
1. Dropping high cardinality or no variance features
|
||||
- Features with no useful information are dropped from training and validation sets. These include features with all values missing, same value across all rows or with extremely high cardinality (e.g., hashes, IDs or GUIDs).
|
||||
2. Missing value imputation
|
||||
- For numerical features, missing values are imputed with average of values in the column.
|
||||
- For categorical features, missing values are imputed with most frequent value.
|
||||
3. Generating additional features
|
||||
- For DateTime features: Year, Month, Day, Day of week, Day of year, Quarter, Week of the year, Hour, Minute, Second.
|
||||
- For Text features: Term frequency based on bi-grams and tri-grams, Count vectorizer.
|
||||
4. Transformations and encodings
|
||||
- Numeric features with very few unique values are transformed into categorical features.
|
||||
|
||||
<a name="pythoncommand"></a>
|
||||
# Running using python command
|
||||
# Running using python command <a name="pythoncommand"></a>
|
||||
Jupyter notebook provides a File / Download as / Python (.py) option for saving the notebook as a Python file.
|
||||
You can then run this file using the python command.
|
||||
However, on Windows the file needs to be modified before it can be run.
|
||||
@@ -249,8 +220,7 @@ The following condition must be added to the main code in the file:
|
||||
|
||||
The main code of the file must be indented so that it is under this condition.
|
||||
|
||||
<a name="troubleshooting"></a>
|
||||
# Troubleshooting
|
||||
# Troubleshooting <a name="troubleshooting"></a>
|
||||
## Iterations fail and the log contains "MemoryError"
|
||||
This can be caused by insufficient memory on the DSVM. AutoML loads all training data into memory. So, the available memory should be more than the training data size.
|
||||
If you are using a remote DSVM, memory is needed for each concurrent iteration. The concurrent_iterations setting specifies the maximum concurrent iterations. For example, if the training data size is 8Gb and concurrent_iterations is set to 10, the minimum memory required is at least 80Gb.
|
||||
|
||||
@@ -8,7 +8,7 @@ dependencies:
|
||||
- numpy>=1.11.0,<1.16.0
|
||||
- scipy>=0.19.0,<0.20.0
|
||||
- scikit-learn>=0.18.0,<=0.19.1
|
||||
- pandas>=0.19.0,<0.23.0
|
||||
- pandas>=0.22.0,<0.23.0
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
|
||||
@@ -6,7 +6,8 @@ IF "%conda_env_name%"=="" SET conda_env_name="azure_automl"
|
||||
call conda activate %conda_env_name% 2>nul:
|
||||
|
||||
if not errorlevel 1 (
|
||||
call conda env update --file automl_env.yml -n %conda_env_name%
|
||||
echo Upgrading azureml-sdk[automl] in existing conda environment %conda_env_name%
|
||||
call pip install --upgrade azureml-sdk[automl]
|
||||
if errorlevel 1 goto ErrorExit
|
||||
) else (
|
||||
call conda env create -f automl_env.yml -n %conda_env_name%
|
||||
|
||||
@@ -9,7 +9,8 @@ fi
|
||||
|
||||
if source activate $CONDA_ENV_NAME 2> /dev/null
|
||||
then
|
||||
conda env update -file automl_env.yml -n $CONDA_ENV_NAME
|
||||
echo "Upgrading azureml-sdk[automl] in existing conda environment" $CONDA_ENV_NAME
|
||||
pip install --upgrade azureml-sdk[automl]
|
||||
else
|
||||
conda env create -f automl_env.yml -n $CONDA_ENV_NAME &&
|
||||
source activate $CONDA_ENV_NAME &&
|
||||
|
||||
@@ -9,7 +9,8 @@ fi
|
||||
|
||||
if source activate $CONDA_ENV_NAME 2> /dev/null
|
||||
then
|
||||
conda env update -file automl_env.yml -n $CONDA_ENV_NAME
|
||||
echo "Upgrading azureml-sdk[automl] in existing conda environment" $CONDA_ENV_NAME
|
||||
pip install --upgrade azureml-sdk[automl]
|
||||
else
|
||||
conda env create -f automl_env.yml -n $CONDA_ENV_NAME &&
|
||||
source activate $CONDA_ENV_NAME &&
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
# ONNX Runtime on Azure Machine Learning (AML)
|
||||
|
||||
These tutorials show how to deploy pretrained [ONNX](http://onnx.ai) models on Azure virtual machines using [ONNX Runtime](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-build-deploy-onnx) for inference. By the end of the tutorial, you will deploy a state-of-the-art deep learning model on a virtual machine in Azure Machine Learning, using ONNX Runtime for Inference. You can ping the model with your own images to be analyzed!
|
||||
|
||||
## Tutorials
|
||||
- [Handwritten Digit Classification (MNIST) using ONNX Runtime on AzureML](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-inference-mnist.ipynb)
|
||||
- [Facial Expression Recognition using ONNX Runtime on AzureML](https://github.com/Azure/MachineLearningNotebooks/blob/master/onnx/onnx-inference-emotion-recognition.ipynb)
|
||||
|
||||
## Documentation
|
||||
- [ONNX Runtime Python API Documentation](http://aka.ms/onnxruntime-python)
|
||||
- [Azure Machine Learning API Documentation](http://aka.ms/aml-docs)
|
||||
|
||||
## Related Articles
|
||||
- [Building and Deploying ONNX Runtime Models](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-build-deploy-onnx)
|
||||
- [Azure AI – Making AI Real for Business](https://aka.ms/aml-blog-overview)
|
||||
- [What’s new in Azure Machine Learning](https://aka.ms/aml-blog-whats-new)
|
||||
|
||||
|
||||
## License
|
||||
|
||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
Licensed under the MIT License.
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Facial Expression Recognition (Emotion FER+) using ONNX Runtime on Azure ML\n",
|
||||
"# Facial Expression Recognition using ONNX Runtime on AzureML\n",
|
||||
"\n",
|
||||
"This example shows how to deploy an image classification neural network using the Facial Expression Recognition ([FER](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. This tutorial will show you how to deploy a FER+ model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
|
||||
"\n",
|
||||
@@ -34,54 +34,32 @@
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"### 1. Install Azure ML SDK and create a new workspace\n",
|
||||
"Please follow [Azure ML configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/00.configuration.ipynb) to set up your environment.\n",
|
||||
"Please follow [00.configuration.ipynb](https://github.com/Azure/MachineLearningNotebooks/blob/master/00.configuration.ipynb) notebook.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### 2. Install additional packages needed for this Notebook\n",
|
||||
"You need to install the popular plotting library `matplotlib`, the image manipulation library `opencv`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed.\n",
|
||||
"You need to install the popular plotting library `matplotlib`, the image manipulation library `PIL`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed.\n",
|
||||
"\n",
|
||||
"```sh\n",
|
||||
"(myenv) $ pip install matplotlib onnx opencv-python\n",
|
||||
"(myenv) $ pip install matplotlib onnx Pillow\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"**Debugging tip**: Make sure that to activate your virtual environment (myenv) before you re-launch this notebook using the `jupyter notebook` comand. Choose the respective Python kernel for your new virtual environment using the `Kernel > Change Kernel` menu above. If you have completed the steps correctly, the upper right corner of your screen should state `Python [conda env:myenv]` instead of `Python [default]`.\n",
|
||||
"\n",
|
||||
"### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n",
|
||||
"\n",
|
||||
"In the following lines of code, we download [the trained ONNX Emotion FER+ model and corresponding test data](https://github.com/onnx/models/tree/master/emotion_ferplus) and place them in the same folder as this tutorial notebook. For more information about the FER+ dataset, please visit Microsoft Researcher Emad Barsoum's [FER+ source data repository](https://github.com/ebarsoum/FERPlus)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# urllib is a built-in Python library to download files from URLs\n",
|
||||
"[Download the ONNX Emotion FER+ model and corresponding test data](https://www.cntk.ai/OnnxModels/emotion_ferplus/opset_7/emotion_ferplus.tar.gz) and place them in the same folder as this tutorial notebook. You can unzip the file through the following line of code.\n",
|
||||
"\n",
|
||||
"# Objective: retrieve the latest version of the ONNX Emotion FER+ model files from the\n",
|
||||
"# ONNX Model Zoo and save it in the same folder as this tutorial\n",
|
||||
"```sh\n",
|
||||
"(myenv) $ tar xvzf emotion_ferplus.tar.gz\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"onnx_model_url = \"https://www.cntk.ai/OnnxModels/emotion_ferplus/opset_7/emotion_ferplus.tar.gz\"\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"emotion_ferplus.tar.gz\")\n",
|
||||
"\n",
|
||||
"# the ! magic command tells our jupyter notebook kernel to run the following line of \n",
|
||||
"# code from the command line instead of the notebook kernel\n",
|
||||
"\n",
|
||||
"# We use tar and xvcf to unzip the files we just retrieved from the ONNX model zoo\n",
|
||||
"\n",
|
||||
"!tar xvzf emotion_ferplus.tar.gz"
|
||||
"More information can be found about the ONNX FER+ model on [github](https://github.com/onnx/models/tree/master/emotion_ferplus). For more information about the FER+ dataset, please visit Microsoft Researcher Emad Barsoum's [FER+ source data repository](https://github.com/ebarsoum/FERPlus)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy a VM with your ONNX model in the Cloud\n",
|
||||
"\n",
|
||||
"### Load Azure ML workspace\n",
|
||||
"## Load Azure ML workspace\n",
|
||||
"\n",
|
||||
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
||||
]
|
||||
@@ -169,9 +147,9 @@
|
||||
"source": [
|
||||
"### ONNX FER+ Model Methodology\n",
|
||||
"\n",
|
||||
"The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the well-known FER+ data set, provided as part of the [trained Emotion Recognition model](https://github.com/onnx/models/tree/master/emotion_ferplus) in the ONNX model zoo.\n",
|
||||
"The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the famous FER+ data set, provided as part of the [trained Emotion Recognition model](https://github.com/onnx/models/tree/master/emotion_ferplus) in the ONNX model zoo.\n",
|
||||
"\n",
|
||||
"The original Facial Emotion Recognition (FER) Dataset was released in 2013 by Pierre-Luc Carrier and Aaron Courville as part of a [Kaggle Competition](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data), but some of the labels are not entirely appropriate for the expression. In the FER+ Dataset, each photo was evaluated by at least 10 croud sourced reviewers, creating a more accurate basis for ground truth. \n",
|
||||
"The original Facial Emotion Recognition (FER) Dataset was released in 2013, but some of the labels are not entirely appropriate for the expression. In the FER+ Dataset, each photo was evaluated by at least 10 croud sourced reviewers, creating a better basis for ground truth. \n",
|
||||
"\n",
|
||||
"You can see the difference of label quality in the sample model input below. The FER labels are the first word below each image, and the FER+ labels are the second word below each image.\n",
|
||||
"\n",
|
||||
@@ -224,18 +202,20 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Specify our Score and Environment Files"
|
||||
"## Deploy our model on Azure ML"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We are now going to deploy our ONNX Model on AML with inference in ONNX Runtime. We begin by writing a score.py file, which will help us run the model in our Azure ML virtual machine (VM), and then specify our environment by writing a yml file. You will also notice that we import the onnxruntime library to do runtime inference on our ONNX models (passing in input and evaluating out model's predicted output). More information on the API and commands can be found in the [ONNX Runtime documentation](https://aka.ms/onnxruntime).\n",
|
||||
"We are now going to deploy our ONNX Model on AML with inference in ONNX Runtime. We begin by writing a score.py file, which will help us run the model in our Azure ML virtual machine (VM), and then specify our environment by writing a yml file.\n",
|
||||
"\n",
|
||||
"You will also notice that we import the onnxruntime library to do runtime inference on our ONNX models (passing in input and evaluating out model's predicted output). More information on the API and commands can be found in the [ONNX Runtime documentation](https://aka.ms/onnxruntime).\n",
|
||||
"\n",
|
||||
"### Write Score File\n",
|
||||
"\n",
|
||||
"A score file is what tells our Azure cloud service what to do. After initializing our model using azureml.core.model, we start an ONNX Runtime inference session to evaluate the data passed in on our function calls."
|
||||
"A score file is what tells our Azure cloud service what to do. After initializing our model using azureml.core.model, we start an ONNX Runtime GPU inference session to evaluate the data passed in on our function calls."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -268,13 +248,10 @@
|
||||
" try:\n",
|
||||
" # load in our data, convert to readable format\n",
|
||||
" data = np.array(json.loads(input_data)['data']).astype('float32')\n",
|
||||
" \n",
|
||||
" start = time.time()\n",
|
||||
" r = session.run([output_name], {input_name : data})\n",
|
||||
" end = time.time()\n",
|
||||
" \n",
|
||||
" result = emotion_map(postprocess(r[0]))\n",
|
||||
" \n",
|
||||
" result_dict = {\"result\": result,\n",
|
||||
" \"time_in_sec\": [end - start]}\n",
|
||||
" except Exception as e:\n",
|
||||
@@ -283,12 +260,9 @@
|
||||
" return json.dumps(result_dict)\n",
|
||||
"\n",
|
||||
"def emotion_map(classes, N=1):\n",
|
||||
" \"\"\"Take the most probable labels (output of postprocess) and returns the \n",
|
||||
" top N emotional labels that fit the picture.\"\"\"\n",
|
||||
" \n",
|
||||
" emotion_table = {'neutral':0, 'happiness':1, 'surprise':2, 'sadness':3, \n",
|
||||
" 'anger':4, 'disgust':5, 'fear':6, 'contempt':7}\n",
|
||||
" \"\"\"Take the most probable labels (output of postprocess) and returns the top N emotional labels that fit the picture.\"\"\"\n",
|
||||
" \n",
|
||||
" emotion_table = {'neutral':0, 'happiness':1, 'surprise':2, 'sadness':3, 'anger':4, 'disgust':5, 'fear':6, 'contempt':7}\n",
|
||||
" emotion_keys = list(emotion_table.keys())\n",
|
||||
" emotions = []\n",
|
||||
" for i in range(N):\n",
|
||||
@@ -302,8 +276,8 @@
|
||||
" return e_x / e_x.sum(axis=0)\n",
|
||||
"\n",
|
||||
"def postprocess(scores):\n",
|
||||
" \"\"\"This function takes the scores generated by the network and \n",
|
||||
" returns the class IDs in decreasing order of probability.\"\"\"\n",
|
||||
" \"\"\"This function takes the scores generated by the network and returns the class IDs in decreasing \n",
|
||||
" order of probability.\"\"\"\n",
|
||||
" prob = softmax(scores)\n",
|
||||
" prob = np.squeeze(prob)\n",
|
||||
" classes = np.argsort(prob)[::-1]\n",
|
||||
@@ -355,7 +329,7 @@
|
||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||
" runtime = \"python\",\n",
|
||||
" conda_file = \"myenv.yml\",\n",
|
||||
" description = \"Emotion ONNX Runtime container\",\n",
|
||||
" description = \"test\",\n",
|
||||
" tags = {\"demo\": \"onnx\"})\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -372,6 +346,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Debugging\n",
|
||||
"\n",
|
||||
"In case you need to debug your code, the next line of code accesses the log file."
|
||||
]
|
||||
},
|
||||
@@ -388,9 +364,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We're all done specifying what we want our virtual machine to do. Let's configure and deploy our container image.\n",
|
||||
"We're all set! Let's get our model chugging.\n",
|
||||
"\n",
|
||||
"### Deploy the container image"
|
||||
"## Deploy the container image"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -463,57 +439,23 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Testing and Evaluation\n",
|
||||
"\n",
|
||||
"### Useful Helper Functions\n",
|
||||
"\n",
|
||||
"We preprocess and postprocess our data (see score.py file) using the helper functions specified in the [ONNX FER+ Model page in the Model Zoo repository](https://github.com/onnx/models/tree/master/emotion_ferplus)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def emotion_map(classes, N=1):\n",
|
||||
" \"\"\"Take the most probable labels (output of postprocess) and returns the \n",
|
||||
" top N emotional labels that fit the picture.\"\"\"\n",
|
||||
" \n",
|
||||
" emotion_table = {'neutral':0, 'happiness':1, 'surprise':2, 'sadness':3, \n",
|
||||
" 'anger':4, 'disgust':5, 'fear':6, 'contempt':7}\n",
|
||||
" \n",
|
||||
" emotion_keys = list(emotion_table.keys())\n",
|
||||
" emotions = []\n",
|
||||
" for i in range(N):\n",
|
||||
" emotions.append(emotion_keys[classes[i]])\n",
|
||||
" \n",
|
||||
" return emotions\n",
|
||||
"\n",
|
||||
"def softmax(x):\n",
|
||||
" \"\"\"Compute softmax values (probabilities from 0 to 1) for each possible label.\"\"\"\n",
|
||||
" x = x.reshape(-1)\n",
|
||||
" e_x = np.exp(x - np.max(x))\n",
|
||||
" return e_x / e_x.sum(axis=0)\n",
|
||||
"\n",
|
||||
"def postprocess(scores):\n",
|
||||
" \"\"\"This function takes the scores generated by the network and \n",
|
||||
" returns the class IDs in decreasing order of probability.\"\"\"\n",
|
||||
" prob = softmax(scores)\n",
|
||||
" prob = np.squeeze(prob)\n",
|
||||
" classes = np.argsort(prob)[::-1]\n",
|
||||
" return classes"
|
||||
"# Testing and Evaluation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Test Data\n",
|
||||
"#### Useful Helper Functions\n",
|
||||
"\n",
|
||||
"These are already in your directory from your ONNX model download (from the model zoo).\n",
|
||||
"\n",
|
||||
"Notice that our Model Zoo files have a .pb extension. This is because they are [protobuf files (Protocol Buffers)](https://developers.google.com/protocol-buffers/docs/pythontutorial), so we need to read in our data through our ONNX TensorProto reader into a format we can work with, like numerical arrays."
|
||||
"We preprocess and postprocess our data (see score.py file) using the helper functions specified in the [ONNX FER+ Model page in the Model Zoo repository](https://github.com/onnx/models/tree/master/emotion_ferplus)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Test Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -533,6 +475,8 @@
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from score import emotion_map, softmax, postprocess\n",
|
||||
"\n",
|
||||
"test_inputs = []\n",
|
||||
"test_outputs = []\n",
|
||||
"\n",
|
||||
@@ -568,7 +512,7 @@
|
||||
},
|
||||
"source": [
|
||||
"### Show some sample images\n",
|
||||
"We use `matplotlib` to plot 3 test images from the dataset."
|
||||
"We use `matplotlib` to plot 3 test images from the model zoo with their labels over them."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -588,7 +532,7 @@
|
||||
" plt.axhline('')\n",
|
||||
" plt.axvline('')\n",
|
||||
" plt.text(x = 10, y = -10, s = test_outputs[test_image], fontsize = 18)\n",
|
||||
" plt.imshow(test_inputs[test_image].reshape(64, 64), cmap = plt.cm.gray)\n",
|
||||
" plt.imshow(test_inputs[test_image].reshape(64, 64), cmap = plt.cm.Greys)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
@@ -627,7 +571,7 @@
|
||||
" print(r['error'])\n",
|
||||
" break\n",
|
||||
" \n",
|
||||
" result = r['result'][0]\n",
|
||||
" result = r['result'][0][0]\n",
|
||||
" time_ms = np.round(r['time_in_sec'][0] * 1000, 2)\n",
|
||||
" \n",
|
||||
" ground_truth = test_outputs[i]\n",
|
||||
@@ -639,7 +583,7 @@
|
||||
"\n",
|
||||
" # use different color for misclassified sample\n",
|
||||
" font_color = 'red' if ground_truth != result else 'black'\n",
|
||||
" clr_map = plt.cm.Greys if ground_truth != result else plt.cm.gray\n",
|
||||
" clr_map = plt.cm.gray if ground_truth != result else plt.cm.Greys\n",
|
||||
"\n",
|
||||
" # ground truth labels are in blue\n",
|
||||
" plt.text(x = 10, y = -70, s = ground_truth, fontsize = 18, color = 'blue')\n",
|
||||
@@ -667,30 +611,15 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Preprocessing functions take your image and format it so it can be passed\n",
|
||||
"# as input into our ONNX model\n",
|
||||
"from PIL import Image\n",
|
||||
"\n",
|
||||
"import cv2\n",
|
||||
"\n",
|
||||
"def rgb2gray(rgb):\n",
|
||||
" \"\"\"Convert the input image into grayscale\"\"\"\n",
|
||||
" return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])\n",
|
||||
"\n",
|
||||
"def resize_img(img):\n",
|
||||
" \"\"\"Resize image to MNIST model input dimensions\"\"\"\n",
|
||||
" img = cv2.resize(img, dsize=(64, 64), interpolation=cv2.INTER_AREA)\n",
|
||||
" img.resize((1, 1, 64, 64))\n",
|
||||
" return img\n",
|
||||
"\n",
|
||||
"def preprocess(img):\n",
|
||||
" \"\"\"Resize input images and convert them to grayscale.\"\"\"\n",
|
||||
" if img.shape == (64, 64):\n",
|
||||
" img.resize((1, 1, 64, 64))\n",
|
||||
" return img\n",
|
||||
" \n",
|
||||
" grayscale = rgb2gray(img)\n",
|
||||
" processed_img = resize_img(grayscale)\n",
|
||||
" return processed_img"
|
||||
"def preprocess(image_path):\n",
|
||||
" input_shape = (1, 1, 64, 64)\n",
|
||||
" img = Image.open(image_path)\n",
|
||||
" img = img.resize((64, 64), Image.ANTIALIAS)\n",
|
||||
" img_data = np.array(img)\n",
|
||||
" img_data = np.resize(img_data, input_shape)\n",
|
||||
" return img_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -707,15 +636,12 @@
|
||||
"\n",
|
||||
"# e.g. your_test_image = \"C://Users//vinitra.swamy//Pictures//emotion_test_images//img_1.png\"\n",
|
||||
"\n",
|
||||
"import matplotlib.image as mpimg\n",
|
||||
"your_test_image = \"<path to file>\"\n",
|
||||
"\n",
|
||||
"if your_test_image != \"<path to file>\":\n",
|
||||
" img = mpimg.imread(your_test_image)\n",
|
||||
" img = preprocess(your_test_image)\n",
|
||||
" plt.subplot(1,3,1)\n",
|
||||
" plt.imshow(img, cmap = plt.cm.Greys)\n",
|
||||
" print(\"Old Dimensions: \", img.shape)\n",
|
||||
" img = preprocess(img)\n",
|
||||
" print(\"New Dimensions: \", img.shape)\n",
|
||||
" plt.imshow(img.reshape((64,64)), cmap = plt.cm.gray)\n",
|
||||
"else:\n",
|
||||
" img = None"
|
||||
]
|
||||
@@ -733,7 +659,7 @@
|
||||
"\n",
|
||||
" try:\n",
|
||||
" r = json.loads(aci_service.run(input_data))\n",
|
||||
" result = r['result'][0]\n",
|
||||
" result = r['result'][0][0]\n",
|
||||
" time_ms = np.round(r['time_in_sec'][0] * 1000, 2)\n",
|
||||
" except Exception as e:\n",
|
||||
" print(str(e))\n",
|
||||
@@ -742,13 +668,12 @@
|
||||
" plt.subplot(1,8,1)\n",
|
||||
" plt.axhline('')\n",
|
||||
" plt.axvline('')\n",
|
||||
" plt.text(x = -10, y = -40, s = \"Model prediction: \", fontsize = 14)\n",
|
||||
" plt.text(x = -10, y = -25, s = \"Inference time: \", fontsize = 14)\n",
|
||||
" plt.text(x = 100, y = -40, s = str(result), fontsize = 14)\n",
|
||||
" plt.text(x = 100, y = -25, s = str(time_ms) + \" ms\", fontsize = 14)\n",
|
||||
" plt.text(x = -10, y = -10, s = \"Model Input image: \", fontsize = 14)\n",
|
||||
" plt.imshow(img.reshape((64, 64)), cmap = plt.cm.gray) \n",
|
||||
" "
|
||||
" plt.text(x = -10, y = -35, s = \"Model prediction: \", fontsize = 14)\n",
|
||||
" plt.text(x = -10, y = -20, s = \"Inference time: \", fontsize = 14)\n",
|
||||
" plt.text(x = 100, y = -35, s = str(result), fontsize = 14)\n",
|
||||
" plt.text(x = 100, y = -20, s = str(time_ms) + \" ms\", fontsize = 14)\n",
|
||||
" plt.text(x = -10, y = -8, s = \"Input image: \", fontsize = 14)\n",
|
||||
" plt.imshow(img.reshape(64, 64), cmap = plt.cm.gray) "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -759,7 +684,7 @@
|
||||
"source": [
|
||||
"# remember to delete your service after you are done using it!\n",
|
||||
"\n",
|
||||
"aci_service.delete()"
|
||||
"# aci_service.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -784,9 +709,9 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python [conda env:myenv]",
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "conda-env-myenv-py"
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -798,7 +723,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
"version": "3.6.5"
|
||||
},
|
||||
"msauthor": "vinitra.swamy"
|
||||
},
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Handwritten Digit Classification (MNIST) using ONNX Runtime on Azure ML\n",
|
||||
"# Handwritten Digit Classification (MNIST) using ONNX Runtime on AzureML\n",
|
||||
"\n",
|
||||
"This example shows how to deploy an image classification neural network using the Modified National Institute of Standards and Technology ([MNIST](http://yann.lecun.com/exdb/mnist/)) dataset and Open Neural Network eXchange format ([ONNX](http://aka.ms/onnxdocarticle)) on the Azure Machine Learning platform. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing number from 0 to 9. This tutorial will show you how to deploy a MNIST model from the [ONNX model zoo](https://github.com/onnx/models), use it to make predictions using ONNX Runtime Inference, and deploy it as a web service in Azure.\n",
|
||||
"\n",
|
||||
@@ -22,9 +22,9 @@
|
||||
"\n",
|
||||
"#### Tutorial Objectives:\n",
|
||||
"\n",
|
||||
"- Describe the MNIST dataset and pretrained Convolutional Neural Net ONNX model, stored in the ONNX model zoo.\n",
|
||||
"- Deploy and run the pretrained MNIST ONNX model on an Azure Machine Learning instance\n",
|
||||
"- Predict labels for test set data points in the cloud using ONNX Runtime and Azure ML"
|
||||
"1. Describe the MNIST dataset and pretrained Convolutional Neural Net ONNX model, stored in the ONNX model zoo.\n",
|
||||
"2. Deploy and run the pretrained MNIST ONNX model on an Azure Machine Learning instance\n",
|
||||
"3. Predict labels for test set data points in the cloud using ONNX Runtime and Azure ML"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -34,61 +34,31 @@
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"### 1. Install Azure ML SDK and create a new workspace\n",
|
||||
"Please follow [Azure ML configuration notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/00.configuration.ipynb) to set up your environment.\n",
|
||||
"Please follow [00.configuration.ipynb](https://github.com/Azure/MachineLearningNotebooks/blob/master/00.configuration.ipynb) notebook.\n",
|
||||
"\n",
|
||||
"### 2. Install additional packages needed for this tutorial notebook\n",
|
||||
"You need to install the popular plotting library `matplotlib`, the image manipulation library `opencv`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed. \n",
|
||||
"### 2. Install additional packages needed for this Notebook\n",
|
||||
"You need to install the popular plotting library `matplotlib`, the image manipulation library `opencv`, and the `onnx` library in the conda environment where Azure Maching Learning SDK is installed.\n",
|
||||
"\n",
|
||||
"```sh\n",
|
||||
"(myenv) $ pip install matplotlib onnx opencv-python\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"**Debugging tip**: Make sure that you run the \"jupyter notebook\" command to launch this notebook after activating your virtual environment. Choose the respective Python kernel for your new virtual environment using the `Kernel > Change Kernel` menu above. If you have completed the steps correctly, the upper right corner of your screen should state `Python [conda env:myenv]` instead of `Python [default]`.\n",
|
||||
"\n",
|
||||
"### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n",
|
||||
"\n",
|
||||
"In the following lines of code, we download [the trained ONNX MNIST model and corresponding test data](https://github.com/onnx/models/tree/master/mnist) and place them in the same folder as this tutorial notebook. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# urllib is a built-in Python library to download files from URLs\n",
|
||||
"[Download the ONNX MNIST model and corresponding test data](https://www.cntk.ai/OnnxModels/mnist/opset_7/mnist.tar.gz) and place them in the same folder as this tutorial notebook. You can unzip the file through the following line of code.\n",
|
||||
"\n",
|
||||
"# Objective: retrieve the latest version of the ONNX MNIST model files from the\n",
|
||||
"# ONNX Model Zoo and save it in the same folder as this tutorial\n",
|
||||
"```sh\n",
|
||||
"(myenv) $ tar xvzf mnist.tar.gz\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"onnx_model_url = \"https://www.cntk.ai/OnnxModels/mnist/opset_7/mnist.tar.gz\"\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"mnist.tar.gz\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# the ! magic command tells our jupyter notebook kernel to run the following line of \n",
|
||||
"# code from the command line instead of the notebook kernel\n",
|
||||
"\n",
|
||||
"# We use tar and xvcf to unzip the files we just retrieved from the ONNX model zoo\n",
|
||||
"\n",
|
||||
"!tar xvzf mnist.tar.gz"
|
||||
"More information can be found about the ONNX MNIST model on [github](https://github.com/onnx/models/tree/master/mnist). For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Deploy a VM with your ONNX model in the Cloud\n",
|
||||
"\n",
|
||||
"### Load Azure ML workspace\n",
|
||||
"## Load Azure ML workspace\n",
|
||||
"\n",
|
||||
"We begin by instantiating a workspace object from the existing workspace created earlier in the configuration notebook."
|
||||
]
|
||||
@@ -143,11 +113,11 @@
|
||||
"source": [
|
||||
"from azureml.core.model import Model\n",
|
||||
"\n",
|
||||
"model = Model.register(workspace = ws,\n",
|
||||
" model_path = model_dir + \"/\" + \"model.onnx\",\n",
|
||||
"model = Model.register(model_path = model_dir + \"//model.onnx\",\n",
|
||||
" model_name = \"mnist_1\",\n",
|
||||
" tags = {\"onnx\": \"demo\"},\n",
|
||||
" description = \"MNIST image classification CNN from ONNX Model Zoo\",)"
|
||||
" description = \"MNIST image classification CNN from ONNX Model Zoo\",\n",
|
||||
" workspace = ws)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -218,14 +188,16 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Specify our Score and Environment Files"
|
||||
"## Deploy our model on Azure ML"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We are now going to deploy our ONNX Model on AML with inference in ONNX Runtime. We begin by writing a score.py file, which will help us run the model in our Azure ML virtual machine (VM), and then specify our environment by writing a yml file. You will also notice that we import the onnxruntime library to do runtime inference on our ONNX models (passing in input and evaluating out model's predicted output). More information on the API and commands can be found in the [ONNX Runtime documentation](https://aka.ms/onnxruntime).\n",
|
||||
"We are now going to deploy our ONNX Model on AML with inference in ONNX Runtime. We begin by writing a score.py file, which will help us run the model in our Azure ML virtual machine (VM), and then specify our environment by writing a yml file.\n",
|
||||
"\n",
|
||||
"You will also notice that we import the onnxruntime library to do runtime inference on our ONNX models (passing in input and evaluating out model's predicted output). More information on the API and commands can be found in the [ONNX Runtime documentation](https://aka.ms/onnxruntime).\n",
|
||||
"\n",
|
||||
"### Write Score File\n",
|
||||
"\n",
|
||||
@@ -276,7 +248,7 @@
|
||||
" return json.dumps(result_dict)\n",
|
||||
"\n",
|
||||
"def choose_class(result_prob):\n",
|
||||
" \"\"\"We use argmax to determine the right label to choose from our output\"\"\"\n",
|
||||
" \"\"\"We use argmax to determine the right label to choose from our output, after calling softmax on the 10 numbers we receive\"\"\"\n",
|
||||
" return int(np.argmax(result_prob, axis=0))"
|
||||
]
|
||||
},
|
||||
@@ -284,9 +256,14 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Write Environment File\n",
|
||||
"\n",
|
||||
"This step creates a YAML environment file that specifies which dependencies we would like to see in our Linux Virtual Machine."
|
||||
"### Write Environment File"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This step creates a YAML file that specifies which dependencies we would like to see in our Linux Virtual Machine."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -312,19 +289,10 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create the Container Image\n",
|
||||
"\n",
|
||||
"This step will likely take a few minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.image import ContainerImage\n",
|
||||
"help(ContainerImage.image_configuration)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -336,8 +304,8 @@
|
||||
"image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n",
|
||||
" runtime = \"python\",\n",
|
||||
" conda_file = \"myenv.yml\",\n",
|
||||
" description = \"MNIST ONNX Runtime container\",\n",
|
||||
" tags = {\"demo\": \"onnx\"}) \n",
|
||||
" description = \"test\",\n",
|
||||
" tags = {\"demo\": \"onnx\"}) )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"image = ContainerImage.create(name = \"onnxtest\",\n",
|
||||
@@ -353,6 +321,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Debugging\n",
|
||||
"\n",
|
||||
"In case you need to debug your code, the next line of code accesses the log file."
|
||||
]
|
||||
},
|
||||
@@ -369,9 +339,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We're all done specifying what we want our virtual machine to do. Let's configure and deploy our container image.\n",
|
||||
"We're all set! Let's get our model chugging.\n",
|
||||
"\n",
|
||||
"### Deploy the container image"
|
||||
"## Deploy the container image"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -403,7 +373,7 @@
|
||||
"source": [
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"\n",
|
||||
"aci_service_name = 'onnx-demo-mnist20'\n",
|
||||
"aci_service_name = 'onnx-demo-mnist'\n",
|
||||
"print(\"Service\", aci_service_name)\n",
|
||||
"\n",
|
||||
"aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n",
|
||||
@@ -444,13 +414,16 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Testing and Evaluation\n",
|
||||
"# Testing and Evaluation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Load Test Data\n",
|
||||
"\n",
|
||||
"### Load Test Data\n",
|
||||
"\n",
|
||||
"These are already in your directory from your ONNX model download (from the model zoo).\n",
|
||||
"\n",
|
||||
"Notice that our Model Zoo files have a .pb extension. This is because they are [protobuf files (Protocol Buffers)](https://developers.google.com/protocol-buffers/docs/pythontutorial), so we need to read in our data through our ONNX TensorProto reader into a format we can work with, like numerical arrays."
|
||||
"These are already in your directory from your ONNX model download (from the model zoo). If you didn't place your model and test data in the same directory as this notebook, edit the \"model_dir\" filename below."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -606,9 +579,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Preprocessing functions take your image and format it so it can be passed\n",
|
||||
"# as input into our ONNX model\n",
|
||||
"\n",
|
||||
"# Preprocessing functions\n",
|
||||
"import cv2\n",
|
||||
"\n",
|
||||
"def rgb2gray(rgb):\n",
|
||||
@@ -616,17 +587,12 @@
|
||||
" return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])\n",
|
||||
"\n",
|
||||
"def resize_img(img):\n",
|
||||
" \"\"\"Resize image to MNIST model input dimensions\"\"\"\n",
|
||||
" img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_AREA)\n",
|
||||
" img.resize((1, 1, 28, 28))\n",
|
||||
" return img\n",
|
||||
"\n",
|
||||
"def preprocess(img):\n",
|
||||
" \"\"\"Resize input images and convert them to grayscale.\"\"\"\n",
|
||||
" if img.shape == (28, 28):\n",
|
||||
" img.resize((1, 1, 28, 28))\n",
|
||||
" return img\n",
|
||||
" \n",
|
||||
" grayscale = rgb2gray(img)\n",
|
||||
" processed_img = resize_img(grayscale)\n",
|
||||
" return processed_img"
|
||||
@@ -642,8 +608,11 @@
|
||||
"# Make sure your image is square and the dimensions are equal (i.e. 100 * 100 pixels or 28 * 28 pixels)\n",
|
||||
"\n",
|
||||
"# Any PNG or JPG image file should work\n",
|
||||
"# Make sure to include the entire path with // instead of /\n",
|
||||
"\n",
|
||||
"# e.g. your_test_image = \"C:/Users/vinitra.swamy/Pictures/handwritten_digit.png\"\n",
|
||||
"# e.g. your_test_image = \"C://Users//vinitra.swamy//Pictures//digit.png\"\n",
|
||||
"\n",
|
||||
"your_test_image = \"<path to file>\"\n",
|
||||
"\n",
|
||||
"import matplotlib.image as mpimg\n",
|
||||
"\n",
|
||||
@@ -752,7 +721,7 @@
|
||||
"source": [
|
||||
"# remember to delete your service after you are done using it!\n",
|
||||
"\n",
|
||||
"aci_service.delete()"
|
||||
"# aci_service.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -769,16 +738,16 @@
|
||||
"- ensured that your deep learning model is working perfectly (in the cloud) on test data, and checked it against some of your own!\n",
|
||||
"\n",
|
||||
"Next steps:\n",
|
||||
"- Check out another interesting application based on a Microsoft Research computer vision paper that lets you set up a [facial emotion recognition model](https://github.com/Azure/MachineLearningNotebooks/tree/master/onnx/onnx-inference-emotion-recognition.ipynb) in the cloud! This tutorial deploys a pre-trained ONNX Computer Vision model in an Azure ML virtual machine.\n",
|
||||
"- Check out another interesting application based on a Microsoft Research computer vision paper that lets you set up a [facial emotion recognition model](https://github.com/Azure/MachineLearningNotebooks/tree/master/onnx/onnx-inference-emotion-recognition.ipynb) in the cloud! This tutorial deploys a pre-trained ONNX Computer Vision model in an Azure ML virtual machine with GPU support.\n",
|
||||
"- Contribute to our [open source ONNX repository on github](http://github.com/onnx/onnx) and/or add to our [ONNX model zoo](http://github.com/onnx/models)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python [conda env:myenv]",
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "conda-env-myenv-py"
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
@@ -790,7 +759,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
"version": "3.6.5"
|
||||
},
|
||||
"msauthor": "vinitra.swamy"
|
||||
},
|
||||
|
||||
@@ -103,7 +103,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"classifier_input, classifier_output = Resnet50.get_default_classifier(feature_tensor, model_path)"
|
||||
"classifier_output = model.get_default_classifier(feature_tensor)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -511,7 +511,7 @@
|
||||
"\n",
|
||||
"New BSD License\n",
|
||||
"\n",
|
||||
"Copyright (c) 2007–2018 The scikit-learn developers.\n",
|
||||
"Copyright (c) 2007\u00e2\u20ac\u201c2018 The scikit-learn developers.\n",
|
||||
"All rights reserved.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
|
||||
@@ -57,7 +57,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"check version"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
@@ -84,7 +88,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"load workspace"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load workspace configuration from the config.json file in the current folder.\n",
|
||||
@@ -104,7 +112,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"create experiment"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = 'sklearn-mnist'\n",
|
||||
@@ -135,35 +147,38 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, BatchAiCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"from azureml.core.compute import BatchAiCompute\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"batchai_cluster_name = \"traincluster\"\n",
|
||||
"batchai_cluster_name = os.environ.get(\"BATCHAI_CLUSTER_NAME\", ws.name + \"gpu\")\n",
|
||||
"cluster_min_nodes = os.environ.get(\"BATCHAI_CLUSTER_MIN_NODES\", 1)\n",
|
||||
"cluster_max_nodes = os.environ.get(\"BATCHAI_CLUSTER_MAX_NODES\", 3)\n",
|
||||
"vm_size = os.environ.get(\"BATCHAI_CLUSTER_SKU\", \"STANDARD_NC6\")\n",
|
||||
"autoscale_enabled = os.environ.get(\"BATCHAI_CLUSTER_AUTOSCALE_ENABLED\", True)\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" # look for the existing cluster by name\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=batchai_cluster_name)\n",
|
||||
" if type(compute_target) is BatchAiCompute:\n",
|
||||
" print('found compute target {}, just use it.'.format(batchai_cluster_name))\n",
|
||||
" else:\n",
|
||||
" print('{} exists but it is not a Batch AI cluster. Please choose a different name.'.format(batchai_cluster_name))\n",
|
||||
"except ComputeTargetException:\n",
|
||||
"\n",
|
||||
"if batchai_cluster_name in ws.compute_targets():\n",
|
||||
" compute_target = ws.compute_targets()[batchai_cluster_name]\n",
|
||||
" if compute_target and type(compute_target) is BatchAiCompute:\n",
|
||||
" print('found compute target. just use it. ' + batchai_cluster_name)\n",
|
||||
"else:\n",
|
||||
" print('creating a new compute target...')\n",
|
||||
" compute_config = BatchAiCompute.provisioning_configuration(vm_size=\"STANDARD_D2_V2\", # small CPU-based VM\n",
|
||||
" #vm_priority='lowpriority', # optional\n",
|
||||
" autoscale_enabled=True,\n",
|
||||
" cluster_min_nodes=0, \n",
|
||||
" cluster_max_nodes=4)\n",
|
||||
" provisioning_config = BatchAiCompute.provisioning_configuration(vm_size = vm_size, # NC6 is GPU-enabled\n",
|
||||
" vm_priority = 'lowpriority', # optional\n",
|
||||
" autoscale_enabled = autoscale_enabled,\n",
|
||||
" cluster_min_nodes = cluster_min_nodes, \n",
|
||||
" cluster_max_nodes = cluster_max_nodes)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, batchai_cluster_name, compute_config)\n",
|
||||
" compute_target = ComputeTarget.create(ws, batchai_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||
" # if no min node count is provided it uses the scale settings for the cluster\n",
|
||||
" # if no min node count is provided it will use the scale settings for the cluster\n",
|
||||
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||
" \n",
|
||||
" # Use the 'status' property to get a detailed status for the current cluster. \n",
|
||||
" # For a more detailed view of current BatchAI cluster status, use the 'status' property \n",
|
||||
" print(compute_target.status.serialize())"
|
||||
]
|
||||
},
|
||||
@@ -265,7 +280,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"use datastore"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ds = ws.get_default_datastore()\n",
|
||||
@@ -473,7 +492,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"configure estimator"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.estimator import Estimator\n",
|
||||
@@ -502,7 +525,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remote run",
|
||||
"batchai",
|
||||
"scikit-learn"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = exp.submit(config=est)\n",
|
||||
@@ -565,7 +594,13 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"remote run",
|
||||
"batchai",
|
||||
"scikit-learn"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=False) # specify True for a verbose log"
|
||||
@@ -609,7 +644,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"query history"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(run.get_file_names())"
|
||||
@@ -625,7 +664,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"register model from history"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# register model \n",
|
||||
@@ -633,27 +676,6 @@
|
||||
"print(model.name, model.id, model.version, sep = '\\t')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Clean up resources\n",
|
||||
"\n",
|
||||
"If you're not going to use what you've created here, delete the resources you just created with this quickstart so you don't incur any charges. In the Azure portal, select and delete your resource group. You can also keep the resource group, but delete a single workspace by displaying the workspace properties and selecting the Delete button.\n",
|
||||
"\n",
|
||||
"You can also just delete the Azure Managed Compute cluster. But even if you don't delete it, since `autoscale_enabled` is set to `True`, and `cluster_min_nodes` is set to `0`, when the jobs are done, all cluster nodes will be shut down and you will not incur any additional compute charges. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# optionally, delete the Azure Managed Compute cluster\n",
|
||||
"compute_target.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -132,13 +132,9 @@
|
||||
"\n",
|
||||
"digits = datasets.load_digits()\n",
|
||||
"\n",
|
||||
"# only take the first 100 rows if you want the training steps to run faster\n",
|
||||
"X_digits = digits.data[:100,:]\n",
|
||||
"y_digits = digits.target[:100]\n",
|
||||
"\n",
|
||||
"# use full dataset\n",
|
||||
"#X_digits = digits.data\n",
|
||||
"#y_digits = digits.target"
|
||||
"# Exclude the first 100 rows from training so that they can be used for test.\n",
|
||||
"X_digits = digits.data[100:,:]\n",
|
||||
"y_digits = digits.target[100:]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -199,7 +195,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"configure automl"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
@@ -211,7 +211,7 @@
|
||||
" iterations = 20,\n",
|
||||
" n_cross_validations = 3,\n",
|
||||
" preprocess = False,\n",
|
||||
" exit_score = 0.995,\n",
|
||||
" exit_score = 0.9985,\n",
|
||||
" blacklist_algos = ['kNN','LinearSVM'],\n",
|
||||
" X = X_digits,\n",
|
||||
" y = y_digits,\n",
|
||||
@@ -230,7 +230,12 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"local submitted run",
|
||||
"automl"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
@@ -254,7 +259,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"use notebook widget"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.widgets import RunDetails\n",
|
||||
@@ -273,7 +282,12 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"get metrics",
|
||||
"query history"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"children = list(local_run.get_children())\n",
|
||||
@@ -300,7 +314,12 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"query history",
|
||||
"register model from history"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# find the run with the highest accuracy value.\n",
|
||||
@@ -374,7 +393,7 @@
|
||||
"> * Review training results\n",
|
||||
"> * Register the best model\n",
|
||||
"\n",
|
||||
"Learn more about [how to configure settings for automatic training](https://aka.ms/aml-how-configure-auto) or [how to use automatic training on a remote resource](https://aka.ms/aml-how-to-auto-remote)."
|
||||
"Learn more about [how to configure settings for automatic training]() or [how to use automatic training on a remote resource]()."
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user