mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-19 17:17:04 -05:00
update samples from Release-160 as a part of 1.0.76 SDK release
This commit is contained in:
@@ -1,7 +0,0 @@
|
||||
.ipynb_checkpoints
|
||||
azureml-logs
|
||||
.azureml
|
||||
.git
|
||||
outputs
|
||||
azureml-setup
|
||||
docs
|
||||
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
@@ -1,3 +0,0 @@
|
||||
{
|
||||
"python.pythonPath": "C:\\Users\\sgilley\\.azureml\\envs\\jan3\\python.exe"
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
# Conda environment specification. The dependencies defined in this file will
|
||||
|
||||
# be automatically provisioned for runs with userManagedDependencies=False.
|
||||
|
||||
|
||||
# Details about the Conda environment file format:
|
||||
|
||||
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually
|
||||
|
||||
|
||||
name: project_environment
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
@@ -1,115 +0,0 @@
|
||||
# The script to run.
|
||||
script: train.py
|
||||
# The arguments to the script file.
|
||||
arguments: []
|
||||
# The name of the compute target to use for this run.
|
||||
target: local
|
||||
# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch".
|
||||
framework: PySpark
|
||||
# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi".
|
||||
communicator: None
|
||||
# Automatically prepare the run environment as part of the run itself.
|
||||
autoPrepareEnvironment: true
|
||||
# Maximum allowed duration for the run.
|
||||
maxRunDurationSeconds:
|
||||
# Number of nodes to use for running job.
|
||||
nodeCount: 1
|
||||
# Environment details.
|
||||
environment:
|
||||
# Environment variables set for the run.
|
||||
environmentVariables:
|
||||
EXAMPLE_ENV_VAR: EXAMPLE_VALUE
|
||||
# Python details
|
||||
python:
|
||||
# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
|
||||
userManagedDependencies: false
|
||||
# The python interpreter path
|
||||
interpreterPath: python
|
||||
# Path to the conda dependencies file to use for this run. If a project
|
||||
# contains multiple programs with different sets of dependencies, it may be
|
||||
# convenient to manage those environments with separate files.
|
||||
condaDependenciesFile: aml_config/conda_dependencies.yml
|
||||
# Docker details
|
||||
docker:
|
||||
# Set True to perform this run inside a Docker container.
|
||||
enabled: true
|
||||
# Base image used for Docker-based runs.
|
||||
baseImage: mcr.microsoft.com/azureml/base:0.2.0
|
||||
# Set False if necessary to work around shared volume bugs.
|
||||
sharedVolumes: true
|
||||
# Run with NVidia Docker extension to support GPUs.
|
||||
gpuSupport: false
|
||||
# Extra arguments to the Docker run command.
|
||||
arguments: []
|
||||
# Image registry that contains the base image.
|
||||
baseImageRegistry:
|
||||
# DNS name or IP address of azure container registry(ACR)
|
||||
address:
|
||||
# The username for ACR
|
||||
username:
|
||||
# The password for ACR
|
||||
password:
|
||||
# Spark details
|
||||
spark:
|
||||
# List of spark repositories.
|
||||
repositories:
|
||||
- https://mmlspark.azureedge.net/maven
|
||||
packages:
|
||||
- group: com.microsoft.ml.spark
|
||||
artifact: mmlspark_2.11
|
||||
version: '0.12'
|
||||
precachePackages: true
|
||||
# Databricks details
|
||||
databricks:
|
||||
# List of maven libraries.
|
||||
mavenLibraries: []
|
||||
# List of PyPi libraries
|
||||
pypiLibraries: []
|
||||
# List of RCran libraries
|
||||
rcranLibraries: []
|
||||
# List of JAR libraries
|
||||
jarLibraries: []
|
||||
# List of Egg libraries
|
||||
eggLibraries: []
|
||||
# History details.
|
||||
history:
|
||||
# Enable history tracking -- this allows status, logs, metrics, and outputs
|
||||
# to be collected for a run.
|
||||
outputCollection: true
|
||||
# whether to take snapshots for history.
|
||||
snapshotProject: true
|
||||
# Spark configuration details.
|
||||
spark:
|
||||
configuration:
|
||||
spark.app.name: Azure ML Experiment
|
||||
spark.yarn.maxAppAttempts: 1
|
||||
# HDI details.
|
||||
hdi:
|
||||
# Yarn deploy mode. Options are cluster and client.
|
||||
yarnDeployMode: cluster
|
||||
# Tensorflow details.
|
||||
tensorflow:
|
||||
# The number of worker tasks.
|
||||
workerCount: 1
|
||||
# The number of parameter server tasks.
|
||||
parameterServerCount: 1
|
||||
# Mpi details.
|
||||
mpi:
|
||||
# When using MPI, number of processes per node.
|
||||
processCountPerNode: 1
|
||||
# data reference configuration details
|
||||
dataReferences: {}
|
||||
# Project share datastore reference.
|
||||
sourceDirectoryDataStore:
|
||||
# AmlCompute details.
|
||||
amlcompute:
|
||||
# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
|
||||
vmSize:
|
||||
# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
|
||||
vmPriority:
|
||||
# A bool that indicates if the cluster has to be retained after job completion.
|
||||
retainCluster: false
|
||||
# Name of the cluster to be created. If not specified, runId will be used as cluster name.
|
||||
name:
|
||||
# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
|
||||
clusterMaxNodeCount: 1
|
||||
@@ -1,115 +0,0 @@
|
||||
# The script to run.
|
||||
script: train.py
|
||||
# The arguments to the script file.
|
||||
arguments: []
|
||||
# The name of the compute target to use for this run.
|
||||
target: local
|
||||
# Framework to execute inside. Allowed values are "Python" , "PySpark", "CNTK", "TensorFlow", and "PyTorch".
|
||||
framework: Python
|
||||
# Communicator for the given framework. Allowed values are "None" , "ParameterServer", "OpenMpi", and "IntelMpi".
|
||||
communicator: None
|
||||
# Automatically prepare the run environment as part of the run itself.
|
||||
autoPrepareEnvironment: true
|
||||
# Maximum allowed duration for the run.
|
||||
maxRunDurationSeconds:
|
||||
# Number of nodes to use for running job.
|
||||
nodeCount: 1
|
||||
# Environment details.
|
||||
environment:
|
||||
# Environment variables set for the run.
|
||||
environmentVariables:
|
||||
EXAMPLE_ENV_VAR: EXAMPLE_VALUE
|
||||
# Python details
|
||||
python:
|
||||
# user_managed_dependencies=True indicates that the environmentwill be user managed. False indicates that AzureML willmanage the user environment.
|
||||
userManagedDependencies: false
|
||||
# The python interpreter path
|
||||
interpreterPath: python
|
||||
# Path to the conda dependencies file to use for this run. If a project
|
||||
# contains multiple programs with different sets of dependencies, it may be
|
||||
# convenient to manage those environments with separate files.
|
||||
condaDependenciesFile: aml_config/conda_dependencies.yml
|
||||
# Docker details
|
||||
docker:
|
||||
# Set True to perform this run inside a Docker container.
|
||||
enabled: false
|
||||
# Base image used for Docker-based runs.
|
||||
baseImage: mcr.microsoft.com/azureml/base:0.2.0
|
||||
# Set False if necessary to work around shared volume bugs.
|
||||
sharedVolumes: true
|
||||
# Run with NVidia Docker extension to support GPUs.
|
||||
gpuSupport: false
|
||||
# Extra arguments to the Docker run command.
|
||||
arguments: []
|
||||
# Image registry that contains the base image.
|
||||
baseImageRegistry:
|
||||
# DNS name or IP address of azure container registry(ACR)
|
||||
address:
|
||||
# The username for ACR
|
||||
username:
|
||||
# The password for ACR
|
||||
password:
|
||||
# Spark details
|
||||
spark:
|
||||
# List of spark repositories.
|
||||
repositories:
|
||||
- https://mmlspark.azureedge.net/maven
|
||||
packages:
|
||||
- group: com.microsoft.ml.spark
|
||||
artifact: mmlspark_2.11
|
||||
version: '0.12'
|
||||
precachePackages: true
|
||||
# Databricks details
|
||||
databricks:
|
||||
# List of maven libraries.
|
||||
mavenLibraries: []
|
||||
# List of PyPi libraries
|
||||
pypiLibraries: []
|
||||
# List of RCran libraries
|
||||
rcranLibraries: []
|
||||
# List of JAR libraries
|
||||
jarLibraries: []
|
||||
# List of Egg libraries
|
||||
eggLibraries: []
|
||||
# History details.
|
||||
history:
|
||||
# Enable history tracking -- this allows status, logs, metrics, and outputs
|
||||
# to be collected for a run.
|
||||
outputCollection: true
|
||||
# whether to take snapshots for history.
|
||||
snapshotProject: true
|
||||
# Spark configuration details.
|
||||
spark:
|
||||
configuration:
|
||||
spark.app.name: Azure ML Experiment
|
||||
spark.yarn.maxAppAttempts: 1
|
||||
# HDI details.
|
||||
hdi:
|
||||
# Yarn deploy mode. Options are cluster and client.
|
||||
yarnDeployMode: cluster
|
||||
# Tensorflow details.
|
||||
tensorflow:
|
||||
# The number of worker tasks.
|
||||
workerCount: 1
|
||||
# The number of parameter server tasks.
|
||||
parameterServerCount: 1
|
||||
# Mpi details.
|
||||
mpi:
|
||||
# When using MPI, number of processes per node.
|
||||
processCountPerNode: 1
|
||||
# data reference configuration details
|
||||
dataReferences: {}
|
||||
# Project share datastore reference.
|
||||
sourceDirectoryDataStore:
|
||||
# AmlCompute details.
|
||||
amlcompute:
|
||||
# VM size of the Cluster to be created.Allowed values are Azure vm sizes.The list of vm sizes is available in 'https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-sizes-specs
|
||||
vmSize:
|
||||
# VM priority of the Cluster to be created.Allowed values are "dedicated" , "lowpriority".
|
||||
vmPriority:
|
||||
# A bool that indicates if the cluster has to be retained after job completion.
|
||||
retainCluster: false
|
||||
# Name of the cluster to be created. If not specified, runId will be used as cluster name.
|
||||
name:
|
||||
# Maximum number of nodes in the AmlCompute cluster to be created. Minimum number of nodes will always be set to 0.
|
||||
clusterMaxNodeCount: 1
|
||||
@@ -1 +0,0 @@
|
||||
{"Id": "local-compute", "Scope": "/subscriptions/65a1016d-0f67-45d2-b838-b8f373d6d52e/resourceGroups/sheri/providers/Microsoft.MachineLearningServices/workspaces/sheritestqs3/projects/local-compute"}
|
||||
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.0.74.1 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.76 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -8,6 +8,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -29,8 +36,6 @@
|
||||
"- Register the pretrained MNIST model into the model registry. \n",
|
||||
"- Use the registered model to do batch inference on the images in the data blob container.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first. This sets you up with a working config file that has information on your workspace, subscription id, etc. "
|
||||
]
|
||||
@@ -485,7 +490,7 @@
|
||||
"source": [
|
||||
"## Cleanup Compute resources\n",
|
||||
"\n",
|
||||
"For re-occuring jobs, it may be wise to keep compute the compute resources and allow compute nodes to scale down to 0. However, since this is just a single-run job, we are free to release the allocated compute resources."
|
||||
"For re-occurring jobs, it may be wise to keep compute the compute resources and allow compute nodes to scale down to 0. However, since this is just a single-run job, we are free to release the allocated compute resources."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -514,6 +519,27 @@
|
||||
"name": "tracych"
|
||||
}
|
||||
],
|
||||
"friendly_name": "MNIST data inferencing using ParallelRunStep",
|
||||
"exclude_from_index": false,
|
||||
"index_order": 1,
|
||||
"category": "Other notebooks",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"MNIST"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"framework": [
|
||||
"None"
|
||||
],
|
||||
"tags": [
|
||||
"Batch Inferencing",
|
||||
"Pipeline"
|
||||
],
|
||||
"task": "Digit identification",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
|
||||
@@ -8,6 +8,13 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -28,9 +35,7 @@
|
||||
"- Use the registered model to do batch inference on the CSV files in the data blob container.\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first. This sets you up with a working config file that has information on your workspace, subscription id, etc. \n",
|
||||
"\n",
|
||||
""
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first. This sets you up with a working config file that has information on your workspace, subscription id, etc. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -460,7 +465,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cleanup compute resources\n",
|
||||
"For re-occuring jobs, it may be wise to keep compute the compute resources and allow compute nodes to scale down to 0. However, since this is just a single run job, we are free to release the allocated compute resources."
|
||||
"For re-occurring jobs, it may be wise to keep compute the compute resources and allow compute nodes to scale down to 0. However, since this is just a single run job, we are free to release the allocated compute resources."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -489,6 +494,27 @@
|
||||
"name": "tracych"
|
||||
}
|
||||
],
|
||||
"friendly_name": "IRIS data inferencing using ParallelRunStep",
|
||||
"exclude_from_index": false,
|
||||
"index_order": 1,
|
||||
"category": "Other notebooks",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"IRIS"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"framework": [
|
||||
"None"
|
||||
],
|
||||
"tags": [
|
||||
"Batch Inferencing",
|
||||
"Pipeline"
|
||||
],
|
||||
"task": "Recognize flower type",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
@@ -505,8 +531,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"notice": "Copyright (c) Microsoft Corporation. All rights reserved.\u00e2\u20ac\u00afLicensed under the MIT License."
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -27,10 +27,10 @@ dependencies:
|
||||
- azureml-explain-model
|
||||
- azureml-pipeline
|
||||
- azureml-contrib-interpret
|
||||
- pandas_ml
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- joblib
|
||||
- onnxruntime==0.4.0
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
|
||||
channels:
|
||||
|
||||
@@ -28,10 +28,10 @@ dependencies:
|
||||
- azureml-explain-model
|
||||
- azureml-pipeline
|
||||
- azureml-contrib-interpret
|
||||
- pandas_ml
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- joblib
|
||||
- onnxruntime==0.4.0
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
|
||||
channels:
|
||||
|
||||
@@ -293,7 +293,6 @@
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
"|**training_data**|Input dataset, containing both features and label column.|\n",
|
||||
"|**label_column_name**|The name of the label column.|\n",
|
||||
"|**model_explainability**|Indicate to explain each trained pipeline or not.|\n",
|
||||
"\n",
|
||||
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||
]
|
||||
@@ -325,7 +324,6 @@
|
||||
" training_data = train_data,\n",
|
||||
" label_column_name = label,\n",
|
||||
" validation_data = validation_dataset,\n",
|
||||
" model_explainability=True,\n",
|
||||
" **automl_settings\n",
|
||||
" )"
|
||||
]
|
||||
@@ -458,72 +456,6 @@
|
||||
"RunDetails(remote_run).show() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model's explanation\n",
|
||||
"Retrieve the explanation from the best_run which includes explanations for engineered features and raw features. Make sure that the run for generating explanations for the best model is completed."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Wait for the best model explanation run to complete\n",
|
||||
"from azureml.train.automl.run import AutoMLRun\n",
|
||||
"model_explainability_run_id = remote_run.get_properties().get('ModelExplainRunId')\n",
|
||||
"print(model_explainability_run_id)\n",
|
||||
"if model_explainability_run_id is not None:\n",
|
||||
" model_explainability_run = AutoMLRun(experiment=experiment, run_id=model_explainability_run_id)\n",
|
||||
" model_explainability_run.wait_for_completion()\n",
|
||||
"\n",
|
||||
"# Get the best run object\n",
|
||||
"best_run, fitted_model = remote_run.get_output()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Download engineered feature importance from artifact store\n",
|
||||
"You can use ExplanationClient to download the engineered feature explanations from the artifact store of the best_run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client = ExplanationClient.from_run(best_run)\n",
|
||||
"engineered_explanations = client.download_model_explanation(raw=False)\n",
|
||||
"exp_data = engineered_explanations.get_feature_importance_dict()\n",
|
||||
"exp_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Download raw feature importance from artifact store\n",
|
||||
"You can use ExplanationClient to download the raw feature explanations from the artifact store of the best_run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"client = ExplanationClient.from_run(best_run)\n",
|
||||
"engineered_explanations = client.download_model_explanation(raw=True)\n",
|
||||
"exp_data = engineered_explanations.get_feature_importance_dict()\n",
|
||||
"exp_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -557,7 +489,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.automl.core.onnx_convert import OnnxConverter\n",
|
||||
"from azureml.automl.runtime.onnx_convert import OnnxConverter\n",
|
||||
"onnx_fl_path = \"./best_model.onnx\"\n",
|
||||
"OnnxConverter.save_onnx_model(onnx_mdl, onnx_fl_path)"
|
||||
]
|
||||
@@ -566,17 +498,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Predict with the ONNX model, using onnxruntime package\n",
|
||||
"#### Note: The code will install the onnxruntime==0.4.0 if not installed. Newer versions of the onnxruntime have compatibility issues."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_df = test_dataset.to_pandas_dataframe()"
|
||||
"### Predict with the ONNX model, using onnxruntime package"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -595,21 +517,8 @@
|
||||
"else:\n",
|
||||
" python_version_compatible = False\n",
|
||||
"\n",
|
||||
"onnxrt_present = False\n",
|
||||
"try:\n",
|
||||
" import onnxruntime\n",
|
||||
" from azureml.automl.core.onnx_convert import OnnxInferenceHelper \n",
|
||||
" from onnxruntime import __version__ as ORT_VER\n",
|
||||
" if ORT_VER == '0.4.0':\n",
|
||||
" onnxrt_present = True\n",
|
||||
"except ImportError:\n",
|
||||
" onnxrt_present = False\n",
|
||||
" \n",
|
||||
"# Install the onnxruntime if the version 0.4.0 is not installed.\n",
|
||||
"if not onnxrt_present:\n",
|
||||
" print(\"Installing the onnxruntime version 0.4.0.\")\n",
|
||||
" !{sys.executable} -m pip install --user --force-reinstall onnxruntime==0.4.0\n",
|
||||
" onnxrt_present = True\n",
|
||||
"import onnxruntime\n",
|
||||
"from azureml.automl.runtime.onnx_convert import OnnxInferenceHelper\n",
|
||||
"\n",
|
||||
"def get_onnx_res(run):\n",
|
||||
" res_path = 'onnx_resource.json'\n",
|
||||
@@ -618,7 +527,8 @@
|
||||
" onnx_res = json.load(f)\n",
|
||||
" return onnx_res\n",
|
||||
"\n",
|
||||
"if onnxrt_present and python_version_compatible: \n",
|
||||
"if python_version_compatible:\n",
|
||||
" test_df = test_dataset.to_pandas_dataframe()\n",
|
||||
" mdl_bytes = onnx_mdl.SerializeToString()\n",
|
||||
" onnx_res = get_onnx_res(best_run)\n",
|
||||
"\n",
|
||||
@@ -628,10 +538,7 @@
|
||||
" print(pred_onnx)\n",
|
||||
" print(pred_prob_onnx)\n",
|
||||
"else:\n",
|
||||
" if not python_version_compatible:\n",
|
||||
" print('Please use Python version 3.6 or 3.7 to run the inference helper.') \n",
|
||||
" if not onnxrt_present:\n",
|
||||
" print('Please install the onnxruntime package to do the prediction with ONNX model.')"
|
||||
" print('Please use Python version 3.6 or 3.7 to run the inference helper.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -451,7 +451,7 @@
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"creditcard"
|
||||
"Creditcard"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
|
||||
@@ -522,6 +522,9 @@
|
||||
"datasets": [
|
||||
"None"
|
||||
],
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
|
||||
@@ -323,7 +323,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl import AutoMLStep, AutoMLConfig\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.train.automl.runtime import AutoMLStep\n",
|
||||
"\n",
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\": 20,\n",
|
||||
|
||||
@@ -301,7 +301,7 @@
|
||||
"source": [
|
||||
"### Setting forecaster maximum horizon \n",
|
||||
"\n",
|
||||
"The forecast horizon is the number of periods into the future that the model should predict. Here, we set the horizon to 4 periods (i.e. 4 months). Notice that this is much shorter than the number of days in the test set; we will need to use a rolling test to evaluate the performance on the whole test set. For more discussion of forecast horizons and guiding principles for setting them, please see the [energy demand notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand). "
|
||||
"The forecast horizon is the number of periods into the future that the model should predict. Here, we set the horizon to 12 periods (i.e. 12 months). Notice that this is much shorter than the number of months in the test set; we will need to use a rolling test to evaluate the performance on the whole test set. For more discussion of forecast horizons and guiding principles for setting them, please see the [energy demand notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand). "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -363,7 +363,7 @@
|
||||
" label_column_name=target_column_name,\n",
|
||||
" validation_data=valid_dataset, \n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" compute_target = compute_target,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" max_concurrent_iterations=4,\n",
|
||||
" max_cores_per_iteration=-1,\n",
|
||||
" **automl_settings)"
|
||||
|
||||
@@ -161,7 +161,7 @@
|
||||
"source": [
|
||||
"## Data\n",
|
||||
"\n",
|
||||
"The [Machine Learning service workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-workspace), is paired with the storage account, which contains the default data store. We will use it to upload the bike share data and create [tabular dataset](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.tabulardataset?view=azure-ml-py) for training. A tabular dataset defines a series of lazily-evaluated, immutable operations to load data from the data source into tabular representation."
|
||||
"The [Machine Learning service workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-workspace) is paired with the storage account, which contains the default data store. We will use it to upload the bike share data and create [tabular dataset](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.tabulardataset?view=azure-ml-py) for training. A tabular dataset defines a series of lazily-evaluated, immutable operations to load data from the data source into tabular representation."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -309,7 +309,7 @@
|
||||
" training_data=train,\n",
|
||||
" label_column_name=target_column_name,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" enable_early_stopping = True,\n",
|
||||
" enable_early_stopping=True,\n",
|
||||
" n_cross_validations=3, \n",
|
||||
" max_concurrent_iterations=4,\n",
|
||||
" max_cores_per_iteration=-1,\n",
|
||||
@@ -586,7 +586,7 @@
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"remote"
|
||||
"Remote"
|
||||
],
|
||||
"datasets": [
|
||||
"BikeShare"
|
||||
@@ -625,7 +625,7 @@
|
||||
"tags": [
|
||||
"Forecasting"
|
||||
],
|
||||
"task": "forecasting",
|
||||
"task": "Forecasting",
|
||||
"version": 3
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import argparse
|
||||
import azureml.train.automl
|
||||
from azureml.automl.core._vendor.automl.client.core.runtime import forecasting_models
|
||||
from azureml.automl.runtime._vendor.automl.client.core.runtime import forecasting_models
|
||||
from azureml.core import Run
|
||||
from sklearn.externals import joblib
|
||||
import forecasting_helper
|
||||
|
||||
@@ -337,7 +337,7 @@
|
||||
" training_data=train,\n",
|
||||
" label_column_name=target_column_name,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" enable_early_stopping = True,\n",
|
||||
" enable_early_stopping=True,\n",
|
||||
" n_cross_validations=3, \n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" **automl_settings)"
|
||||
@@ -730,14 +730,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
},
|
||||
"star_tag": [
|
||||
"featured"
|
||||
],
|
||||
"tags": [
|
||||
""
|
||||
],
|
||||
"task": "Forecasting"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -152,7 +152,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# upload data to your default datastore\n",
|
||||
"# upload training and test data to your default datastore\n",
|
||||
"ds = ws.get_default_datastore()\n",
|
||||
"ds.upload(src_dir='./data', target_path='groupdata', overwrite=True, show_progress=True)"
|
||||
]
|
||||
@@ -178,7 +178,7 @@
|
||||
"\n",
|
||||
"#### Create or Attach existing AmlCompute\n",
|
||||
"\n",
|
||||
"You will need to create a compute target for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||
"You will need to create a compute target for your automated ML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
||||
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read this article on the default limits and how to request more quota."
|
||||
|
||||
@@ -11,7 +11,7 @@ from azureml.core.dataset import Dataset
|
||||
from azureml.pipeline.core import PipelineData, PipelineParameter, TrainingOutput, StepSequence
|
||||
from azureml.pipeline.steps import PythonScriptStep
|
||||
from azureml.train.automl import AutoMLConfig
|
||||
from azureml.train.automl import AutoMLStep
|
||||
from azureml.train.automl.runtime import AutoMLStep
|
||||
|
||||
|
||||
def _get_groups(data: Dataset, group_column_names: List[str]) -> pd.DataFrame:
|
||||
@@ -33,7 +33,7 @@ def _get_configs(automlconfig: AutoMLConfig,
|
||||
group_name = "#####".join(str(x) for x in group.values)
|
||||
group_name = valid_chars.sub('', group_name)
|
||||
for key in group.index:
|
||||
single = data._dataflow.filter(data._dataflow[key] == group[key])
|
||||
single = single._dataflow.filter(data._dataflow[key] == group[key])
|
||||
group_conf = copy.deepcopy(automlconfig)
|
||||
group_conf.user_settings['training_data'] = single
|
||||
group_conf.user_settings['label_column_name'] = target_column
|
||||
@@ -106,6 +106,13 @@ def build_pipeline_steps(automlconfig: AutoMLConfig,
|
||||
|
||||
final_steps = steps
|
||||
if deploy:
|
||||
# modify the conda dependencies to ensure we pick up correct
|
||||
# versions of azureml-defaults and azureml-train-automl
|
||||
cd = CondaDependencies.create(pip_packages=['azureml-defaults', 'azureml-train-automl'])
|
||||
automl_deps = CondaDependencies(conda_dependencies_file_path='deploy/myenv.yml')
|
||||
cd._merge_dependencies(automl_deps)
|
||||
cd.save('deploy/myenv.yml')
|
||||
|
||||
# add deployment step
|
||||
pp_group_column_names = PipelineParameter(
|
||||
"group_column_names",
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
import argparse
|
||||
from azureml.core import Run, Model
|
||||
from azureml.core import Workspace
|
||||
from azureml.core.webservice import AciWebservice
|
||||
from azureml.core.model import InferenceConfig
|
||||
import json
|
||||
|
||||
from azureml.core import Run, Model, Workspace
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
from azureml.core.model import InferenceConfig
|
||||
from azureml.core.webservice import AciWebservice
|
||||
|
||||
|
||||
script_file_name = 'score.py'
|
||||
conda_env_file_name = 'myenv.yml'
|
||||
|
||||
@@ -1,15 +1,11 @@
|
||||
name: project_environment
|
||||
name: automl_grouping_env
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
|
||||
- python=3.6.2
|
||||
|
||||
- pip:
|
||||
- azureml-defaults
|
||||
- azureml-train-automl
|
||||
- numpy
|
||||
- scikit-learn
|
||||
- numpy>=1.16.0,<=1.16.2
|
||||
- scikit-learn>=0.19.0,<=0.20.3
|
||||
- conda-forge::fbprophet==0.5
|
||||
|
||||
|
||||
@@ -448,7 +448,7 @@
|
||||
"#### Distribution forecasts\n",
|
||||
"\n",
|
||||
"Often the figure of interest is not just the point prediction, but the prediction at some quantile of the distribution. \n",
|
||||
"This arises when the forecast is used to control some kind of inventory, for example of grocery items of virtual machines for a cloud service. In such case, the control point is usually something like \"we want the item to be in stock and not run out 99% of the time\". This is called a \"service level\". Here is how you get quantile forecasts."
|
||||
"This arises when the forecast is used to control some kind of inventory, for example of grocery items or virtual machines for a cloud service. In such case, the control point is usually something like \"we want the item to be in stock and not run out 99% of the time\". This is called a \"service level\". Here is how you get quantile forecasts."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -472,7 +472,7 @@
|
||||
"source": [
|
||||
"#### Destination-date forecast: \"just do something\"\n",
|
||||
"\n",
|
||||
"In some scenarios, the X_test is not known. The forecast is likely to be weak, becaus it is missing contemporaneous predictors, which we will need to impute. If you still wish to predict forward under the assumption that the last known values will be carried forward, you can forecast out to \"destination date\". The destination date still needs to fit within the maximum horizon from training."
|
||||
"In some scenarios, the X_test is not known. The forecast is likely to be weak, because it is missing contemporaneous predictors, which we will need to impute. If you still wish to predict forward under the assumption that the last known values will be carried forward, you can forecast out to \"destination date\". The destination date still needs to fit within the maximum horizon from training."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -551,7 +551,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"How should we read that eror message? The forecast origin is at the last time themodel saw an actual values of `y` (the target). That was at the end of the training data! Because the model received all `NaN` (and not an actual target value), it is attempting to forecast from the end of training data. But the requested forecast periods are past the maximum horizon. We need to provide a define `y` value to establish the forecast origin.\n",
|
||||
"How should we read that eror message? The forecast origin is at the last time the model saw an actual value of `y` (the target). That was at the end of the training data! Because the model received all `NaN` (and not an actual target value), it is attempting to forecast from the end of training data. But the requested forecast periods are past the maximum horizon. We need to provide a define `y` value to establish the forecast origin.\n",
|
||||
"\n",
|
||||
"We will use this helper function to take the required amount of context from the data preceding the testing data. It's definition is intentionally simplified to keep the idea in the clear."
|
||||
]
|
||||
@@ -711,7 +711,7 @@
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"remote"
|
||||
"Remote"
|
||||
],
|
||||
"datasets": [
|
||||
"None"
|
||||
@@ -746,7 +746,7 @@
|
||||
"Forecasting",
|
||||
"Confidence Intervals"
|
||||
],
|
||||
"task": "forecasting"
|
||||
"task": "Forecasting"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -325,9 +325,9 @@
|
||||
"\n",
|
||||
"For forecasting tasks, there are some additional parameters that can be set: the name of the column holding the date/time, the grain column names, and the maximum forecast horizon. A time column is required for forecasting, while the grain is optional. If a grain is not given, AutoML assumes that the whole dataset is a single time-series. We also pass a list of columns to drop prior to modeling. The _logQuantity_ column is completely correlated with the target quantity, so it must be removed to prevent a target leak.\n",
|
||||
"\n",
|
||||
"The forecast horizon is given in units of the time-series frequency; for instance, the OJ series frequency is weekly, so a horizon of 20 means that a trained model will estimate sales up-to 20 weeks beyond the latest date in the training data for each series. In this example, we set the maximum horizon to the number of samples per series in the test set (n_test_periods). Generally, the value of this parameter will be dictated by business needs. For example, a demand planning organizaion that needs to estimate the next month of sales would set the horizon accordingly. Please see the [energy_demand notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand) for more discussion of forecast horizon.\n",
|
||||
"The forecast horizon is given in units of the time-series frequency; for instance, the OJ series frequency is weekly, so a horizon of 20 means that a trained model will estimate sales up to 20 weeks beyond the latest date in the training data for each series. In this example, we set the maximum horizon to the number of samples per series in the test set (n_test_periods). Generally, the value of this parameter will be dictated by business needs. For example, a demand planning organizaion that needs to estimate the next month of sales would set the horizon accordingly. Please see the [energy_demand notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand) for more discussion of forecast horizon.\n",
|
||||
"\n",
|
||||
"Finally, a note about the cross-validation (CV) procedure for time-series data. AutoML uses out-of-sample error estimates to select a best pipeline/model, so it is important that the CV fold splitting is done correctly. Time-series can violate the basic statistical assumptions of the canonical K-Fold CV strategy, so AutoML implements a [rolling origin validation](https://robjhyndman.com/hyndsight/tscv/) procedure to create CV folds for time-series data. To use this procedure, you just need to specify the desired number of CV folds in the AutoMLConfig object. It is also possible to bypass CV and use your own validation set by setting the *X_valid* and *y_valid* parameters of AutoMLConfig.\n",
|
||||
"Finally, a note about the cross-validation (CV) procedure for time-series data. AutoML uses out-of-sample error estimates to select a best pipeline/model, so it is important that the CV fold splitting is done correctly. Time-series can violate the basic statistical assumptions of the canonical K-Fold CV strategy, so AutoML implements a [rolling origin validation](https://robjhyndman.com/hyndsight/tscv/) procedure to create CV folds for time-series data. To use this procedure, you just need to specify the desired number of CV folds in the AutoMLConfig object. It is also possible to bypass CV and use your own validation set by setting the *validation_data* parameter of AutoMLConfig.\n",
|
||||
"\n",
|
||||
"Here is a summary of AutoMLConfig parameters used for training the OJ model:\n",
|
||||
"\n",
|
||||
@@ -370,7 +370,7 @@
|
||||
" training_data=train_dataset,\n",
|
||||
" label_column_name=target_column_name,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" enable_early_stopping = True,\n",
|
||||
" enable_early_stopping=True,\n",
|
||||
" n_cross_validations=3,\n",
|
||||
" verbosity=logging.INFO,\n",
|
||||
" **time_series_settings)"
|
||||
@@ -693,7 +693,7 @@
|
||||
"category": "tutorial",
|
||||
"celltoolbar": "Raw Cell Format",
|
||||
"compute": [
|
||||
"remote"
|
||||
"Remote"
|
||||
],
|
||||
"datasets": [
|
||||
"Orange Juice Sales"
|
||||
@@ -705,6 +705,9 @@
|
||||
"framework": [
|
||||
"Azure ML AutoML"
|
||||
],
|
||||
"tags": [
|
||||
"None"
|
||||
],
|
||||
"friendly_name": "Forecasting orange juice sales with deployment",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
|
||||
@@ -634,7 +634,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations\n",
|
||||
"from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations\n",
|
||||
"explainer_setup_class = automl_setup_model_explanations(fitted_model, 'regression', X_test=X_test)"
|
||||
]
|
||||
},
|
||||
@@ -653,11 +653,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.explain.model._internal.explanation_client import ExplanationClient\n",
|
||||
"from azureml.contrib.interpret.visualize import ExplanationDashboard\n",
|
||||
"from interpret_community.widget import ExplanationDashboard\n",
|
||||
"client = ExplanationClient.from_run(automl_run)\n",
|
||||
"engineered_explanations = client.download_model_explanation(raw=False)\n",
|
||||
"print(engineered_explanations.get_feature_importance_dict())\n",
|
||||
"ExplanationDashboard(engineered_explanations, explainer_setup_class.automl_estimator, explainer_setup_class.X_test_transform)"
|
||||
"ExplanationDashboard(engineered_explanations, explainer_setup_class.automl_estimator, datasetX=explainer_setup_class.X_test_transform)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -676,7 +676,7 @@
|
||||
"source": [
|
||||
"raw_explanations = client.download_model_explanation(raw=True)\n",
|
||||
"print(raw_explanations.get_feature_importance_dict())\n",
|
||||
"ExplanationDashboard(raw_explanations, explainer_setup_class.automl_pipeline, explainer_setup_class.X_test_raw)"
|
||||
"ExplanationDashboard(raw_explanations, explainer_setup_class.automl_pipeline, datasetX=explainer_setup_class.X_test_raw)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,7 +5,8 @@ import os
|
||||
import pickle
|
||||
import azureml.train.automl
|
||||
import azureml.explain.model
|
||||
from azureml.train.automl.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations
|
||||
from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplainerSetupClass, \
|
||||
automl_setup_model_explanations
|
||||
from sklearn.externals import joblib
|
||||
from azureml.core.model import Model
|
||||
|
||||
|
||||
@@ -6,7 +6,8 @@ from azureml.core.run import Run
|
||||
from azureml.core.experiment import Experiment
|
||||
from sklearn.externals import joblib
|
||||
from azureml.core.dataset import Dataset
|
||||
from azureml.train.automl.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations
|
||||
from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplainerSetupClass, \
|
||||
automl_setup_model_explanations
|
||||
from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel
|
||||
from azureml.explain.model.mimic_wrapper import MimicWrapper
|
||||
from automl.client.core.common.constants import MODEL_PATH
|
||||
|
||||
@@ -140,6 +140,9 @@
|
||||
"framework": [
|
||||
"Azure ML AutoML"
|
||||
],
|
||||
"tags": [
|
||||
""
|
||||
],
|
||||
"friendly_name": "Forecasting with automated ML SQL integration",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
@@ -151,9 +154,6 @@
|
||||
"name": "sql",
|
||||
"version": ""
|
||||
},
|
||||
"tags": [
|
||||
""
|
||||
],
|
||||
"task": "Forecasting"
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -560,6 +560,9 @@
|
||||
"framework": [
|
||||
"Azure ML AutoML"
|
||||
],
|
||||
"tags": [
|
||||
""
|
||||
],
|
||||
"friendly_name": "Setup automated ML SQL integration",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
@@ -571,9 +574,6 @@
|
||||
"name": "sql",
|
||||
"version": ""
|
||||
},
|
||||
"tags": [
|
||||
""
|
||||
],
|
||||
"task": "None"
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -175,6 +175,7 @@
|
||||
"source": [
|
||||
"#deploy to ACI\n",
|
||||
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||
"from azureml.exceptions import WebserviceException\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"\n",
|
||||
"myaci_config = AciWebservice.deploy_configuration(cpu_cores = 2, \n",
|
||||
@@ -182,11 +183,19 @@
|
||||
" tags = {'name':'Databricks Azure ML ACI'}, \n",
|
||||
" description = 'This is for ADB and AML example.')\n",
|
||||
"\n",
|
||||
"service_name = 'aciws'\n",
|
||||
"\n",
|
||||
"# Remove any existing service under the same name.\n",
|
||||
"try:\n",
|
||||
" Webservice(ws, service_name).delete()\n",
|
||||
"except WebserviceException:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= 'spark-py', \n",
|
||||
" entry_script='score_sparkml.py',\n",
|
||||
" conda_file='mydeployenv.yml')\n",
|
||||
"\n",
|
||||
"myservice = Model.deploy(ws, 'aciws', [mymodel], inference_config, myaci_config)\n",
|
||||
"myservice = Model.deploy(ws, service_name, [mymodel], inference_config, myaci_config)\n",
|
||||
"myservice.wait_for_deployment(show_output=True)"
|
||||
]
|
||||
},
|
||||
@@ -199,18 +208,6 @@
|
||||
"help(Webservice)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# List images by ws\n",
|
||||
"\n",
|
||||
"for i in ContainerImage.list(workspace = ws):\n",
|
||||
" print('{}(v.{} [{}]) stored at {} with build log {}'.format(i.name, i.version, i.creation_state, i.image_location, i.image_build_log_uri))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
||||
@@ -163,14 +163,19 @@
|
||||
"#it may take 20-25 minutes to create a new cluster\n",
|
||||
"\n",
|
||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||
"\n",
|
||||
"# Use the default configuration (can also provide parameters to customize)\n",
|
||||
"prov_config = AksCompute.provisioning_configuration()\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"aks_name = 'ps-aks-demo2' \n",
|
||||
"\n",
|
||||
"# Create the cluster\n",
|
||||
"aks_target = ComputeTarget.create(workspace = ws, \n",
|
||||
"try:\n",
|
||||
" aks_target = ComputeTarget(workspace=ws, name=aks_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" # Use the default configuration (can also provide parameters to customize)\n",
|
||||
" prov_config = AksCompute.provisioning_configuration()\n",
|
||||
" \n",
|
||||
" # Create the cluster\n",
|
||||
" aks_target = ComputeTarget.create(workspace = ws, \n",
|
||||
" name = aks_name, \n",
|
||||
" provisioning_configuration = prov_config)\n",
|
||||
"\n",
|
||||
@@ -188,15 +193,24 @@
|
||||
"source": [
|
||||
"#deploy to AKS\n",
|
||||
"from azureml.core.webservice import AksWebservice, Webservice\n",
|
||||
"from azureml.exceptions import WebserviceException\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"\n",
|
||||
"aks_config = AksWebservice.deploy_configuration(enable_app_insights=True)\n",
|
||||
"\n",
|
||||
"service_name = 'ps-aks-service'\n",
|
||||
"\n",
|
||||
"# Remove any existing service under the same name.\n",
|
||||
"try:\n",
|
||||
" Webservice(ws, service_name).delete()\n",
|
||||
"except WebserviceException:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime = 'spark-py', \n",
|
||||
" entry_script ='score_sparkml.py',\n",
|
||||
" conda_file ='mydeployenv.yml')\n",
|
||||
"\n",
|
||||
"aks_service = Model.deploy(ws, 'ps-aks-service', [mymodel], inference_config, aks_config, aks_target)\n",
|
||||
"aks_service = Model.deploy(ws, service_name, [mymodel], inference_config, aks_config, aks_target)\n",
|
||||
"aks_service.wait_for_deployment(show_output=True)"
|
||||
]
|
||||
},
|
||||
@@ -288,7 +302,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
"version": "3.6.8"
|
||||
},
|
||||
"name": "deploy-to-aks-existingimage-05",
|
||||
"notebookId": 1030695628045968
|
||||
|
||||
@@ -661,6 +661,7 @@
|
||||
"# this will take 10-15 minutes to finish\n",
|
||||
"\n",
|
||||
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||
"from azureml.exceptions import WebserviceException\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"import uuid\n",
|
||||
@@ -677,6 +678,13 @@
|
||||
"\n",
|
||||
"guid = str(uuid.uuid4()).split(\"-\")[0]\n",
|
||||
"service_name = \"myservice-{}\".format(guid)\n",
|
||||
"\n",
|
||||
"# Remove any existing service under the same name.\n",
|
||||
"try:\n",
|
||||
" Webservice(ws, service_name).delete()\n",
|
||||
"except WebserviceException:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"print(\"Creating service with name: {}\".format(service_name))\n",
|
||||
"\n",
|
||||
"myservice = Model.deploy(ws, service_name, [model], inference_config, myaci_config)\n",
|
||||
@@ -795,7 +803,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.5"
|
||||
"version": "3.6.8"
|
||||
},
|
||||
"name": "auto-ml-classification-local-adb",
|
||||
"notebookId": 2733885892129020
|
||||
|
||||
@@ -116,7 +116,8 @@
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"register model from file"
|
||||
"register model from file",
|
||||
"sample-model-register"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
|
||||
@@ -96,7 +96,8 @@
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"register model from file"
|
||||
"register model from file",
|
||||
"sample-model-register"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
|
||||
@@ -345,9 +345,11 @@
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"local"
|
||||
"Local"
|
||||
],
|
||||
"datasets": [
|
||||
"None"
|
||||
],
|
||||
"datasets": [],
|
||||
"deployment": [
|
||||
"Local"
|
||||
],
|
||||
|
||||
@@ -431,7 +431,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"aks_service.update(enable_app_insights=False)"
|
||||
"aks_service.update(enable_app_insights=False)\n",
|
||||
"aks_service.wait_for_deployment(show_output = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -755,7 +755,7 @@
|
||||
],
|
||||
"category": "deployment",
|
||||
"compute": [
|
||||
"local"
|
||||
"Local"
|
||||
],
|
||||
"datasets": [
|
||||
"Emotion FER"
|
||||
|
||||
@@ -763,7 +763,7 @@
|
||||
],
|
||||
"category": "deployment",
|
||||
"compute": [
|
||||
"local"
|
||||
"Local"
|
||||
],
|
||||
"datasets": [
|
||||
"MNIST"
|
||||
|
||||
@@ -373,7 +373,7 @@
|
||||
],
|
||||
"category": "deployment",
|
||||
"compute": [
|
||||
"local"
|
||||
"Local"
|
||||
],
|
||||
"datasets": [
|
||||
"ImageNet"
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
## Using explain model APIs
|
||||
## Using AzureML Interpret APIs
|
||||
|
||||
<a name="samples"></a>
|
||||
# Explain Model SDK Sample Notebooks
|
||||
# AzureML Interpret SDK Sample Notebooks
|
||||
|
||||
Follow these sample notebooks to learn:
|
||||
You can run the interpret-community SDK to explain models locally without Azure.
|
||||
For notebooks on the local experience, please see:
|
||||
https://github.com/interpretml/interpret-community/tree/master/notebooks
|
||||
|
||||
1. [Explain tabular data locally](tabular-data): Basic examples of explaining model trained on tabular data.
|
||||
2. [Explain on remote AMLCompute](azure-integration/remote-explanation): Explain a model on a remote AMLCompute target.
|
||||
3. [Explain tabular data with Run History](azure-integration/run-history): Explain a model with Run History.
|
||||
4. [Operationalize model explanation](azure-integration/scoring-time): Operationalize model explanation as a web service.
|
||||
Follow these sample notebooks to learn about the model interpretability integration with Azure:
|
||||
|
||||
1. [Explain on remote AMLCompute](azure-integration/remote-explanation): Explain a model on a remote AMLCompute target.
|
||||
2. [Explain tabular data with Run History](azure-integration/run-history): Explain a model with Run History.
|
||||
3. [Operationalize model explanation](azure-integration/scoring-time): Operationalize model explanation as a web service.
|
||||
|
||||
@@ -669,7 +669,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.interpret.visualize import ExplanationDashboard"
|
||||
"from interpret_community.widget import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -678,7 +678,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, original_model, x_test)"
|
||||
"ExplanationDashboard(global_explanation, original_model, datasetX=x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -61,4 +61,4 @@ global_explanation = tabular_explainer.explain_global(X_test)
|
||||
# Uploading model explanation data for storage or visualization in webUX
|
||||
# The explanation can then be downloaded on any compute
|
||||
comment = 'Global explanation on regression model trained on boston dataset'
|
||||
client.upload_model_explanation(global_explanation, comment=comment)
|
||||
client.upload_model_explanation(global_explanation, comment=comment, model_id=original_model.id)
|
||||
|
||||
@@ -564,7 +564,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.interpret.visualize import ExplanationDashboard"
|
||||
"from interpret_community.widget import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -573,7 +573,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(downloaded_global_explanation, model, x_test)"
|
||||
"ExplanationDashboard(downloaded_global_explanation, model, datasetX=x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -290,7 +290,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.interpret.visualize import ExplanationDashboard"
|
||||
"from interpret_community.widget import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -299,7 +299,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, clf, x_test)"
|
||||
"ExplanationDashboard(global_explanation, clf, datasetX=x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -355,7 +355,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.interpret.visualize import ExplanationDashboard"
|
||||
"from interpret_community.widget import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -364,7 +364,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, original_svm_model, x_test)"
|
||||
"ExplanationDashboard(global_explanation, original_svm_model, datasetX=x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -116,7 +116,7 @@ global_explanation = tabular_explainer.explain_global(x_test)
|
||||
|
||||
# uploading model explanation data for storage or visualization
|
||||
comment = 'Global explanation on classification model trained on IBM employee attrition dataset'
|
||||
client.upload_model_explanation(global_explanation, comment=comment)
|
||||
client.upload_model_explanation(global_explanation, comment=comment, model_id=original_model.id)
|
||||
|
||||
# also create a lightweight explainer for scoring time
|
||||
scoring_explainer = LinearScoringExplainer(tabular_explainer)
|
||||
|
||||
@@ -1,509 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Explain binary classification model predictions with raw feature transformations\n",
|
||||
"_**This notebook showcases how to use the Azure Machine Learning Interpretability SDK to explain and visualize a binary classification model that uses advanced many to one or many to many feature transformations.**_\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Table of Contents\n",
|
||||
"\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Run model explainer locally at training time](#Explain)\n",
|
||||
" 1. Apply feature transformations\n",
|
||||
" 1. Train a binary classification model\n",
|
||||
" 1. Explain the model on raw features\n",
|
||||
" 1. Generate global explanations\n",
|
||||
" 1. Generate local explanations\n",
|
||||
"1. [Visualize results](#Visualize)\n",
|
||||
"1. [Next steps](#Next)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"This notebook illustrates creating explanations for a binary classification model, Titanic passenger data classification, that uses many to one and many to many feature transformations from raw data to engineered features. For the many to one transformation, we sum 2 features `age` and `fare`. For many to many transformations two features are computed: one that is product of `age` and `fare` and another that is square of this product. Our tabular data explainer is then used to get the explanation object with the flag `allow_all_transformations` passed. The object is then used to get raw feature importances.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"We will showcase raw feature transformations with three tabular data explainers: TabularExplainer (SHAP), MimicExplainer (global surrogate), and PFIExplainer.\n",
|
||||
"\n",
|
||||
"|  |\n",
|
||||
"|:--:|\n",
|
||||
"| *Interpretability Toolkit Architecture* |\n",
|
||||
"\n",
|
||||
"Problem: Titanic passenger data classification with scikit-learn (run model explainer locally)\n",
|
||||
"\n",
|
||||
"1. Transform raw features to engineered features\n",
|
||||
"2. Train a Logistic Regression model using Scikit-learn\n",
|
||||
"3. Run 'explain_model' globally and locally with full dataset in local mode, which doesn't contact any Azure services.\n",
|
||||
"4. Visualize the global and local explanations with the visualization dashboard.\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain\n",
|
||||
"\n",
|
||||
"### Run model explainer locally at training time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||
"from sklearn.linear_model import LogisticRegression\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# Explainers:\n",
|
||||
"# 1. SHAP Tabular Explainer\n",
|
||||
"from interpret.ext.blackbox import TabularExplainer\n",
|
||||
"\n",
|
||||
"# OR\n",
|
||||
"\n",
|
||||
"# 2. Mimic Explainer\n",
|
||||
"from interpret.ext.blackbox import MimicExplainer\n",
|
||||
"# You can use one of the following four interpretable models as a global surrogate to the black box model\n",
|
||||
"from interpret.ext.glassbox import LGBMExplainableModel\n",
|
||||
"from interpret.ext.glassbox import LinearExplainableModel\n",
|
||||
"from interpret.ext.glassbox import SGDExplainableModel\n",
|
||||
"from interpret.ext.glassbox import DecisionTreeExplainableModel\n",
|
||||
"\n",
|
||||
"# OR\n",
|
||||
"\n",
|
||||
"# 3. PFI Explainer\n",
|
||||
"from interpret.ext.blackbox import PFIExplainer "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load the Titanic passenger data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"titanic_url = ('https://raw.githubusercontent.com/amueller/'\n",
|
||||
" 'scipy-2017-sklearn/091d371/notebooks/datasets/titanic3.csv')\n",
|
||||
"data = pd.read_csv(titanic_url)\n",
|
||||
"# fill missing values\n",
|
||||
"data = data.fillna(method=\"ffill\")\n",
|
||||
"data = data.fillna(method=\"bfill\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Similar to example [here](https://scikit-learn.org/stable/auto_examples/compose/plot_column_transformer_mixed_types.html#sphx-glr-auto-examples-compose-plot-column-transformer-mixed-types-py), use a subset of columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"numeric_features = ['age', 'fare']\n",
|
||||
"categorical_features = ['embarked', 'sex', 'pclass']\n",
|
||||
"\n",
|
||||
"y = data['survived'].values\n",
|
||||
"X = data[categorical_features + numeric_features]\n",
|
||||
"\n",
|
||||
"# Split data into train and test\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transform raw features"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can explain raw features by either using a `sklearn.compose.ColumnTransformer` or a list of fitted transformer tuples. The cell below uses `sklearn.compose.ColumnTransformer`. In case you want to run the example with the list of fitted transformer tuples, comment the cell below and uncomment the cell that follows after. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# We add many to one and many to many transformations for illustration purposes.\n",
|
||||
"# The support for raw feature explanations with many to one and many to many transformations are only supported \n",
|
||||
"# When allow_all_transformations is set to True on explainer creation\n",
|
||||
"from sklearn.preprocessing import FunctionTransformer\n",
|
||||
"many_to_one_transformer = FunctionTransformer(lambda x: x.sum(axis=1).reshape(-1, 1))\n",
|
||||
"many_to_many_transformer = FunctionTransformer(lambda x: np.hstack(\n",
|
||||
" (np.prod(x, axis=1).reshape(-1, 1), (np.prod(x, axis=1)**2).reshape(-1, 1))\n",
|
||||
"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.compose import ColumnTransformer\n",
|
||||
"\n",
|
||||
"transformations = ColumnTransformer([\n",
|
||||
" (\"age_fare_1\", Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='median')),\n",
|
||||
" ('scaler', StandardScaler())\n",
|
||||
" ]), [\"age\", \"fare\"]),\n",
|
||||
" (\"age_fare_2\", many_to_one_transformer, [\"age\", \"fare\"]),\n",
|
||||
" (\"age_fare_3\", many_to_many_transformer, [\"age\", \"fare\"]),\n",
|
||||
" (\"embarked\", Pipeline(steps=[\n",
|
||||
" (\"imputer\", SimpleImputer(strategy='constant', fill_value='missing')), \n",
|
||||
" (\"encoder\", OneHotEncoder(sparse=False))]), [\"embarked\"]),\n",
|
||||
" (\"sex_pclass\", OneHotEncoder(sparse=False), [\"sex\", \"pclass\"]) \n",
|
||||
"])\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"# Uncomment below if sklearn-pandas is not installed\n",
|
||||
"#!pip install sklearn-pandas\n",
|
||||
"from sklearn_pandas import DataFrameMapper\n",
|
||||
"\n",
|
||||
"# Impute, standardize the numeric features and one-hot encode the categorical features. \n",
|
||||
"\n",
|
||||
"transformations = [\n",
|
||||
" ([\"age\", \"fare\"], Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='median')),\n",
|
||||
" ('scaler', StandardScaler())\n",
|
||||
" ])),\n",
|
||||
" ([\"age\", \"fare\"], many_to_one_transformer),\n",
|
||||
" ([\"age\", \"fare\"], many_to_many_transformer),\n",
|
||||
" ([\"embarked\"], Pipeline(steps=[\n",
|
||||
" (\"imputer\", SimpleImputer(strategy='constant', fill_value='missing')), \n",
|
||||
" (\"encoder\", OneHotEncoder(sparse=False))])),\n",
|
||||
" ([\"sex\", \"pclass\"], OneHotEncoder(sparse=False)) \n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Append classifier to preprocessing pipeline.\n",
|
||||
"# Now we have a full prediction pipeline.\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),\n",
|
||||
" ('classifier', LogisticRegression(solver='lbfgs'))])\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Train a Logistic Regression model, which you want to explain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Append classifier to preprocessing pipeline.\n",
|
||||
"# Now we have a full prediction pipeline.\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
|
||||
" ('classifier', LogisticRegression(solver='lbfgs'))])\n",
|
||||
"model = clf.fit(x_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explain predictions on your local machine"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 1. Using SHAP TabularExplainer\n",
|
||||
"# When the last parameter allow_all_transformations is passed, we handle many to one and many to many transformations to \n",
|
||||
"# generate approximations to raw feature importances. When this flag is passed, for transformations not recognized as one to \n",
|
||||
"# many, we distribute feature importances evenly to raw features generating them.\n",
|
||||
"# clf.steps[-1][1] returns the trained classification model\n",
|
||||
"explainer = TabularExplainer(clf.steps[-1][1], \n",
|
||||
" initialization_examples=x_train, \n",
|
||||
" features=x_train.columns, \n",
|
||||
" transformations=transformations, \n",
|
||||
" allow_all_transformations=True)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 2. Using MimicExplainer\n",
|
||||
"# augment_data is optional and if true, oversamples the initialization examples to improve surrogate model accuracy to fit original model. Useful for high-dimensional data where the number of rows is less than the number of columns. \n",
|
||||
"# max_num_of_augmentations is optional and defines max number of times we can increase the input data size.\n",
|
||||
"# LGBMExplainableModel can be replaced with LinearExplainableModel, SGDExplainableModel, or DecisionTreeExplainableModel\n",
|
||||
"# explainer = MimicExplainer(clf.steps[-1][1], \n",
|
||||
"# x_train, \n",
|
||||
"# LGBMExplainableModel, \n",
|
||||
"# augment_data=True, \n",
|
||||
"# max_num_of_augmentations=10, \n",
|
||||
"# features=x_train.columns, \n",
|
||||
"# transformations=transformations, \n",
|
||||
"# allow_all_transformations=True)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 3. Using PFIExplainer\n",
|
||||
"\n",
|
||||
"# Use the parameter \"metric\" to pass a metric name or function to evaluate the permutation. \n",
|
||||
"# Note that if a metric function is provided a higher value must be better.\n",
|
||||
"# Otherwise, take the negative of the function or set the parameter \"is_error_metric\" to True.\n",
|
||||
"# Default metrics: \n",
|
||||
"# F1 Score for binary classification, F1 Score with micro average for multiclass classification and\n",
|
||||
"# Mean absolute error for regression\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# explainer = PFIExplainer(clf.steps[-1][1], \n",
|
||||
"# features=x_train.columns, \n",
|
||||
"# transformations=transformations)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate global explanations\n",
|
||||
"Explain overall model predictions (global explanation)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||
"\n",
|
||||
"global_explanation = explainer.explain_global(x_test)\n",
|
||||
"\n",
|
||||
"# Note: if you used the PFIExplainer in the previous step, use the next line of code instead\n",
|
||||
"# global_explanation = explainer.explain_global(x_test, true_labels=y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sorted SHAP values\n",
|
||||
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
||||
"# Corresponding feature names\n",
|
||||
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
||||
"# Feature ranks (based on original order of features)\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))\n",
|
||||
"# Per class feature names\n",
|
||||
"print('ranked per class feature names: {}'.format(global_explanation.get_ranked_per_class_names()))\n",
|
||||
"# Per class feature importance values\n",
|
||||
"print('ranked per class feature values: {}'.format(global_explanation.get_ranked_per_class_values()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Print out a dictionary that holds the sorted feature importance names and values\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.get_feature_importance_dict()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# feature shap values for all features and all data points in the training data\n",
|
||||
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate local explanations\n",
|
||||
"Explain local data points (individual instances)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note: PFIExplainer does not support local explanations\n",
|
||||
"# You can pass a specific data point or a group of data points to the explain_local function\n",
|
||||
"\n",
|
||||
"# E.g., Explain the first data point in the test set\n",
|
||||
"instance_num = 1\n",
|
||||
"local_explanation = explainer.explain_local(x_test[:instance_num])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||
"prediction_value = clf.predict(x_test)[instance_num]\n",
|
||||
"\n",
|
||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
||||
"\n",
|
||||
"print('local importance values: {}'.format(sorted_local_importance_values))\n",
|
||||
"print('local importance names: {}'.format(sorted_local_importance_names))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Visualize\n",
|
||||
"Load the visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.interpret.visualize import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
" \n",
|
||||
"1. [Training time: regression problem](./explain-regression-local.ipynb)\n",
|
||||
"1. [Training time: binary classification problem](./explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](./explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. [Explain models with simple feature transformations](./simple-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Save model explanations via Azure Machine Learning Run History](../azure-integration/run-history/save-retrieve-explanations-run-history.ipynb)\n",
|
||||
"1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb)\n",
|
||||
"1. Inferencing time: deploy a classification model and explainer:\n",
|
||||
" 1. [Deploy a locally-trained model and explainer](../azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb)\n",
|
||||
" 1. [Deploy a remotely-trained model and explainer](../azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
name: advanced-feature-transformations-explain-local
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- interpret
|
||||
- azureml-interpret
|
||||
- azureml-contrib-interpret
|
||||
- sklearn-pandas
|
||||
- ipywidgets
|
||||
@@ -1,390 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Explain binary classification model predictions\n",
|
||||
"_**This notebook showcases how to use the Azure Machine Learning Interpretability SDK to explain and visualize a binary classification model predictions.**_\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Table of Contents\n",
|
||||
"\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Run model explainer locally at training time](#Explain)\n",
|
||||
" 1. Train a binary classification model\n",
|
||||
" 1. Explain the model\n",
|
||||
" 1. Generate global explanations\n",
|
||||
" 1. Generate local explanations\n",
|
||||
"1. [Visualize results](#Visualize)\n",
|
||||
"1. [Next steps](#Next)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"This notebook illustrates how to explain a binary classification model predictions locally at training time without contacting any Azure services.\n",
|
||||
"It demonstrates the API calls that you need to make to get the global and local explanations and a visualization dashboard that provides an interactive way of discovering patterns in data and explanations.\n",
|
||||
"\n",
|
||||
"We will showcase three tabular data explainers: TabularExplainer (SHAP), MimicExplainer (global surrogate), and PFIExplainer.\n",
|
||||
"\n",
|
||||
"|  |\n",
|
||||
"|:--:|\n",
|
||||
"| *Interpretability Toolkit Architecture* |\n",
|
||||
"\n",
|
||||
"Problem: Breast cancer diagnosis classification with scikit-learn (run model explainer locally)\n",
|
||||
"\n",
|
||||
"1. Train a SVM classification model using Scikit-learn\n",
|
||||
"2. Run 'explain_model' globally and locally with full dataset in local mode, which doesn't contact any Azure services.\n",
|
||||
"3. Visualize the global and local explanations with the visualization dashboard.\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain\n",
|
||||
"\n",
|
||||
"### Run model explainer locally at training time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_breast_cancer\n",
|
||||
"from sklearn import svm\n",
|
||||
"\n",
|
||||
"# Explainers:\n",
|
||||
"# 1. SHAP Tabular Explainer\n",
|
||||
"from interpret.ext.blackbox import TabularExplainer\n",
|
||||
"\n",
|
||||
"# OR\n",
|
||||
"\n",
|
||||
"# 2. Mimic Explainer\n",
|
||||
"from interpret.ext.blackbox import MimicExplainer\n",
|
||||
"# You can use one of the following four interpretable models as a global surrogate to the black box model\n",
|
||||
"from interpret.ext.glassbox import LGBMExplainableModel\n",
|
||||
"from interpret.ext.glassbox import LinearExplainableModel\n",
|
||||
"from interpret.ext.glassbox import SGDExplainableModel\n",
|
||||
"from interpret.ext.glassbox import DecisionTreeExplainableModel\n",
|
||||
"\n",
|
||||
"# OR\n",
|
||||
"\n",
|
||||
"# 3. PFI Explainer\n",
|
||||
"from interpret.ext.blackbox import PFIExplainer "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load the breast cancer diagnosis data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"breast_cancer_data = load_breast_cancer()\n",
|
||||
"classes = breast_cancer_data.target_names.tolist()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split data into train and test\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(breast_cancer_data.data, breast_cancer_data.target, test_size=0.2, random_state=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Train a SVM classification model, which you want to explain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"clf = svm.SVC(gamma=0.001, C=100., probability=True)\n",
|
||||
"model = clf.fit(x_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explain predictions on your local machine"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 1. Using SHAP TabularExplainer\n",
|
||||
"explainer = TabularExplainer(model, \n",
|
||||
" x_train, \n",
|
||||
" features=breast_cancer_data.feature_names, \n",
|
||||
" classes=classes)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 2. Using MimicExplainer\n",
|
||||
"# augment_data is optional and if true, oversamples the initialization examples to improve surrogate model accuracy to fit original model. Useful for high-dimensional data where the number of rows is less than the number of columns. \n",
|
||||
"# max_num_of_augmentations is optional and defines max number of times we can increase the input data size.\n",
|
||||
"# LGBMExplainableModel can be replaced with LinearExplainableModel, SGDExplainableModel, or DecisionTreeExplainableModel\n",
|
||||
"# explainer = MimicExplainer(model, \n",
|
||||
"# x_train, \n",
|
||||
"# LGBMExplainableModel, \n",
|
||||
"# augment_data=True, \n",
|
||||
"# max_num_of_augmentations=10, \n",
|
||||
"# features=breast_cancer_data.feature_names, \n",
|
||||
"# classes=classes)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 3. Using PFIExplainer\n",
|
||||
"\n",
|
||||
"# Use the parameter \"metric\" to pass a metric name or function to evaluate the permutation. \n",
|
||||
"# Note that if a metric function is provided a higher value must be better.\n",
|
||||
"# Otherwise, take the negative of the function or set the parameter \"is_error_metric\" to True.\n",
|
||||
"# Default metrics: \n",
|
||||
"# F1 Score for binary classification, F1 Score with micro average for multiclass classification and\n",
|
||||
"# Mean absolute error for regression\n",
|
||||
"\n",
|
||||
"# explainer = PFIExplainer(model, \n",
|
||||
"# features=breast_cancer_data.feature_names, \n",
|
||||
"# classes=classes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate global explanations\n",
|
||||
"Explain overall model predictions (global explanation)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||
"global_explanation = explainer.explain_global(x_test)\n",
|
||||
"\n",
|
||||
"# Note: if you used the PFIExplainer in the previous step, use the next line of code instead\n",
|
||||
"# global_explanation = explainer.explain_global(x_test, true_labels=y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sorted SHAP values\n",
|
||||
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
||||
"# Corresponding feature names\n",
|
||||
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
||||
"# Feature ranks (based on original order of features)\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))\n",
|
||||
"\n",
|
||||
"# Note: PFIExplainer does not support per class explanations\n",
|
||||
"# Per class feature names\n",
|
||||
"print('ranked per class feature names: {}'.format(global_explanation.get_ranked_per_class_names()))\n",
|
||||
"# Per class feature importance values\n",
|
||||
"print('ranked per class feature values: {}'.format(global_explanation.get_ranked_per_class_values()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Print out a dictionary that holds the sorted feature importance names and values\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.get_feature_importance_dict()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# feature shap values for all features and all data points in the training data\n",
|
||||
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate local explanations\n",
|
||||
"Explain local data points (individual instances)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note: PFIExplainer does not support local explanations\n",
|
||||
"# You can pass a specific data point or a group of data points to the explain_local function\n",
|
||||
"\n",
|
||||
"# E.g., Explain the first data point in the test set\n",
|
||||
"instance_num = 0\n",
|
||||
"local_explanation = explainer.explain_local(x_test[instance_num,:])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||
"prediction_value = clf.predict(x_test)[instance_num]\n",
|
||||
"\n",
|
||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
||||
"\n",
|
||||
"print('local importance values: {}'.format(sorted_local_importance_values))\n",
|
||||
"print('local importance names: {}'.format(sorted_local_importance_names))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Visualize\n",
|
||||
"Load the visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.interpret.visualize import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
" \n",
|
||||
"1. [Training time: regression problem](./explain-regression-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](./explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. Explain models with engineered features:\n",
|
||||
" 1. [Simple feature transformations](./simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](./advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Save model explanations via Azure Machine Learning Run History](../azure-integration/run-history/save-retrieve-explanations-run-history.ipynb)\n",
|
||||
"1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb)\n",
|
||||
"1. Inferencing time: deploy a classification model and explainer:\n",
|
||||
" 1. [Deploy a locally-trained model and explainer](../azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb)\n",
|
||||
" 1. [Deploy a remotely-trained model and explainer](../azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
name: explain-binary-classification-local
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- interpret
|
||||
- azureml-interpret
|
||||
- azureml-contrib-interpret
|
||||
- ipywidgets
|
||||
@@ -1,388 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Explain multiclass classification model's predictions\n",
|
||||
"_**This notebook showcases how to use the Azure Machine Learning Interpretability SDK to explain and visualize a multiclass classification model predictions.**_\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Table of Contents\n",
|
||||
"\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Run model explainer locally at training time](#Explain)\n",
|
||||
" 1. Train a multiclass classification model\n",
|
||||
" 1. Explain the model\n",
|
||||
" 1. Generate global explanations\n",
|
||||
" 1. Generate local explanations\n",
|
||||
"1. [Visualize results](#Visualize)\n",
|
||||
"1. [Next steps](#Next)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"This notebook illustrates how to explain a multiclass classification model predictions locally at training time without contacting any Azure services.\n",
|
||||
"It demonstrates the API calls that you need to make to get the global and local explanations and a visualization dashboard that provides an interactive way of discovering patterns in data and explanations.\n",
|
||||
"\n",
|
||||
"We will showcase three tabular data explainers: TabularExplainer (SHAP), MimicExplainer (global surrogate), and PFIExplainer.\n",
|
||||
"\n",
|
||||
"|  |\n",
|
||||
"|:--:|\n",
|
||||
"| *Interpretability Toolkit Architecture* |\n",
|
||||
"\n",
|
||||
"Problem: Iris flower classification with scikit-learn (run model explainer locally)\n",
|
||||
"\n",
|
||||
"1. Train a SVM classification model using Scikit-learn\n",
|
||||
"2. Run 'explain_model' globally and locally with full dataset in local mode, which doesn't contact any Azure services.\n",
|
||||
"3. Visualize the global and local explanations with the visualization dashboard.\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain\n",
|
||||
"\n",
|
||||
"### Run model explainer locally at training time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_iris\n",
|
||||
"from sklearn import svm\n",
|
||||
"\n",
|
||||
"# Explainers:\n",
|
||||
"# 1. SHAP Tabular Explainer\n",
|
||||
"from interpret.ext.blackbox import TabularExplainer\n",
|
||||
"\n",
|
||||
"# OR\n",
|
||||
"\n",
|
||||
"# 2. Mimic Explainer\n",
|
||||
"from interpret.ext.blackbox import MimicExplainer\n",
|
||||
"# You can use one of the following four interpretable models as a global surrogate to the black box model\n",
|
||||
"from interpret.ext.glassbox import LGBMExplainableModel\n",
|
||||
"from interpret.ext.glassbox import LinearExplainableModel\n",
|
||||
"from interpret.ext.glassbox import SGDExplainableModel\n",
|
||||
"from interpret.ext.glassbox import DecisionTreeExplainableModel\n",
|
||||
"\n",
|
||||
"# OR\n",
|
||||
"\n",
|
||||
"# 3. PFI Explainer\n",
|
||||
"from interpret.ext.blackbox import PFIExplainer "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load the Iris flower dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iris = load_iris()\n",
|
||||
"X = iris['data']\n",
|
||||
"y = iris['target']\n",
|
||||
"classes = iris['target_names']\n",
|
||||
"feature_names = iris['feature_names']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split data into train and test\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Train a SVM classification model, which you want to explain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"clf = svm.SVC(gamma=0.001, C=100., probability=True)\n",
|
||||
"model = clf.fit(x_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explain predictions on your local machine"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 1. Using SHAP TabularExplainer\n",
|
||||
"explainer = TabularExplainer(model, \n",
|
||||
" x_train, \n",
|
||||
" features=feature_names, \n",
|
||||
" classes=classes)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 2. Using MimicExplainer\n",
|
||||
"# augment_data is optional and if true, oversamples the initialization examples to improve surrogate model accuracy to fit original model. Useful for high-dimensional data where the number of rows is less than the number of columns. \n",
|
||||
"# max_num_of_augmentations is optional and defines max number of times we can increase the input data size.\n",
|
||||
"# LGBMExplainableModel can be replaced with LinearExplainableModel, SGDExplainableModel, or DecisionTreeExplainableModel\n",
|
||||
"# explainer = MimicExplainer(model, \n",
|
||||
"# x_train, \n",
|
||||
"# LGBMExplainableModel, \n",
|
||||
"# augment_data=True, \n",
|
||||
"# max_num_of_augmentations=10, \n",
|
||||
"# features=feature_names, \n",
|
||||
"# classes=classes)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 3. Using PFIExplainer\n",
|
||||
"\n",
|
||||
"# Use the parameter \"metric\" to pass a metric name or function to evaluate the permutation. \n",
|
||||
"# Note that if a metric function is provided a higher value must be better.\n",
|
||||
"# Otherwise, take the negative of the function or set the parameter \"is_error_metric\" to True.\n",
|
||||
"# Default metrics: \n",
|
||||
"# F1 Score for binary classification, F1 Score with micro average for multiclass classification and\n",
|
||||
"# Mean absolute error for regression\n",
|
||||
"\n",
|
||||
"# explainer = PFIExplainer(model, \n",
|
||||
"# features=feature_names, \n",
|
||||
"# classes=classes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate global explanations\n",
|
||||
"Explain overall model predictions (global explanation)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||
"global_explanation = explainer.explain_global(x_test)\n",
|
||||
"\n",
|
||||
"# Note: if you used the PFIExplainer in the previous step, use the next line of code instead\n",
|
||||
"# global_explanation = explainer.explain_global(x_test, true_labels=y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sorted SHAP values\n",
|
||||
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
||||
"# Corresponding feature names\n",
|
||||
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
||||
"# Feature ranks (based on original order of features)\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))\n",
|
||||
"\n",
|
||||
"# Note: PFIExplainer does not support per class explanations\n",
|
||||
"# Per class feature names\n",
|
||||
"print('ranked per class feature names: {}'.format(global_explanation.get_ranked_per_class_names()))\n",
|
||||
"# Per class feature importance values\n",
|
||||
"print('ranked per class feature values: {}'.format(global_explanation.get_ranked_per_class_values()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Print out a dictionary that holds the sorted feature importance names and values\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.get_feature_importance_dict()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# feature shap values for all features and all data points in the training data\n",
|
||||
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate local explanations\n",
|
||||
"Explain local data points (individual instances)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note: PFIExplainer does not support local explanations\n",
|
||||
"# You can pass a specific data point or a group of data points to the explain_local function\n",
|
||||
"\n",
|
||||
"# E.g., Explain the first data point in the test set\n",
|
||||
"instance_num = 0\n",
|
||||
"local_explanation = explainer.explain_local(x_test[instance_num,:])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||
"prediction_value = clf.predict(x_test)[instance_num]\n",
|
||||
"\n",
|
||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
||||
"\n",
|
||||
"print('local importance values: {}'.format(sorted_local_importance_values))\n",
|
||||
"print('local importance names: {}'.format(sorted_local_importance_names))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Visualize\n",
|
||||
"Load the visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.interpret.visualize import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
"\n",
|
||||
"1. [Training time: regression problem](./explain-regression-local.ipynb) \n",
|
||||
"1. [Training time: binary classification problem](./explain-binary-classification-local.ipynb)\n",
|
||||
"1. Explain models with engineered features:\n",
|
||||
" 1. [Simple feature transformations](./simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](./advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Save model explanations via Azure Machine Learning Run History](../azure-integration/run-history/save-retrieve-explanations-run-history.ipynb)\n",
|
||||
"1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb)\n",
|
||||
"1. Inferencing time: deploy a classification model and explainer:\n",
|
||||
" 1. [Deploy a locally-trained model and explainer](../azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb)\n",
|
||||
" 1. [Deploy a remotely-trained model and explainer](../azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)\n",
|
||||
"\u00e2\u20ac\u2039\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
name: explain-multiclass-classification-local
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- interpret
|
||||
- azureml-interpret
|
||||
- azureml-contrib-interpret
|
||||
- ipywidgets
|
||||
@@ -1,383 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Explain regression model predictions\n",
|
||||
"_**This notebook showcases how to use the Azure Machine Learning Interpretability SDK to explain and visualize a regression model predictions.**_\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Table of Contents\n",
|
||||
"\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Run model explainer locally at training time](#Explain)\n",
|
||||
" 1. Train a regressor model\n",
|
||||
" 1. Explain the model\n",
|
||||
" 1. Generate global explanations\n",
|
||||
" 1. Generate local explanations\n",
|
||||
"1. [Visualize results](#Visualize)\n",
|
||||
"1. [Next steps](#Next)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"This notebook illustrates how to explain regression model predictions locally at training time without contacting any Azure services.\n",
|
||||
"It demonstrates the API calls that you need to make to get the global and local explanations and a visualization dashboard that provides an interactive way of discovering patterns in data and explanations.\n",
|
||||
"\n",
|
||||
"We will showcase three tabular data explainers: TabularExplainer (SHAP), MimicExplainer (global surrogate), and PFIExplainer.\n",
|
||||
"\n",
|
||||
"|  |\n",
|
||||
"|:--:|\n",
|
||||
"| *Interpretability Toolkit Architecture* |\n",
|
||||
"\n",
|
||||
"Problem: Boston Housing Price Prediction with scikit-learn (run model explainer locally)\n",
|
||||
"\n",
|
||||
"1. Train a GradientBoosting regression model using Scikit-learn\n",
|
||||
"2. Run 'explain_model' globally and locally with full dataset in local mode, which doesn't contact any Azure services.\n",
|
||||
"3. Visualize the global and local explanations with the visualization dashboard.\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain\n",
|
||||
"\n",
|
||||
"### Run model explainer locally at training time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn import datasets\n",
|
||||
"from sklearn.ensemble import GradientBoostingRegressor\n",
|
||||
"\n",
|
||||
"# Explainers:\n",
|
||||
"# 1. SHAP Tabular Explainer\n",
|
||||
"from interpret.ext.blackbox import TabularExplainer\n",
|
||||
"\n",
|
||||
"# OR\n",
|
||||
"\n",
|
||||
"# 2. Mimic Explainer\n",
|
||||
"from interpret.ext.blackbox import MimicExplainer\n",
|
||||
"# You can use one of the following four interpretable models as a global surrogate to the black box model\n",
|
||||
"from interpret.ext.glassbox import LGBMExplainableModel\n",
|
||||
"from interpret.ext.glassbox import LinearExplainableModel\n",
|
||||
"from interpret.ext.glassbox import SGDExplainableModel\n",
|
||||
"from interpret.ext.glassbox import DecisionTreeExplainableModel\n",
|
||||
"\n",
|
||||
"# OR\n",
|
||||
"\n",
|
||||
"# 3. PFI Explainer\n",
|
||||
"from interpret.ext.blackbox import PFIExplainer "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load the Boston house price data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"boston_data = datasets.load_boston()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split data into train and test\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(boston_data.data, boston_data.target, test_size=0.2, random_state=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Train a GradientBoosting regression model, which you want to explain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"reg = GradientBoostingRegressor(n_estimators=100, max_depth=4,\n",
|
||||
" learning_rate=0.1, loss='huber',\n",
|
||||
" random_state=1)\n",
|
||||
"model = reg.fit(x_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explain predictions on your local machine"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 1. Using SHAP TabularExplainer\n",
|
||||
"explainer = TabularExplainer(model, \n",
|
||||
" x_train, \n",
|
||||
" features = boston_data.feature_names)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 2. Using MimicExplainer\n",
|
||||
"# augment_data is optional and if true, oversamples the initialization examples to improve surrogate model accuracy to fit original model. Useful for high-dimensional data where the number of rows is less than the number of columns. \n",
|
||||
"# max_num_of_augmentations is optional and defines max number of times we can increase the input data size.\n",
|
||||
"# LGBMExplainableModel can be replaced with LinearExplainableModel, SGDExplainableModel, or DecisionTreeExplainableModel\n",
|
||||
"# explainer = MimicExplainer(model, \n",
|
||||
"# x_train, \n",
|
||||
"# LGBMExplainableModel, \n",
|
||||
"# augment_data=True, \n",
|
||||
"# max_num_of_augmentations=10, \n",
|
||||
"# features=boston_data.feature_names)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 3. Using PFIExplainer\n",
|
||||
"\n",
|
||||
"# Use the parameter \"metric\" to pass a metric name or function to evaluate the permutation. \n",
|
||||
"# Note that if a metric function is provided a higher value must be better.\n",
|
||||
"# Otherwise, take the negative of the function or set the parameter \"is_error_metric\" to True.\n",
|
||||
"# Default metrics: \n",
|
||||
"# F1 Score for binary classification, F1 Score with micro average for multiclass classification and\n",
|
||||
"# Mean absolute error for regression\n",
|
||||
"\n",
|
||||
"# explainer = PFIExplainer(model, \n",
|
||||
"# features=boston_data.feature_names)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate global explanations\n",
|
||||
"Explain overall model predictions (global explanation)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||
"global_explanation = explainer.explain_global(x_test)\n",
|
||||
"\n",
|
||||
"# Note: if you used the PFIExplainer in the previous step, use the next line of code instead\n",
|
||||
"# global_explanation = explainer.explain_global(x_test, true_labels=y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sorted SHAP values \n",
|
||||
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
||||
"# Corresponding feature names\n",
|
||||
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
||||
"# Feature ranks (based on original order of features)\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Print out a dictionary that holds the sorted feature importance names and values\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.get_feature_importance_dict()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note: PFIExplainer does not support local explanations\n",
|
||||
"# feature shap values for all features and all data points in the training data\n",
|
||||
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate local explanations\n",
|
||||
"Explain local data points (individual instances)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note: PFIExplainer does not support local explanations\n",
|
||||
"# You can pass a specific data point or a group of data points to the explain_local function\n",
|
||||
"\n",
|
||||
"# E.g., Explain the first data point in the test set\n",
|
||||
"local_explanation = explainer.explain_local(x_test[0,:])\n",
|
||||
"\n",
|
||||
"# E.g., Explain the first five data points in the test set\n",
|
||||
"# local_explanation_group = explainer.explain_local(x_test[0:4,:])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sorted local feature importance information; reflects the original feature order\n",
|
||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()\n",
|
||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()\n",
|
||||
"\n",
|
||||
"print('sorted local importance names: {}'.format(sorted_local_importance_names))\n",
|
||||
"print('sorted local importance values: {}'.format(sorted_local_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Visualize\n",
|
||||
"Load the visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.interpret.visualize import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
" \n",
|
||||
"1. [Training time: binary classification problem](./explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](./explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. Explain models with engineered features:\n",
|
||||
" 1. [Simple feature transformations](./simple-feature-transformations-explain-local.ipynb)\n",
|
||||
" 1. [Advanced feature transformations](./advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Save model explanations via Azure Machine Learning Run History](../azure-integration/run-history/save-retrieve-explanations-run-history.ipynb)\n",
|
||||
"1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb)\n",
|
||||
"1. Inferencing time: deploy a classification model and explainer:\n",
|
||||
" 1. [Deploy a locally-trained model and explainer](../azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb)\n",
|
||||
" 1. [Deploy a remotely-trained model and explainer](../azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
name: explain-regression-local
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- interpret
|
||||
- azureml-interpret
|
||||
- azureml-contrib-interpret
|
||||
- ipywidgets
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 116 KiB |
@@ -1,517 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Explain binary classification model predictions with raw feature transformations\n",
|
||||
"_**This notebook showcases how to use the Azure Machine Learning Interpretability SDK to explain and visualize a binary classification model that uses one to one and one to many feature transformations.**_\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"## Table of Contents\n",
|
||||
"\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Run model explainer locally at training time](#Explain)\n",
|
||||
" 1. Apply feature transformations\n",
|
||||
" 1. Train a binary classification model\n",
|
||||
" 1. Explain the model on raw features\n",
|
||||
" 1. Generate global explanations\n",
|
||||
" 1. Generate local explanations\n",
|
||||
"1. [Visualize results](#Visualize)\n",
|
||||
"1. [Next steps](#Next%20steps)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"This notebook illustrates creating explanations for a binary classification model, IBM employee attrition classification, that uses one to one and one to many feature transformations from raw data to engineered features. The one to many feature transformations include one hot encoding on categorical features. The one to one feature transformations apply standard scaling on numeric features. Our tabular data explainer is then used to get raw feature importances.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"We will showcase raw feature transformations with three tabular data explainers: TabularExplainer (SHAP), MimicExplainer (global surrogate), and PFIExplainer.\n",
|
||||
"\n",
|
||||
"|  |\n",
|
||||
"|:--:|\n",
|
||||
"| *Interpretability Toolkit Architecture* |\n",
|
||||
"\n",
|
||||
"Problem: IBM employee attrition classification with scikit-learn (run model explainer locally)\n",
|
||||
"\n",
|
||||
"1. Transform raw features to engineered features\n",
|
||||
"2. Train a SVC classification model using Scikit-learn\n",
|
||||
"3. Run 'explain_model' globally and locally with full dataset in local mode, which doesn't contact any Azure services.\n",
|
||||
"4. Visualize the global and local explanations with the visualization dashboard.\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"Setup: If you are using Jupyter notebooks, the extensions should be installed automatically with the package.\n",
|
||||
"If you are using Jupyter Labs run the following command:\n",
|
||||
"```\n",
|
||||
"(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n",
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explain\n",
|
||||
"\n",
|
||||
"### Run model explainer locally at training time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.pipeline import Pipeline\n",
|
||||
"from sklearn.impute import SimpleImputer\n",
|
||||
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||
"from sklearn.svm import SVC\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# Explainers:\n",
|
||||
"# 1. SHAP Tabular Explainer\n",
|
||||
"from interpret.ext.blackbox import TabularExplainer\n",
|
||||
"\n",
|
||||
"# OR\n",
|
||||
"\n",
|
||||
"# 2. Mimic Explainer\n",
|
||||
"from interpret.ext.blackbox import MimicExplainer\n",
|
||||
"# You can use one of the following four interpretable models as a global surrogate to the black box model\n",
|
||||
"from interpret.ext.glassbox import LGBMExplainableModel\n",
|
||||
"from interpret.ext.glassbox import LinearExplainableModel\n",
|
||||
"from interpret.ext.glassbox import SGDExplainableModel\n",
|
||||
"from interpret.ext.glassbox import DecisionTreeExplainableModel\n",
|
||||
"\n",
|
||||
"# OR\n",
|
||||
"\n",
|
||||
"# 3. PFI Explainer\n",
|
||||
"from interpret.ext.blackbox import PFIExplainer "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load the IBM employee attrition data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get the IBM employee attrition dataset\n",
|
||||
"outdirname = 'dataset.6.21.19'\n",
|
||||
"try:\n",
|
||||
" from urllib import urlretrieve\n",
|
||||
"except ImportError:\n",
|
||||
" from urllib.request import urlretrieve\n",
|
||||
"import zipfile\n",
|
||||
"zipfilename = outdirname + '.zip'\n",
|
||||
"urlretrieve('https://publictestdatasets.blob.core.windows.net/data/' + zipfilename, zipfilename)\n",
|
||||
"with zipfile.ZipFile(zipfilename, 'r') as unzip:\n",
|
||||
" unzip.extractall('.')\n",
|
||||
"attritionData = pd.read_csv('./WA_Fn-UseC_-HR-Employee-Attrition.csv')\n",
|
||||
"\n",
|
||||
"# Dropping Employee count as all values are 1 and hence attrition is independent of this feature\n",
|
||||
"attritionData = attritionData.drop(['EmployeeCount'], axis=1)\n",
|
||||
"# Dropping Employee Number since it is merely an identifier\n",
|
||||
"attritionData = attritionData.drop(['EmployeeNumber'], axis=1)\n",
|
||||
"\n",
|
||||
"attritionData = attritionData.drop(['Over18'], axis=1)\n",
|
||||
"\n",
|
||||
"# Since all values are 80\n",
|
||||
"attritionData = attritionData.drop(['StandardHours'], axis=1)\n",
|
||||
"\n",
|
||||
"# Converting target variables from string to numerical values\n",
|
||||
"target_map = {'Yes': 1, 'No': 0}\n",
|
||||
"attritionData[\"Attrition_numerical\"] = attritionData[\"Attrition\"].apply(lambda x: target_map[x])\n",
|
||||
"target = attritionData[\"Attrition_numerical\"]\n",
|
||||
"\n",
|
||||
"attritionXData = attritionData.drop(['Attrition_numerical', 'Attrition'], axis=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Split data into train and test\n",
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(attritionXData, \n",
|
||||
" target, \n",
|
||||
" test_size = 0.2,\n",
|
||||
" random_state=0,\n",
|
||||
" stratify=target)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Creating dummy columns for each categorical feature\n",
|
||||
"categorical = []\n",
|
||||
"for col, value in attritionXData.iteritems():\n",
|
||||
" if value.dtype == 'object':\n",
|
||||
" categorical.append(col)\n",
|
||||
" \n",
|
||||
"# Store the numerical columns in a list numerical\n",
|
||||
"numerical = attritionXData.columns.difference(categorical) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transform raw features"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can explain raw features by either using a `sklearn.compose.ColumnTransformer` or a list of fitted transformer tuples. The cell below uses `sklearn.compose.ColumnTransformer`. In case you want to run the example with the list of fitted transformer tuples, comment the cell below and uncomment the cell that follows after. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.compose import ColumnTransformer\n",
|
||||
"\n",
|
||||
"# We create the preprocessing pipelines for both numeric and categorical data.\n",
|
||||
"numeric_transformer = Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='median')),\n",
|
||||
" ('scaler', StandardScaler())])\n",
|
||||
"\n",
|
||||
"categorical_transformer = Pipeline(steps=[\n",
|
||||
" ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),\n",
|
||||
" ('onehot', OneHotEncoder(handle_unknown='ignore'))])\n",
|
||||
"\n",
|
||||
"transformations = ColumnTransformer(\n",
|
||||
" transformers=[\n",
|
||||
" ('num', numeric_transformer, numerical),\n",
|
||||
" ('cat', categorical_transformer, categorical)])\n",
|
||||
"\n",
|
||||
"# Append classifier to preprocessing pipeline.\n",
|
||||
"# Now we have a full prediction pipeline.\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
|
||||
" ('classifier', SVC(C = 1.0, probability=True))])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"'''\n",
|
||||
"# Uncomment below if sklearn-pandas is not installed\n",
|
||||
"#!pip install sklearn-pandas\n",
|
||||
"from sklearn_pandas import DataFrameMapper\n",
|
||||
"\n",
|
||||
"# Impute, standardize the numeric features and one-hot encode the categorical features. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"numeric_transformations = [([f], Pipeline(steps=[('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())])) for f in numerical]\n",
|
||||
"\n",
|
||||
"categorical_transformations = [([f], OneHotEncoder(handle_unknown='ignore', sparse=False)) for f in categorical]\n",
|
||||
"\n",
|
||||
"transformations = numeric_transformations + categorical_transformations\n",
|
||||
"\n",
|
||||
"# Append classifier to preprocessing pipeline.\n",
|
||||
"# Now we have a full prediction pipeline.\n",
|
||||
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
|
||||
" ('classifier', SVC(C = 1.0, probability=True))]) \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"'''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Train a SVM classification model, which you want to explain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = clf.fit(x_train, y_train)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explain predictions on your local machine"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 1. Using SHAP TabularExplainer\n",
|
||||
"# clf.steps[-1][1] returns the trained classification model\n",
|
||||
"explainer = TabularExplainer(clf.steps[-1][1], \n",
|
||||
" initialization_examples=x_train, \n",
|
||||
" features=attritionXData.columns, \n",
|
||||
" classes=[\"Not leaving\", \"leaving\"], \n",
|
||||
" transformations=transformations)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 2. Using MimicExplainer\n",
|
||||
"# augment_data is optional and if true, oversamples the initialization examples to improve surrogate model accuracy to fit original model. Useful for high-dimensional data where the number of rows is less than the number of columns. \n",
|
||||
"# max_num_of_augmentations is optional and defines max number of times we can increase the input data size.\n",
|
||||
"# LGBMExplainableModel can be replaced with LinearExplainableModel, SGDExplainableModel, or DecisionTreeExplainableModel\n",
|
||||
"# explainer = MimicExplainer(clf.steps[-1][1], \n",
|
||||
"# x_train, \n",
|
||||
"# LGBMExplainableModel, \n",
|
||||
"# augment_data=True, \n",
|
||||
"# max_num_of_augmentations=10, \n",
|
||||
"# features=attritionXData.columns, \n",
|
||||
"# classes=[\"Not leaving\", \"leaving\"], \n",
|
||||
"# transformations=transformations)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 3. Using PFIExplainer\n",
|
||||
"\n",
|
||||
"# Use the parameter \"metric\" to pass a metric name or function to evaluate the permutation. \n",
|
||||
"# Note that if a metric function is provided a higher value must be better.\n",
|
||||
"# Otherwise, take the negative of the function or set the parameter \"is_error_metric\" to True.\n",
|
||||
"# Default metrics: \n",
|
||||
"# F1 Score for binary classification, F1 Score with micro average for multiclass classification and\n",
|
||||
"# Mean absolute error for regression\n",
|
||||
"\n",
|
||||
"# explainer = PFIExplainer(clf.steps[-1][1], \n",
|
||||
"# features=x_train.columns, \n",
|
||||
"# transformations=transformations,\n",
|
||||
"# classes=[\"Not leaving\", \"leaving\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate global explanations\n",
|
||||
"Explain overall model predictions (global explanation)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data\n",
|
||||
"# x_train can be passed as well, but with more examples explanations will take longer although they may be more accurate\n",
|
||||
"global_explanation = explainer.explain_global(x_test)\n",
|
||||
"\n",
|
||||
"# Note: if you used the PFIExplainer in the previous step, use the next line of code instead\n",
|
||||
"# global_explanation = explainer.explain_global(x_test, true_labels=y_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Sorted SHAP values\n",
|
||||
"print('ranked global importance values: {}'.format(global_explanation.get_ranked_global_values()))\n",
|
||||
"# Corresponding feature names\n",
|
||||
"print('ranked global importance names: {}'.format(global_explanation.get_ranked_global_names()))\n",
|
||||
"# Feature ranks (based on original order of features)\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.global_importance_rank))\n",
|
||||
"\n",
|
||||
"# Note: PFIExplainer does not support per class explanations\n",
|
||||
"# Per class feature names\n",
|
||||
"print('ranked per class feature names: {}'.format(global_explanation.get_ranked_per_class_names()))\n",
|
||||
"# Per class feature importance values\n",
|
||||
"print('ranked per class feature values: {}'.format(global_explanation.get_ranked_per_class_values()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Print out a dictionary that holds the sorted feature importance names and values\n",
|
||||
"print('global importance rank: {}'.format(global_explanation.get_feature_importance_dict()))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Explain overall model predictions as a collection of local (instance-level) explanations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# feature shap values for all features and all data points in the training data\n",
|
||||
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Generate local explanations\n",
|
||||
"Explain local data points (individual instances)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Note: PFIExplainer does not support local explanations\n",
|
||||
"# You can pass a specific data point or a group of data points to the explain_local function\n",
|
||||
"\n",
|
||||
"# E.g., Explain the first data point in the test set\n",
|
||||
"instance_num = 1\n",
|
||||
"local_explanation = explainer.explain_local(x_test[:instance_num])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||
"prediction_value = clf.predict(x_test)[instance_num]\n",
|
||||
"\n",
|
||||
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]\n",
|
||||
"\n",
|
||||
"print('local importance values: {}'.format(sorted_local_importance_values))\n",
|
||||
"print('local importance names: {}'.format(sorted_local_importance_names))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Visualize\n",
|
||||
"Load the visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.interpret.visualize import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, model, x_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next\n",
|
||||
"Learn about other use cases of the explain package on a:\n",
|
||||
" \n",
|
||||
"1. [Training time: regression problem](./explain-regression-local.ipynb)\n",
|
||||
"1. [Training time: binary classification problem](./explain-binary-classification-local.ipynb)\n",
|
||||
"1. [Training time: multiclass classification problem](./explain-multiclass-classification-local.ipynb)\n",
|
||||
"1. [Explain models with advanced feature transformations](./advanced-feature-transformations-explain-local.ipynb)\n",
|
||||
"1. [Save model explanations via Azure Machine Learning Run History](../azure-integration/run-history/save-retrieve-explanations-run-history.ipynb)\n",
|
||||
"1. [Run explainers remotely on Azure Machine Learning Compute (AMLCompute)](../azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb)\n",
|
||||
"1. Inferencing time: deploy a classification model and explainer:\n",
|
||||
" 1. [Deploy a locally-trained model and explainer](../azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb)\n",
|
||||
" 1. [Deploy a remotely-trained model and explainer](../azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "mesameki"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
name: simple-feature-transformations-explain-local
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- interpret
|
||||
- azureml-interpret
|
||||
- azureml-contrib-interpret
|
||||
- sklearn-pandas
|
||||
- ipywidgets
|
||||
@@ -34,7 +34,8 @@
|
||||
"| Azure Data Lake Storage Gen 1 | Yes | Yes |\n",
|
||||
"| Azure Data Lake Storage Gen 2 | Yes | Yes |\n",
|
||||
"| Azure SQL Database | Yes | Yes |\n",
|
||||
"| Azure Database for PostgreSQL | Yes | No |"
|
||||
"| Azure Database for PostgreSQL | Yes | Yes |",
|
||||
"| Azure Database for MySQL | Yes | Yes |"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -342,8 +343,8 @@
|
||||
"source": [
|
||||
"\n",
|
||||
"mysql_datastore_name=\"MySqlDatastore\"\n",
|
||||
"server_name=os.getenv(\"MYSQL_SERVERNAME_62\", \"<my-server-name>\") # Name of PostgreSQL server \n",
|
||||
"database_name=os.getenv(\"MYSQL_DATBASENAME_62\", \"<my-database-name>\") # Name of PostgreSQL database\n",
|
||||
"server_name=os.getenv(\"MYSQL_SERVERNAME_62\", \"<my-server-name>\") # Name of MySQL server \n",
|
||||
"database_name=os.getenv(\"MYSQL_DATBASENAME_62\", \"<my-database-name>\") # Name of MySQL database\n",
|
||||
"user_id=os.getenv(\"MYSQL_USERID_62\", \"<my-user-id>\") # user id\n",
|
||||
"user_password=os.getenv(\"MYSQL_USERPW_62\", \"<my-user-password>\") # user password\n",
|
||||
"\n",
|
||||
|
||||
@@ -23,9 +23,9 @@
|
||||
"# How to create Module, ModuleVersion, and use them in a pipeline with ModuleStep.\n",
|
||||
"In this notebook, we introduce the concept of versioned modules and how to use them in an Azure Machine Learning Pipeline.\n",
|
||||
"\n",
|
||||
"The core idea behind introducing Module, ModuleVersion and ModuleStep is to allow the separation between a reusable executable components and their actual usage. These reusable software components (such as scripts or executables) can be used in different scenarios and by different users. This follows the same idea of separating software frameworks/libraries and their actual usage in applications. Module and ModuleVersion take the role of the reusable executable components where ModuleStep is there to link them to an actual usage.\n",
|
||||
"The core idea behind introducing Module, ModuleVersion and ModuleStep is to allow the separation between reusable executable components and their actual usage. These reusable software components (such as scripts or executables) can be used in different scenarios and by different users. This follows the same idea of separating software frameworks/libraries and their actual usage in applications. Module and ModuleVersion take the role of the reusable executable components where ModuleStep is there to link them to an actual usage.\n",
|
||||
"\n",
|
||||
"A module is an elaborated container of its versions, where each version is the actual computational unit. It is up to users to define the semantics of this hierarchical structure of container and versions. For example, they could be different versions for different use cases, development progress, etc.\n",
|
||||
"A module is an elaborated container of its versions, where each version is the actual computational unit. It is up to users to define the semantics of this hierarchical structure of container and versions. For example, there could be different versions for different use cases, development progress, etc.\n",
|
||||
"\n",
|
||||
"Each ModuleVersion may have inputs, outputs and rely on parameters and its environment configuration to operate.\n",
|
||||
"\n",
|
||||
|
||||
@@ -522,6 +522,24 @@
|
||||
"run_id = pipeline_endpoint_by_name.submit(\"NewName\")\n",
|
||||
"print(run_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Use Experiment.Submit() to Submit Pipeline\n",
|
||||
"Run specific pipeline using Experiment submit api"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"pipeline_run = Experiment(ws, name=\"submit_from_endpoint\").submit(pipeline_endpoint_by_name, tags={'endpoint_tag': \"1\"}, pipeline_version=\"0\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"from azureml.train.automl import AutoMLStep\n",
|
||||
"from azureml.train.automl.runtime import AutoMLStep\n",
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
|
||||
@@ -822,7 +822,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl import AutoMLStep\n",
|
||||
"from azureml.train.automl.runtime import AutoMLStep\n",
|
||||
"\n",
|
||||
"trainWithAutomlStep = AutoMLStep(\n",
|
||||
" name='AutoML_Regression',\n",
|
||||
|
||||
@@ -322,7 +322,6 @@
|
||||
"# Runconfig\n",
|
||||
"amlcompute_run_config = RunConfiguration(conda_dependencies=cd)\n",
|
||||
"amlcompute_run_config.environment.docker.enabled = True\n",
|
||||
"amlcompute_run_config.environment.docker.gpu_support = True\n",
|
||||
"amlcompute_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE\n",
|
||||
"amlcompute_run_config.environment.spark.precache_packages = False"
|
||||
]
|
||||
|
||||
@@ -104,7 +104,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"sample-interactiveloginauth-tenantid"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.authentication import InteractiveLoginAuthentication\n",
|
||||
@@ -131,7 +135,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"sample-azurecliauth"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.authentication import AzureCliAuthentication\n",
|
||||
@@ -168,7 +176,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"sample-msiauth"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.authentication import MsiAuthentication\n",
|
||||
@@ -245,7 +257,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"sample-serviceprincipalauth-tenantid"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
@@ -300,7 +316,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"sample-keyvault"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os, uuid\n",
|
||||
|
||||
@@ -480,7 +480,7 @@
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"remote"
|
||||
"Remote"
|
||||
],
|
||||
"datasets": [
|
||||
"NOAA"
|
||||
|
||||
@@ -100,7 +100,7 @@
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using SDK version 1.0.74.1, you are currently running version\", azureml.core.VERSION)"
|
||||
"print(\"This notebook was created using SDK version 1.0.76, you are currently running version\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -542,7 +542,9 @@
|
||||
"compute": [
|
||||
"None"
|
||||
],
|
||||
"datasets": [],
|
||||
"datasets": [
|
||||
"None"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
|
||||
@@ -63,7 +63,6 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
||||
]
|
||||
@@ -258,6 +257,16 @@
|
||||
"metrics = run.get_metrics()\n",
|
||||
"print(metrics)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# register the generated model\n",
|
||||
"model = run.register_model(model_name='iris.model', model_path='outputs/iris.model')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -297,7 +306,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
"version": "3.6.2"
|
||||
},
|
||||
"tags": [
|
||||
"None"
|
||||
|
||||
@@ -76,6 +76,8 @@ train, test = data.randomSplit([0.70, 0.30])
|
||||
lr = pyspark.ml.classification.LogisticRegression(regParam=reg)
|
||||
model = lr.fit(train)
|
||||
|
||||
model.save(os.path.join("outputs", "iris.model"))
|
||||
|
||||
# predict on the test set
|
||||
prediction = model.transform(test)
|
||||
print("Prediction")
|
||||
|
||||
@@ -685,7 +685,7 @@
|
||||
"framework": [
|
||||
"None"
|
||||
],
|
||||
"friendly_name": "",
|
||||
"friendly_name": "Train and deploy a model using Python SDK",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
"\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Use curated environment](#Use-curated-environment)\n",
|
||||
"1. [Create environment](#Create-environment)\n",
|
||||
" 1. Add Python packages\n",
|
||||
" 1. Specify environment variables\n",
|
||||
@@ -36,6 +37,8 @@
|
||||
"1. [Other ways to create environments](#Other-ways-to-create-environments)\n",
|
||||
" 1. From existing Conda environment\n",
|
||||
" 1. From Conda or pip files\n",
|
||||
"1. [Estimators and environments](#Estimators-and-environments) \n",
|
||||
"1. [Using environments for inferencing](#Using-environments-for-inferencing)\n",
|
||||
"1. [Docker settings](#Docker-settings)\n",
|
||||
"1. [Spark and Azure Databricks settings](#Spark-and-Azure-Databricks-settings)\n",
|
||||
"1. [Next steps](#Next-steps)\n",
|
||||
@@ -84,7 +87,57 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create environment\n",
|
||||
"## Use curated environments\n",
|
||||
"\n",
|
||||
"Curated environments are provided by Azure Machine Learning and are available in your workspace by default. They contain collections of Python packages and settings to help you get started different machine learning frameworks. \n",
|
||||
"\n",
|
||||
" * The __AzureML-Minimal__ environment contains a minimal set of packages to enable run tracking and asset uploading. You can use it as a starting point for your own environment.\n",
|
||||
" * The __AzureML-Tutorial__ environment contains common data science packages, such as Scikit-Learn, Pandas and Matplotlib, and larger set of azureml-sdk packages.\n",
|
||||
" \n",
|
||||
"Curated environments are backed by cached Docker images, reducing the run preparation cost.\n",
|
||||
" \n",
|
||||
"You can get a curated environment using"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"\n",
|
||||
"curated_env = Environment.get(workspace=ws, name=\"AzureML-Minimal\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To list curated environments, use following code.\n",
|
||||
"\n",
|
||||
"**Note**: The name prefixes _AzureML_ and _Microsoft_ are reserved for curated environments. Do not use them for your own environments"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"envs = Environment.list(workspace=ws)\n",
|
||||
"\n",
|
||||
"for env in envs:\n",
|
||||
" if env.startswith(\"AzureML\"):\n",
|
||||
" print(\"Name\",env)\n",
|
||||
" print(\"packages\", envs[env].python.conda_dependencies.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create your own environment\n",
|
||||
"\n",
|
||||
"You can create an environment by instantiating ```Environment``` object and then setting its attributes: set of Python packages, environment variables and others.\n",
|
||||
"\n",
|
||||
@@ -99,7 +152,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"from azureml.core.environment import CondaDependencies\n",
|
||||
"\n",
|
||||
"myenv = Environment(name=\"myenv\")\n",
|
||||
@@ -185,6 +237,22 @@
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To audit the environment used by for a run, you can use ```get_environement```."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.get_environment()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -256,6 +324,48 @@
|
||||
"```\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Estimators and environments\n",
|
||||
"\n",
|
||||
"[Estimators](https://docs.microsoft.com/azure/machine-learning/service/how-to-train-ml-models) are backed by environments that define the base images, Python packages and other settings for the training environment. \n",
|
||||
"\n",
|
||||
"For example, to see the environment behind PyTorch Estimator, you can create a dummy instance of the Estimator, and look at the ```run_config.environment``` property."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.dnn import PyTorch\n",
|
||||
"\n",
|
||||
"pt = PyTorch(source_directory=\".\", compute_target=\"local\")\n",
|
||||
"pt.run_config.environment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Using environments for inferencing\n",
|
||||
"\n",
|
||||
"You can re-use the training environment when you deploy your model as a web service, by specifying inferencing stack version, and adding then environment to ```InferenceConfig```.\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"\n",
|
||||
"myenv.inferencing_stack_version = \"latest\"\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"See [Register Model and deploy as Webservice Notebook](../../deployment/deploy-to-cloud/model-register-and-deploy.ipynb) for an end-to-end example of web service deployment."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -299,7 +409,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also specify whether to use GPU or shared volumes, and shm size."
|
||||
"You can also specify shared volumes, and shm size."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -308,7 +418,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"myenv.docker.gpu_support\n",
|
||||
"myenv.docker.shared_volumes\n",
|
||||
"myenv.docker.shm_size"
|
||||
]
|
||||
@@ -336,7 +445,7 @@
|
||||
"\n",
|
||||
"Learn more about registering and deploying a model:\n",
|
||||
"\n",
|
||||
"* [Model Register and Deploy](../../deploy-to-cloud/model-register-and-deploy.ipynb)"
|
||||
"* [Register Model and deploy as Webservice](../../deployment/deploy-to-cloud/model-register-and-deploy.ipynb)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -414,7 +414,7 @@
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"remote"
|
||||
"Remote"
|
||||
],
|
||||
"datasets": [
|
||||
"NOAA"
|
||||
|
||||
@@ -522,7 +522,7 @@
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"local"
|
||||
"Local"
|
||||
],
|
||||
"datasets": [
|
||||
"NOAA"
|
||||
|
||||
@@ -606,11 +606,11 @@
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"remote"
|
||||
"Remote"
|
||||
],
|
||||
"datasets": [
|
||||
"Iris",
|
||||
"Daibetes"
|
||||
"Diabetes"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
|
||||
@@ -1,174 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
import azureml.core
|
||||
print('SDK version' + azureml.core.VERSION)
|
||||
|
||||
# PREREQ: load workspace info
|
||||
# import azureml.core
|
||||
|
||||
# <loadWorkspace>
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.from_config()
|
||||
# </loadWorkspace>
|
||||
|
||||
scorepy_content = "import json\nimport numpy as np\nimport os\nimport pickle\nfrom sklearn.externals import joblib\nfrom sklearn.linear_model import LogisticRegression\n\nfrom azureml.core.model import Model\n\ndef init():\n global model\n # retreive the path to the model file using the model name\n model_path = Model.get_model_path('sklearn_mnist')\n model = joblib.load(model_path)\n\ndef run(raw_data):\n data = np.array(json.loads(raw_data)['data'])\n # make prediction\n y_hat = model.predict(data)\n return json.dumps(y_hat.tolist())"
|
||||
print(scorepy_content)
|
||||
with open("score.py","w") as f:
|
||||
f.write(scorepy_content)
|
||||
|
||||
|
||||
# PREREQ: create environment file
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
|
||||
myenv = CondaDependencies()
|
||||
myenv.add_conda_package("scikit-learn")
|
||||
|
||||
with open("myenv.yml","w") as f:
|
||||
f.write(myenv.serialize_to_string())
|
||||
|
||||
#<configImage>
|
||||
from azureml.core.image import ContainerImage
|
||||
|
||||
image_config = ContainerImage.image_configuration(execution_script = "score.py",
|
||||
runtime = "python",
|
||||
conda_file = "myenv.yml",
|
||||
description = "Image with mnist model",
|
||||
tags = {"data": "mnist", "type": "classification"}
|
||||
)
|
||||
#</configImage>
|
||||
|
||||
# <configAci>
|
||||
from azureml.core.webservice import AciWebservice
|
||||
|
||||
aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1,
|
||||
memory_gb = 1,
|
||||
tags = {"data": "mnist", "type": "classification"},
|
||||
description = 'Handwriting recognition')
|
||||
# </configAci>
|
||||
|
||||
#<registerModel>
|
||||
from azureml.core.model import Model
|
||||
|
||||
model_name = "sklearn_mnist"
|
||||
model = Model.register(model_path = "sklearn_mnist_model.pkl",
|
||||
model_name = model_name,
|
||||
tags = {"data": "mnist", "type": "classification"},
|
||||
description = "Mnist handwriting recognition",
|
||||
workspace = ws)
|
||||
#</registerModel>
|
||||
|
||||
# <retrieveModel>
|
||||
from azureml.core.model import Model
|
||||
|
||||
model_name = "sklearn_mnist"
|
||||
model=Model(ws, model_name)
|
||||
# </retrieveModel>
|
||||
|
||||
|
||||
# ## DEPLOY FROM REGISTERED MODEL
|
||||
|
||||
# <option2Deploy>
|
||||
from azureml.core.webservice import Webservice
|
||||
|
||||
service_name = 'aci-mnist-2'
|
||||
service = Webservice.deploy_from_model(deployment_config = aciconfig,
|
||||
image_config = image_config,
|
||||
models = [model], # this is the registered model object
|
||||
name = service_name,
|
||||
workspace = ws)
|
||||
service.wait_for_deployment(show_output = True)
|
||||
print(service.state)
|
||||
# </option2Deploy>
|
||||
|
||||
service.delete()
|
||||
|
||||
# ## DEPLOY FROM IMAGE
|
||||
|
||||
|
||||
# <option3CreateImage>
|
||||
from azureml.core.image import ContainerImage
|
||||
|
||||
image = ContainerImage.create(name = "myimage1",
|
||||
models = [model], # this is the registered model object
|
||||
image_config = image_config,
|
||||
workspace = ws)
|
||||
|
||||
image.wait_for_creation(show_output = True)
|
||||
# </option3CreateImage>
|
||||
|
||||
# <option3Deploy>
|
||||
from azureml.core.webservice import Webservice
|
||||
|
||||
service_name = 'aci-mnist-13'
|
||||
service = Webservice.deploy_from_image(deployment_config = aciconfig,
|
||||
image = image,
|
||||
name = service_name,
|
||||
workspace = ws)
|
||||
service.wait_for_deployment(show_output = True)
|
||||
print(service.state)
|
||||
# </option3Deploy>
|
||||
|
||||
service.delete()
|
||||
|
||||
|
||||
# ## DEPLOY FROM MODEL FILE
|
||||
# First change score.py!
|
||||
|
||||
|
||||
|
||||
scorepy_content = "import json\nimport numpy as np\nimport os\nimport pickle\nfrom sklearn.externals import joblib\nfrom sklearn.linear_model import LogisticRegression\n\nfrom azureml.core.model import Model\n\ndef init():\n global model\n # retreive the path to the model file using the model name\n model_path = Model.get_model_path('sklearn_mnist_model.pkl')\n model = joblib.load(model_path)\n\ndef run(raw_data):\n data = np.array(json.loads(raw_data)['data'])\n # make prediction\n y_hat = model.predict(data)\n return json.dumps(y_hat.tolist())"
|
||||
with open("score.py","w") as f:
|
||||
f.write(scorepy_content)
|
||||
|
||||
|
||||
|
||||
# <option1Deploy>
|
||||
from azureml.core.webservice import Webservice
|
||||
|
||||
service_name = 'aci-mnist-1'
|
||||
service = Webservice.deploy(deployment_config = aciconfig,
|
||||
image_config = image_config,
|
||||
model_paths = ['sklearn_mnist_model.pkl'],
|
||||
name = service_name,
|
||||
workspace = ws)
|
||||
|
||||
service.wait_for_deployment(show_output = True)
|
||||
print(service.state)
|
||||
# </option1Deploy>
|
||||
|
||||
# <testService>
|
||||
# Load Data
|
||||
import os
|
||||
import urllib
|
||||
|
||||
os.makedirs('./data', exist_ok = True)
|
||||
|
||||
urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/test-images.gz')
|
||||
|
||||
from utils import load_data
|
||||
X_test = load_data('./data/test-images.gz', False) / 255.0
|
||||
|
||||
from sklearn import datasets
|
||||
import numpy as np
|
||||
import json
|
||||
|
||||
# find 5 random samples from test set
|
||||
n = 5
|
||||
sample_indices = np.random.permutation(X_test.shape[0])[0:n]
|
||||
|
||||
test_samples = json.dumps({"data": X_test[sample_indices].tolist()})
|
||||
test_samples = bytes(test_samples, encoding = 'utf8')
|
||||
|
||||
# predict using the deployed model
|
||||
prediction = service.run(input_data = test_samples)
|
||||
print(prediction)
|
||||
# </testService>
|
||||
|
||||
# <deleteService>
|
||||
service.delete()
|
||||
# </deleteService>
|
||||
|
||||
|
||||
|
||||
|
||||
Binary file not shown.
@@ -1,27 +0,0 @@
|
||||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import gzip
|
||||
import numpy as np
|
||||
import struct
|
||||
|
||||
|
||||
# load compressed MNIST gz files and return numpy arrays
|
||||
def load_data(filename, label=False):
|
||||
with gzip.open(filename) as gz:
|
||||
struct.unpack('I', gz.read(4))
|
||||
n_items = struct.unpack('>I', gz.read(4))
|
||||
if not label:
|
||||
n_rows = struct.unpack('>I', gz.read(4))[0]
|
||||
n_cols = struct.unpack('>I', gz.read(4))[0]
|
||||
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
|
||||
res = res.reshape(n_items[0], n_rows * n_cols)
|
||||
else:
|
||||
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
|
||||
res = res.reshape(n_items[0], 1)
|
||||
return res
|
||||
|
||||
|
||||
# one-hot encode a 1-D array
|
||||
def one_hot_encode(array, num_of_classes):
|
||||
return np.eye(num_of_classes)[array.reshape(-1)]
|
||||
@@ -1,39 +0,0 @@
|
||||
# Code for Local computer and Submit training run sections
|
||||
|
||||
# Check core SDK version number
|
||||
import azureml.core
|
||||
|
||||
print("SDK version:", azureml.core.VERSION)
|
||||
|
||||
#<run_local>
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
|
||||
# Edit a run configuration property on the fly.
|
||||
run_local = RunConfiguration()
|
||||
|
||||
run_local.environment.python.user_managed_dependencies = True
|
||||
#</run_local>
|
||||
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.from_config()
|
||||
|
||||
|
||||
# Set up an experiment
|
||||
# <experiment>
|
||||
from azureml.core import Experiment
|
||||
experiment_name = 'my_experiment'
|
||||
|
||||
exp = Experiment(workspace=ws, name=experiment_name)
|
||||
# </experiment>
|
||||
|
||||
# Submit the experiment using the run configuration
|
||||
#<local_submit>
|
||||
from azureml.core import ScriptRunConfig
|
||||
import os
|
||||
|
||||
script_folder = os.getcwd()
|
||||
src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_local)
|
||||
run = exp.submit(src)
|
||||
run.wait_for_completion(show_output = True)
|
||||
#</local_submit>
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
# Code for Azure Machine Learning Compute - Run-based creation
|
||||
|
||||
# Check core SDK version number
|
||||
import azureml.core
|
||||
|
||||
print("SDK version:", azureml.core.VERSION)
|
||||
|
||||
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.from_config()
|
||||
|
||||
|
||||
# Set up an experiment
|
||||
from azureml.core import Experiment
|
||||
experiment_name = 'my-experiment'
|
||||
script_folder= "./"
|
||||
|
||||
exp = Experiment(workspace=ws, name=experiment_name)
|
||||
|
||||
|
||||
#<run_temp_compute>
|
||||
from azureml.core.compute import ComputeTarget, AmlCompute
|
||||
|
||||
# First, list the supported VM families for Azure Machine Learning Compute
|
||||
print(AmlCompute.supported_vmsizes(workspace=ws))
|
||||
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
# Create a new runconfig object
|
||||
run_temp_compute = RunConfiguration()
|
||||
|
||||
# Signal that you want to use AmlCompute to execute the script
|
||||
run_temp_compute.target = "amlcompute"
|
||||
|
||||
# AmlCompute is created in the same region as your workspace
|
||||
# Set the VM size for AmlCompute from the list of supported_vmsizes
|
||||
run_temp_compute.amlcompute.vm_size = 'STANDARD_D2_V2'
|
||||
#</run_temp_compute>
|
||||
|
||||
|
||||
# Submit the experiment using the run configuration
|
||||
from azureml.core import ScriptRunConfig
|
||||
|
||||
src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_temp_compute)
|
||||
run = exp.submit(src)
|
||||
run.wait_for_completion(show_output = True)
|
||||
|
||||
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
# Code for Azure Machine Learning Compute - Persistent compute
|
||||
|
||||
# Check core SDK version number
|
||||
import azureml.core
|
||||
|
||||
print("SDK version:", azureml.core.VERSION)
|
||||
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.from_config()
|
||||
|
||||
|
||||
# Set up an experiment
|
||||
from azureml.core import Experiment
|
||||
experiment_name = 'my-experiment'
|
||||
script_folder= "./"
|
||||
|
||||
exp = Experiment(workspace=ws, name=experiment_name)
|
||||
|
||||
#<cpu_cluster>
|
||||
from azureml.core.compute import ComputeTarget, AmlCompute
|
||||
from azureml.core.compute_target import ComputeTargetException
|
||||
|
||||
# Choose a name for your CPU cluster
|
||||
cpu_cluster_name = "cpucluster"
|
||||
|
||||
# Verify that cluster does not exist already
|
||||
try:
|
||||
cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
|
||||
print('Found existing cluster, use it.')
|
||||
except ComputeTargetException:
|
||||
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
|
||||
max_nodes=4)
|
||||
cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)
|
||||
|
||||
cpu_cluster.wait_for_completion(show_output=True)
|
||||
#</cpu_cluster>
|
||||
|
||||
#<run_amlcompute>
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
|
||||
|
||||
# Create a new runconfig object
|
||||
run_amlcompute = RunConfiguration()
|
||||
|
||||
# Use the cpu_cluster you created above.
|
||||
run_amlcompute.target = cpu_cluster
|
||||
|
||||
# Enable Docker
|
||||
run_amlcompute.environment.docker.enabled = True
|
||||
|
||||
# Set Docker base image to the default CPU-based image
|
||||
run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE
|
||||
|
||||
# Use conda_dependencies.yml to create a conda environment in the Docker image for execution
|
||||
run_amlcompute.environment.python.user_managed_dependencies = False
|
||||
|
||||
# Specify CondaDependencies obj, add necessary packages
|
||||
run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
|
||||
#</run_amlcompute>
|
||||
|
||||
# Submit the experiment using the run configuration
|
||||
#<amlcompute_submit>
|
||||
from azureml.core import ScriptRunConfig
|
||||
|
||||
src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_amlcompute)
|
||||
run = exp.submit(src)
|
||||
run.wait_for_completion(show_output = True)
|
||||
#</amlcompute_submit>
|
||||
@@ -1,26 +0,0 @@
|
||||
# Code for Remote virtual machines
|
||||
|
||||
compute_target_name = "sheri-linuxvm"
|
||||
|
||||
#<run_dsvm>
|
||||
import azureml.core
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
|
||||
run_dsvm = RunConfiguration(framework = "python")
|
||||
|
||||
# Set the compute target to the Linux DSVM
|
||||
run_dsvm.target = compute_target_name
|
||||
|
||||
# Use Docker in the remote VM
|
||||
run_dsvm.environment.docker.enabled = True
|
||||
|
||||
# Use the CPU base image
|
||||
# To use GPU in DSVM, you must also use the GPU base Docker image "azureml.core.runconfig.DEFAULT_GPU_IMAGE"
|
||||
run_dsvm.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
|
||||
print('Base Docker image is:', run_dsvm.environment.docker.base_image)
|
||||
|
||||
# Specify the CondaDependencies object
|
||||
run_dsvm.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
|
||||
#</run_dsvm>
|
||||
print(run_dsvm)
|
||||
@@ -1,27 +0,0 @@
|
||||
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.from_config()
|
||||
|
||||
from azureml.core.compute import ComputeTarget
|
||||
|
||||
# refers to an existing compute resource attached to the workspace!
|
||||
hdi_compute = ComputeTarget(workspace=ws, name='sherihdi')
|
||||
|
||||
|
||||
#<run_hdi>
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
|
||||
|
||||
# use pyspark framework
|
||||
run_hdi = RunConfiguration(framework="pyspark")
|
||||
|
||||
# Set compute target to the HDI cluster
|
||||
run_hdi.target = hdi_compute.name
|
||||
|
||||
# specify CondaDependencies object to ask system installing numpy
|
||||
cd = CondaDependencies()
|
||||
cd.add_conda_package('numpy')
|
||||
run_hdi.environment.python.conda_dependencies = cd
|
||||
#</run_hdi>
|
||||
print(run_hdi)
|
||||
@@ -1,9 +0,0 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def get_alphas():
|
||||
# list of numbers from 0.0 to 1.0 with a 0.05 interval
|
||||
return np.arange(0.0, 1.0, 0.05)
|
||||
@@ -1,52 +0,0 @@
|
||||
# Code for Remote virtual machines
|
||||
|
||||
compute_target_name = "attach-dsvm"
|
||||
|
||||
#<run_dsvm>
|
||||
import azureml.core
|
||||
from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
|
||||
run_dsvm = RunConfiguration(framework = "python")
|
||||
|
||||
# Set the compute target to the Linux DSVM
|
||||
run_dsvm.target = compute_target_name
|
||||
|
||||
# Use Docker in the remote VM
|
||||
run_dsvm.environment.docker.enabled = True
|
||||
|
||||
# Use the CPU base image
|
||||
# To use GPU in DSVM, you must also use the GPU base Docker image "azureml.core.runconfig.DEFAULT_GPU_IMAGE"
|
||||
run_dsvm.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE
|
||||
print('Base Docker image is:', run_dsvm.environment.docker.base_image)
|
||||
|
||||
# Prepare the Docker and conda environment automatically when they're used for the first time
|
||||
run_dsvm.prepare_environment = True
|
||||
|
||||
# Specify the CondaDependencies object
|
||||
run_dsvm.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
|
||||
#</run_dsvm>
|
||||
hdi_compute.name = "blah"
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
|
||||
|
||||
# use pyspark framework
|
||||
hdi_run_config = RunConfiguration(framework="pyspark")
|
||||
|
||||
# Set compute target to the HDI cluster
|
||||
hdi_run_config.target = hdi_compute.name
|
||||
|
||||
# specify CondaDependencies object to ask system installing numpy
|
||||
cd = CondaDependencies()
|
||||
cd.add_conda_package('numpy')
|
||||
hdi_run_config.environment.python.conda_dependencies = cd
|
||||
|
||||
#<run_hdi>
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
# Configure the HDInsight run
|
||||
# Load the runconfig object from the myhdi.runconfig file generated in the previous attach operation
|
||||
run_hdi = RunConfiguration.load(project_object = project, run_name = 'myhdi')
|
||||
|
||||
# Ask the system to prepare the conda environment automatically when it's used for the first time
|
||||
run_hdi.auto_prepare_environment = True>
|
||||
@@ -1,25 +0,0 @@
|
||||
# Code for What's a run configuration
|
||||
|
||||
# <run_system_managed>
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
from azureml.core.conda_dependencies import CondaDependencies
|
||||
|
||||
run_system_managed = RunConfiguration()
|
||||
|
||||
# Specify the conda dependencies with scikit-learn
|
||||
run_system_managed.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])
|
||||
# </run_system_managed>
|
||||
print(run_system_managed)
|
||||
|
||||
|
||||
# <run_user_managed>
|
||||
from azureml.core.runconfig import RunConfiguration
|
||||
|
||||
run_user_managed = RunConfiguration()
|
||||
run_user_managed.environment.python.user_managed_dependencies = True
|
||||
|
||||
# Choose a specific Python environment by pointing to a Python path. For example:
|
||||
# run_config.environment.python.interpreter_path = '/home/ninghai/miniconda3/envs/sdk2/bin/python'
|
||||
# </run_user_managed>
|
||||
print(run_user_managed)
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from sklearn.datasets import load_diabetes
|
||||
from sklearn.linear_model import Ridge
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.model_selection import train_test_split
|
||||
from azureml.core.run import Run
|
||||
from sklearn.externals import joblib
|
||||
import os
|
||||
import numpy as np
|
||||
import mylib
|
||||
|
||||
os.makedirs('./outputs', exist_ok=True)
|
||||
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y,
|
||||
test_size=0.2,
|
||||
random_state=0)
|
||||
data = {"train": {"X": X_train, "y": y_train},
|
||||
"test": {"X": X_test, "y": y_test}}
|
||||
|
||||
# list of numbers from 0.0 to 1.0 with a 0.05 interval
|
||||
alphas = mylib.get_alphas()
|
||||
|
||||
for alpha in alphas:
|
||||
# Use Ridge algorithm to create a regression model
|
||||
reg = Ridge(alpha=alpha)
|
||||
reg.fit(data["train"]["X"], data["train"]["y"])
|
||||
|
||||
preds = reg.predict(data["test"]["X"])
|
||||
mse = mean_squared_error(preds, data["test"]["y"])
|
||||
run.log('alpha', alpha)
|
||||
run.log('mse', mse)
|
||||
|
||||
model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha)
|
||||
# save model in the outputs folder so it automatically get uploaded
|
||||
with open(model_file_name, "wb") as file:
|
||||
joblib.dump(value=reg, filename=os.path.join('./outputs/',
|
||||
model_file_name))
|
||||
|
||||
print('alpha is {0:.2f}, and mse is {1:0.2f}'.format(alpha, mse))
|
||||
@@ -1,55 +0,0 @@
|
||||
# code snippets for the quickstart-create-workspace-with-python article
|
||||
# <import>
|
||||
import azureml.core
|
||||
print(azureml.core.VERSION)
|
||||
# </import>
|
||||
|
||||
# this is NOT a snippet. If this code changes, go fix it in the article!
|
||||
from azureml.core import Workspace
|
||||
ws = Workspace.create(name='myworkspace',
|
||||
subscription_id='<subscription-id>',
|
||||
resource_group='myresourcegroup',
|
||||
create_resource_group=True,
|
||||
location='eastus2' # or other supported Azure region
|
||||
)
|
||||
|
||||
# <getDetails>
|
||||
ws.get_details()
|
||||
# </getDetails>
|
||||
|
||||
# <writeConfig>
|
||||
# Create the configuration file.
|
||||
ws.write_config()
|
||||
|
||||
# Use this code to load the workspace from
|
||||
# other scripts and notebooks in this directory.
|
||||
# ws = Workspace.from_config()
|
||||
# </writeConfig>
|
||||
|
||||
# <useWs>
|
||||
from azureml.core import Experiment
|
||||
|
||||
# Create a new experiment in your workspace.
|
||||
exp = Experiment(workspace=ws, name='myexp')
|
||||
|
||||
# Start a run and start the logging service.
|
||||
run = exp.start_logging()
|
||||
|
||||
# Log a single number.
|
||||
run.log('my magic number', 42)
|
||||
|
||||
# Log a list (Fibonacci numbers).
|
||||
run.log_list('my list', [1, 1, 2, 3, 5, 8, 13, 21, 34, 55])
|
||||
|
||||
# Finish the run.
|
||||
run.complete()
|
||||
# </useWs>
|
||||
|
||||
# <viewLog>
|
||||
print(run.get_portal_url())
|
||||
# </viewLog>
|
||||
|
||||
|
||||
# <delete>
|
||||
ws.delete(delete_dependent_resources=True)
|
||||
# </delete>
|
||||
140
index.md
140
index.md
@@ -10,6 +10,7 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:|
|
||||
| [Using Azure ML environments](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/using-environments/using-environments.ipynb) | Creating and registering environments | None | Local | None | None | None |
|
||||
|
||||
| [Estimators in AML with hyperparameter tuning](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/how-to-use-estimator.ipynb) | Use the Estimator pattern in Azure Machine Learning SDK | None | AML Compute | None | None | None |
|
||||
|
||||
|
||||
@@ -17,35 +18,64 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:|
|
||||
| [Forecasting BikeShare Demand](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb) | forecasting | BikeShare | remote | None | Azure ML AutoML | Forecasting |
|
||||
| [Forecasting orange juice sales with deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb) | Forecasting | Orange Juice Sales | remote | Azure Container Instance | Azure ML AutoML | |
|
||||
| [Forecasting BikeShare Demand](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb) | Forecasting | BikeShare | Remote | None | Azure ML AutoML | Forecasting |
|
||||
|
||||
| [Forecasting orange juice sales with deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb) | Forecasting | Orange Juice Sales | Remote | Azure Container Instance | Azure ML AutoML | None |
|
||||
|
||||
| [Forecasting with automated ML SQL integration](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.ipynb) | Forecasting | NYC Energy | Local | None | Azure ML AutoML | |
|
||||
|
||||
| [Setup automated ML SQL integration](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb) | None | None | None | None | Azure ML AutoML | |
|
||||
| [Register a model and deploy locally](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local.ipynb) | Deployment | | local | Local | None | None |
|
||||
| :star:[Data drift on aks](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/monitor-models/data-drift/drift-on-aks.ipynb) | Filtering | NOAA | remote | AKS | Azure ML | Dataset, Timeseries, Drift |
|
||||
| [](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb) | Training and deploying a model from a notebook | Diabetes | Local | Azure Container Instance | None | None |
|
||||
| :star:[Data drift quickdemo](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datadrift-tutorial/datadrift-tutorial.ipynb) | Filtering | NOAA | remote | None | Azure ML | Dataset, Timeseries, Drift |
|
||||
| :star:[Filtering data using Tabular Timeseiries Dataset related API](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/tabular-timeseries-dataset-filtering.ipynb) | Filtering | NOAA | local | None | Azure ML | Dataset, Tabular Timeseries |
|
||||
| :star:[Train with Datasets (Tabular and File)](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/train-with-datasets.ipynb) | Filtering | Iris, Daibetes | remote | None | Azure ML | Dataset |
|
||||
| [Forecasting away from training data](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb) | forecasting | None | remote | None | Azure ML AutoML | Forecasting, Confidence Intervals |
|
||||
|
||||
| [Register a model and deploy locally](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local.ipynb) | Deployment | None | Local | Local | None | None |
|
||||
|
||||
| :star:[Data drift on aks](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/monitor-models/data-drift/drift-on-aks.ipynb) | Filtering | NOAA | Remote | AKS | Azure ML | Dataset, Timeseries, Drift |
|
||||
|
||||
| [Train and deploy a model using Python SDK](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-within-notebook/train-within-notebook.ipynb) | Training and deploying a model from a notebook | Diabetes | Local | Azure Container Instance | None | None |
|
||||
|
||||
| :star:[Data drift quickdemo](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datadrift-tutorial/datadrift-tutorial.ipynb) | Filtering | NOAA | Remote | None | Azure ML | Dataset, Timeseries, Drift |
|
||||
|
||||
| :star:[Filtering data using Tabular Timeseiries Dataset related API](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/tabular-timeseries-dataset-filtering.ipynb) | Filtering | NOAA | Local | None | Azure ML | Dataset, Tabular Timeseries |
|
||||
|
||||
| :star:[Train with Datasets (Tabular and File)](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/train-with-datasets.ipynb) | Filtering | Iris, Diabetes | Remote | None | Azure ML | Dataset |
|
||||
|
||||
| [Forecasting away from training data](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb) | Forecasting | None | Remote | None | Azure ML AutoML | Forecasting, Confidence Intervals |
|
||||
|
||||
| [Automated ML run with basic edition features.](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb) | Classification | Bankmarketing | AML | ACI | None | featurization, explainability, remote_run, AutomatedML |
|
||||
| [Classification of credit card fraudulent transactions using Automated ML](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb) | Classification | creditcard | AML Compute | None | None | remote_run, AutomatedML |
|
||||
|
||||
| [Classification of credit card fraudulent transactions using Automated ML](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb) | Classification | Creditcard | AML Compute | None | None | remote_run, AutomatedML |
|
||||
|
||||
| [Automated ML run with featurization and model explainability.](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/regression-hardware-performance-explanation-and-featurization/auto-ml-regression-hardware-performance-explanation-and-featurization.ipynb) | Regression | MachineData | AML | ACI | None | featurization, explainability, remote_run, AutomatedML |
|
||||
|
||||
| [Use MLflow with Azure Machine Learning for training and deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-deploy-pytorch/train-and-deploy-pytorch.ipynb) | Use MLflow with Azure Machine Learning to train and deploy Pa yTorch image classifier model | MNIST | AML Compute | Azure Container Instance | PyTorch | None |
|
||||
|
||||
| :star:[Azure Machine Learning Pipeline with DataTranferStep](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb) | Demonstrates the use of DataTranferStep | Custom | ADF | None | Azure ML | None |
|
||||
|
||||
| [Getting Started with Azure Machine Learning Pipelines](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb) | Getting Started notebook for ANML Pipelines | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| [Azure Machine Learning Pipeline with AzureBatchStep](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-azurebatch-to-run-a-windows-executable.ipynb) | Demonstrates the use of AzureBatchStep | Custom | Azure Batch | None | Azure ML | None |
|
||||
|
||||
| [Azure Machine Learning Pipeline with EstimatorStep](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-estimatorstep.ipynb) | Demonstrates the use of EstimatorStep | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| :star:[How to use ModuleStep with AML Pipelines](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-modulestep.ipynb) | Demonstrates the use of ModuleStep | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| :star:[How to use Pipeline Drafts to create a Published Pipeline](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-how-to-use-pipeline-drafts.ipynb) | Demonstrates the use of Pipeline Drafts | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| :star:[Azure Machine Learning Pipeline with HyperDriveStep](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb) | Demonstrates the use of HyperDriveStep | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| :star:[How to Publish a Pipeline and Invoke the REST endpoint](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-publish-and-run-using-rest-endpoint.ipynb) | Demonstrates the use of Published Pipelines | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| :star:[How to Setup a Schedule for a Published Pipeline](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-schedule-for-a-published-pipeline.ipynb) | Demonstrates the use of Schedules for Published Pipelines | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| [How to setup a versioned Pipeline Endpoint](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-setup-versioned-pipeline-endpoints.ipynb) | Demonstrates the use of PipelineEndpoint to run a specific version of the Published Pipeline | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| :star:[How to use DataPath as a PipelineParameter](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-showcasing-datapath-and-pipelineparameter.ipynb) | Demonstrates the use of DataPath as a PipelineParameter | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| [How to use AdlaStep with AML Pipelines](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-adla-as-compute-target.ipynb) | Demonstrates the use of AdlaStep | Custom | Azure Data Lake Analytics | None | Azure ML | None |
|
||||
|
||||
| :star:[How to use DatabricksStep with AML Pipelines](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb) | Demonstrates the use of DatabricksStep | Custom | Azure Databricks | None | Azure ML, Azure Databricks | None |
|
||||
|
||||
| :star:[How to use AutoMLStep with AML Pipelines](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb) | Demonstrates the use of AutoMLStep | Custom | AML Compute | None | Automated Machine Learning | None |
|
||||
|
||||
| :star:[Azure Machine Learning Pipelines with Data Dependency](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb) | Demonstrates how to construct a Pipeline with data dependency between steps | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
|
||||
@@ -54,25 +84,45 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:|
|
||||
| [Train a model with hyperparameter tuning](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/chainer/deployment/train-hyperparameter-tune-deploy-with-chainer/train-hyperparameter-tune-deploy-with-chainer.ipynb) | Train a Convolutional Neural Network (CNN) | MNIST | AML Compute | Azure Container Instance | Chainer | None |
|
||||
|
||||
| [Distributed Training with Chainer](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/chainer/training/distributed-chainer/distributed-chainer.ipynb) | Use the Chainer estimator to perform distributed training | MNIST | AML Compute | None | Chainer | None |
|
||||
|
||||
| [Training with hyperparameter tuning using PyTorch](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/pytorch/deployment/train-hyperparameter-tune-deploy-with-pytorch/train-hyperparameter-tune-deploy-with-pytorch.ipynb) | Train an image classification model using transfer learning with the PyTorch estimator | ImageNet | AML Compute | Azure Container Instance | PyTorch | None |
|
||||
|
||||
| [Distributed PyTorch](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-horovod/distributed-pytorch-with-horovod.ipynb) | Train a model using the distributed training via Horovod | MNIST | AML Compute | None | PyTorch | None |
|
||||
|
||||
| [Distributed training with PyTorch](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/pytorch/training/distributed-pytorch-with-nccl-gloo/distributed-pytorch-with-nccl-gloo.ipynb) | Train a model using distributed training via Nccl/Gloo | MNIST | AML Compute | None | PyTorch | None |
|
||||
|
||||
| [Training and hyperparameter tuning with Scikit-learn](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/scikit-learn/training/train-hyperparameter-tune-deploy-with-sklearn/train-hyperparameter-tune-deploy-with-sklearn.ipynb) | Train a support vector machine (SVM) to perform classification | Iris | AML Compute | None | Scikit-learn | None |
|
||||
|
||||
| [Training and hyperparameter tuning using the TensorFlow estimator](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/tensorflow/deployment/train-hyperparameter-tune-deploy-with-tensorflow/train-hyperparameter-tune-deploy-with-tensorflow.ipynb) | Train a deep neural network | MNIST | AML Compute | Azure Container Instance | TensorFlow | None |
|
||||
|
||||
| [Distributed training using TensorFlow with Horovod](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-horovod/distributed-tensorflow-with-horovod.ipynb) | Use the TensorFlow estimator to train a word2vec model | None | AML Compute | None | TensorFlow | None |
|
||||
|
||||
| [Distributed TensorFlow with parameter server](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/tensorflow/training/distributed-tensorflow-with-parameter-server/distributed-tensorflow-with-parameter-server.ipynb) | Use the TensorFlow estimator to train a model using distributed training | MNIST | AML Compute | None | TensorFlow | None |
|
||||
|
||||
| [Hyperparameter tuning and warm start using the TensorFlow estimator](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/tensorflow/training/hyperparameter-tune-and-warm-start-with-tensorflow/hyperparameter-tune-and-warm-start-with-tensorflow.ipynb) | Train a deep neural network | MNIST | AML Compute | Azure Container Instance | TensorFlow | None |
|
||||
|
||||
| [Resuming a model](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/ml-frameworks/tensorflow/training/train-tensorflow-resume-training/train-tensorflow-resume-training.ipynb) | Resume a model in TensorFlow from a previously submitted run | MNIST | AML Compute | None | TensorFlow | None |
|
||||
|
||||
| [Training in Spark](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-in-spark/train-in-spark.ipynb) | Submiting a run on a spark cluster | None | HDI cluster | None | PySpark | None |
|
||||
|
||||
| [Train on Azure Machine Learning Compute](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-on-amlcompute/train-on-amlcompute.ipynb) | Submit a run on Azure Machine Learning Compute. | Diabetes | AML Compute | None | None | None |
|
||||
|
||||
| [Train on local compute](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-on-local/train-on-local.ipynb) | Train a model locally | Diabetes | Local | None | None | None |
|
||||
|
||||
| [Train in a remote Linux virtual machine](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training/train-on-remote-vm/train-on-remote-vm.ipynb) | Configure and execute a run | Diabetes | Data Science Virtual Machine | None | None | None |
|
||||
|
||||
| [Using Tensorboard](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/export-run-history-to-tensorboard/export-run-history-to-tensorboard.ipynb) | Export the run history as Tensorboard logs | None | None | None | TensorFlow | None |
|
||||
|
||||
| [Train a DNN using hyperparameter tuning and deploying with Keras](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/train-hyperparameter-tune-deploy-with-keras/train-hyperparameter-tune-deploy-with-keras.ipynb) | Create a multi-class classifier | MNIST | AML Compute | Azure Container Instance | TensorFlow | None |
|
||||
|
||||
| [Managing your training runs](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/manage-runs/manage-runs.ipynb) | Monitor and complete runs | None | Local | None | None | None |
|
||||
|
||||
| [Tensorboard integration with run history](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/tensorboard/tensorboard.ipynb) | Run a TensorFlow job and view its Tensorboard output live | None | Local, DSVM, AML Compute | None | TensorFlow | None |
|
||||
|
||||
| [Use MLflow with AML for a local training run](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-local/train-local.ipynb) | Use MLflow tracking APIs together with Azure Machine Learning for storing your metrics and artifacts | Diabetes | Local | None | None | None |
|
||||
|
||||
| [Use MLflow with AML for a remote training run](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-remote/train-remote.ipynb) | Use MLflow tracking APIs together with AML for storing your metrics and artifacts | Diabetes | AML Compute | None | None | None |
|
||||
|
||||
|
||||
@@ -82,12 +132,18 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:|
|
||||
| [Deploy MNIST digit recognition with ONNX Runtime](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb) | Image Classification | MNIST | local | Azure Container Instance | ONNX | ONNX Model Zoo |
|
||||
| [Deploy Facial Expression Recognition (FER+) with ONNX Runtime](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb) | Facial Expression Recognition | Emotion FER | local | Azure Container Instance | ONNX | ONNX Model Zoo |
|
||||
| [Deploy MNIST digit recognition with ONNX Runtime](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb) | Image Classification | MNIST | Local | Azure Container Instance | ONNX | ONNX Model Zoo |
|
||||
|
||||
| [Deploy Facial Expression Recognition (FER+) with ONNX Runtime](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb) | Facial Expression Recognition | Emotion FER | Local | Azure Container Instance | ONNX | ONNX Model Zoo |
|
||||
|
||||
| :star:[Register model and deploy as webservice](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-cloud/model-register-and-deploy.ipynb) | Deploy a model with Azure Machine Learning | Diabetes | None | Azure Container Instance | Scikit-learn | None |
|
||||
|
||||
| [Train MNIST in PyTorch, convert, and deploy with ONNX Runtime](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb) | Image Classification | MNIST | AML Compute | Azure Container Instance | ONNX | ONNX Converter |
|
||||
| [Deploy ResNet50 with ONNX Runtime](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb) | Image Classification | ImageNet | local | Azure Container Instance | ONNX | ONNX Model Zoo |
|
||||
|
||||
| [Deploy ResNet50 with ONNX Runtime](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb) | Image Classification | ImageNet | Local | Azure Container Instance | ONNX | ONNX Model Zoo |
|
||||
|
||||
| [Deploy a model as a web service using MLflow](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/using-mlflow/deploy-model/deploy-model.ipynb) | Use MLflow with AML | Diabetes | None | Azure Container Instance | Scikit-learn | None |
|
||||
|
||||
| :star:[Convert and deploy TinyYolo with ONNX Runtime](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb) | Object Detection | PASCAL VOC | local | Azure Container Instance | ONNX | ONNX Converter |
|
||||
|
||||
|
||||
@@ -95,55 +151,91 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
## Other Notebooks
|
||||
|Title| Task | Dataset | Training Compute | Deployment Target | ML Framework | Tags |
|
||||
|:----|:-----|:-------:|:----------------:|:-----------------:|:------------:|:------------:|
|
||||
| [DNN Text Featurization](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.ipynb) | Text featurization using DNNs for classification | None | | None | None | None |
|
||||
| [DNN Text Featurization](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.ipynb) | Text featurization using DNNs for classification | None | AML Compute | None | None | None |
|
||||
|
||||
| [Automated ML Grouping with Pipeline.](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-grouping/auto-ml-forecasting-grouping.ipynb) | Use AzureML Pipeline to trigger multiple Automated ML runs. | Orange Juice Sales | AML Compute | Azure Container Instance | Scikit-learn, Pytorch | AutomatedML |
|
||||
| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) | | | | | |
|
||||
| [file-dataset-image-inference-mnist](https://github.com/Azure/MachineLearningNotebooks/blob/master//contrib/batch_inferencing/file-dataset-image-inference-mnist.ipynb) | | | | | | |
|
||||
| [tabular-dataset-inference-iris](https://github.com/Azure/MachineLearningNotebooks/blob/master//contrib/batch_inferencing/tabular-dataset-inference-iris.ipynb) | | | | | | |
|
||||
|
||||
| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master/configuration.ipynb) | | | | | | |
|
||||
|
||||
| [lightgbm-example](https://github.com/Azure/MachineLearningNotebooks/blob/master//contrib/gbdt/lightgbm/lightgbm-example.ipynb) | | | | | | |
|
||||
|
||||
| [azure-ml-with-nvidia-rapids](https://github.com/Azure/MachineLearningNotebooks/blob/master//contrib/RAPIDS/azure-ml-with-nvidia-rapids.ipynb) | | | | | | |
|
||||
|
||||
| [auto-ml-continuous-retraining](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/continuous-retraining/auto-ml-continuous-retraining.ipynb) | | | | | | |
|
||||
|
||||
| [auto-ml-forecasting-beer-remote](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-beer-remote/auto-ml-forecasting-beer-remote.ipynb) | | | | | | |
|
||||
| :star:[auto-ml-forecasting-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb) | Forecasting | | | | | |
|
||||
|
||||
| [auto-ml-forecasting-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb) | | | | | | |
|
||||
|
||||
| [auto-ml-regression](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/regression/auto-ml-regression.ipynb) | | | | | | |
|
||||
|
||||
| [build-model-run-history-03](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/amlsdk/build-model-run-history-03.ipynb) | | | | | | |
|
||||
|
||||
| [deploy-to-aci-04](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/amlsdk/deploy-to-aci-04.ipynb) | | | | | | |
|
||||
|
||||
| [deploy-to-aks-05](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/amlsdk/deploy-to-aks-05.ipynb) | | | | | | |
|
||||
|
||||
| [ingest-data-02](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/amlsdk/ingest-data-02.ipynb) | | | | | | |
|
||||
|
||||
| [installation-and-configuration-01](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/amlsdk/installation-and-configuration-01.ipynb) | | | | | | |
|
||||
|
||||
| [automl-databricks-local-01](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb) | | | | | | |
|
||||
|
||||
| [automl-databricks-local-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb) | | | | | | |
|
||||
|
||||
| [aml-pipelines-use-databricks-as-compute-target](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/databricks-as-remote-compute-target/aml-pipelines-use-databricks-as-compute-target.ipynb) | | | | | | |
|
||||
|
||||
| [accelerated-models-object-detection](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/accelerated-models/accelerated-models-object-detection.ipynb) | | | | | | |
|
||||
|
||||
| [accelerated-models-quickstart](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb) | | | | | | |
|
||||
|
||||
| [accelerated-models-training](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/accelerated-models/accelerated-models-training.ipynb) | | | | | | |
|
||||
|
||||
| [multi-model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-multi-model/multi-model-register-and-deploy.ipynb) | | | | | | |
|
||||
|
||||
| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | |
|
||||
|
||||
| [enable-app-insights-in-production-service](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb) | | | | | | |
|
||||
|
||||
| [onnx-model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/onnx/onnx-model-register-and-deploy.ipynb) | | | | | | |
|
||||
|
||||
| [production-deploy-to-aks](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb) | | | | | | |
|
||||
|
||||
| [register-model-create-image-deploy-service](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/register-model-create-image-deploy-service/register-model-create-image-deploy-service.ipynb) | | | | | | |
|
||||
|
||||
| [tensorflow-model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/tensorflow/tensorflow-model-register-and-deploy.ipynb) | | | | | | |
|
||||
|
||||
| [explain-model-on-amlcompute](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb) | | | | | | |
|
||||
|
||||
| [save-retrieve-explanations-run-history](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.ipynb) | | | | | | |
|
||||
|
||||
| [train-explain-model-locally-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.ipynb) | | | | | | |
|
||||
|
||||
| [train-explain-model-on-amlcompute-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb) | | | | | | |
|
||||
| [advanced-feature-transformations-explain-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/tabular-data/advanced-feature-transformations-explain-local.ipynb) | | | | | | |
|
||||
| [explain-binary-classification-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/tabular-data/explain-binary-classification-local.ipynb) | | | | | | |
|
||||
| [explain-multiclass-classification-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/tabular-data/explain-multiclass-classification-local.ipynb) | | | | | | |
|
||||
| [explain-regression-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/tabular-data/explain-regression-local.ipynb) | | | | | | |
|
||||
| [simple-feature-transformations-explain-local](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/tabular-data/simple-feature-transformations-explain-local.ipynb) | | | | | | |
|
||||
|
||||
| [nyc-taxi-data-regression-model-building](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb) | | | | | | |
|
||||
|
||||
| [pipeline-batch-scoring](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb) | | | | | | |
|
||||
|
||||
| [pipeline-style-transfer](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb) | | | | | | |
|
||||
|
||||
| [authentication-in-azureml](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azureml.ipynb) | | | | | | |
|
||||
| [Logging APIs](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb) | Logging APIs and analyzing results | | None | None | None | None |
|
||||
|
||||
| [Logging APIs](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb) | Logging APIs and analyzing results | None | None | None | None | None |
|
||||
|
||||
| [distributed-cntk-with-custom-docker](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/distributed-cntk-with-custom-docker/distributed-cntk-with-custom-docker.ipynb) | | | | | | |
|
||||
|
||||
| [notebook_example](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/training-with-deep-learning/how-to-use-estimator/notebook_example.ipynb) | | | | | | |
|
||||
|
||||
| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master//setup-environment/configuration.ipynb) | | | | | | |
|
||||
|
||||
| [img-classification-part1-training](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/img-classification-part1-training.ipynb) | | | | | | |
|
||||
|
||||
| [img-classification-part2-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/img-classification-part2-deploy.ipynb) | | | | | | |
|
||||
|
||||
| [regression-automated-ml](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/regression-automated-ml.ipynb) | | | | | | |
|
||||
|
||||
| [tutorial-1st-experiment-sdk-train](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/tutorial-1st-experiment-sdk-train.ipynb) | | | | | | |
|
||||
|
||||
| [tutorial-pipeline-batch-scoring-classification](https://github.com/Azure/MachineLearningNotebooks/blob/master//tutorials/tutorial-pipeline-batch-scoring-classification.ipynb) | | | | | | |
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.0.74.1 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.76 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user