update samples from Release-58 as a part of SDK release

This commit is contained in:
amlrelsa-ms
2020-07-20 20:44:42 +00:00
parent f80512a6db
commit d096535e48
37 changed files with 265 additions and 282 deletions

View File

@@ -204,108 +204,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Option 1: Provision as a run based compute target\n",
"### Option 1: Provision a compute target (Basic)\n",
"\n",
"You can provision AmlCompute as a compute target at run-time. In this case, the compute is auto-created for your run, scales up to max_nodes that you specify, and then **deleted automatically** after the run completes."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n",
"\n",
"# create a new runconfig object\n",
"run_config = RunConfiguration()\n",
"\n",
"# signal that you want to use AmlCompute to execute script.\n",
"run_config.target = \"amlcompute\"\n",
"\n",
"# AmlCompute will be created in the same region as workspace\n",
"# Set vm size for AmlCompute\n",
"run_config.amlcompute.vm_size = 'STANDARD_D2_V2'\n",
"\n",
"# enable Docker \n",
"run_config.environment.docker.enabled = True\n",
"\n",
"# set Docker base image to the default CPU-based image\n",
"run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE\n",
"\n",
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"azureml_pip_packages = [\n",
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n",
" 'azureml-interpret', 'sklearn-pandas', 'azureml-dataprep'\n",
"]\n",
"\n",
"# Note: this is to pin the scikit-learn and pandas versions to be same as notebook.\n",
"# In production scenario user would choose their dependencies\n",
"import pkg_resources\n",
"available_packages = pkg_resources.working_set\n",
"sklearn_ver = None\n",
"pandas_ver = None\n",
"for dist in available_packages:\n",
" if dist.key == 'scikit-learn':\n",
" sklearn_ver = dist.version\n",
" elif dist.key == 'pandas':\n",
" pandas_ver = dist.version\n",
"sklearn_dep = 'scikit-learn'\n",
"pandas_dep = 'pandas'\n",
"if sklearn_ver:\n",
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
"if pandas_ver:\n",
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
"# specify CondaDependencies obj\n",
"# The CondaDependencies specifies the conda and pip packages that are installed in the environment\n",
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
"# cause errors. Please take extra care when specifying your dependencies in a production environment.\n",
"run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=[sklearn_dep, pandas_dep],\n",
" pip_packages=azureml_pip_packages)\n",
"\n",
"# Now submit a run on AmlCompute\n",
"from azureml.core.script_run_config import ScriptRunConfig\n",
"\n",
"script_run_config = ScriptRunConfig(source_directory=project_folder,\n",
" script='train_explain.py',\n",
" run_config=run_config)\n",
"\n",
"run = experiment.submit(script_run_config)\n",
"\n",
"# Show run details\n",
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"# Shows output of the run on stdout.\n",
"run.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Option 2: Provision as a persistent compute target (Basic)\n",
"\n",
"You can provision a persistent AmlCompute resource by simply defining two parameters thanks to smart defaults. By default it autoscales from 0 nodes and provisions dedicated VMs to run your job in a container. This is useful when you want to continously re-use the same target, debug it between jobs or simply share the resource with other users of your workspace.\n",
"You can provision an AmlCompute resource by simply defining two parameters thanks to smart defaults. By default it autoscales from 0 nodes and provisions dedicated VMs to run your job in a container. This is useful when you want to continously re-use the same target, debug it between jobs or simply share the resource with other users of your workspace.\n",
"\n",
"* `vm_size`: VM family of the nodes provisioned by AmlCompute. Simply choose from the supported_vmsizes() above\n",
"* `max_nodes`: Maximum nodes to autoscale to while running a job on AmlCompute"
@@ -351,13 +252,13 @@
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"# create a new RunConfig object\n",
"# Create a new RunConfig object\n",
"run_config = RunConfiguration(framework=\"python\")\n",
"\n",
"# Set compute target to AmlCompute target created in previous step\n",
"run_config.target = cpu_cluster.name\n",
"\n",
"# enable Docker \n",
"# Enable Docker \n",
"run_config.environment.docker.enabled = True\n",
"\n",
"azureml_pip_packages = [\n",
@@ -382,7 +283,7 @@
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
"if pandas_ver:\n",
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
"# specify CondaDependencies obj\n",
"# Specify CondaDependencies obj\n",
"# The CondaDependencies specifies the conda and pip packages that are installed in the environment\n",
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
@@ -400,6 +301,13 @@
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
]
},
{
"cell_type": "code",
"execution_count": null,
@@ -424,7 +332,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Option 3: Provision as a persistent compute target (Advanced)\n",
"### Option 2: Provision a compute target (Advanced)\n",
"\n",
"You can also specify additional properties or change defaults while provisioning AmlCompute using a more advanced configuration. This is useful when you want a dedicated cluster of 4 nodes (for example you can set the min_nodes and max_nodes to 4), or want the compute to be within an existing VNet in your subscription.\n",
"\n",
@@ -483,13 +391,13 @@
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"# create a new RunConfig object\n",
"# Create a new RunConfig object\n",
"run_config = RunConfiguration(framework=\"python\")\n",
"\n",
"# Set compute target to AmlCompute target created in previous step\n",
"run_config.target = cpu_cluster.name\n",
"\n",
"# enable Docker \n",
"# Enable Docker \n",
"run_config.environment.docker.enabled = True\n",
"\n",
"azureml_pip_packages = [\n",
@@ -516,7 +424,7 @@
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
"if pandas_ver:\n",
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
"# specify CondaDependencies obj\n",
"# Specify CondaDependencies obj\n",
"# The CondaDependencies specifies the conda and pip packages that are installed in the environment\n",
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
@@ -554,19 +462,6 @@
"run.get_metrics()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.contrib.interpret.explanation.explanation_client import ExplanationClient\n",
"\n",
"client = ExplanationClient.from_run(run)\n",
"# Get the top k (e.g., 4) most important features with their importance values\n",
"explanation = client.download_model_explanation(top_k=4)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -682,7 +577,7 @@
"metadata": {},
"outputs": [],
"source": [
"# retrieve model for visualization and deployment\n",
"# Retrieve model for visualization and deployment\n",
"from azureml.core.model import Model\n",
"import joblib\n",
"original_model = Model(ws, 'model_explain_model_on_amlcomp')\n",
@@ -703,7 +598,7 @@
"metadata": {},
"outputs": [],
"source": [
"# retrieve x_test for visualization\n",
"# Retrieve x_test for visualization\n",
"import joblib\n",
"x_test_path = './x_test_boston_housing.pkl'\n",
"run.download_file('x_test_boston_housing.pkl', output_file_path=x_test_path)"

View File

@@ -122,7 +122,7 @@
"metadata": {},
"outputs": [],
"source": [
"# get the IBM employee attrition dataset\n",
"# Get the IBM employee attrition dataset\n",
"outdirname = 'dataset.6.21.19'\n",
"try:\n",
" from urllib import urlretrieve\n",
@@ -163,7 +163,7 @@
"from sklearn.model_selection import train_test_split\n",
"x_train, x_test, y_train, y_test = train_test_split(attritionXData, \n",
" target, \n",
" test_size = 0.2,\n",
" test_size=0.2,\n",
" random_state=0,\n",
" stratify=target)"
]
@@ -223,7 +223,7 @@
"# Append classifier to preprocessing pipeline.\n",
"# Now we have a full prediction pipeline.\n",
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
" ('classifier', SVC(kernel='linear', C = 1.0, probability=True))])"
" ('classifier', SVC(C=1.0, probability=True))])"
]
},
{
@@ -249,7 +249,7 @@
"# Append classifier to preprocessing pipeline.\n",
"# Now we have a full prediction pipeline.\n",
"clf = Pipeline(steps=[('preprocessor', transformations),\n",
" ('classifier', SVC(kernel='linear', C = 1.0, probability=True))]) \n",
" ('classifier', SVC(C=1.0, probability=True))]) \n",
"\n",
"\n",
"\n",
@@ -393,7 +393,7 @@
"metadata": {},
"outputs": [],
"source": [
"# feature shap values for all features and all data points in the training data\n",
"# Feature shap values for all features and all data points in the training data\n",
"print('local importance values: {}'.format(global_explanation.local_importance_values))"
]
},
@@ -450,8 +450,7 @@
"outputs": [],
"source": [
"import azureml.core\n",
"from azureml.core import Workspace, Experiment, Run\n",
"from interpret.ext.blackbox import TabularExplainer\n",
"from azureml.core import Workspace, Experiment\n",
"from azureml.contrib.interpret.explanation.explanation_client import ExplanationClient\n",
"# Check core SDK version number\n",
"print(\"SDK version:\", azureml.core.VERSION)"
@@ -576,6 +575,23 @@
"ExplanationDashboard(downloaded_global_explanation, model, datasetX=x_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## End\n",
"Complete the run"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.complete()"
]
},
{
"cell_type": "markdown",
"metadata": {},

View File

@@ -141,7 +141,7 @@
"metadata": {},
"outputs": [],
"source": [
"# get IBM attrition data\n",
"# Get IBM attrition data\n",
"import os\n",
"import pandas as pd\n",
"\n",
@@ -218,17 +218,17 @@
"from sklearn.model_selection import train_test_split\n",
"x_train, x_test, y_train, y_test = train_test_split(attritionXData,\n",
" target,\n",
" test_size = 0.2,\n",
" test_size=0.2,\n",
" random_state=0,\n",
" stratify=target)\n",
"\n",
"# preprocess the data and fit the classification model\n",
"# Preprocess the data and fit the classification model\n",
"clf.fit(x_train, y_train)\n",
"model = clf.steps[-1][1]\n",
"\n",
"model_file_name = 'log_reg.pkl'\n",
"\n",
"# save model in the outputs folder so it automatically get uploaded\n",
"# Save model in the outputs folder so it automatically get uploaded\n",
"with open(model_file_name, 'wb') as file:\n",
" joblib.dump(value=clf, filename=os.path.join('./outputs/',\n",
" model_file_name))"
@@ -345,7 +345,7 @@
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
"if pandas_ver:\n",
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
"# specify CondaDependencies obj\n",
"# Specify CondaDependencies obj\n",
"# The CondaDependencies specifies the conda and pip packages that are installed in the environment\n",
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
@@ -368,7 +368,7 @@
"outputs": [],
"source": [
"from azureml.core.model import Model\n",
"# retrieve scoring explainer for deployment\n",
"# Retrieve scoring explainer for deployment\n",
"scoring_explainer_model = Model(ws, 'IBM_attrition_explainer')"
]
},
@@ -416,11 +416,11 @@
"\n",
"headers = {'Content-Type':'application/json'}\n",
"\n",
"# send request to service\n",
"# Send request to service\n",
"print(\"POST to url\", service.scoring_uri)\n",
"resp = requests.post(service.scoring_uri, sample_data, headers=headers)\n",
"\n",
"# can covert back to Python objects from json string if desired\n",
"# Can covert back to Python objects from json string if desired\n",
"print(\"prediction:\", resp.text)\n",
"result = json.loads(resp.text)"
]
@@ -431,7 +431,7 @@
"metadata": {},
"outputs": [],
"source": [
"#plot the feature importance for the prediction\n",
"# Plot the feature importance for the prediction\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt; plt.rcdefaults()\n",
"\n",

View File

@@ -156,7 +156,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Submit an AmlCompute run in a few different ways\n",
"## Submit an AmlCompute run\n",
"\n",
"First lets check which VM families are available in your region. Azure is a regional service and some specialized SKUs (especially GPUs) are only available in certain regions. Since AmlCompute is created in the region of your workspace, we will use the supported_vms () function to see if the VM family we want to use ('STANDARD_D2_V2') is supported.\n",
"\n",
@@ -202,9 +202,43 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Provision as a run based compute target\n",
"### Provision a compute target\n",
"\n",
"You can provision AmlCompute as a compute target at run-time. In this case, the compute is auto-created for your run, scales up to max_nodes that you specify, and then **deleted automatically** after the run completes."
"You can provision an AmlCompute resource by simply defining two parameters thanks to smart defaults. By default it autoscales from 0 nodes and provisions dedicated VMs to run your job in a container. This is useful when you want to continously re-use the same target, debug it between jobs or simply share the resource with other users of your workspace.\n",
"\n",
"* `vm_size`: VM family of the nodes provisioned by AmlCompute. Simply choose from the supported_vmsizes() above\n",
"* `max_nodes`: Maximum nodes to autoscale to while running a job on AmlCompute"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# Choose a name for your CPU cluster\n",
"cpu_cluster_name = \"cpu-cluster\"\n",
"\n",
"# Verify that cluster does not exist already\n",
"try:\n",
" cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
" print('Found existing cluster, use it.')\n",
"except ComputeTargetException:\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n",
" max_nodes=4)\n",
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
"\n",
"cpu_cluster.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure & Run"
]
},
{
@@ -217,28 +251,21 @@
"from azureml.core.conda_dependencies import CondaDependencies\n",
"from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n",
"\n",
"# create a new runconfig object\n",
"# Create a new runconfig object\n",
"run_config = RunConfiguration()\n",
"\n",
"# signal that you want to use AmlCompute to execute script.\n",
"run_config.target = \"amlcompute\"\n",
"# Set compute target to AmlCompute target created in previous step\n",
"run_config.target = cpu_cluster.name\n",
"\n",
"# AmlCompute will be created in the same region as workspace\n",
"# Set vm size for AmlCompute\n",
"run_config.amlcompute.vm_size = 'STANDARD_D2_V2'\n",
"\n",
"# enable Docker \n",
"# Enable Docker \n",
"run_config.environment.docker.enabled = True\n",
"\n",
"# set Docker base image to the default CPU-based image\n",
"# Set Docker base image to the default CPU-based image\n",
"run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE\n",
"\n",
"# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
"# Use conda_dependencies.yml to create a conda environment in the Docker image for execution\n",
"run_config.environment.python.user_managed_dependencies = False\n",
"\n",
"# auto-prepare the Docker image when used for execution (if it is not already prepared)\n",
"run_config.auto_prepare_environment = True\n",
"\n",
"azureml_pip_packages = [\n",
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n",
" 'azureml-interpret', 'azureml-dataprep'\n",
@@ -263,7 +290,7 @@
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
"if pandas_ver:\n",
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
"# specify CondaDependencies obj\n",
"# Specify CondaDependencies obj\n",
"# The CondaDependencies specifies the conda and pip packages that are installed in the environment\n",
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
@@ -327,7 +354,7 @@
"metadata": {},
"outputs": [],
"source": [
"# retrieve model for visualization and deployment\n",
"# Retrieve model for visualization and deployment\n",
"from azureml.core.model import Model\n",
"import joblib\n",
"original_model = Model(ws, 'amlcompute_deploy_model')\n",
@@ -341,7 +368,7 @@
"metadata": {},
"outputs": [],
"source": [
"# retrieve global explanation for visualization\n",
"# Retrieve global explanation for visualization\n",
"from azureml.contrib.interpret.explanation.explanation_client import ExplanationClient\n",
"\n",
"# get model explanation data\n",
@@ -355,7 +382,7 @@
"metadata": {},
"outputs": [],
"source": [
"# retrieve x_test for visualization\n",
"# Retrieve x_test for visualization\n",
"import joblib\n",
"x_test_path = './x_test.pkl'\n",
"run.download_file('x_test_ibm.pkl', output_file_path=x_test_path)\n",
@@ -435,7 +462,7 @@
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
"if pandas_ver:\n",
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
"# specify CondaDependencies obj\n",
"# Specify CondaDependencies obj\n",
"# The CondaDependencies specifies the conda and pip packages that are installed in the environment\n",
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
"# environment, otherwise if a model is trained or deployed in a different environment this can\n",
@@ -457,7 +484,7 @@
"metadata": {},
"outputs": [],
"source": [
"# retrieve scoring explainer for deployment\n",
"# Retrieve scoring explainer for deployment\n",
"scoring_explainer_model = Model(ws, 'IBM_attrition_explainer')"
]
},
@@ -496,17 +523,17 @@
"source": [
"import requests\n",
"\n",
"# create data to test service with\n",
"# Create data to test service with\n",
"examples = x_test[:4]\n",
"input_data = examples.to_json()\n",
"\n",
"headers = {'Content-Type':'application/json'}\n",
"\n",
"# send request to service\n",
"# Send request to service\n",
"print(\"POST to url\", service.scoring_uri)\n",
"resp = requests.post(service.scoring_uri, input_data, headers=headers)\n",
"\n",
"# can covert back to Python objects from json string if desired\n",
"# Can covert back to Python objects from json string if desired\n",
"print(\"prediction:\", resp.text)"
]
},