mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 09:37:04 -05:00
Compare commits
6 Commits
azureml-sd
...
azureml-sd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
15a3ca649d | ||
|
|
3c4770cfe5 | ||
|
|
8d7de05908 | ||
|
|
863faae57f | ||
|
|
8d3f5adcdb | ||
|
|
cd3394e129 |
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.42.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.43.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -6,6 +6,7 @@ dependencies:
|
||||
- fairlearn>=0.6.2
|
||||
- joblib
|
||||
- liac-arff
|
||||
- raiwidgets~=0.18.1
|
||||
- raiwidgets~=0.19.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -6,6 +6,7 @@ dependencies:
|
||||
- fairlearn>=0.6.2
|
||||
- joblib
|
||||
- liac-arff
|
||||
- raiwidgets~=0.18.1
|
||||
- raiwidgets~=0.19.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -13,19 +13,19 @@ dependencies:
|
||||
- pytorch::pytorch=1.4.0
|
||||
- conda-forge::fbprophet==0.7.1
|
||||
- cudatoolkit=10.1.243
|
||||
- scipy==1.5.2
|
||||
- scipy==1.5.3
|
||||
- notebook
|
||||
- pywin32==227
|
||||
- PySocks==1.7.1
|
||||
- jsonschema==4.5.1
|
||||
- jsonschema==4.6.0
|
||||
- conda-forge::pyqt==5.12.3
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.42.0
|
||||
- azureml-widgets~=1.43.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.42.0/validated_win32_requirements.txt [--no-deps]
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.43.0/validated_win32_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
|
||||
@@ -14,7 +14,7 @@ dependencies:
|
||||
- numpy==1.19.5
|
||||
- cython==0.29.14
|
||||
- urllib3==1.26.7
|
||||
- scipy>=1.4.1,<=1.5.2
|
||||
- scipy>=1.4.1,<=1.5.3
|
||||
- scikit-learn==0.22.1
|
||||
- py-xgboost<=1.3.3
|
||||
- holidays==0.10.3
|
||||
@@ -24,10 +24,10 @@ dependencies:
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.42.0
|
||||
- azureml-widgets~=1.43.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.42.0/validated_linux_requirements.txt [--no-deps]
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.43.0/validated_linux_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
|
||||
@@ -15,7 +15,7 @@ dependencies:
|
||||
- numpy==1.19.5
|
||||
- cython==0.29.14
|
||||
- urllib3==1.26.7
|
||||
- scipy>=1.4.1,<=1.5.2
|
||||
- scipy>=1.4.1,<=1.5.3
|
||||
- scikit-learn==0.22.1
|
||||
- py-xgboost<=1.3.3
|
||||
- holidays==0.10.3
|
||||
@@ -25,10 +25,10 @@ dependencies:
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.42.0
|
||||
- azureml-widgets~=1.43.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.42.0/validated_darwin_requirements.txt [--no-deps]
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.43.0/validated_darwin_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
|
||||
@@ -92,7 +92,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.42.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.43.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -91,7 +91,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.42.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.43.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -149,12 +149,7 @@ def get_backtest_pipeline(
|
||||
inputs=[forecasts.as_mount()],
|
||||
outputs=[data_results],
|
||||
source_directory=PROJECT_FOLDER,
|
||||
arguments=[
|
||||
"--forecasts",
|
||||
forecasts,
|
||||
"--output-dir",
|
||||
data_results,
|
||||
],
|
||||
arguments=["--forecasts", forecasts, "--output-dir", data_results],
|
||||
runconfig=run_config,
|
||||
compute_target=compute_target,
|
||||
allow_reuse=False,
|
||||
|
||||
@@ -23,11 +23,7 @@ except ImportError:
|
||||
|
||||
|
||||
def infer_forecasting_dataset_tcn(
|
||||
X_test,
|
||||
y_test,
|
||||
model,
|
||||
output_path,
|
||||
output_dataset_name="results",
|
||||
X_test, y_test, model, output_path, output_dataset_name="results"
|
||||
):
|
||||
|
||||
y_pred, df_all = model.forecast(X_test, y_test)
|
||||
@@ -71,10 +67,7 @@ def get_model(model_path, model_file_name):
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--model_name",
|
||||
type=str,
|
||||
dest="model_name",
|
||||
help="Model to be loaded",
|
||||
"--model_name", type=str, dest="model_name", help="Model to be loaded"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -108,12 +101,7 @@ def get_args():
|
||||
return args
|
||||
|
||||
|
||||
def get_data(
|
||||
run,
|
||||
fitted_model,
|
||||
target_column_name,
|
||||
test_dataset_name,
|
||||
):
|
||||
def get_data(run, fitted_model, target_column_name, test_dataset_name):
|
||||
|
||||
# get input dataset by name
|
||||
test_dataset = Dataset.get_by_name(run.experiment.workspace, test_dataset_name)
|
||||
@@ -159,10 +147,7 @@ if __name__ == "__main__":
|
||||
fitted_model = get_model(model_path, model_file_name)
|
||||
|
||||
X_test_df, y_test = get_data(
|
||||
run,
|
||||
fitted_model,
|
||||
target_column_name,
|
||||
test_dataset_name,
|
||||
run, fitted_model, target_column_name, test_dataset_name
|
||||
)
|
||||
|
||||
infer_forecasting_dataset_tcn(
|
||||
|
||||
@@ -69,17 +69,19 @@
|
||||
"# ONNX Model Zoo and save it in the same folder as this tutorial\n",
|
||||
"\n",
|
||||
"import urllib.request\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"onnx_model_url = \"https://github.com/onnx/models/blob/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-7.tar.gz?raw=true\"\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"emotion-ferplus-7.tar.gz\")\n",
|
||||
"os.mkdir(\"emotion_ferplus\")\n",
|
||||
"\n",
|
||||
"# the ! magic command tells our jupyter notebook kernel to run the following line of \n",
|
||||
"# code from the command line instead of the notebook kernel\n",
|
||||
"\n",
|
||||
"# We use tar and xvcf to unzip the files we just retrieved from the ONNX model zoo\n",
|
||||
"\n",
|
||||
"!tar xvzf emotion-ferplus-7.tar.gz"
|
||||
"!tar xvzf emotion-ferplus-7.tar.gz -C emotion_ferplus"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -130,7 +132,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_dir = \"emotion_ferplus\" # replace this with the location of your model files\n",
|
||||
"model_dir = \"emotion_ferplus/model\" # replace this with the location of your model files\n",
|
||||
"\n",
|
||||
"# leave as is if it's in the same folder as this notebook"
|
||||
]
|
||||
@@ -496,13 +498,12 @@
|
||||
"\n",
|
||||
"# to use parsers to read in our model/data\n",
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"test_inputs = []\n",
|
||||
"test_outputs = []\n",
|
||||
"\n",
|
||||
"# read in 3 testing images from .pb files\n",
|
||||
"test_data_size = 3\n",
|
||||
"# read in 1 testing images from .pb files\n",
|
||||
"test_data_size = 1\n",
|
||||
"\n",
|
||||
"for num in np.arange(test_data_size):\n",
|
||||
" input_test_data = os.path.join(model_dir, 'test_data_set_{0}'.format(num), 'input_0.pb')\n",
|
||||
@@ -533,7 +534,7 @@
|
||||
},
|
||||
"source": [
|
||||
"### Show some sample images\n",
|
||||
"We use `matplotlib` to plot 3 test images from the dataset."
|
||||
"We use `matplotlib` to plot 1 test images from the dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -547,7 +548,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plt.figure(figsize = (20, 20))\n",
|
||||
"for test_image in np.arange(3):\n",
|
||||
"for test_image in np.arange(test_data_size):\n",
|
||||
" test_inputs[test_image].reshape(1, 64, 64)\n",
|
||||
" plt.subplot(1, 8, test_image+1)\n",
|
||||
" plt.axhline('')\n",
|
||||
|
||||
@@ -69,10 +69,12 @@
|
||||
"# ONNX Model Zoo and save it in the same folder as this tutorial\n",
|
||||
"\n",
|
||||
"import urllib.request\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"onnx_model_url = \"https://github.com/onnx/models/blob/main/vision/classification/mnist/model/mnist-7.tar.gz?raw=true\"\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"mnist-7.tar.gz\")"
|
||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"mnist-7.tar.gz\")\n",
|
||||
"os.mkdir(\"mnist\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -86,7 +88,7 @@
|
||||
"\n",
|
||||
"# We use tar and xvcf to unzip the files we just retrieved from the ONNX model zoo\n",
|
||||
"\n",
|
||||
"!tar xvzf mnist-7.tar.gz"
|
||||
"!tar xvzf mnist-7.tar.gz -C mnist"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -137,7 +139,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_dir = \"mnist\" # replace this with the location of your model files\n",
|
||||
"model_dir = \"mnist/model\" # replace this with the location of your model files\n",
|
||||
"\n",
|
||||
"# leave as is if it's in the same folder as this notebook"
|
||||
]
|
||||
@@ -447,13 +449,12 @@
|
||||
"\n",
|
||||
"# to use parsers to read in our model/data\n",
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"test_inputs = []\n",
|
||||
"test_outputs = []\n",
|
||||
"\n",
|
||||
"# read in 3 testing images from .pb files\n",
|
||||
"test_data_size = 3\n",
|
||||
"# read in 1 testing images from .pb files\n",
|
||||
"test_data_size = 1\n",
|
||||
"\n",
|
||||
"for i in np.arange(test_data_size):\n",
|
||||
" input_test_data = os.path.join(model_dir, 'test_data_set_{0}'.format(i), 'input_0.pb')\n",
|
||||
@@ -486,7 +487,7 @@
|
||||
},
|
||||
"source": [
|
||||
"### Show some sample images\n",
|
||||
"We use `matplotlib` to plot 3 test images from the dataset."
|
||||
"We use `matplotlib` to plot 1 test images from the dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -500,7 +501,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plt.figure(figsize = (16, 6))\n",
|
||||
"for test_image in np.arange(3):\n",
|
||||
"for test_image in np.arange(test_data_size):\n",
|
||||
" plt.subplot(1, 15, test_image+1)\n",
|
||||
" plt.axhline('')\n",
|
||||
" plt.axvline('')\n",
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from azureml.core.run import Run
|
||||
from azureml.interpret import ExplanationClient
|
||||
from interpret_community.adapter import ExplanationAdapter
|
||||
import joblib
|
||||
import os
|
||||
import shap
|
||||
@@ -11,9 +13,11 @@ OUTPUT_DIR = './outputs/'
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
run = Run.get_context()
|
||||
client = ExplanationClient.from_run(run)
|
||||
|
||||
# get a dataset on income prediction
|
||||
X, y = shap.datasets.adult()
|
||||
features = X.columns.values
|
||||
|
||||
# train an XGBoost model (but any other tree model type should work)
|
||||
model = xgboost.XGBClassifier()
|
||||
@@ -26,6 +30,12 @@ shap_values = explainer(X_shap)
|
||||
print("computed shap values:")
|
||||
print(shap_values)
|
||||
|
||||
# Use the explanation adapter to convert the importances into an interpret-community
|
||||
# style explanation which can be uploaded to AzureML or visualized in the
|
||||
# ExplanationDashboard widget
|
||||
adapter = ExplanationAdapter(features, classification=True)
|
||||
global_explanation = adapter.create_global(shap_values.values, X_shap, expected_values=shap_values.base_values)
|
||||
|
||||
# write X_shap out as a pickle file for later visualization
|
||||
x_shap_pkl = 'x_shap.pkl'
|
||||
with open(x_shap_pkl, 'wb') as file:
|
||||
@@ -42,3 +52,8 @@ with open(model_file_name, 'wb') as file:
|
||||
run.upload_file('xgboost_model.pkl', os.path.join('./outputs/', model_file_name))
|
||||
original_model = run.register_model(model_name='xgboost_with_gpu_tree_explainer',
|
||||
model_path='xgboost_model.pkl')
|
||||
|
||||
# Uploading model explanation data for storage or visualization in webUX
|
||||
# The explanation can then be downloaded on any compute
|
||||
comment = 'Global explanation on classification model trained on adult census income dataset'
|
||||
client.upload_model_explanation(global_explanation, comment=comment, model_id=original_model.id)
|
||||
|
||||
@@ -106,7 +106,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.42.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.43.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -225,36 +225,73 @@
|
||||
"\n",
|
||||
"from azureml.core import Environment\n",
|
||||
"\n",
|
||||
"environment_name = \"shap-gpu-tree\"\n",
|
||||
"\n",
|
||||
"environment_name = \"shapgpu\"\n",
|
||||
"env = Environment(environment_name)\n",
|
||||
"\n",
|
||||
"env.docker.enabled = True\n",
|
||||
"env.docker.base_image = None\n",
|
||||
"env.docker.base_dockerfile = \"\"\"\n",
|
||||
"FROM rapidsai/rapidsai:cuda10.0-devel-ubuntu18.04\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Note: this is to pin the pandas and xgboost versions to be same as notebook.\n",
|
||||
"# In production scenario user would choose their dependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"available_packages = pkg_resources.working_set\n",
|
||||
"xgboost_ver = None\n",
|
||||
"pandas_ver = None\n",
|
||||
"for dist in list(available_packages):\n",
|
||||
" if dist.key == 'xgboost':\n",
|
||||
" xgboost_ver = dist.version\n",
|
||||
" elif dist.key == 'pandas':\n",
|
||||
" pandas_ver = dist.version\n",
|
||||
"xgboost_dep = 'xgboost'\n",
|
||||
"pandas_dep = 'pandas'\n",
|
||||
"if pandas_ver:\n",
|
||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||
"if xgboost_dep:\n",
|
||||
" xgboost_dep = 'xgboost=={}'.format(xgboost_ver)\n",
|
||||
"\n",
|
||||
"# Note: we build shap at commit 690245 for Tesla K80 GPUs\n",
|
||||
"env.docker.base_dockerfile = f\"\"\"\n",
|
||||
"FROM nvidia/cuda:10.2-devel-ubuntu18.04\n",
|
||||
"ENV PATH=\"/root/miniconda3/bin:${{PATH}}\"\n",
|
||||
"ARG PATH=\"/root/miniconda3/bin:${{PATH}}\"\n",
|
||||
"RUN apt-get update && \\\n",
|
||||
"apt-get install -y fuse && \\\n",
|
||||
"apt-get install -y build-essential && \\\n",
|
||||
"apt-get install -y python3-dev && \\\n",
|
||||
"source activate rapids && \\\n",
|
||||
"apt-get install -y wget && \\\n",
|
||||
"apt-get install -y git && \\\n",
|
||||
"rm -rf /var/lib/apt/lists/* && \\\n",
|
||||
"wget \\\n",
|
||||
"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \\\n",
|
||||
"mkdir /root/.conda && \\\n",
|
||||
"bash Miniconda3-latest-Linux-x86_64.sh -b && \\\n",
|
||||
"rm -f Miniconda3-latest-Linux-x86_64.sh && \\\n",
|
||||
"conda init bash && \\\n",
|
||||
". ~/.bashrc && \\\n",
|
||||
"conda create -n shapgpu python=3.7 && \\\n",
|
||||
"conda activate shapgpu && \\\n",
|
||||
"apt-get install -y g++ && \\\n",
|
||||
"printenv && \\\n",
|
||||
"echo \"which nvcc: \" && \\\n",
|
||||
"which nvcc && \\\n",
|
||||
"pip install numpy==1.20.3 && \\\n",
|
||||
"pip install azureml-defaults && \\\n",
|
||||
"pip install azureml-telemetry && \\\n",
|
||||
"pip install azureml-interpret && \\\n",
|
||||
"pip install {pandas_dep} && \\\n",
|
||||
"cd /usr/local/src && \\\n",
|
||||
"git clone https://github.com/slundberg/shap && \\\n",
|
||||
"git clone https://github.com/slundberg/shap.git --single-branch && \\\n",
|
||||
"cd shap && \\\n",
|
||||
"git reset --hard 690245c6ab043edf40cfce3d8438a62e29ab599f && \\\n",
|
||||
"mkdir build && \\\n",
|
||||
"python setup.py install --user && \\\n",
|
||||
"pip uninstall -y xgboost && \\\n",
|
||||
"rm /conda/envs/rapids/lib/libxgboost.so && \\\n",
|
||||
"pip install xgboost==1.4.2\n",
|
||||
"pip install {xgboost_dep} \\\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"env.python.user_managed_dependencies = True\n",
|
||||
"env.python.interpreter_path = '/root/miniconda3/envs/shapgpu/bin/python'\n",
|
||||
"\n",
|
||||
"from azureml.core import Run\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
@@ -266,6 +303,176 @@
|
||||
"run = experiment.submit(config=src)\n",
|
||||
"run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# Shows output of the run on stdout.\n",
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Download \n",
|
||||
"1. Download model explanation data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.interpret import ExplanationClient\n",
|
||||
"\n",
|
||||
"# Get model explanation data\n",
|
||||
"client = ExplanationClient.from_run(run)\n",
|
||||
"global_explanation = client.download_model_explanation()\n",
|
||||
"local_importance_values = global_explanation.local_importance_values\n",
|
||||
"expected_values = global_explanation.expected_values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the top k (e.g., 4) most important features with their importance values\n",
|
||||
"global_explanation_topk = client.download_model_explanation(top_k=4)\n",
|
||||
"global_importance_values = global_explanation_topk.get_ranked_global_values()\n",
|
||||
"global_importance_names = global_explanation_topk.get_ranked_global_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('global importance values: {}'.format(global_importance_values))\n",
|
||||
"print('global importance names: {}'.format(global_importance_names))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"2. Download model file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Retrieve model for visualization and deployment\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"import joblib\n",
|
||||
"original_model = Model(ws, 'xgboost_with_gpu_tree_explainer')\n",
|
||||
"model_path = original_model.download(exist_ok=True)\n",
|
||||
"original_model = joblib.load(model_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"3. Download test dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Retrieve x_test for visualization\n",
|
||||
"x_test_path = './x_shap_adult_census.pkl'\n",
|
||||
"run.download_file('x_shap_adult_census.pkl', output_file_path=x_test_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"x_test = joblib.load('x_shap_adult_census.pkl')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Visualize\n",
|
||||
"Load the visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from raiwidgets import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from interpret_community.common.model_wrapper import wrap_model\n",
|
||||
"from interpret_community.dataset.dataset_wrapper import DatasetWrapper\n",
|
||||
"# note we need to wrap the XGBoost model to output predictions and probabilities in the scikit-learn format\n",
|
||||
"class WrappedXGBoostModel(object):\n",
|
||||
" \"\"\"A class for wrapping an XGBoost model to output integer predicted classes.\"\"\"\n",
|
||||
"\n",
|
||||
" def __init__(self, model):\n",
|
||||
" self.model = model\n",
|
||||
"\n",
|
||||
" def predict(self, dataset):\n",
|
||||
" return self.model.predict(dataset).astype(int)\n",
|
||||
"\n",
|
||||
" def predict_proba(self, dataset):\n",
|
||||
" return self.model.predict_proba(dataset)\n",
|
||||
"\n",
|
||||
"wrapped_model = WrappedXGBoostModel(wrap_model(original_model, DatasetWrapper(x_test), model_task='classification'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, wrapped_model, dataset=x_test)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -1,5 +1,18 @@
|
||||
name: train-explain-model-gpu-tree-explainer
|
||||
dependencies:
|
||||
- py-xgboost==1.3.3
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- flask
|
||||
- flask-cors
|
||||
- gevent>=1.3.6
|
||||
- jinja2
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.19.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -11,6 +11,8 @@ dependencies:
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.18.1
|
||||
- raiwidgets~=0.19.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -10,7 +10,9 @@ dependencies:
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.18.1
|
||||
- raiwidgets~=0.19.0
|
||||
- packaging>=20.9
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -18,7 +18,9 @@ def init():
|
||||
original_model_path = Model.get_model_path('local_deploy_model')
|
||||
scoring_explainer_path = Model.get_model_path('IBM_attrition_explainer')
|
||||
|
||||
# Load the original model into the environment
|
||||
original_model = joblib.load(original_model_path)
|
||||
# Load the scoring explainer into the environment
|
||||
scoring_explainer = joblib.load(scoring_explainer_path)
|
||||
|
||||
|
||||
@@ -29,5 +31,15 @@ def run(raw_data):
|
||||
predictions = original_model.predict(data)
|
||||
# Retrieve model explanations
|
||||
local_importance_values = scoring_explainer.explain(data)
|
||||
# Retrieve the feature names, which we may want to return to the user.
|
||||
# Note: you can also get the raw_features and engineered_features
|
||||
# by calling scoring_explainer.raw_features and
|
||||
# scoring_explainer.engineered_features but you may need to pass
|
||||
# the raw or engineered feature names in the ScoringExplainer
|
||||
# constructor, depending on if you are using feature maps or
|
||||
# transformations on the original explainer.
|
||||
features = scoring_explainer.features
|
||||
# You can return any data type as long as it is JSON-serializable
|
||||
return {'predictions': predictions.tolist(), 'local_importance_values': local_importance_values}
|
||||
return {'predictions': predictions.tolist(),
|
||||
'local_importance_values': local_importance_values,
|
||||
'features': features}
|
||||
|
||||
@@ -10,7 +10,9 @@ dependencies:
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.18.1
|
||||
- raiwidgets~=0.19.0
|
||||
- packaging>=20.9
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -12,6 +12,8 @@ dependencies:
|
||||
- azureml-dataset-runtime
|
||||
- azureml-core
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.18.1
|
||||
- raiwidgets~=0.19.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -3,3 +3,4 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -437,7 +437,8 @@
|
||||
" - azureml-defaults\n",
|
||||
" - tensorflow-gpu==2.0.0\n",
|
||||
" - keras<=2.3.1\n",
|
||||
" - matplotlib"
|
||||
" - matplotlib\n",
|
||||
" - protobuf==3.20.1"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -989,6 +990,7 @@
|
||||
"cd.add_conda_package('h5py<=2.10.0')\n",
|
||||
"cd.add_conda_package('keras<=2.3.1')\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.add_pip_package(\"protobuf==3.20.1\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
|
||||
@@ -943,6 +943,7 @@
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_pip_package('tensorflow==2.2.0')\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.add_pip_package(\"protobuf==3.20.1\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
|
||||
@@ -8,8 +8,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
rm -rf /usr/share/man/*
|
||||
|
||||
RUN conda install -y conda=4.12.0 python=3.7 && conda clean -ay
|
||||
RUN pip install ray-on-aml==0.1.6 & \
|
||||
RUN conda install -y conda=4.13.0 python=3.7 && conda clean -ay
|
||||
RUN pip install ray-on-aml==0.2.1 & \
|
||||
pip install --no-cache-dir \
|
||||
azureml-defaults \
|
||||
azureml-dataset-runtime[fuse,pandas] \
|
||||
|
||||
@@ -8,8 +8,9 @@ dependencies:
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.18.1
|
||||
- raiwidgets~=0.19.0
|
||||
- liac-arff
|
||||
- packaging>=20.9
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
" 1. Logging numeric metrics\n",
|
||||
" 1. Logging vectors\n",
|
||||
" 1. Logging tables\n",
|
||||
" 1. Logging when additional Metric Names are required\n",
|
||||
" 1. Uploading files\n",
|
||||
"1. [Analyzing results](#Analyzing-results)\n",
|
||||
" 1. Tagging a run\n",
|
||||
@@ -100,7 +101,7 @@
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using SDK version 1.42.0, you are currently running version\", azureml.core.VERSION)"
|
||||
"print(\"This notebook was created using SDK version 1.43.0, you are currently running version\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -367,7 +368,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Logging for when more Metric Names are required\n",
|
||||
"### Logging when additional Metric Names are required\n",
|
||||
"\n",
|
||||
"Limits on logging are internally enforced to ensure a smooth experience, however these can sometimes be limiting, particularly in terms of the limit on metric names.\n",
|
||||
"\n",
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.42.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.43.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user