mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-25 01:00:11 -05:00
Compare commits
1 Commits
release_up
...
users/GitH
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e0c9376aab |
41
SECURITY.md
Normal file
41
SECURITY.md
Normal file
@@ -0,0 +1,41 @@
|
||||
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.7 BLOCK -->
|
||||
|
||||
## Security
|
||||
|
||||
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
|
||||
|
||||
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
|
||||
|
||||
## Reporting Security Issues
|
||||
|
||||
**Please do not report security vulnerabilities through public GitHub issues.**
|
||||
|
||||
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
|
||||
|
||||
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
|
||||
|
||||
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
|
||||
|
||||
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
|
||||
|
||||
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
|
||||
* Full paths of source file(s) related to the manifestation of the issue
|
||||
* The location of the affected source code (tag/branch/commit or direct URL)
|
||||
* Any special configuration required to reproduce the issue
|
||||
* Step-by-step instructions to reproduce the issue
|
||||
* Proof-of-concept or exploit code (if possible)
|
||||
* Impact of the issue, including how an attacker might exploit the issue
|
||||
|
||||
This information will help us triage your report more quickly.
|
||||
|
||||
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
|
||||
|
||||
## Preferred Languages
|
||||
|
||||
We prefer all communications to be in English.
|
||||
|
||||
## Policy
|
||||
|
||||
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
|
||||
|
||||
<!-- END MICROSOFT SECURITY.MD BLOCK -->
|
||||
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.44.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.42.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -6,7 +6,6 @@ dependencies:
|
||||
- fairlearn>=0.6.2
|
||||
- joblib
|
||||
- liac-arff
|
||||
- raiwidgets~=0.19.0
|
||||
- raiwidgets~=0.18.1
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -6,7 +6,6 @@ dependencies:
|
||||
- fairlearn>=0.6.2
|
||||
- joblib
|
||||
- liac-arff
|
||||
- raiwidgets~=0.19.0
|
||||
- raiwidgets~=0.18.1
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -13,21 +13,19 @@ dependencies:
|
||||
- pytorch::pytorch=1.4.0
|
||||
- conda-forge::fbprophet==0.7.1
|
||||
- cudatoolkit=10.1.243
|
||||
- scipy==1.5.3
|
||||
- scipy==1.5.2
|
||||
- notebook
|
||||
- pywin32==227
|
||||
- PySocks==1.7.1
|
||||
- jsonschema==4.6.0
|
||||
- Pygments==2.11.2
|
||||
- conda-forge::pyqt==5.12.3
|
||||
- jsonschema==4.7.2
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.44.0
|
||||
- azureml-widgets~=1.42.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.44.0/validated_win32_requirements.txt [--no-deps]
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.42.0/validated_win32_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
- wasabi==0.9.1
|
||||
|
||||
@@ -11,10 +11,10 @@ dependencies:
|
||||
- boto3==1.20.19
|
||||
- botocore<=1.23.19
|
||||
- matplotlib==3.2.1
|
||||
- numpy>=1.21.6,<=1.22.3
|
||||
- numpy==1.19.5
|
||||
- cython==0.29.14
|
||||
- urllib3==1.26.7
|
||||
- scipy>=1.4.1,<=1.5.3
|
||||
- scipy>=1.4.1,<=1.5.2
|
||||
- scikit-learn==0.22.1
|
||||
- py-xgboost<=1.3.3
|
||||
- holidays==0.10.3
|
||||
@@ -24,10 +24,10 @@ dependencies:
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.44.0
|
||||
- azureml-widgets~=1.42.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.44.0/validated_linux_requirements.txt [--no-deps]
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.42.0/validated_linux_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
|
||||
@@ -12,10 +12,10 @@ dependencies:
|
||||
- boto3==1.20.19
|
||||
- botocore<=1.23.19
|
||||
- matplotlib==3.2.1
|
||||
- numpy>=1.21.6,<=1.22.3
|
||||
- numpy==1.19.5
|
||||
- cython==0.29.14
|
||||
- urllib3==1.26.7
|
||||
- scipy>=1.4.1,<=1.5.3
|
||||
- scipy>=1.4.1,<=1.5.2
|
||||
- scikit-learn==0.22.1
|
||||
- py-xgboost<=1.3.3
|
||||
- holidays==0.10.3
|
||||
@@ -25,10 +25,10 @@ dependencies:
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.44.0
|
||||
- azureml-widgets~=1.42.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.44.0/validated_darwin_requirements.txt [--no-deps]
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.42.0/validated_darwin_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
|
||||
@@ -228,8 +228,8 @@
|
||||
"n_missing_samples = int(np.floor(data.shape[0] * missing_rate))\n",
|
||||
"missing_samples = np.hstack(\n",
|
||||
" (\n",
|
||||
" np.zeros(data.shape[0] - n_missing_samples, dtype=bool),\n",
|
||||
" np.ones(n_missing_samples, dtype=bool),\n",
|
||||
" np.zeros(data.shape[0] - n_missing_samples, dtype=np.bool),\n",
|
||||
" np.ones(n_missing_samples, dtype=np.bool),\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"rng = np.random.RandomState(0)\n",
|
||||
@@ -1074,7 +1074,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.12"
|
||||
"version": "3.6.9"
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
|
||||
@@ -207,11 +207,11 @@
|
||||
"\n",
|
||||
"def remove_blanks_20news(data, feature_column_name, target_column_name):\n",
|
||||
"\n",
|
||||
" for index, row in data.iterrows():\n",
|
||||
" data.at[index, feature_column_name] = (\n",
|
||||
" row[feature_column_name].replace(\"\\n\", \" \").strip()\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" data[feature_column_name] = (\n",
|
||||
" data[feature_column_name]\n",
|
||||
" .replace(r\"\\n\", \" \", regex=True)\n",
|
||||
" .apply(lambda x: x.strip())\n",
|
||||
" )\n",
|
||||
" data = data[data[feature_column_name] != \"\"]\n",
|
||||
"\n",
|
||||
" return data"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import pandas as pd
|
||||
from azureml.core import Environment, ScriptRunConfig
|
||||
from azureml.core import Environment
|
||||
from azureml.train.estimator import Estimator
|
||||
from azureml.core.run import Run
|
||||
|
||||
|
||||
@@ -15,19 +16,16 @@ def run_inference(
|
||||
|
||||
inference_env = train_run.get_environment()
|
||||
|
||||
est = ScriptRunConfig(
|
||||
est = Estimator(
|
||||
source_directory=script_folder,
|
||||
script="infer.py",
|
||||
arguments=[
|
||||
"--target_column_name",
|
||||
target_column_name,
|
||||
"--model_name",
|
||||
model_name,
|
||||
"--input-data",
|
||||
test_dataset.as_named_input("data"),
|
||||
],
|
||||
entry_script="infer.py",
|
||||
script_params={
|
||||
"--target_column_name": target_column_name,
|
||||
"--model_name": model_name,
|
||||
},
|
||||
inputs=[test_dataset.as_named_input("test_data")],
|
||||
compute_target=compute_target,
|
||||
environment=inference_env,
|
||||
environment_definition=inference_env,
|
||||
)
|
||||
|
||||
run = test_experiment.submit(
|
||||
|
||||
@@ -6,7 +6,7 @@ import numpy as np
|
||||
from sklearn.externals import joblib
|
||||
|
||||
from azureml.automl.runtime.shared.score import scoring, constants
|
||||
from azureml.core import Run, Dataset
|
||||
from azureml.core import Run
|
||||
from azureml.core.model import Model
|
||||
|
||||
|
||||
@@ -21,8 +21,6 @@ parser.add_argument(
|
||||
"--model_name", type=str, dest="model_name", help="Name of registered model"
|
||||
)
|
||||
|
||||
parser.add_argument("--input-data", type=str, dest="input_data", help="Dataset")
|
||||
|
||||
args = parser.parse_args()
|
||||
target_column_name = args.target_column_name
|
||||
model_name = args.model_name
|
||||
@@ -36,8 +34,8 @@ model_path = Model.get_model_path(model_name)
|
||||
model = joblib.load(model_path)
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
test_dataset = Dataset.get_by_id(run.experiment.workspace, id=args.input_data)
|
||||
# get input dataset by name
|
||||
test_dataset = run.input_datasets["test_data"]
|
||||
|
||||
X_test_df = test_dataset.drop_columns(
|
||||
columns=[target_column_name]
|
||||
|
||||
@@ -120,13 +120,9 @@ except Exception:
|
||||
end_time = datetime(2021, 5, 1, 0, 0)
|
||||
end_time_last_slice = end_time - relativedelta(weeks=2)
|
||||
|
||||
try:
|
||||
train_df = get_noaa_data(end_time_last_slice, end_time)
|
||||
except Exception as ex:
|
||||
print("get_noaa_data failed:", ex)
|
||||
train_df = None
|
||||
train_df = get_noaa_data(end_time_last_slice, end_time)
|
||||
|
||||
if train_df is not None and train_df.size > 0:
|
||||
if train_df.size > 0:
|
||||
print(
|
||||
"Received {0} rows of new data after {1}.".format(
|
||||
train_df.shape[0], end_time_last_slice
|
||||
|
||||
@@ -1,346 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning - Codegen for AutoFeaturization \n",
|
||||
"_**Autofeaturization of credit card fraudulent transactions dataset on remote compute and codegen functionality**_\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Data](#Data)\n",
|
||||
"1. [Autofeaturization](#Autofeaturization)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Introduction'></a>\n",
|
||||
"## Introduction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Autofeaturization** lets you run an AutoML experiment to only featurize the datasets. These datasets along with the transformer are stored in AML Storage and linked to the run which can later be retrieved and used to train models. \n",
|
||||
"\n",
|
||||
"**To run Autofeaturization, set the number of iterations to zero and featurization as auto.**\n",
|
||||
"\n",
|
||||
"Please refer to [Autofeaturization and custom model training](../autofeaturization-custom-model-training/custom-model-training-from-autofeaturization-run.ipynb) for more details on the same.\n",
|
||||
"\n",
|
||||
"[Codegen](https://github.com/Azure/automl-codegen-preview) is a feature, which when enabled, provides a user with the script of the underlying functionality and a notebook to tweak inputs or code and rerun the same.\n",
|
||||
"\n",
|
||||
"In this example we use the credit card fraudulent transactions dataset to showcase how you can use AutoML for autofeaturization and further how you can enable the `Codegen` feature.\n",
|
||||
"\n",
|
||||
"This notebook is using remote compute to complete the featurization.\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration](../../configuration.ipynb) notebook first if you haven't already, to establish your connection to the AzureML Workspace. \n",
|
||||
"\n",
|
||||
"Here you will learn how to create an autofeaturization experiment using an existing workspace with codegen feature enabled."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Setup'></a>\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For Automated ML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"import pandas as pd\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.44.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-autofeaturization-ccard-codegen-remote'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach existing AmlCompute\n",
|
||||
"A compute target is required to execute the Automated ML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||
"\n",
|
||||
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
|
||||
"\n",
|
||||
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
||||
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"cpu_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',\n",
|
||||
" max_nodes=6)\n",
|
||||
" compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
"compute_target.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Data'></a>\n",
|
||||
"## Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Load the credit card fraudulent transactions dataset from a CSV file, containing both training features and labels. The features are inputs to the model, while the training labels represent the expected output of the model. \n",
|
||||
"\n",
|
||||
"Here the autofeaturization run will featurize the training data passed in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Training Dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"training_data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/creditcard_train.csv\"\n",
|
||||
"training_dataset = Dataset.Tabular.from_delimited_files(training_data) # Tabular dataset\n",
|
||||
"\n",
|
||||
"label_column_name = 'Class' # output label"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Autofeaturization'></a>\n",
|
||||
"## AutoFeaturization\n",
|
||||
"\n",
|
||||
"Instantiate an AutoMLConfig object. This defines the settings and data used to run the autofeaturization experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression or forecasting|\n",
|
||||
"|**training_data**|Input training dataset, containing both features and label column.|\n",
|
||||
"|**iterations**|For an autofeaturization run, iterations will be 0.|\n",
|
||||
"|**featurization**|For an autofeaturization run, featurization can be 'auto' or 'custom'.|\n",
|
||||
"|**label_column_name**|The name of the label column.|\n",
|
||||
"|**enable_code_generation**|For enabling codegen for the run, value would be True|\n",
|
||||
"\n",
|
||||
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" iterations = 0, # autofeaturization run can be triggered by setting iterations to 0\n",
|
||||
" compute_target = compute_target,\n",
|
||||
" training_data = training_dataset,\n",
|
||||
" label_column_name = label_column_name,\n",
|
||||
" featurization = 'auto',\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" enable_code_generation = True # enable codegen\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Depending on the data this can run for a while. Validation errors and current status will be shown when setting `show_output=True` and the execution will be synchronous."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run = experiment.submit(automl_config, show_output = False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(remote_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run.wait_for_completion(show_output=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Codegen Script and Notebook"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Codegen script and notebook can be found under the `Outputs + logs` section from the details page of the remote run. Please check for the `autofeaturization_notebook.ipynb` under `/outputs/generated_code`. To modify the featurization code, open `script.py` and make changes. The codegen notebook can be run with the same environment configuration as the above AutoML run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Experiment Complete!"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "bhavanatumma"
|
||||
}
|
||||
],
|
||||
"interpreter": {
|
||||
"hash": "adb464b67752e4577e3dc163235ced27038d19b7d88def00d75d1975bde5d9ab"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
name: codegen-for-autofeaturization
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -1,735 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning - AutoFeaturization (Part 1)\n",
|
||||
"_**Autofeaturization of credit card fraudulent transactions dataset on remote compute**_\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Setup](#Setup)\n",
|
||||
"1. [Data](#Data)\n",
|
||||
"1. [Autofeaturization](#Autofeaturization)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Introduction'></a>\n",
|
||||
"## Introduction"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Autofeaturization is a new feature to let you as the user run an AutoML experiment to only featurize the datasets. These datasets along with the transformer will be stored in the experiment which can later be retrieved and used to train models, either via AutoML or custom training. \n",
|
||||
"\n",
|
||||
"**To run Autofeaturization, pass in zero iterations and featurization as auto. This will featurize the datasets and terminate the experiment. Training will not occur.**\n",
|
||||
"\n",
|
||||
"*Limitations - Sparse data cannot be supported at the moment. Any dataset that has extensive categorical data might be featurized into sparse data which will not be allowed as input to AutoML. Efforts are underway to support sparse data and will be updated soon.* \n",
|
||||
"\n",
|
||||
"In this example we use the credit card fraudulent transactions dataset to showcase how you can use AutoML for autofeaturization. The goal is to clean and featurize the training dataset.\n",
|
||||
"\n",
|
||||
"This notebook is using remote compute to complete the featurization.\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Compute Instance, you are all set. Otherwise, go through the [configuration](../../configuration.ipynb) notebook first if you haven't already, to establish your connection to the AzureML Workspace. \n",
|
||||
"\n",
|
||||
"In the below steps, you will learn how to:\n",
|
||||
"1. Create an autofeaturization experiment using an existing workspace.\n",
|
||||
"2. View the featurized datasets and transformer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Setup'></a>\n",
|
||||
"## Setup\n",
|
||||
"\n",
|
||||
"As part of the setup you have already created an Azure ML `Workspace` object. For Automated ML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"import pandas as pd\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.core.dataset import Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.44.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for experiment\n",
|
||||
"experiment_name = 'automl-autofeaturization-ccard-remote'\n",
|
||||
"\n",
|
||||
"experiment=Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
"output = {}\n",
|
||||
"output['Subscription ID'] = ws.subscription_id\n",
|
||||
"output['Workspace'] = ws.name\n",
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach existing AmlCompute\n",
|
||||
"A compute target is required to execute the Automated ML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||
"\n",
|
||||
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
|
||||
"\n",
|
||||
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
||||
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"cpu_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n",
|
||||
" print('Found existing cluster, use it.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',\n",
|
||||
" max_nodes=6)\n",
|
||||
" compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
"compute_target.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Data'></a>\n",
|
||||
"## Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Data\n",
|
||||
"\n",
|
||||
"Load the credit card fraudulent transactions dataset from a CSV file, containing both training features and labels. The features are inputs to the model, while the training labels represent the expected output of the model. \n",
|
||||
"\n",
|
||||
"Here the autofeaturization run will featurize the training data passed in."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Training Dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"training_data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/creditcard_train.csv\"\n",
|
||||
"training_dataset = Dataset.Tabular.from_delimited_files(training_data) # Tabular dataset\n",
|
||||
"\n",
|
||||
"label_column_name = 'Class' # output label"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Autofeaturization'></a>\n",
|
||||
"## AutoFeaturization\n",
|
||||
"\n",
|
||||
"Instantiate an AutoMLConfig object. This defines the settings and data used to run the autofeaturization experiment.\n",
|
||||
"\n",
|
||||
"|Property|Description|\n",
|
||||
"|-|-|\n",
|
||||
"|**task**|classification or regression|\n",
|
||||
"|**training_data**|Input training dataset, containing both features and label column.|\n",
|
||||
"|**iterations**|For an autofeaturization run, iterations will be 0.|\n",
|
||||
"|**featurization**|For an autofeaturization run, featurization will be 'auto'.|\n",
|
||||
"|**label_column_name**|The name of the label column.|\n",
|
||||
"\n",
|
||||
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_config = AutoMLConfig(task = 'classification',\n",
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" iterations = 0, # autofeaturization run can be triggered by setting iterations to 0\n",
|
||||
" compute_target = compute_target,\n",
|
||||
" training_data = training_dataset,\n",
|
||||
" label_column_name = label_column_name,\n",
|
||||
" featurization = 'auto',\n",
|
||||
" verbosity = logging.INFO\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Depending on the data this can run for a while. Validation errors and current status will be shown when setting `show_output=True` and the execution will be synchronous."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run = experiment.submit(automl_config, show_output = False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transformer and Featurized Datasets\n",
|
||||
"The given datasets have been featurized and stored under `Outputs + logs` from the details page of the remote run. The structure is shown below. The featurized dataset is stored under `/outputs/featurization/data` and the transformer is saved under `/outputs/featurization/pipeline` \n",
|
||||
"\n",
|
||||
"Below you will learn how to refer to the data saved in your run and retrieve the same."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Widget for Monitoring Runs\n",
|
||||
"\n",
|
||||
"The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
|
||||
"\n",
|
||||
"**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(remote_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"remote_run.wait_for_completion(show_output=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Automated Machine Learning - AutoFeaturization (Part 2)\n",
|
||||
"_**Training using a custom model with the featurized data from Autofeaturization run of credit card fraudulent transactions dataset**_\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
"1. [Data Setup](#DataSetup)\n",
|
||||
"1. [Autofeaturization Data](#AutofeaturizationData)\n",
|
||||
"1. [Train](#Train)\n",
|
||||
"1. [Results](#Results)\n",
|
||||
"1. [Test](#Test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Introduction'></a>\n",
|
||||
"## Introduction\n",
|
||||
"\n",
|
||||
"Here we use the featurized dataset saved in the above run to showcase how you can perform custom training by using the transformer from an autofeaturization run to transform validation / test datasets. \n",
|
||||
"\n",
|
||||
"The goal is to use autofeaturized run data and transformer to transform and run a custom training experiment independently\n",
|
||||
"\n",
|
||||
"In the below steps, you will learn how to:\n",
|
||||
"1. Read transformer from a completed autofeaturization run and transform data\n",
|
||||
"2. Pull featurized data from a completed autofeaturization run\n",
|
||||
"3. Run a custom training experiment with the above data\n",
|
||||
"4. Check results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='DataSetup'></a>\n",
|
||||
"## Data Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will load the featurized training data and also load the transformer from the above autofeaturized run. This transformer can then be used to transform the test data to check the accuracy of the custom model after training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Test Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"load test dataset from CSV and split into X and y columns to featurize with the transformer going forward."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/creditcard_test.csv\"\n",
|
||||
"\n",
|
||||
"test_dataset = pd.read_csv(test_data)\n",
|
||||
"label_column_name = 'Class'\n",
|
||||
"\n",
|
||||
"X_test_data = test_dataset[test_dataset.columns.difference([label_column_name])]\n",
|
||||
"y_test_data = test_dataset[label_column_name].values\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load data_transformer from the above remote run artifact"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### (Method 1)\n",
|
||||
"\n",
|
||||
"Method 1 allows you to read the transformer from the remote storage."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import mlflow\n",
|
||||
"mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())\n",
|
||||
"\n",
|
||||
"# Set uri to fetch data transformer from remote parent run.\n",
|
||||
"artifact_path = \"/outputs/featurization/pipeline/\"\n",
|
||||
"uri = \"runs:/\" + remote_run.id + artifact_path\n",
|
||||
"\n",
|
||||
"print(uri)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### (Method 2)\n",
|
||||
"\n",
|
||||
"Method 2 downloads the transformer to the local directory and then can be used to transform the data. Uncomment to use."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"''' import pathlib\n",
|
||||
"\n",
|
||||
"# Download the transformer to the local directory\n",
|
||||
"transformers_file_path = \"/outputs/featurization/pipeline/\"\n",
|
||||
"local_path = \"./transformer\"\n",
|
||||
"remote_run.download_files(prefix=transformers_file_path, output_directory=local_path, batch_size=500)\n",
|
||||
"\n",
|
||||
"path = pathlib.Path(\"transformer\") \n",
|
||||
"path = str(path.absolute()) + transformers_file_path\n",
|
||||
"str_uri = \"file:///\" + path\n",
|
||||
"\n",
|
||||
"print(str_uri) '''"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transform Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note:** Not all datasets produce a y_transformer. The dataset used in the current notebook requires a transformer as the y column data is categorical."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.automl.core.shared.constants import Transformers\n",
|
||||
"\n",
|
||||
"transformers = mlflow.sklearn.load_model(uri) # Using method 1\n",
|
||||
"data_transformers = transformers.get_transformers()\n",
|
||||
"x_transformer = data_transformers[Transformers.X_TRANSFORMER]\n",
|
||||
"y_transformer = data_transformers[Transformers.Y_TRANSFORMER]\n",
|
||||
"\n",
|
||||
"X_test = x_transformer.transform(X_test_data)\n",
|
||||
"y_test = y_transformer.transform(y_test_data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the following cell to see the featurization summary of X and y transformers. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_data_summary = x_transformer.get_featurization_summary(is_user_friendly=False)\n",
|
||||
"\n",
|
||||
"summary_df = pd.DataFrame.from_records(X_data_summary)\n",
|
||||
"summary_df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load Datastore\n",
|
||||
"\n",
|
||||
"The below data store holds the featurized datasets, hence we load and access the data. Check the path and file names according to the saved structure in your experiment `Outputs + logs` as seen in <i>Autofeaturization Part 1</i>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.datastore import Datastore\n",
|
||||
"\n",
|
||||
"ds = Datastore.get(ws, \"workspaceartifactstore\")\n",
|
||||
"experiment_loc = \"ExperimentRun/dcid.\" + remote_run.id\n",
|
||||
"\n",
|
||||
"remote_data_path = \"/outputs/featurization/data/\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='AutofeaturizationData'></a>\n",
|
||||
"## Autofeaturization Data\n",
|
||||
"\n",
|
||||
"We will load the training data from the previously completed Autofeaturization experiment. The resulting featurized dataframe can be passed into the custom model for training. Here we are saving the file to local from the experiment storage and reading the data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_data_file_path = \"full_training_dataset.df.parquet\"\n",
|
||||
"local_data_path = \"./data/\" + train_data_file_path\n",
|
||||
"\n",
|
||||
"remote_run.download_file(remote_data_path + train_data_file_path, local_data_path)\n",
|
||||
"\n",
|
||||
"full_training_data = pd.read_parquet(local_data_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Another way to load the data is to go to the above autofeaturization experiment and check for the featurized dataset ids under `Output datasets`. Uncomment and replace them accordingly below to use."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# train_data = Dataset.get_by_id(ws, 'cb4418ee-bac4-45ac-b055-600653bdf83a') # replace the featurized full_training_dataset id\n",
|
||||
"# full_training_data = train_data.to_pandas_dataframe()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Training Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We are dropping the y column and weights column from the featurized training dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Y_COLUMN = \"automl_y\"\n",
|
||||
"SW_COLUMN = \"automl_weights\"\n",
|
||||
"\n",
|
||||
"X_train = full_training_data[full_training_data.columns.difference([Y_COLUMN, SW_COLUMN])]\n",
|
||||
"y_train = full_training_data[Y_COLUMN].values\n",
|
||||
"sample_weight = full_training_data[SW_COLUMN].values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Train'></a>\n",
|
||||
"## Train"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Here we are passing our training data to the lightgbm classifier, any custom model can be used with your data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import lightgbm as lgb\n",
|
||||
"\n",
|
||||
"model = lgb.LGBMClassifier(learning_rate=0.08,max_depth=-5,random_state=42)\n",
|
||||
"model.fit(X_train, y_train, sample_weight=sample_weight, eval_set=[(X_test, y_test),(X_train, y_train)],\n",
|
||||
" verbose=20,eval_metric='logloss')\n",
|
||||
"\n",
|
||||
"print('Training accuracy {:.4f}'.format(model.score(X_train, y_train)))\n",
|
||||
"print('Testing accuracy {:.4f}'.format(model.score(X_test, y_test)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Results'></a>\n",
|
||||
"## Analyze results\n",
|
||||
"\n",
|
||||
"### Retrieve the Model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"<a id='Test'></a>\n",
|
||||
"## Test the fitted model\n",
|
||||
"\n",
|
||||
"Now that the model is trained, split the data in the same way the data was split for training (The difference here is the data is being split locally) and then run the test data through the trained model to get the predicted values."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_pred = model.predict(X_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Calculate metrics for the prediction\n",
|
||||
"\n",
|
||||
"Now visualize the data on a scatter plot to show what our truth (actual) values are compared to the predicted values \n",
|
||||
"from the trained model that was returned."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.metrics import confusion_matrix\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import itertools\n",
|
||||
"\n",
|
||||
"cf =confusion_matrix(y_test,y_pred)\n",
|
||||
"plt.imshow(cf,cmap=plt.cm.Blues,interpolation='nearest')\n",
|
||||
"plt.colorbar()\n",
|
||||
"plt.title('Confusion Matrix')\n",
|
||||
"plt.xlabel('Predicted')\n",
|
||||
"plt.ylabel('Actual')\n",
|
||||
"class_labels = ['False','True']\n",
|
||||
"tick_marks = np.arange(len(class_labels))\n",
|
||||
"plt.xticks(tick_marks,class_labels)\n",
|
||||
"plt.yticks([-0.5,0,1,1.5],['','False','True',''])\n",
|
||||
"# plotting text value inside cells\n",
|
||||
"thresh = cf.max() / 2.\n",
|
||||
"for i,j in itertools.product(range(cf.shape[0]),range(cf.shape[1])):\n",
|
||||
" plt.text(j,i,format(cf[i,j],'d'),horizontalalignment='center',color='white' if cf[i,j] >thresh else 'black')\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Experiment Complete!"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "bhavanatumma"
|
||||
}
|
||||
],
|
||||
"interpreter": {
|
||||
"hash": "adb464b67752e4577e3dc163235ced27038d19b7d88def00d75d1975bde5d9ab"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
name: custom-model-training-from-autofeaturization-run
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -7,13 +7,12 @@ dependencies:
|
||||
- cython==0.29.14
|
||||
- urllib3==1.26.7
|
||||
- PyJWT < 2.0.0
|
||||
- numpy==1.21.6
|
||||
- numpy==1.18.5
|
||||
- pywin32==227
|
||||
- cryptography<37.0.0
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azure-core==1.24.1
|
||||
- azure-core==1.21.1
|
||||
- azure-identity==1.7.0
|
||||
- azureml-defaults
|
||||
- azureml-sdk
|
||||
|
||||
@@ -10,12 +10,11 @@ dependencies:
|
||||
- python>=3.6.0,<3.9
|
||||
- urllib3==1.26.7
|
||||
- PyJWT < 2.0.0
|
||||
- numpy>=1.21.6,<=1.22.3
|
||||
- cryptography<37.0.0
|
||||
- numpy==1.19.5
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azure-core==1.24.1
|
||||
- azure-core==1.21.1
|
||||
- azure-identity==1.7.0
|
||||
- azureml-defaults
|
||||
- azureml-sdk
|
||||
|
||||
@@ -92,7 +92,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.44.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.42.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -91,7 +91,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.44.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.42.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -524,7 +524,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_list = Model.list(ws, tags=[[\"experiment\", \"automl-backtesting\"]])\n",
|
||||
"model_list = Model.list(ws, tags={\"experiment\": \"automl-backtesting\"})\n",
|
||||
"model_data = {\"name\": [], \"last_training_date\": []}\n",
|
||||
"for model in model_list:\n",
|
||||
" if (\n",
|
||||
|
||||
@@ -72,8 +72,6 @@ def get_backtest_pipeline(
|
||||
run_config.docker.use_docker = True
|
||||
run_config.environment = env
|
||||
|
||||
utilities.set_environment_variables_for_run(run_config)
|
||||
|
||||
split_data = PipelineData(name="split_data_output", datastore=None).as_dataset()
|
||||
split_step = PythonScriptStep(
|
||||
name="split_data_for_backtest",
|
||||
@@ -116,7 +114,6 @@ def get_backtest_pipeline(
|
||||
run_invocation_timeout=3600,
|
||||
node_count=node_count,
|
||||
)
|
||||
utilities.set_environment_variables_for_run(back_test_config)
|
||||
forecasts = PipelineData(name="forecasts", datastore=None)
|
||||
if model_name:
|
||||
parallel_step_name = "{}-backtest".format(model_name.replace("_", "-"))
|
||||
@@ -152,7 +149,12 @@ def get_backtest_pipeline(
|
||||
inputs=[forecasts.as_mount()],
|
||||
outputs=[data_results],
|
||||
source_directory=PROJECT_FOLDER,
|
||||
arguments=["--forecasts", forecasts, "--output-dir", data_results],
|
||||
arguments=[
|
||||
"--forecasts",
|
||||
forecasts,
|
||||
"--output-dir",
|
||||
data_results,
|
||||
],
|
||||
runconfig=run_config,
|
||||
compute_target=compute_target,
|
||||
allow_reuse=False,
|
||||
|
||||
@@ -647,11 +647,13 @@
|
||||
" & (fulldata[time_column_name] <= forecast_origin + horizon)\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" y_past = X_past.pop(target_column_name).values.astype(float)\n",
|
||||
" y_future = X_future.pop(target_column_name).values.astype(float)\n",
|
||||
" y_past = X_past.pop(target_column_name).values.astype(np.float)\n",
|
||||
" y_future = X_future.pop(target_column_name).values.astype(np.float)\n",
|
||||
"\n",
|
||||
" # Now take y_future and turn it into question marks\n",
|
||||
" y_query = y_future.copy().astype(float) # because sometimes life hands you an int\n",
|
||||
" y_query = y_future.copy().astype(\n",
|
||||
" np.float\n",
|
||||
" ) # because sometimes life hands you an int\n",
|
||||
" y_query.fill(np.NaN)\n",
|
||||
"\n",
|
||||
" print(\"X_past is \" + str(X_past.shape) + \" - shaped\")\n",
|
||||
|
||||
@@ -95,7 +95,7 @@ def do_rolling_forecast_with_lookback(
|
||||
# Extract test data from an expanding window up-to the horizon
|
||||
expand_wind = X[time_column_name] < horizon_time
|
||||
X_test_expand = X[expand_wind]
|
||||
y_query_expand = np.zeros(len(X_test_expand)).astype(float)
|
||||
y_query_expand = np.zeros(len(X_test_expand)).astype(np.float)
|
||||
y_query_expand.fill(np.NaN)
|
||||
|
||||
if origin_time != X[time_column_name].min():
|
||||
@@ -176,7 +176,7 @@ def do_rolling_forecast(fitted_model, X_test, y_test, max_horizon, freq="D"):
|
||||
# Extract test data from an expanding window up-to the horizon
|
||||
expand_wind = X_test[time_column_name] < horizon_time
|
||||
X_test_expand = X_test[expand_wind]
|
||||
y_query_expand = np.zeros(len(X_test_expand)).astype(float)
|
||||
y_query_expand = np.zeros(len(X_test_expand)).astype(np.float)
|
||||
y_query_expand.fill(np.NaN)
|
||||
|
||||
if origin_time != X_test[time_column_name].min():
|
||||
|
||||
@@ -242,34 +242,6 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### 2.4 Configure data with ``OutputFileDatasetConfig`` objects\n",
|
||||
"This step shows how to configure output data from a pipeline step. One of the use cases for this step is when you want to do some preprocessing before feeding the data to training step. Intermediate data (or output of a step) is represented by an ``OutputFileDatasetConfig`` object. ``output_data`` is produced as the output of a step. Optionally, this data can be registered as a dataset by calling the ``register_on_complete`` method. If you create an ``OutputFileDatasetConfig`` in one step and use it as an input to another step, that data dependency between steps creates an implicit execution order in the pipeline.\n",
|
||||
"\n",
|
||||
"``OutputFileDatasetConfig`` objects return a directory, and by default write output to the default datastore of the workspace.\n",
|
||||
"\n",
|
||||
"Since instance creation for class ``OutputTabularDatasetConfig`` is not allowed, we first create an instance of this class. Then we use the ``read_parquet_files`` method to read the parquet file into ``OutputTabularDatasetConfig``."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.data.output_dataset_config import OutputFileDatasetConfig\n",
|
||||
"\n",
|
||||
"output_data = OutputFileDatasetConfig(\n",
|
||||
" name=\"processed_data\", destination=(dstore, \"outputdataset/{run-id}/{output-name}\")\n",
|
||||
").as_upload()\n",
|
||||
"# output_data_dataset = output_data.register_on_complete(\n",
|
||||
"# name='processed_data', description = 'files from prev step')\n",
|
||||
"output_data = output_data.read_parquet_files()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -331,48 +303,6 @@
|
||||
" print(compute_target.status.serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure the training run's environment\n",
|
||||
"The next step is making sure that the remote training run has all the dependencies needed by the training steps. Dependencies and the runtime context are set by creating and configuring a RunConfiguration object.\n",
|
||||
"\n",
|
||||
"The code below shows two options for handling dependencies. As presented, with ``USE_CURATED_ENV = True``, the configuration is based on a [curated environment](https://docs.microsoft.com/en-us/azure/machine-learning/resource-curated-environments). Curated environments have prebuilt Docker images in the [Microsoft Container Registry](https://hub.docker.com/publishers/microsoftowner). For more information, see [Azure Machine Learning curated environments](https://docs.microsoft.com/en-us/azure/machine-learning/resource-curated-environments).\n",
|
||||
"\n",
|
||||
"The path taken if you change ``USE_CURATED_ENV`` to False shows the pattern for explicitly setting your dependencies. In that scenario, a new custom Docker image will be created and registered in an Azure Container Registry within your resource group (see [Introduction to private Docker container registries in Azure](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro)). Building and registering this image can take quite a few minutes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"from azureml.core import Environment\n",
|
||||
"\n",
|
||||
"aml_run_config = RunConfiguration()\n",
|
||||
"aml_run_config.target = compute_target\n",
|
||||
"\n",
|
||||
"USE_CURATED_ENV = True\n",
|
||||
"if USE_CURATED_ENV:\n",
|
||||
" curated_environment = Environment.get(\n",
|
||||
" workspace=ws, name=\"AzureML-sklearn-0.24-ubuntu18.04-py37-cpu\"\n",
|
||||
" )\n",
|
||||
" aml_run_config.environment = curated_environment\n",
|
||||
"else:\n",
|
||||
" aml_run_config.environment.python.user_managed_dependencies = False\n",
|
||||
"\n",
|
||||
" # Add some packages relied on by data prep step\n",
|
||||
" aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n",
|
||||
" conda_packages=[\"pandas\", \"scikit-learn\"],\n",
|
||||
" pip_packages=[\"azureml-sdk\", \"azureml-dataset-runtime[fuse,pandas]\"],\n",
|
||||
" pin_sdk_version=False,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -436,46 +366,6 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Construct your pipeline steps\n",
|
||||
"Once you have the compute resource and environment created, you're ready to define your pipeline's steps. There are many built-in steps available via the Azure Machine Learning SDK, as you can see on the [reference documentation for the azureml.pipeline.steps package](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps?view=azure-ml-py). The most flexible class is [PythonScriptStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.python_script_step.pythonscriptstep?view=azure-ml-py), which runs a Python script.\n",
|
||||
"\n",
|
||||
"Your data preparation code is in a subdirectory (in this example, \"data_preprocessing_tabular.py\" in the directory \"./scripts\"). As part of the pipeline creation process, this directory is zipped and uploaded to the compute_target and the step runs the script specified as the value for ``script_name``.\n",
|
||||
"\n",
|
||||
"The ``arguments`` values specify the inputs and outputs of the step. In the example below, the baseline data is the ``input_ds_small`` dataset. The script data_preprocessing_tabular.py does whatever data-transformation tasks are appropriate to the task at hand and outputs the data to ``output_data``, of type ``OutputFileDatasetConfig``. For more information, see [Moving data into and between ML pipeline steps (Python)](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-move-data-in-out-of-pipelines). The step will run on the machine defined by ``compute_target``, using the configuration ``aml_run_config``.\n",
|
||||
"\n",
|
||||
"Reuse of previous results (``allow_reuse``) is key when using pipelines in a collaborative environment since eliminating unnecessary reruns offers agility. Reuse is the default behavior when the ``script_name``, ``inputs``, and the parameters of a step remain the same. When reuse is allowed, results from the previous run are immediately sent to the next step. If ``allow_reuse`` is set to False, a new run will always be generated for this step during pipeline execution.\n",
|
||||
"\n",
|
||||
"> Note that we only support partitioned FileDataset and TabularDataset without partition when using such output as input."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"\n",
|
||||
"dataprep_source_dir = \"./scripts\"\n",
|
||||
"entry_point = \"data_preprocessing_tabular.py\"\n",
|
||||
"ds_input = input_ds_small.as_named_input(\"train_10_models\")\n",
|
||||
"\n",
|
||||
"data_prep_step = PythonScriptStep(\n",
|
||||
" script_name=entry_point,\n",
|
||||
" source_directory=dataprep_source_dir,\n",
|
||||
" arguments=[\"--input\", ds_input, \"--output\", output_data],\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" runconfig=aml_run_config,\n",
|
||||
" allow_reuse=False,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"input_ds_small = output_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -237,11 +237,11 @@
|
||||
"\n",
|
||||
"datastore = ws.get_default_datastore()\n",
|
||||
"train_dataset = TabularDatasetFactory.register_pandas_dataframe(\n",
|
||||
" train, target=(datastore, \"dataset/\"), name=\"dominicks_OJ_train_pipeline\"\n",
|
||||
" train, target=(datastore, \"dataset/\"), name=\"dominicks_OJ_train\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"test_dataset = TabularDatasetFactory.register_pandas_dataframe(\n",
|
||||
" test, target=(datastore, \"dataset/\"), name=\"dominicks_OJ_test_pipeline\"\n",
|
||||
" test, target=(datastore, \"dataset/\"), name=\"dominicks_OJ_test\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -23,7 +23,11 @@ except ImportError:
|
||||
|
||||
|
||||
def infer_forecasting_dataset_tcn(
|
||||
X_test, y_test, model, output_path, output_dataset_name="results"
|
||||
X_test,
|
||||
y_test,
|
||||
model,
|
||||
output_path,
|
||||
output_dataset_name="results",
|
||||
):
|
||||
|
||||
y_pred, df_all = model.forecast(X_test, y_test)
|
||||
@@ -67,7 +71,10 @@ def get_model(model_path, model_file_name):
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--model_name", type=str, dest="model_name", help="Model to be loaded"
|
||||
"--model_name",
|
||||
type=str,
|
||||
dest="model_name",
|
||||
help="Model to be loaded",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -101,13 +108,18 @@ def get_args():
|
||||
return args
|
||||
|
||||
|
||||
def get_data(run, fitted_model, target_column_name, test_dataset_name):
|
||||
def get_data(
|
||||
run,
|
||||
fitted_model,
|
||||
target_column_name,
|
||||
test_dataset_name,
|
||||
):
|
||||
|
||||
# get input dataset by name
|
||||
test_dataset = Dataset.get_by_name(run.experiment.workspace, test_dataset_name)
|
||||
test_df = test_dataset.to_pandas_dataframe()
|
||||
if target_column_name in test_df:
|
||||
y_test = test_df.pop(target_column_name).values
|
||||
y_test = test_df.pop(target_column_name)
|
||||
else:
|
||||
y_test = np.full(test_df.shape[0], np.nan)
|
||||
|
||||
@@ -147,7 +159,10 @@ if __name__ == "__main__":
|
||||
fitted_model = get_model(model_path, model_file_name)
|
||||
|
||||
X_test_df, y_test = get_data(
|
||||
run, fitted_model, target_column_name, test_dataset_name
|
||||
run,
|
||||
fitted_model,
|
||||
target_column_name,
|
||||
test_dataset_name,
|
||||
)
|
||||
|
||||
infer_forecasting_dataset_tcn(
|
||||
|
||||
@@ -513,7 +513,13 @@
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"conda_run_config.environment = automl_run.get_environment()"
|
||||
"conda_run_config.environment.python.conda_dependencies = (\n",
|
||||
" automl_run.get_environment().python.conda_dependencies\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"conda_run_config.environment.python.conda_dependencies.add_pip_package(\n",
|
||||
" \"dotnetcore2==2.1.23\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -642,6 +648,28 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create the conda dependencies for setting up the service\n",
|
||||
"We need to create the conda dependencies comprising of the *azureml* packages using the training environment from the *automl_run*."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"conda_dep = automl_run.get_environment().python.conda_dependencies\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\", \"w\") as f:\n",
|
||||
" f.write(conda_dep.serialize_to_string())\n",
|
||||
"with open(\"myenv.yml\", \"r\") as f:\n",
|
||||
" print(f.read())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -664,7 +692,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy the service\n",
|
||||
"In the cell below, we deploy the service using the automl training environment and the scoring file from the previous steps. "
|
||||
"In the cell below, we deploy the service using the conda file and the scoring file from the previous steps. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -686,7 +714,7 @@
|
||||
" description=\"Get local explanations for Machine test data\",\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"myenv = automl_run.get_environment()\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score_explain.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"# Use configs and models generated above\n",
|
||||
|
||||
@@ -69,19 +69,17 @@
|
||||
"# ONNX Model Zoo and save it in the same folder as this tutorial\n",
|
||||
"\n",
|
||||
"import urllib.request\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"onnx_model_url = \"https://github.com/onnx/models/blob/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-7.tar.gz?raw=true\"\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"emotion-ferplus-7.tar.gz\")\n",
|
||||
"os.mkdir(\"emotion_ferplus\")\n",
|
||||
"\n",
|
||||
"# the ! magic command tells our jupyter notebook kernel to run the following line of \n",
|
||||
"# code from the command line instead of the notebook kernel\n",
|
||||
"\n",
|
||||
"# We use tar and xvcf to unzip the files we just retrieved from the ONNX model zoo\n",
|
||||
"\n",
|
||||
"!tar xvzf emotion-ferplus-7.tar.gz -C emotion_ferplus"
|
||||
"!tar xvzf emotion-ferplus-7.tar.gz"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -132,7 +130,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_dir = \"emotion_ferplus/model\" # replace this with the location of your model files\n",
|
||||
"model_dir = \"emotion_ferplus\" # replace this with the location of your model files\n",
|
||||
"\n",
|
||||
"# leave as is if it's in the same folder as this notebook"
|
||||
]
|
||||
@@ -498,12 +496,13 @@
|
||||
"\n",
|
||||
"# to use parsers to read in our model/data\n",
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"test_inputs = []\n",
|
||||
"test_outputs = []\n",
|
||||
"\n",
|
||||
"# read in 1 testing images from .pb files\n",
|
||||
"test_data_size = 1\n",
|
||||
"# read in 3 testing images from .pb files\n",
|
||||
"test_data_size = 3\n",
|
||||
"\n",
|
||||
"for num in np.arange(test_data_size):\n",
|
||||
" input_test_data = os.path.join(model_dir, 'test_data_set_{0}'.format(num), 'input_0.pb')\n",
|
||||
@@ -534,7 +533,7 @@
|
||||
},
|
||||
"source": [
|
||||
"### Show some sample images\n",
|
||||
"We use `matplotlib` to plot 1 test images from the dataset."
|
||||
"We use `matplotlib` to plot 3 test images from the dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -548,7 +547,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plt.figure(figsize = (20, 20))\n",
|
||||
"for test_image in np.arange(test_data_size):\n",
|
||||
"for test_image in np.arange(3):\n",
|
||||
" test_inputs[test_image].reshape(1, 64, 64)\n",
|
||||
" plt.subplot(1, 8, test_image+1)\n",
|
||||
" plt.axhline('')\n",
|
||||
|
||||
@@ -69,12 +69,10 @@
|
||||
"# ONNX Model Zoo and save it in the same folder as this tutorial\n",
|
||||
"\n",
|
||||
"import urllib.request\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"onnx_model_url = \"https://github.com/onnx/models/blob/main/vision/classification/mnist/model/mnist-7.tar.gz?raw=true\"\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"mnist-7.tar.gz\")\n",
|
||||
"os.mkdir(\"mnist\")"
|
||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"mnist-7.tar.gz\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,7 +86,7 @@
|
||||
"\n",
|
||||
"# We use tar and xvcf to unzip the files we just retrieved from the ONNX model zoo\n",
|
||||
"\n",
|
||||
"!tar xvzf mnist-7.tar.gz -C mnist"
|
||||
"!tar xvzf mnist-7.tar.gz"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -139,7 +137,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_dir = \"mnist/model\" # replace this with the location of your model files\n",
|
||||
"model_dir = \"mnist\" # replace this with the location of your model files\n",
|
||||
"\n",
|
||||
"# leave as is if it's in the same folder as this notebook"
|
||||
]
|
||||
@@ -449,12 +447,13 @@
|
||||
"\n",
|
||||
"# to use parsers to read in our model/data\n",
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"test_inputs = []\n",
|
||||
"test_outputs = []\n",
|
||||
"\n",
|
||||
"# read in 1 testing images from .pb files\n",
|
||||
"test_data_size = 1\n",
|
||||
"# read in 3 testing images from .pb files\n",
|
||||
"test_data_size = 3\n",
|
||||
"\n",
|
||||
"for i in np.arange(test_data_size):\n",
|
||||
" input_test_data = os.path.join(model_dir, 'test_data_set_{0}'.format(i), 'input_0.pb')\n",
|
||||
@@ -487,7 +486,7 @@
|
||||
},
|
||||
"source": [
|
||||
"### Show some sample images\n",
|
||||
"We use `matplotlib` to plot 1 test images from the dataset."
|
||||
"We use `matplotlib` to plot 3 test images from the dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -501,7 +500,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plt.figure(figsize = (16, 6))\n",
|
||||
"for test_image in np.arange(test_data_size):\n",
|
||||
"for test_image in np.arange(3):\n",
|
||||
" plt.subplot(1, 15, test_image+1)\n",
|
||||
" plt.axhline('')\n",
|
||||
" plt.axvline('')\n",
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
# Licensed under the MIT license.
|
||||
|
||||
from azureml.core.run import Run
|
||||
from azureml.interpret import ExplanationClient
|
||||
from interpret_community.adapter import ExplanationAdapter
|
||||
import joblib
|
||||
import os
|
||||
import shap
|
||||
@@ -13,11 +11,9 @@ OUTPUT_DIR = './outputs/'
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
run = Run.get_context()
|
||||
client = ExplanationClient.from_run(run)
|
||||
|
||||
# get a dataset on income prediction
|
||||
X, y = shap.datasets.adult()
|
||||
features = X.columns.values
|
||||
|
||||
# train an XGBoost model (but any other tree model type should work)
|
||||
model = xgboost.XGBClassifier()
|
||||
@@ -30,12 +26,6 @@ shap_values = explainer(X_shap)
|
||||
print("computed shap values:")
|
||||
print(shap_values)
|
||||
|
||||
# Use the explanation adapter to convert the importances into an interpret-community
|
||||
# style explanation which can be uploaded to AzureML or visualized in the
|
||||
# ExplanationDashboard widget
|
||||
adapter = ExplanationAdapter(features, classification=True)
|
||||
global_explanation = adapter.create_global(shap_values.values, X_shap, expected_values=shap_values.base_values)
|
||||
|
||||
# write X_shap out as a pickle file for later visualization
|
||||
x_shap_pkl = 'x_shap.pkl'
|
||||
with open(x_shap_pkl, 'wb') as file:
|
||||
@@ -52,8 +42,3 @@ with open(model_file_name, 'wb') as file:
|
||||
run.upload_file('xgboost_model.pkl', os.path.join('./outputs/', model_file_name))
|
||||
original_model = run.register_model(model_name='xgboost_with_gpu_tree_explainer',
|
||||
model_path='xgboost_model.pkl')
|
||||
|
||||
# Uploading model explanation data for storage or visualization in webUX
|
||||
# The explanation can then be downloaded on any compute
|
||||
comment = 'Global explanation on classification model trained on adult census income dataset'
|
||||
client.upload_model_explanation(global_explanation, comment=comment, model_id=original_model.id)
|
||||
|
||||
@@ -106,7 +106,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.44.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.42.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -225,67 +225,36 @@
|
||||
"\n",
|
||||
"from azureml.core import Environment\n",
|
||||
"\n",
|
||||
"environment_name = \"shapgpu\"\n",
|
||||
"environment_name = \"shap-gpu-tree\"\n",
|
||||
"\n",
|
||||
"env = Environment(environment_name)\n",
|
||||
"\n",
|
||||
"env.docker.enabled = True\n",
|
||||
"env.docker.base_image = None\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Note: this is to pin the pandas and xgboost versions to be same as notebook.\n",
|
||||
"# In production scenario user would choose their dependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"available_packages = pkg_resources.working_set\n",
|
||||
"pandas_ver = None\n",
|
||||
"for dist in list(available_packages):\n",
|
||||
" if dist.key == 'pandas':\n",
|
||||
" pandas_ver = dist.version\n",
|
||||
"pandas_dep = 'pandas'\n",
|
||||
"if pandas_ver:\n",
|
||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||
"\n",
|
||||
"# Note: we build shap at commit 690245 for Tesla K80 GPUs\n",
|
||||
"env.docker.base_dockerfile = f\"\"\"\n",
|
||||
"FROM nvidia/cuda:10.2-devel-ubuntu18.04\n",
|
||||
"ENV PATH=\"/root/miniconda3/bin:${{PATH}}\"\n",
|
||||
"ARG PATH=\"/root/miniconda3/bin:${{PATH}}\"\n",
|
||||
"env.docker.base_dockerfile = \"\"\"\n",
|
||||
"FROM rapidsai/rapidsai:cuda10.0-devel-ubuntu18.04\n",
|
||||
"RUN apt-get update && \\\n",
|
||||
"apt-get install -y fuse && \\\n",
|
||||
"apt-get install -y build-essential && \\\n",
|
||||
"apt-get install -y python3-dev && \\\n",
|
||||
"apt-get install -y wget && \\\n",
|
||||
"apt-get install -y git && \\\n",
|
||||
"rm -rf /var/lib/apt/lists/* && \\\n",
|
||||
"wget \\\n",
|
||||
"https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \\\n",
|
||||
"mkdir /root/.conda && \\\n",
|
||||
"bash Miniconda3-latest-Linux-x86_64.sh -b && \\\n",
|
||||
"rm -f Miniconda3-latest-Linux-x86_64.sh && \\\n",
|
||||
"conda init bash && \\\n",
|
||||
". ~/.bashrc && \\\n",
|
||||
"conda create -n shapgpu python=3.8 && \\\n",
|
||||
"conda activate shapgpu && \\\n",
|
||||
"source activate rapids && \\\n",
|
||||
"apt-get install -y g++ && \\\n",
|
||||
"printenv && \\\n",
|
||||
"echo \"which nvcc: \" && \\\n",
|
||||
"which nvcc && \\\n",
|
||||
"pip install numpy==1.20.3 && \\\n",
|
||||
"pip install azureml-defaults && \\\n",
|
||||
"pip install azureml-telemetry && \\\n",
|
||||
"pip install azureml-interpret && \\\n",
|
||||
"pip install {pandas_dep} && \\\n",
|
||||
"cd /usr/local/src && \\\n",
|
||||
"git clone https://github.com/slundberg/shap.git --single-branch && \\\n",
|
||||
"git clone https://github.com/slundberg/shap && \\\n",
|
||||
"cd shap && \\\n",
|
||||
"git reset --hard 690245c6ab043edf40cfce3d8438a62e29ab599f && \\\n",
|
||||
"mkdir build && \\\n",
|
||||
"python setup.py install --user && \\\n",
|
||||
"pip uninstall -y xgboost && \\\n",
|
||||
"conda install py-xgboost==1.3.3 \\\n",
|
||||
"rm /conda/envs/rapids/lib/libxgboost.so && \\\n",
|
||||
"pip install xgboost==1.4.2\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"env.python.user_managed_dependencies = True\n",
|
||||
"env.python.interpreter_path = '/root/miniconda3/envs/shapgpu/bin/python'\n",
|
||||
"\n",
|
||||
"from azureml.core import Run\n",
|
||||
"from azureml.core import ScriptRunConfig\n",
|
||||
@@ -297,176 +266,6 @@
|
||||
"run = experiment.submit(config=src)\n",
|
||||
"run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# Shows output of the run on stdout.\n",
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Download \n",
|
||||
"1. Download model explanation data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.interpret import ExplanationClient\n",
|
||||
"\n",
|
||||
"# Get model explanation data\n",
|
||||
"client = ExplanationClient.from_run(run)\n",
|
||||
"global_explanation = client.download_model_explanation()\n",
|
||||
"local_importance_values = global_explanation.local_importance_values\n",
|
||||
"expected_values = global_explanation.expected_values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the top k (e.g., 4) most important features with their importance values\n",
|
||||
"global_explanation_topk = client.download_model_explanation(top_k=4)\n",
|
||||
"global_importance_values = global_explanation_topk.get_ranked_global_values()\n",
|
||||
"global_importance_names = global_explanation_topk.get_ranked_global_names()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('global importance values: {}'.format(global_importance_values))\n",
|
||||
"print('global importance names: {}'.format(global_importance_names))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"2. Download model file."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Retrieve model for visualization and deployment\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"import joblib\n",
|
||||
"original_model = Model(ws, 'xgboost_with_gpu_tree_explainer')\n",
|
||||
"model_path = original_model.download(exist_ok=True)\n",
|
||||
"original_model = joblib.load(model_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"3. Download test dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Retrieve x_test for visualization\n",
|
||||
"x_test_path = './x_shap_adult_census.pkl'\n",
|
||||
"run.download_file('x_shap_adult_census.pkl', output_file_path=x_test_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"x_test = joblib.load('x_shap_adult_census.pkl')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Visualize\n",
|
||||
"Load the visualization dashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from raiwidgets import ExplanationDashboard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from interpret_community.common.model_wrapper import wrap_model\n",
|
||||
"from interpret_community.dataset.dataset_wrapper import DatasetWrapper\n",
|
||||
"# note we need to wrap the XGBoost model to output predictions and probabilities in the scikit-learn format\n",
|
||||
"class WrappedXGBoostModel(object):\n",
|
||||
" \"\"\"A class for wrapping an XGBoost model to output integer predicted classes.\"\"\"\n",
|
||||
"\n",
|
||||
" def __init__(self, model):\n",
|
||||
" self.model = model\n",
|
||||
"\n",
|
||||
" def predict(self, dataset):\n",
|
||||
" return self.model.predict(dataset).astype(int)\n",
|
||||
"\n",
|
||||
" def predict_proba(self, dataset):\n",
|
||||
" return self.model.predict_proba(dataset)\n",
|
||||
"\n",
|
||||
"wrapped_model = WrappedXGBoostModel(wrap_model(original_model, DatasetWrapper(x_test), model_task='classification'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ExplanationDashboard(global_explanation, wrapped_model, dataset=x_test)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -1,18 +1,5 @@
|
||||
name: train-explain-model-gpu-tree-explainer
|
||||
dependencies:
|
||||
- py-xgboost==1.3.3
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-interpret
|
||||
- flask
|
||||
- flask-cors
|
||||
- gevent>=1.3.6
|
||||
- jinja2
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.19.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -11,8 +11,6 @@ dependencies:
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.19.0
|
||||
- raiwidgets~=0.18.1
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -10,9 +10,7 @@ dependencies:
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.19.0
|
||||
- raiwidgets~=0.18.1
|
||||
- packaging>=20.9
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -18,9 +18,7 @@ def init():
|
||||
original_model_path = Model.get_model_path('local_deploy_model')
|
||||
scoring_explainer_path = Model.get_model_path('IBM_attrition_explainer')
|
||||
|
||||
# Load the original model into the environment
|
||||
original_model = joblib.load(original_model_path)
|
||||
# Load the scoring explainer into the environment
|
||||
scoring_explainer = joblib.load(scoring_explainer_path)
|
||||
|
||||
|
||||
@@ -31,15 +29,5 @@ def run(raw_data):
|
||||
predictions = original_model.predict(data)
|
||||
# Retrieve model explanations
|
||||
local_importance_values = scoring_explainer.explain(data)
|
||||
# Retrieve the feature names, which we may want to return to the user.
|
||||
# Note: you can also get the raw_features and engineered_features
|
||||
# by calling scoring_explainer.raw_features and
|
||||
# scoring_explainer.engineered_features but you may need to pass
|
||||
# the raw or engineered feature names in the ScoringExplainer
|
||||
# constructor, depending on if you are using feature maps or
|
||||
# transformations on the original explainer.
|
||||
features = scoring_explainer.features
|
||||
# You can return any data type as long as it is JSON-serializable
|
||||
return {'predictions': predictions.tolist(),
|
||||
'local_importance_values': local_importance_values,
|
||||
'features': features}
|
||||
return {'predictions': predictions.tolist(), 'local_importance_values': local_importance_values}
|
||||
|
||||
@@ -340,29 +340,17 @@
|
||||
"available_packages = pkg_resources.working_set\n",
|
||||
"sklearn_ver = None\n",
|
||||
"pandas_ver = None\n",
|
||||
"numpy_ver = None\n",
|
||||
"numba_ver = None\n",
|
||||
"for dist in available_packages:\n",
|
||||
" if dist.key == 'scikit-learn':\n",
|
||||
" sklearn_ver = dist.version\n",
|
||||
" elif dist.key == 'numpy':\n",
|
||||
" numpy_ver = dist.version\n",
|
||||
" elif dist.key == 'numba':\n",
|
||||
" numba_ver = dist.version\n",
|
||||
" elif dist.key == 'pandas':\n",
|
||||
" pandas_ver = dist.version\n",
|
||||
"sklearn_dep = 'scikit-learn'\n",
|
||||
"pandas_dep = 'pandas'\n",
|
||||
"numpy_dep = 'numpy'\n",
|
||||
"numba_dep = 'numba'\n",
|
||||
"if sklearn_ver:\n",
|
||||
" sklearn_dep = 'scikit-learn=={}'.format(sklearn_ver)\n",
|
||||
"if pandas_ver:\n",
|
||||
" pandas_dep = 'pandas=={}'.format(pandas_ver)\n",
|
||||
"if numpy_ver:\n",
|
||||
" numpy_dep = 'numpy=={}'.format(numpy_ver)\n",
|
||||
"if numba_ver:\n",
|
||||
" numba_dep = 'numba=={}'.format(numba_ver)\n",
|
||||
"# Specify CondaDependencies obj\n",
|
||||
"# The CondaDependencies specifies the conda and pip packages that are installed in the environment\n",
|
||||
"# the submitted job is run in. Note the remote environment(s) needs to be similar to the local\n",
|
||||
@@ -371,7 +359,7 @@
|
||||
"myenv = CondaDependencies.create(\n",
|
||||
" python_version=python_version,\n",
|
||||
" conda_packages=['pip==20.2.4'],\n",
|
||||
" pip_packages=['pyyaml', sklearn_dep, pandas_dep, numpy_dep, numba_dep] + azureml_pip_packages)\n",
|
||||
" pip_packages=['pyyaml', sklearn_dep, pandas_dep] + azureml_pip_packages)\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())\n",
|
||||
|
||||
@@ -10,9 +10,7 @@ dependencies:
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.19.0
|
||||
- raiwidgets~=0.18.1
|
||||
- packaging>=20.9
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -12,8 +12,6 @@ dependencies:
|
||||
- azureml-dataset-runtime
|
||||
- azureml-core
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.19.0
|
||||
- raiwidgets~=0.18.1
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -3,4 +3,3 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
# DisableDockerDetector "Disabled to unblock PRs until the owner can fix the file. Not used in any prod deployments - only as a documentation for the customers"
|
||||
FROM rocker/tidyverse:4.0.0-ubuntu18.04
|
||||
|
||||
# Install python
|
||||
|
||||
@@ -361,7 +361,7 @@
|
||||
"\n",
|
||||
"batch_conda_deps = CondaDependencies.create(python_version=\"3.7\",\n",
|
||||
" conda_packages=['pip==20.2.4'],\n",
|
||||
" pip_packages=[\"tensorflow==1.15.2\", \"pillow\", \"protobuf==3.20.1\",\n",
|
||||
" pip_packages=[\"tensorflow==1.15.2\", \"pillow\", \n",
|
||||
" \"azureml-core\", \"azureml-dataset-runtime[fuse]\"])\n",
|
||||
"batch_env = Environment(name=\"batch_environment\")\n",
|
||||
"batch_env.python.conda_dependencies = batch_conda_deps\n",
|
||||
|
||||
@@ -437,8 +437,7 @@
|
||||
" - azureml-defaults\n",
|
||||
" - tensorflow-gpu==2.0.0\n",
|
||||
" - keras<=2.3.1\n",
|
||||
" - matplotlib\n",
|
||||
" - protobuf==3.20.1"
|
||||
" - matplotlib"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -990,7 +989,6 @@
|
||||
"cd.add_conda_package('h5py<=2.10.0')\n",
|
||||
"cd.add_conda_package('keras<=2.3.1')\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.add_pip_package(\"protobuf==3.20.1\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
|
||||
@@ -943,7 +943,6 @@
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_pip_package('tensorflow==2.2.0')\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.add_pip_package(\"protobuf==3.20.1\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
|
||||
@@ -11,8 +11,6 @@ RUN pip install azureml-core
|
||||
RUN pip install ray==0.8.7
|
||||
RUN pip install ray[rllib,tune,serve]==0.8.7
|
||||
RUN pip install tensorflow==1.14.0
|
||||
RUN pip install 'msrest<0.7.0'
|
||||
RUN pip install protobuf==3.20.0
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y jq
|
||||
|
||||
@@ -37,7 +37,8 @@ RUN pip install gym[atari]==0.19.0
|
||||
RUN pip install gym[accept-rom-license]==0.19.0
|
||||
|
||||
# Install pip dependencies
|
||||
RUN pip install 'matplotlib>=3.3,<3.4' \
|
||||
RUN HOROVOD_WITH_TENSORFLOW=1 \
|
||||
pip install 'matplotlib>=3.3,<3.4' \
|
||||
'psutil>=5.8,<5.9' \
|
||||
'tqdm>=4.59,<4.60' \
|
||||
'pandas>=1.1,<1.2' \
|
||||
@@ -69,9 +70,6 @@ RUN pip install --no-cache-dir \
|
||||
# This is required for ray 0.8.7
|
||||
RUN pip install -U aiohttp==3.7.4
|
||||
|
||||
RUN pip install 'msrest<0.7.0'
|
||||
RUN pip install protobuf==3.20.0
|
||||
|
||||
# This is needed for mpi to locate libpython
|
||||
ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH
|
||||
|
||||
|
||||
@@ -93,7 +93,7 @@
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"# Azure Machine Learning core imports\n",
|
||||
"# Azure Machine Learning Core imports\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
|
||||
@@ -12,7 +12,6 @@ RUN pip install azureml-dataset-runtime
|
||||
RUN pip install ray==0.8.7
|
||||
RUN pip install ray[rllib,tune,serve]==0.8.7
|
||||
RUN pip install tensorflow==1.14.0
|
||||
RUN pip install 'msrest<0.7.0'
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y jq
|
||||
|
||||
@@ -8,8 +8,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
rm -rf /usr/share/man/*
|
||||
|
||||
RUN conda install -y conda=4.13.0 python=3.7 && conda clean -ay
|
||||
RUN pip install ray-on-aml==0.2.1 & \
|
||||
RUN conda install -y conda=4.12.0 python=3.7 && conda clean -ay
|
||||
RUN pip install ray-on-aml==0.1.6 & \
|
||||
pip install --no-cache-dir \
|
||||
azureml-defaults \
|
||||
azureml-dataset-runtime[fuse,pandas] \
|
||||
@@ -30,9 +30,5 @@ RUN pip install ray-on-aml==0.2.1 & \
|
||||
conda install -y -c conda-forge x264='1!152.20180717' ffmpeg=4.0.2 && \
|
||||
conda install -c anaconda opencv
|
||||
|
||||
RUN pip install protobuf==3.20.0
|
||||
|
||||
RUN pip install --upgrade ray==0.8.3 \
|
||||
ray[rllib,dashboard,tune]==0.8.3
|
||||
|
||||
RUN pip install 'msrest<0.7.0'
|
||||
ray[rllib,dashboard,tune]==0.8.3
|
||||
@@ -1,4 +1,3 @@
|
||||
# DisableDockerDetector "Disabled to unblock PRs until the owner can fix the file. Not used in any prod deployments - only as a documentation for the customers"
|
||||
FROM akdmsft/particle-cpu
|
||||
|
||||
RUN conda install -c anaconda python=3.7
|
||||
@@ -29,11 +28,7 @@ RUN cd multiagent-particle-envs && \
|
||||
|
||||
RUN pip3 install ray-on-aml==0.1.6
|
||||
|
||||
RUN pip install protobuf==3.20.0
|
||||
|
||||
RUN pip3 install --upgrade \
|
||||
ray==0.8.7 \
|
||||
ray[rllib]==0.8.7 \
|
||||
ray[tune]==0.8.7
|
||||
|
||||
RUN pip install 'msrest<0.7.0'
|
||||
ray[tune]==0.8.7
|
||||
@@ -8,9 +8,8 @@ dependencies:
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.19.0
|
||||
- raiwidgets~=0.18.1
|
||||
- liac-arff
|
||||
- packaging>=20.9
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- protobuf==3.20.0
|
||||
|
||||
@@ -43,7 +43,6 @@
|
||||
" 1. Logging numeric metrics\n",
|
||||
" 1. Logging vectors\n",
|
||||
" 1. Logging tables\n",
|
||||
" 1. Logging when additional Metric Names are required\n",
|
||||
" 1. Uploading files\n",
|
||||
"1. [Analyzing results](#Analyzing-results)\n",
|
||||
" 1. Tagging a run\n",
|
||||
@@ -101,7 +100,7 @@
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using SDK version 1.44.0, you are currently running version\", azureml.core.VERSION)"
|
||||
"print(\"This notebook was created using SDK version 1.42.0, you are currently running version\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -368,7 +367,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Logging when additional Metric Names are required\n",
|
||||
"### Logging for when more Metric Names are required\n",
|
||||
"\n",
|
||||
"Limits on logging are internally enforced to ensure a smooth experience, however these can sometimes be limiting, particularly in terms of the limit on metric names.\n",
|
||||
"\n",
|
||||
|
||||
2
index.md
2
index.md
@@ -106,8 +106,6 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
| [upload-fairness-dashboard](https://github.com/Azure/MachineLearningNotebooks/blob/master//contrib/fairness/upload-fairness-dashboard.ipynb) | | | | | | |
|
||||
| [azure-ml-with-nvidia-rapids](https://github.com/Azure/MachineLearningNotebooks/blob/master//contrib/RAPIDS/azure-ml-with-nvidia-rapids.ipynb) | | | | | | |
|
||||
| [auto-ml-continuous-retraining](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/continuous-retraining/auto-ml-continuous-retraining.ipynb) | | | | | | |
|
||||
| [codegen-for-autofeaturization](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/experimental/autofeaturization-codegen/codegen-for-autofeaturization.ipynb) | | | | | | |
|
||||
| [custom-model-training-from-autofeaturization-run](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/experimental/autofeaturization-custom-model-training/custom-model-training-from-autofeaturization-run.ipynb) | | | | | | |
|
||||
| [auto-ml-regression-model-proxy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/experimental/regression-model-proxy/auto-ml-regression-model-proxy.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-backtest-many-models](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-backtest-many-models/auto-ml-forecasting-backtest-many-models.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb) | | | | | | |
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.44.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.42.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -211,7 +211,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## View Experiment\n",
|
||||
"In the left-hand menu in Azure Machine Learning Studio, select __Jobs__ and then select your experiment (azure-ml-in10-mins-tutorial). An experiment is a grouping of many runs from a specified script or piece of code. Information for the run is stored under that experiment. If the name doesn't exist when you submit an experiment, if you select your run you will see various tabs containing metrics, logs, explanations, etc.\n",
|
||||
"In the left-hand menu in Azure Machine Learning Studio, select __Experiments__ and then select your experiment (azure-ml-in10-mins-tutorial). An experiment is a grouping of many runs from a specified script or piece of code. Information for the run is stored under that experiment. If the name doesn't exist when you submit an experiment, if you select your run you will see various tabs containing metrics, logs, explanations, etc.\n",
|
||||
"\n",
|
||||
"## Version control your models with the model registry\n",
|
||||
"\n",
|
||||
|
||||
@@ -222,7 +222,7 @@
|
||||
"source": [
|
||||
"### Submit the job\n",
|
||||
"\n",
|
||||
"Run the experiment by submitting the ScriptRunConfig object. After this there are many options for monitoring your run. Once submitted, you can either navigate to the experiment \"get-started-with-jobsubmission-tutorial\" in the left menu item __Jobs__ to monitor the run, or you can monitor the run inline as the `run.wait_for_completion(show_output=True)` will stream the logs of the run. You will see that the environment is built for you to ensure reproducibility - this adds a couple of minutes to the run time. On subsequent runs, the environment is re-used making the runtime shorter."
|
||||
"Run the experiment by submitting the ScriptRunConfig object. After this there are many options for monitoring your run. Once submitted, you can either navigate to the experiment \"get-started-with-jobsubmission-tutorial\" in the left menu item __Experiments__ to monitor the run, or you can monitor the run inline as the `run.wait_for_completion(show_output=True)` will stream the logs of the run. You will see that the environment is built for you to ensure reproducibility - this adds a couple of minutes to the run time. On subsequent runs, the environment is re-used making the runtime shorter."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,19 +5,17 @@ import os
|
||||
import argparse
|
||||
import datetime
|
||||
import time
|
||||
import tensorflow.compat.v1 as tf
|
||||
import tensorflow as tf
|
||||
from math import ceil
|
||||
import numpy as np
|
||||
import sys
|
||||
import shutil
|
||||
import subprocess
|
||||
import tf_slim
|
||||
from tensorflow.contrib.slim.python.slim.nets import inception_v3
|
||||
|
||||
from azureml.core import Run
|
||||
from azureml.core.model import Model
|
||||
from azureml.core.dataset import Dataset
|
||||
|
||||
slim = tf_slim
|
||||
slim = tf.contrib.slim
|
||||
|
||||
image_size = 299
|
||||
num_channel = 3
|
||||
@@ -34,18 +32,16 @@ def get_class_label_dict(labels_dir):
|
||||
|
||||
def init():
|
||||
global g_tf_sess, probabilities, label_dict, input_images
|
||||
subprocess.run(["git", "clone", "https://github.com/tensorflow/models/"])
|
||||
sys.path.append("./models/research/slim")
|
||||
|
||||
parser = argparse.ArgumentParser(description="Start a tensorflow model serving")
|
||||
parser.add_argument('--model_name', dest="model_name", required=True)
|
||||
parser.add_argument('--labels_dir', dest="labels_dir", required=True)
|
||||
args, _ = parser.parse_known_args()
|
||||
from nets import inception_v3, inception_utils
|
||||
|
||||
label_dict = get_class_label_dict(args.labels_dir)
|
||||
classes_num = len(label_dict)
|
||||
tf.disable_v2_behavior()
|
||||
with slim.arg_scope(inception_utils.inception_arg_scope()):
|
||||
|
||||
with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
|
||||
input_images = tf.placeholder(tf.float32, [1, image_size, image_size, num_channel])
|
||||
logits, _ = inception_v3.inception_v3(input_images,
|
||||
num_classes=classes_num,
|
||||
|
||||
@@ -247,7 +247,7 @@
|
||||
" config = AmlCompute.provisioning_configuration(vm_size=\"STANDARD_NC6\",\n",
|
||||
" vm_priority=\"lowpriority\", \n",
|
||||
" min_nodes=0, \n",
|
||||
" max_nodes=2)\n",
|
||||
" max_nodes=1)\n",
|
||||
"\n",
|
||||
" compute_target = ComputeTarget.create(workspace=ws, name=compute_name, provisioning_configuration=config)\n",
|
||||
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)"
|
||||
@@ -305,10 +305,9 @@
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"from azureml.core.runconfig import DEFAULT_GPU_IMAGE\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(python_version=\"3.8\",\n",
|
||||
"cd = CondaDependencies.create(python_version=\"3.7\",\n",
|
||||
" conda_packages=['pip==20.2.4'],\n",
|
||||
" pip_packages=[\"tensorflow-gpu==2.3.0\",\n",
|
||||
" \"tf_slim==1.1.0\", \"protobuf==3.20.1\",\n",
|
||||
" pip_packages=[\"tensorflow-gpu==1.15.2\",\n",
|
||||
" \"azureml-core\", \"azureml-dataset-runtime[fuse]\"])\n",
|
||||
"\n",
|
||||
"env = Environment(name=\"parallelenv\")\n",
|
||||
|
||||
Reference in New Issue
Block a user