Compare commits

...

2 Commits

Author SHA1 Message Date
amlrelsa-ms
70902df6da update samples from Release-95 as a part of SDK release 2021-04-19 18:42:58 +00:00
nikAI77
6f893ff0b4 update samples from Release-94 as a part of SDK release (#1418)
Co-authored-by: amlrelsa-ms <amlrelsa@microsoft.com>
2021-04-06 12:36:12 -04:00
33 changed files with 296 additions and 100 deletions

View File

@@ -103,7 +103,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -36,9 +36,9 @@
"\n",
"<a id=\"Introduction\"></a>\n",
"## Introduction\n",
"This notebook shows how to use [Fairlearn (an open source fairness assessment and unfairness mitigation package)](http://fairlearn.github.io) and Azure Machine Learning Studio for a binary classification problem. This example uses the well-known adult census dataset. For the purposes of this notebook, we shall treat this as a loan decision problem. We will pretend that the label indicates whether or not each individual repaid a loan in the past. We will use the data to train a predictor to predict whether previously unseen individuals will repay a loan or not. The assumption is that the model predictions are used to decide whether an individual should be offered a loan. Its purpose is purely illustrative of a workflow including a fairness dashboard - in particular, we do **not** include a full discussion of the detailed issues which arise when considering fairness in machine learning. For such discussions, please [refer to the Fairlearn website](http://fairlearn.github.io/).\n",
"This notebook shows how to use [Fairlearn (an open source fairness assessment and unfairness mitigation package)](http://fairlearn.org) and Azure Machine Learning Studio for a binary classification problem. This example uses the well-known adult census dataset. For the purposes of this notebook, we shall treat this as a loan decision problem. We will pretend that the label indicates whether or not each individual repaid a loan in the past. We will use the data to train a predictor to predict whether previously unseen individuals will repay a loan or not. The assumption is that the model predictions are used to decide whether an individual should be offered a loan. Its purpose is purely illustrative of a workflow including a fairness dashboard - in particular, we do **not** include a full discussion of the detailed issues which arise when considering fairness in machine learning. For such discussions, please [refer to the Fairlearn website](http://fairlearn.org/).\n",
"\n",
"We will apply the [grid search algorithm](https://fairlearn.github.io/master/api_reference/fairlearn.reductions.html#fairlearn.reductions.GridSearch) from the Fairlearn package using a specific notion of fairness called Demographic Parity. This produces a set of models, and we will view these in a dashboard both locally and in the Azure Machine Learning Studio.\n",
"We will apply the [grid search algorithm](https://fairlearn.org/v0.4.6/api_reference/fairlearn.reductions.html#fairlearn.reductions.GridSearch) from the Fairlearn package using a specific notion of fairness called Demographic Parity. This produces a set of models, and we will view these in a dashboard both locally and in the Azure Machine Learning Studio.\n",
"\n",
"### Setup\n",
"\n",
@@ -48,7 +48,7 @@
"* `azureml-contrib-fairness`\n",
"* `fairlearn==0.4.6` (v0.5.0 will work with minor modifications)\n",
"* `joblib`\n",
"* `shap`\n",
"* `liac-arff`\n",
"\n",
"Fairlearn relies on features introduced in v0.22.1 of `scikit-learn`. If you have an older version already installed, please uncomment and run the following cell:"
]
@@ -88,7 +88,6 @@
"from fairlearn.widget import FairlearnDashboard\n",
"\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.datasets import fetch_openml\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import train_test_split\n",
@@ -112,9 +111,9 @@
"metadata": {},
"outputs": [],
"source": [
"from fairness_nb_utils import fetch_openml_with_retries\n",
"from fairness_nb_utils import fetch_census_dataset\n",
"\n",
"data = fetch_openml_with_retries(data_id=1590)\n",
"data = fetch_census_dataset()\n",
" \n",
"# Extract the items we want\n",
"X_raw = data.data\n",
@@ -137,7 +136,7 @@
"outputs": [],
"source": [
"A = X_raw[['sex','race']]\n",
"X_raw = X_raw.drop(labels=['sex', 'race'],axis = 1)"
"X_raw = X_raw.drop(labels=['sex', 'race'], axis = 1)"
]
},
{
@@ -584,7 +583,7 @@
"<a id=\"Conclusion\"></a>\n",
"## Conclusion\n",
"\n",
"In this notebook we have demonstrated how to use the `GridSearch` algorithm from Fairlearn to generate a collection of models, and then present them in the fairness dashboard in Azure Machine Learning Studio. Please remember that this notebook has not attempted to discuss the many considerations which should be part of any approach to unfairness mitigation. The [Fairlearn website](http://fairlearn.github.io/) provides that discussion"
"In this notebook we have demonstrated how to use the `GridSearch` algorithm from Fairlearn to generate a collection of models, and then present them in the fairness dashboard in Azure Machine Learning Studio. Please remember that this notebook has not attempted to discuss the many considerations which should be part of any approach to unfairness mitigation. The [Fairlearn website](http://fairlearn.org/) provides that discussion"
]
},
{

View File

@@ -5,3 +5,4 @@ dependencies:
- azureml-contrib-fairness
- fairlearn==0.4.6
- joblib
- liac-arff

View File

@@ -4,7 +4,13 @@
"""Utilities for azureml-contrib-fairness notebooks."""
import arff
from collections import OrderedDict
from contextlib import closing
import gzip
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.utils import Bunch
import time
@@ -26,3 +32,62 @@ def fetch_openml_with_retries(data_id, max_retries=4, retry_delay=60):
raise RuntimeError("Unable to download dataset from OpenML")
return data
_categorical_columns = [
'workclass',
'education',
'marital-status',
'occupation',
'relationship',
'race',
'sex',
'native-country'
]
def fetch_census_dataset():
"""Fetch the Adult Census Dataset
This uses a particular URL for the Adult Census dataset. The code
is a simplified version of fetch_openml() in sklearn.
The data are copied from:
https://openml.org/data/v1/download/1595261.gz
(as of 2021-03-31)
"""
try:
from urllib import urlretrieve
except ImportError:
from urllib.request import urlretrieve
filename = "1595261.gz"
data_url = "https://rainotebookscdn.blob.core.windows.net/datasets/"
urlretrieve(data_url + filename, filename)
http_stream = gzip.GzipFile(filename=filename, mode='rb')
with closing(http_stream):
def _stream_generator(response):
for line in response:
yield line.decode('utf-8')
stream = _stream_generator(http_stream)
data = arff.load(stream)
attributes = OrderedDict(data['attributes'])
arff_columns = list(attributes)
raw_df = pd.DataFrame(data=data['data'], columns=arff_columns)
target_column_name = 'class'
target = raw_df.pop(target_column_name)
for col_name in _categorical_columns:
dtype = pd.api.types.CategoricalDtype(attributes[col_name])
raw_df[col_name] = raw_df[col_name].astype(dtype, copy=False)
result = Bunch()
result.data = raw_df
result.target = target
return result

View File

@@ -50,7 +50,7 @@
"* `azureml-contrib-fairness`\n",
"* `fairlearn==0.4.6` (should also work with v0.5.0)\n",
"* `joblib`\n",
"* `shap`\n",
"* `liac-arff`\n",
"\n",
"Fairlearn relies on features introduced in v0.22.1 of `scikit-learn`. If you have an older version already installed, please uncomment and run the following cell:"
]
@@ -88,7 +88,6 @@
"source": [
"from sklearn import svm\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.datasets import fetch_openml\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.model_selection import train_test_split\n",
@@ -110,9 +109,9 @@
"metadata": {},
"outputs": [],
"source": [
"from fairness_nb_utils import fetch_openml_with_retries\n",
"from fairness_nb_utils import fetch_census_dataset\n",
"\n",
"data = fetch_openml_with_retries(data_id=1590)\n",
"data = fetch_census_dataset()\n",
" \n",
"# Extract the items we want\n",
"X_raw = data.data\n",

View File

@@ -5,3 +5,4 @@ dependencies:
- azureml-contrib-fairness
- fairlearn==0.4.6
- joblib
- liac-arff

View File

@@ -21,8 +21,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.25.0
- azureml-widgets~=1.27.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.25.0/validated_win32_requirements.txt [--no-deps]
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.27.0/validated_win32_requirements.txt [--no-deps]

View File

@@ -21,8 +21,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.25.0
- azureml-widgets~=1.27.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.25.0/validated_linux_requirements.txt [--no-deps]
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.27.0/validated_linux_requirements.txt [--no-deps]

View File

@@ -22,8 +22,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.25.0
- azureml-widgets~=1.27.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.25.0/validated_darwin_requirements.txt [--no-deps]
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.27.0/validated_darwin_requirements.txt [--no-deps]

View File

@@ -105,7 +105,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -93,7 +93,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -281,7 +281,7 @@
"outputs": [],
"source": [
"automl_settings = {\n",
" \"experiment_timeout_minutes\": 20,\n",
" \"experiment_timeout_minutes\": 30,\n",
" \"primary_metric\": 'accuracy',\n",
" \"max_concurrent_iterations\": num_nodes, \n",
" \"max_cores_per_iteration\": -1,\n",

View File

@@ -81,7 +81,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -91,7 +91,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -113,7 +113,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -87,7 +87,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -97,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -94,7 +94,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -82,7 +82,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -439,7 +439,7 @@
"\n",
"### Retrieve any AutoML Model for explanations\n",
"\n",
"Below we select the some AutoML pipeline from our iterations. The `get_output` method returns the a AutoML run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*."
"Below we select an AutoML pipeline from our iterations. The `get_output` method returns the a AutoML run and the fitted model for the last invocation. Overloads on `get_output` allow you to retrieve the best run and fitted model for any logged `metric` or for a particular `iteration`."
]
},
{
@@ -448,7 +448,8 @@
"metadata": {},
"outputs": [],
"source": [
"automl_run, fitted_model = remote_run.get_output(metric='r2_score')"
"#automl_run, fitted_model = remote_run.get_output(metric='r2_score')\n",
"automl_run, fitted_model = remote_run.get_output(iteration=2)"
]
},
{

View File

@@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -255,9 +255,6 @@
"# Set compute target to AmlCompute target created in previous step\n",
"run_config.target = cpu_cluster.name\n",
"\n",
"# Enable Docker \n",
"run_config.environment.docker.enabled = True\n",
"\n",
"azureml_pip_packages = [\n",
" 'azureml-defaults', 'azureml-telemetry', 'azureml-interpret'\n",
"]\n",

View File

@@ -388,6 +388,7 @@
"from azureml.core.webservice import AciWebservice\n",
"from azureml.core.model import Model\n",
"from azureml.core.environment import Environment\n",
"from azureml.exceptions import WebserviceException\n",
"\n",
"\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
@@ -401,7 +402,12 @@
"\n",
"# Use configs and models generated above\n",
"service = Model.deploy(ws, 'model-scoring-deploy-local', [scoring_explainer_model, original_model], inference_config, aciconfig)\n",
"service.wait_for_deployment(show_output=True)"
"try:\n",
" service.wait_for_deployment(show_output=True)\n",
"except WebserviceException as e:\n",
" print(e.message)\n",
" print(service.get_logs())\n",
" raise"
]
},
{

View File

@@ -257,9 +257,6 @@
"# Set compute target to AmlCompute target created in previous step\n",
"run_config.target = cpu_cluster.name\n",
"\n",
"# Enable Docker \n",
"run_config.environment.docker.enabled = True\n",
"\n",
"# Set Docker base image to the default CPU-based image\n",
"run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE\n",
"\n",
@@ -489,6 +486,7 @@
"from azureml.core.webservice import AciWebservice\n",
"from azureml.core.model import Model\n",
"from azureml.core.environment import Environment\n",
"from azureml.exceptions import WebserviceException\n",
"\n",
"\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
@@ -502,7 +500,12 @@
"\n",
"# Use configs and models generated above\n",
"service = Model.deploy(ws, 'model-scoring-service', [scoring_explainer_model, original_model], inference_config, aciconfig)\n",
"service.wait_for_deployment(show_output=True)"
"try:\n",
" service.wait_for_deployment(show_output=True)\n",
"except WebserviceException as e:\n",
" print(e.message)\n",
" print(service.get_logs())\n",
" raise"
]
},
{

View File

@@ -42,15 +42,13 @@
"outputs": [],
"source": [
"import azureml.core\n",
"from azureml.core import Workspace, Experiment, Datastore, Dataset\n",
"from azureml.core import Workspace, Environment, Experiment, Datastore, Dataset, ScriptRunConfig\n",
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.exceptions import ComputeTargetException\n",
"from azureml.pipeline.steps import HyperDriveStep, HyperDriveStepRun, PythonScriptStep\n",
"from azureml.pipeline.core import Pipeline, PipelineData, TrainingOutput\n",
"from azureml.train.dnn import TensorFlow\n",
"# from azureml.train.hyperdrive import *\n",
"from azureml.train.hyperdrive import RandomParameterSampling, BanditPolicy, HyperDriveConfig, PrimaryMetricGoal\n",
"from azureml.train.hyperdrive import choice, loguniform\n",
"\n",
@@ -282,13 +280,8 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create TensorFlow estimator\n",
"Next, we construct an [TensorFlow](https://docs.microsoft.com/python/api/azureml-train-core/azureml.train.dnn.tensorflow?view=azure-ml-py) estimator object.\n",
"The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker.\n",
"\n",
"The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release.\n",
"\n",
"The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release."
"## Retrieve an Environment\n",
"In this tutorial, we will use one of Azure ML's curated TensorFlow environments for training. Curated environments are available in your workspace by default. Specifically, we will use the TensorFlow 2.0 GPU curated environment."
]
},
{
@@ -297,12 +290,45 @@
"metadata": {},
"outputs": [],
"source": [
"est = TensorFlow(source_directory=script_folder, \n",
" compute_target=compute_target,\n",
" entry_script='tf_mnist.py', \n",
" use_gpu=True,\n",
" framework_version='2.0',\n",
" pip_packages=['azureml-dataset-runtime[pandas,fuse]'])"
"tf_env = Environment.get(ws, name='AzureML-TensorFlow-2.0-GPU')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Setup an input for the ScriptRunConfig step\n",
"You can mount dataset to remote compute."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data_folder = dataset.as_mount()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Configure the training job\n",
"Create a ScriptRunConfig object to specify the configuration details of your training job, including your training script, environment to use, and the compute target to run on"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"src = ScriptRunConfig(source_directory=script_folder,\n",
" script='tf_mnist.py',\n",
" arguments=['--data-folder', data_folder],\n",
" compute_target=compute_target,\n",
" environment=tf_env)"
]
},
{
@@ -366,7 +392,7 @@
},
"outputs": [],
"source": [
"hd_config = HyperDriveConfig(estimator=est, \n",
"hd_config = HyperDriveConfig(run_config=src, \n",
" hyperparameter_sampling=ps,\n",
" policy=early_termination_policy,\n",
" primary_metric_name='validation_acc', \n",
@@ -375,25 +401,6 @@
" max_concurrent_runs=4)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Add HyperDrive as a step of pipeline\n",
"\n",
"### Setup an input for the hypderdrive step\n",
"You can mount dataset to remote compute."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data_folder = dataset.as_mount()"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -402,7 +409,6 @@
"HyperDriveStep can be used to run HyperDrive job as a step in pipeline.\n",
"- **name:** Name of the step\n",
"- **hyperdrive_config:** A HyperDriveConfig that defines the configuration for this HyperDrive run\n",
"- **estimator_entry_script_arguments:** List of command-line arguments for estimator entry script\n",
"- **inputs:** List of input port bindings\n",
"- **outputs:** List of output port bindings\n",
"- **metrics_output:** Optional value specifying the location to store HyperDrive run metrics as a JSON file\n",
@@ -437,7 +443,6 @@
"hd_step = HyperDriveStep(\n",
" name=hd_step_name,\n",
" hyperdrive_config=hd_config,\n",
" estimator_entry_script_arguments=['--data-folder', data_folder],\n",
" inputs=[data_folder],\n",
" outputs=[metrics_data, saved_model])"
]

View File

@@ -45,16 +45,6 @@
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!jupyter nbextension install --py --user azureml.widgets\n",
"!jupyter nbextension enable --py --user azureml.widgets"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -278,12 +268,14 @@
"outputs": [],
"source": [
"from azureml.core import Environment\n",
"from azureml.core.runconfig import DockerConfiguration\n",
"\n",
"chainer_env = Environment.from_conda_specification(name = 'chainer-5.1.0-gpu', file_path = './conda_dependencies.yml')\n",
"\n",
"# Specify a GPU base image\n",
"chainer_env.docker.enabled = True\n",
"chainer_env.docker.base_image = 'mcr.microsoft.com/azureml/intelmpi2018.3-cuda9.0-cudnn7-ubuntu16.04'"
"chainer_env.docker.base_image = 'mcr.microsoft.com/azureml/intelmpi2018.3-cuda9.0-cudnn7-ubuntu16.04'\n",
"\n",
"docker_config = DockerConfiguration(use_docker=True)"
]
},
{
@@ -307,7 +299,8 @@
" script='chainer_mnist.py',\n",
" arguments=['--epochs', 10, '--batchsize', 128, '--output_dir', './outputs'],\n",
" compute_target=compute_target,\n",
" environment=chainer_env)"
" environment=chainer_env,\n",
" docker_runtime_config=docker_config)"
]
},
{

View File

@@ -72,7 +72,6 @@
"from fairlearn.reductions import GridSearch\n",
"from fairlearn.reductions import DemographicParity, ErrorRate\n",
"\n",
"from sklearn import svm, neighbors, tree\n",
"from sklearn.compose import ColumnTransformer, make_column_selector\n",
"from sklearn.preprocessing import LabelEncoder,StandardScaler\n",
"from sklearn.linear_model import LogisticRegression\n",
@@ -81,10 +80,8 @@
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.datasets import fetch_openml\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"# SHAP Tabular Explainer\n",
"from interpret.ext.blackbox import KernelExplainer\n",
@@ -105,7 +102,9 @@
"metadata": {},
"outputs": [],
"source": [
"dataset = fetch_openml(data_id=1590, as_frame=True)\n",
"from utilities import fetch_census_dataset\n",
"\n",
"dataset = fetch_census_dataset()\n",
"X_raw, y = dataset['data'], dataset['target']\n",
"X_raw[\"race\"].value_counts().to_dict()"
]

View File

@@ -9,3 +9,4 @@ dependencies:
- azureml-dataset-runtime
- ipywidgets
- raiwidgets
- liac-arff

View File

@@ -0,0 +1,75 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
"""Utilities for azureml-contrib-fairness notebooks."""
import arff
from collections import OrderedDict
from contextlib import closing
import gzip
import pandas as pd
from sklearn.utils import Bunch
def _is_gzip_encoded(_fsrc):
return _fsrc.info().get('Content-Encoding', '') == 'gzip'
_categorical_columns = [
'workclass',
'education',
'marital-status',
'occupation',
'relationship',
'race',
'sex',
'native-country'
]
def fetch_census_dataset():
"""Fetch the Adult Census Dataset
This uses a particular URL for the Adult Census dataset. The code
is a simplified version of fetch_openml() in sklearn.
The data are copied from:
https://openml.org/data/v1/download/1595261.gz
(as of 2021-03-31)
"""
try:
from urllib import urlretrieve
except ImportError:
from urllib.request import urlretrieve
filename = "1595261.gz"
data_url = "https://rainotebookscdn.blob.core.windows.net/datasets/"
urlretrieve(data_url + filename, filename)
http_stream = gzip.GzipFile(filename=filename, mode='rb')
with closing(http_stream):
def _stream_generator(response):
for line in response:
yield line.decode('utf-8')
stream = _stream_generator(http_stream)
data = arff.load(stream)
attributes = OrderedDict(data['attributes'])
arff_columns = list(attributes)
raw_df = pd.DataFrame(data=data['data'], columns=arff_columns)
target_column_name = 'class'
target = raw_df.pop(target_column_name)
for col_name in _categorical_columns:
dtype = pd.api.types.CategoricalDtype(attributes[col_name])
raw_df[col_name] = raw_df[col_name].astype(dtype, copy=False)
result = Bunch()
result.data = raw_df
result.target = target
return result

View File

@@ -100,7 +100,7 @@
"\n",
"# Check core SDK version number\n",
"\n",
"print(\"This notebook was created using SDK version 1.25.0, you are currently running version\", azureml.core.VERSION)"
"print(\"This notebook was created using SDK version 1.27.0, you are currently running version\", azureml.core.VERSION)"
]
},
{

View File

@@ -102,7 +102,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.25.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.27.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,51 @@
import os
import torch
import json
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
def init():
global net
global classes
model_filename = 'cifar_net.pth'
model_path = os.path.join(os.environ['AZUREML_MODEL_DIR'], model_filename)
net = Net()
net.load_state_dict(torch.load(model_path))
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
def run(data):
data = json.loads(data)
images = torch.FloatTensor(data['data'])
outputs = net(images)
_, predicted = torch.max(outputs, 1)
result = [classes[predicted[j]] for j in range(4)]
result_json = json.dumps({"predictions": result})
# You can return any JSON-serializable object.
return result_json