Compare commits

..

1 Commits

Author SHA1 Message Date
amlrelsa-ms
35287ab0d8 update samples from Release-91 as a part of SDK release 2021-03-09 05:36:08 +00:00
8 changed files with 119 additions and 49 deletions

View File

@@ -121,12 +121,17 @@
"metadata": {},
"outputs": [],
"source": [
"os.makedirs('./data/mnist', exist_ok=True)\n",
"data_folder = os.path.join(os.getcwd(), 'data/mnist')\n",
"os.makedirs(data_folder, exist_ok=True)\n",
"\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename = './data/mnist/train-images.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename = './data/mnist/train-labels.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')"
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz',\n",
" filename=os.path.join(data_folder, 'train-images-idx3-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz',\n",
" filename=os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))"
]
},
{
@@ -146,11 +151,11 @@
"from utils import load_data\n",
"\n",
"# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n",
"X_train = load_data('./data/mnist/train-images.gz', False) / 255.0\n",
"y_train = load_data('./data/mnist/train-labels.gz', True).reshape(-1)\n",
"X_train = load_data(os.path.join(data_folder, 'train-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
"y_train = load_data(os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
"\n",
"X_test = load_data('./data/mnist/test-images.gz', False) / 255.0\n",
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
"\n",
"count = 0\n",
"sample_size = 30\n",

View File

@@ -4,6 +4,8 @@ import os
import numpy as np
from utils import download_mnist
import chainer
from chainer import backend
from chainer import backends
@@ -17,6 +19,7 @@ from chainer.training import extensions
from chainer.dataset import concat_examples
from chainer.backends.cuda import to_cpu
from azureml.core.run import Run
run = Run.get_context()
@@ -49,7 +52,7 @@ def main():
args = parser.parse_args()
# Download the MNIST data if you haven't downloaded it yet
train, test = datasets.mnist.get_mnist(withlabel=True, ndim=1)
train, test = download_mnist()
gpu_id = args.gpu_id
batchsize = args.batchsize

View File

@@ -2,6 +2,8 @@ import numpy as np
import os
import json
from utils import download_mnist
from chainer import serializers, using_config, Variable, datasets
import chainer.functions as F
import chainer.links as L
@@ -41,7 +43,7 @@ def init():
def run(input_data):
i = np.array(json.loads(input_data)['data'])
_, test = datasets.get_mnist()
_, test = download_mnist()
x = Variable(np.asarray([test[i][0]]))
y = model(x)

View File

@@ -217,7 +217,8 @@
"import shutil\n",
"\n",
"shutil.copy('chainer_mnist.py', project_folder)\n",
"shutil.copy('chainer_score.py', project_folder)"
"shutil.copy('chainer_score.py', project_folder)\n",
"shutil.copy('utils.py', project_folder)"
]
},
{
@@ -263,6 +264,7 @@
"- python=3.6.2\n",
"- pip:\n",
" - azureml-defaults\n",
" - azureml-opendatasets\n",
" - chainer==5.1.0\n",
" - cupy-cuda90==5.1.0\n",
" - mpi4py==3.0.0\n",
@@ -557,6 +559,7 @@
"cd.add_conda_package('numpy')\n",
"cd.add_pip_package('chainer==5.1.0')\n",
"cd.add_pip_package(\"azureml-defaults\")\n",
"cd.add_pip_package(\"azureml-opendatasets\")\n",
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
"\n",
"print(cd.serialize_to_string())"
@@ -584,7 +587,8 @@
"\n",
"\n",
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv)\n",
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv,\n",
" source_directory=project_folder)\n",
"\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
" auth_enabled=True, # this flag generates API keys to secure access\n",
@@ -592,11 +596,11 @@
" tags={'name': 'mnist', 'framework': 'Chainer'},\n",
" description='Chainer DNN with MNIST')\n",
"\n",
"service = Model.deploy(workspace=ws, \n",
" name='chainer-mnist-1', \n",
" models=[model], \n",
" inference_config=inference_config, \n",
" deployment_config=aciconfig)\n",
"service = Model.deploy(workspace=ws,\n",
" name='chainer-mnist-1',\n",
" models=[model],\n",
" inference_config=inference_config,\n",
" deployment_config=aciconfig)\n",
"service.wait_for_deployment(True)\n",
"print(service.state)\n",
"print(service.scoring_uri)"
@@ -685,13 +689,16 @@
" res = res.reshape(n_items[0], 1)\n",
" return res\n",
"\n",
"os.makedirs('./data/mnist', exist_ok=True)\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')\n",
"data_folder = os.path.join(os.getcwd(), 'data/mnist')\n",
"os.makedirs(data_folder, exist_ok=True)\n",
"\n",
"X_test = load_data('./data/mnist/test-images.gz', False)\n",
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))\n",
"\n",
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
"\n",
"# send a random row from the test set to score\n",
"random_index = np.random.randint(0, len(X_test)-1)\n",

View File

@@ -10,3 +10,4 @@ dependencies:
- gzip
- struct
- requests
- azureml-opendatasets

View File

@@ -0,0 +1,50 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import glob
import gzip
import numpy as np
import os
import struct
from azureml.core import Dataset
from azureml.opendatasets import MNIST
from chainer.datasets import tuple_dataset
# load compressed MNIST gz files and return numpy arrays
def load_data(filename, label=False):
with gzip.open(filename) as gz:
struct.unpack('I', gz.read(4))
n_items = struct.unpack('>I', gz.read(4))
if not label:
n_rows = struct.unpack('>I', gz.read(4))[0]
n_cols = struct.unpack('>I', gz.read(4))[0]
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
res = res.reshape(n_items[0], n_rows * n_cols)
else:
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
res = res.reshape(n_items[0], 1)
return res
def download_mnist():
data_folder = os.path.join(os.getcwd(), 'data/mnist')
os.makedirs(data_folder, exist_ok=True)
mnist_file_dataset = MNIST.get_file_dataset()
mnist_file_dataset.download(data_folder, overwrite=True)
X_train = load_data(glob.glob(os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"),
recursive=True)[0], False) / 255.0
X_test = load_data(glob.glob(os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"),
recursive=True)[0], False) / 255.0
y_train = load_data(glob.glob(os.path.join(data_folder, "**/train-labels-idx1-ubyte.gz"),
recursive=True)[0], True).reshape(-1)
y_test = load_data(glob.glob(os.path.join(data_folder, "**/t10k-labels-idx1-ubyte.gz"),
recursive=True)[0], True).reshape(-1)
train = tuple_dataset.TupleDataset(X_train.astype(np.float32), y_train.astype(np.int32))
test = tuple_dataset.TupleDataset(X_test.astype(np.float32), y_test.astype(np.int32))
return train, test

View File

@@ -33,7 +33,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Install required packages"
"## Install required packages\n",
"\n",
"This notebook works with Fairlearn v0.4.6, and not later versions. If needed, please uncomment and run the following cell:"
]
},
{
@@ -42,9 +44,7 @@
"metadata": {},
"outputs": [],
"source": [
"%pip install --upgrade fairlearn\n",
"%pip install --upgrade interpret-community\n",
"%pip install --upgrade raiwidgets"
"# %pip install --upgrade fairlearn==0.4.6"
]
},
{
@@ -71,8 +71,6 @@
"source": [
"from fairlearn.reductions import GridSearch\n",
"from fairlearn.reductions import DemographicParity, ErrorRate\n",
"from fairlearn.datasets import fetch_adult\n",
"from fairlearn.metrics import MetricFrame, selection_rate\n",
"\n",
"from sklearn import svm, neighbors, tree\n",
"from sklearn.compose import ColumnTransformer, make_column_selector\n",
@@ -83,6 +81,7 @@
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.datasets import fetch_openml\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
@@ -106,7 +105,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset = fetch_adult(as_frame=True)\n",
"dataset = fetch_openml(data_id=1590, as_frame=True)\n",
"X_raw, y = dataset['data'], dataset['target']\n",
"X_raw[\"race\"].value_counts().to_dict()"
]
@@ -342,13 +341,13 @@
"metadata": {},
"outputs": [],
"source": [
"from raiwidgets import FairnessDashboard\n",
"from fairlearn.widget import FairlearnDashboard\n",
"\n",
"y_pred = model.predict(X_test)\n",
"\n",
"FairnessDashboard(sensitive_features=sensitive_features_test,\n",
" y_true=y_test,\n",
" y_pred=y_pred)"
"FairlearnDashboard(sensitive_features=sensitive_features_test,\n",
" y_true=y_test,\n",
" y_pred=y_pred)"
]
},
{
@@ -404,7 +403,7 @@
"sweep.fit(X_train_prep, y_train,\n",
" sensitive_features=sensitive_features_train.sex)\n",
"\n",
"predictors = sweep.predictors_"
"predictors = sweep._predictors"
]
},
{
@@ -420,13 +419,18 @@
"metadata": {},
"outputs": [],
"source": [
"from fairlearn.metrics import demographic_parity_difference\n",
"\n",
"accuracies, disparities = [], []\n",
"\n",
"for predictor in predictors:\n",
" accuracy_metric_frame = MetricFrame(accuracy_score, y_train, predictor.predict(X_train_prep), sensitive_features=sensitive_features_train.sex)\n",
" selection_rate_metric_frame = MetricFrame(selection_rate, y_train, predictor.predict(X_train_prep), sensitive_features=sensitive_features_train.sex)\n",
" accuracies.append(accuracy_metric_frame.overall)\n",
" disparities.append(selection_rate_metric_frame.difference())\n",
" y_pred = predictor.predict(X_train_prep)\n",
" # accuracy_metric_frame = MetricFrame(accuracy_score, y_train, predictor.predict(X_train_prep), sensitive_features=sensitive_features_train.sex)\n",
" # selection_rate_metric_frame = MetricFrame(selection_rate, y_train, predictor.predict(X_train_prep), sensitive_features=sensitive_features_train.sex)\n",
" accuracies.append(accuracy_score(y_train, y_pred))\n",
" disparities.append(demographic_parity_difference(y_train,\n",
" y_pred,\n",
" sensitive_features=sensitive_features_train.sex))\n",
" \n",
"all_results = pd.DataFrame({\"predictor\": predictors, \"accuracy\": accuracies, \"disparity\": disparities})\n",
"\n",
@@ -456,8 +460,6 @@
"metadata": {},
"outputs": [],
"source": [
"from raiwidgets import FairnessDashboard\n",
"\n",
"dashboard_all = {}\n",
"for name, predictor in all_models_dict.items():\n",
" value = predictor.predict(X_test_prep)\n",
@@ -467,7 +469,7 @@
"for name, predictor in dominant_models_dict.items():\n",
" dominant_all[name] = predictor.predict(X_test_prep)\n",
"\n",
"FairnessDashboard(sensitive_features=sensitive_features_test, \n",
"FairlearnDashboard(sensitive_features=sensitive_features_test, \n",
" y_true=y_test,\n",
" y_pred=dominant_all)"
]
@@ -551,7 +553,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we register each of the models in the `dashboard_predicted` dictionary into the workspace. For this, we have to save each model to a file, and then register that file:"
"Next, we register each of the models in the `dominant_all` dictionary into the workspace. For this, we have to save each model to a file, and then register that file:"
]
},
{
@@ -576,7 +578,7 @@
" return registered_model.id\n",
"\n",
"model_name_id_mapping = dict()\n",
"for name, model in dashboard_all.items():\n",
"for name, model in dominant_all.items():\n",
" m_id = register_model(name, model)\n",
" model_name_id_mapping[name] = m_id"
]
@@ -594,9 +596,9 @@
"metadata": {},
"outputs": [],
"source": [
"dashboard_all_ids = dict()\n",
"for name, y_pred in dashboard_all.items():\n",
" dashboard_all_ids[model_name_id_mapping[name]] = y_pred"
"dominant_all_ids = dict()\n",
"for name, y_pred in dominant_all.items():\n",
" dominant_all_ids[model_name_id_mapping[name]] = y_pred"
]
},
{
@@ -619,7 +621,7 @@
"from fairlearn.metrics._group_metric_set import _create_group_metric_set\n",
"\n",
"dash_dict_all = _create_group_metric_set(y_true=y_test,\n",
" predictions=dashboard_all_ids,\n",
" predictions=dominant_all_ids,\n",
" sensitive_features=sf,\n",
" prediction_type='binary_classification')"
]

View File

@@ -5,7 +5,7 @@ dependencies:
- azureml-interpret
- azureml-contrib-fairness
- interpret-community[visualization]
- fairlearn
- fairlearn==0.4.6
- matplotlib
- azureml-dataset-runtime
- ipywidgets