Compare commits

..

1 Commits

Author SHA1 Message Date
amlrelsa-ms
efc61d2219 update samples from Release-72 as a part of 1.24.0 SDK stable release 2021-03-09 04:17:07 +00:00
16 changed files with 53 additions and 184 deletions

View File

@@ -94,17 +94,6 @@ def main():
os.makedirs(output_dir, exist_ok=True)
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
# Use Azure Open Datasets for MNIST dataset
datasets.MNIST.resources = [
("https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz",
"f68b3c2dcbeaaa9fbdd348bbdeb94873"),
("https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz",
"d53e105ee54ea40749a09fcbcd1e9432"),
("https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz",
"9fb629c4189551a2d022fa330f9573f3"),
("https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz",
"ec29112dd5afa0611ce80d1b7f02629c")
]
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('data', train=True, download=True,
transform=transforms.Compose([transforms.ToTensor(),

View File

@@ -3,11 +3,7 @@ dependencies:
- pip:
- azureml-sdk
- azureml-interpret
- flask
- flask-cors
- gevent>=1.3.6
- jinja2
- ipython
- interpret-community[visualization]
- matplotlib
- azureml-dataset-runtime
- ipywidgets

View File

@@ -3,10 +3,6 @@ dependencies:
- pip:
- azureml-sdk
- azureml-interpret
- flask
- flask-cors
- gevent>=1.3.6
- jinja2
- ipython
- interpret-community[visualization]
- matplotlib
- ipywidgets

View File

@@ -3,10 +3,6 @@ dependencies:
- pip:
- azureml-sdk
- azureml-interpret
- flask
- flask-cors
- gevent>=1.3.6
- jinja2
- ipython
- interpret-community[visualization]
- matplotlib
- ipywidgets

View File

@@ -3,11 +3,7 @@ dependencies:
- pip:
- azureml-sdk
- azureml-interpret
- flask
- flask-cors
- gevent>=1.3.6
- jinja2
- ipython
- interpret-community[visualization]
- matplotlib
- azureml-dataset-runtime
- azureml-core

View File

@@ -121,17 +121,12 @@
"metadata": {},
"outputs": [],
"source": [
"data_folder = os.path.join(os.getcwd(), 'data/mnist')\n",
"os.makedirs(data_folder, exist_ok=True)\n",
"os.makedirs('./data/mnist', exist_ok=True)\n",
"\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-images-idx3-ubyte.gz',\n",
" filename=os.path.join(data_folder, 'train-images-idx3-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/train-labels-idx1-ubyte.gz',\n",
" filename=os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))"
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', filename = './data/mnist/train-images.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', filename = './data/mnist/train-labels.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')"
]
},
{
@@ -151,11 +146,11 @@
"from utils import load_data\n",
"\n",
"# note we also shrink the intensity values (X) from 0-255 to 0-1. This helps the neural network converge faster.\n",
"X_train = load_data(os.path.join(data_folder, 'train-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
"y_train = load_data(os.path.join(data_folder, 'train-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
"X_train = load_data('./data/mnist/train-images.gz', False) / 255.0\n",
"y_train = load_data('./data/mnist/train-labels.gz', True).reshape(-1)\n",
"\n",
"X_test = load_data('./data/mnist/test-images.gz', False) / 255.0\n",
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
"\n",
"count = 0\n",
"sample_size = 30\n",

View File

@@ -4,8 +4,6 @@ import os
import numpy as np
from utils import download_mnist
import chainer
from chainer import backend
from chainer import backends
@@ -19,7 +17,6 @@ from chainer.training import extensions
from chainer.dataset import concat_examples
from chainer.backends.cuda import to_cpu
from azureml.core.run import Run
run = Run.get_context()
@@ -52,7 +49,7 @@ def main():
args = parser.parse_args()
# Download the MNIST data if you haven't downloaded it yet
train, test = download_mnist()
train, test = datasets.mnist.get_mnist(withlabel=True, ndim=1)
gpu_id = args.gpu_id
batchsize = args.batchsize

View File

@@ -2,8 +2,6 @@ import numpy as np
import os
import json
from utils import download_mnist
from chainer import serializers, using_config, Variable, datasets
import chainer.functions as F
import chainer.links as L
@@ -43,7 +41,7 @@ def init():
def run(input_data):
i = np.array(json.loads(input_data)['data'])
_, test = download_mnist()
_, test = datasets.get_mnist()
x = Variable(np.asarray([test[i][0]]))
y = model(x)

View File

@@ -217,8 +217,7 @@
"import shutil\n",
"\n",
"shutil.copy('chainer_mnist.py', project_folder)\n",
"shutil.copy('chainer_score.py', project_folder)\n",
"shutil.copy('utils.py', project_folder)"
"shutil.copy('chainer_score.py', project_folder)"
]
},
{
@@ -264,7 +263,6 @@
"- python=3.6.2\n",
"- pip:\n",
" - azureml-defaults\n",
" - azureml-opendatasets\n",
" - chainer==5.1.0\n",
" - cupy-cuda90==5.1.0\n",
" - mpi4py==3.0.0\n",
@@ -559,7 +557,6 @@
"cd.add_conda_package('numpy')\n",
"cd.add_pip_package('chainer==5.1.0')\n",
"cd.add_pip_package(\"azureml-defaults\")\n",
"cd.add_pip_package(\"azureml-opendatasets\")\n",
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
"\n",
"print(cd.serialize_to_string())"
@@ -587,8 +584,7 @@
"\n",
"\n",
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv,\n",
" source_directory=project_folder)\n",
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv)\n",
"\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
" auth_enabled=True, # this flag generates API keys to secure access\n",
@@ -596,11 +592,11 @@
" tags={'name': 'mnist', 'framework': 'Chainer'},\n",
" description='Chainer DNN with MNIST')\n",
"\n",
"service = Model.deploy(workspace=ws,\n",
" name='chainer-mnist-1',\n",
" models=[model],\n",
" inference_config=inference_config,\n",
" deployment_config=aciconfig)\n",
"service = Model.deploy(workspace=ws, \n",
" name='chainer-mnist-1', \n",
" models=[model], \n",
" inference_config=inference_config, \n",
" deployment_config=aciconfig)\n",
"service.wait_for_deployment(True)\n",
"print(service.state)\n",
"print(service.scoring_uri)"
@@ -689,16 +685,13 @@
" res = res.reshape(n_items[0], 1)\n",
" return res\n",
"\n",
"data_folder = os.path.join(os.getcwd(), 'data/mnist')\n",
"os.makedirs(data_folder, exist_ok=True)\n",
"os.makedirs('./data/mnist', exist_ok=True)\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz', filename = './data/mnist/test-images.gz')\n",
"urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz', filename = './data/mnist/test-labels.gz')\n",
"\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-images-idx3-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'))\n",
"urllib.request.urlretrieve('https://azureopendatastorage.blob.core.windows.net/mnist/t10k-labels-idx1-ubyte.gz',\n",
" filename=os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'))\n",
"X_test = load_data('./data/mnist/test-images.gz', False)\n",
"y_test = load_data('./data/mnist/test-labels.gz', True).reshape(-1)\n",
"\n",
"X_test = load_data(os.path.join(data_folder, 't10k-images-idx3-ubyte.gz'), False) / np.float32(255.0)\n",
"y_test = load_data(os.path.join(data_folder, 't10k-labels-idx1-ubyte.gz'), True).reshape(-1)\n",
"\n",
"# send a random row from the test set to score\n",
"random_index = np.random.randint(0, len(X_test)-1)\n",

View File

@@ -10,4 +10,3 @@ dependencies:
- gzip
- struct
- requests
- azureml-opendatasets

View File

@@ -1,50 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import glob
import gzip
import numpy as np
import os
import struct
from azureml.core import Dataset
from azureml.opendatasets import MNIST
from chainer.datasets import tuple_dataset
# load compressed MNIST gz files and return numpy arrays
def load_data(filename, label=False):
with gzip.open(filename) as gz:
struct.unpack('I', gz.read(4))
n_items = struct.unpack('>I', gz.read(4))
if not label:
n_rows = struct.unpack('>I', gz.read(4))[0]
n_cols = struct.unpack('>I', gz.read(4))[0]
res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
res = res.reshape(n_items[0], n_rows * n_cols)
else:
res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
res = res.reshape(n_items[0], 1)
return res
def download_mnist():
data_folder = os.path.join(os.getcwd(), 'data/mnist')
os.makedirs(data_folder, exist_ok=True)
mnist_file_dataset = MNIST.get_file_dataset()
mnist_file_dataset.download(data_folder, overwrite=True)
X_train = load_data(glob.glob(os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"),
recursive=True)[0], False) / 255.0
X_test = load_data(glob.glob(os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"),
recursive=True)[0], False) / 255.0
y_train = load_data(glob.glob(os.path.join(data_folder, "**/train-labels-idx1-ubyte.gz"),
recursive=True)[0], True).reshape(-1)
y_test = load_data(glob.glob(os.path.join(data_folder, "**/t10k-labels-idx1-ubyte.gz"),
recursive=True)[0], True).reshape(-1)
train = tuple_dataset.TupleDataset(X_train.astype(np.float32), y_train.astype(np.int32))
test = tuple_dataset.TupleDataset(X_test.astype(np.float32), y_test.astype(np.int32))
return train, test

View File

@@ -51,17 +51,6 @@ if args.cuda:
kwargs = {}
# Use Azure Open Datasets for MNIST dataset
datasets.MNIST.resources = [
("https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz",
"f68b3c2dcbeaaa9fbdd348bbdeb94873"),
("https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz",
"d53e105ee54ea40749a09fcbcd1e9432"),
("https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz",
"9fb629c4189551a2d022fa330f9573f3"),
("https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz",
"ec29112dd5afa0611ce80d1b7f02629c")
]
train_dataset = \
datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True,
transform=transforms.Compose([

View File

@@ -102,17 +102,6 @@ torch.manual_seed(args.seed)
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
# Use Azure Open Datasets for MNIST dataset
datasets.MNIST.resources = [
("https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz",
"f68b3c2dcbeaaa9fbdd348bbdeb94873"),
("https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz",
"d53e105ee54ea40749a09fcbcd1e9432"),
("https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz",
"9fb629c4189551a2d022fa330f9573f3"),
("https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz",
"ec29112dd5afa0611ce80d1b7f02629c")
]
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([

View File

@@ -332,18 +332,6 @@
"import random\n",
"import numpy as np\n",
"\n",
"# Use Azure Open Datasets for MNIST dataset\n",
"datasets.MNIST.resources = [\n",
" (\"https://azureopendatastorage.azurefd.net/mnist/train-images-idx3-ubyte.gz\",\n",
" \"f68b3c2dcbeaaa9fbdd348bbdeb94873\"),\n",
" (\"https://azureopendatastorage.azurefd.net/mnist/train-labels-idx1-ubyte.gz\",\n",
" \"d53e105ee54ea40749a09fcbcd1e9432\"),\n",
" (\"https://azureopendatastorage.azurefd.net/mnist/t10k-images-idx3-ubyte.gz\",\n",
" \"9fb629c4189551a2d022fa330f9573f3\"),\n",
" (\"https://azureopendatastorage.azurefd.net/mnist/t10k-labels-idx1-ubyte.gz\",\n",
" \"ec29112dd5afa0611ce80d1b7f02629c\")\n",
"]\n",
"\n",
"test_data = datasets.MNIST('../data', train=False, transform=transforms.Compose([\n",
" transforms.ToTensor(),\n",
" transforms.Normalize((0.1307,), (0.3081,))]))\n",

View File

@@ -33,9 +33,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Install required packages\n",
"\n",
"This notebook works with Fairlearn v0.4.6, and not later versions. If needed, please uncomment and run the following cell:"
"## Install required packages"
]
},
{
@@ -44,7 +42,9 @@
"metadata": {},
"outputs": [],
"source": [
"# %pip install --upgrade fairlearn==0.4.6"
"%pip install --upgrade fairlearn\n",
"%pip install --upgrade interpret-community\n",
"%pip install --upgrade raiwidgets"
]
},
{
@@ -71,6 +71,8 @@
"source": [
"from fairlearn.reductions import GridSearch\n",
"from fairlearn.reductions import DemographicParity, ErrorRate\n",
"from fairlearn.datasets import fetch_adult\n",
"from fairlearn.metrics import MetricFrame, selection_rate\n",
"\n",
"from sklearn import svm, neighbors, tree\n",
"from sklearn.compose import ColumnTransformer, make_column_selector\n",
@@ -81,7 +83,6 @@
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.svm import SVC\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.datasets import fetch_openml\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
@@ -105,7 +106,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset = fetch_openml(data_id=1590, as_frame=True)\n",
"dataset = fetch_adult(as_frame=True)\n",
"X_raw, y = dataset['data'], dataset['target']\n",
"X_raw[\"race\"].value_counts().to_dict()"
]
@@ -341,13 +342,13 @@
"metadata": {},
"outputs": [],
"source": [
"from fairlearn.widget import FairlearnDashboard\n",
"from raiwidgets import FairnessDashboard\n",
"\n",
"y_pred = model.predict(X_test)\n",
"\n",
"FairlearnDashboard(sensitive_features=sensitive_features_test,\n",
" y_true=y_test,\n",
" y_pred=y_pred)"
"FairnessDashboard(sensitive_features=sensitive_features_test,\n",
" y_true=y_test,\n",
" y_pred=y_pred)"
]
},
{
@@ -403,7 +404,7 @@
"sweep.fit(X_train_prep, y_train,\n",
" sensitive_features=sensitive_features_train.sex)\n",
"\n",
"predictors = sweep._predictors"
"predictors = sweep.predictors_"
]
},
{
@@ -419,18 +420,13 @@
"metadata": {},
"outputs": [],
"source": [
"from fairlearn.metrics import demographic_parity_difference\n",
"\n",
"accuracies, disparities = [], []\n",
"\n",
"for predictor in predictors:\n",
" y_pred = predictor.predict(X_train_prep)\n",
" # accuracy_metric_frame = MetricFrame(accuracy_score, y_train, predictor.predict(X_train_prep), sensitive_features=sensitive_features_train.sex)\n",
" # selection_rate_metric_frame = MetricFrame(selection_rate, y_train, predictor.predict(X_train_prep), sensitive_features=sensitive_features_train.sex)\n",
" accuracies.append(accuracy_score(y_train, y_pred))\n",
" disparities.append(demographic_parity_difference(y_train,\n",
" y_pred,\n",
" sensitive_features=sensitive_features_train.sex))\n",
" accuracy_metric_frame = MetricFrame(accuracy_score, y_train, predictor.predict(X_train_prep), sensitive_features=sensitive_features_train.sex)\n",
" selection_rate_metric_frame = MetricFrame(selection_rate, y_train, predictor.predict(X_train_prep), sensitive_features=sensitive_features_train.sex)\n",
" accuracies.append(accuracy_metric_frame.overall)\n",
" disparities.append(selection_rate_metric_frame.difference())\n",
" \n",
"all_results = pd.DataFrame({\"predictor\": predictors, \"accuracy\": accuracies, \"disparity\": disparities})\n",
"\n",
@@ -460,6 +456,8 @@
"metadata": {},
"outputs": [],
"source": [
"from raiwidgets import FairnessDashboard\n",
"\n",
"dashboard_all = {}\n",
"for name, predictor in all_models_dict.items():\n",
" value = predictor.predict(X_test_prep)\n",
@@ -469,7 +467,7 @@
"for name, predictor in dominant_models_dict.items():\n",
" dominant_all[name] = predictor.predict(X_test_prep)\n",
"\n",
"FairlearnDashboard(sensitive_features=sensitive_features_test, \n",
"FairnessDashboard(sensitive_features=sensitive_features_test, \n",
" y_true=y_test,\n",
" y_pred=dominant_all)"
]
@@ -553,7 +551,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we register each of the models in the `dominant_all` dictionary into the workspace. For this, we have to save each model to a file, and then register that file:"
"Next, we register each of the models in the `dashboard_predicted` dictionary into the workspace. For this, we have to save each model to a file, and then register that file:"
]
},
{
@@ -578,7 +576,7 @@
" return registered_model.id\n",
"\n",
"model_name_id_mapping = dict()\n",
"for name, model in dominant_all.items():\n",
"for name, model in dashboard_all.items():\n",
" m_id = register_model(name, model)\n",
" model_name_id_mapping[name] = m_id"
]
@@ -596,9 +594,9 @@
"metadata": {},
"outputs": [],
"source": [
"dominant_all_ids = dict()\n",
"for name, y_pred in dominant_all.items():\n",
" dominant_all_ids[model_name_id_mapping[name]] = y_pred"
"dashboard_all_ids = dict()\n",
"for name, y_pred in dashboard_all.items():\n",
" dashboard_all_ids[model_name_id_mapping[name]] = y_pred"
]
},
{
@@ -621,7 +619,7 @@
"from fairlearn.metrics._group_metric_set import _create_group_metric_set\n",
"\n",
"dash_dict_all = _create_group_metric_set(y_true=y_test,\n",
" predictions=dominant_all_ids,\n",
" predictions=dashboard_all_ids,\n",
" sensitive_features=sf,\n",
" prediction_type='binary_classification')"
]

View File

@@ -5,7 +5,7 @@ dependencies:
- azureml-interpret
- azureml-contrib-fairness
- interpret-community[visualization]
- fairlearn==0.4.6
- fairlearn
- matplotlib
- azureml-dataset-runtime
- ipywidgets