mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 09:37:04 -05:00
Compare commits
10 Commits
azureml-sd
...
azureml-sd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
40fbadd85c | ||
|
|
0c1fc25542 | ||
|
|
e8e1357229 | ||
|
|
ad44f8fa2b | ||
|
|
ee63e759f0 | ||
|
|
b81d97ebbf | ||
|
|
249fb6bbb5 | ||
|
|
cda1f3e4cf | ||
|
|
1d05efaac2 | ||
|
|
3adebd1127 |
@@ -103,7 +103,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import azureml.core\n",
|
"import azureml.core\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -46,7 +46,7 @@
|
|||||||
"Please see the [configuration notebook](../../configuration.ipynb) for information about creating one, if required.\n",
|
"Please see the [configuration notebook](../../configuration.ipynb) for information about creating one, if required.\n",
|
||||||
"This notebook also requires the following packages:\n",
|
"This notebook also requires the following packages:\n",
|
||||||
"* `azureml-contrib-fairness`\n",
|
"* `azureml-contrib-fairness`\n",
|
||||||
"* `fairlearn==0.4.6`\n",
|
"* `fairlearn==0.4.6` (v0.5.0 will work with minor modifications)\n",
|
||||||
"* `joblib`\n",
|
"* `joblib`\n",
|
||||||
"* `shap`\n",
|
"* `shap`\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -62,13 +62,20 @@
|
|||||||
"# !pip install --upgrade scikit-learn>=0.22.1"
|
"# !pip install --upgrade scikit-learn>=0.22.1"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Finally, please ensure that when you downloaded this notebook, you also downloaded the `fairness_nb_utils.py` file from the same location, and placed it in the same directory as this notebook."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<a id=\"LoadingData\"></a>\n",
|
"<a id=\"LoadingData\"></a>\n",
|
||||||
"## Loading the Data\n",
|
"## Loading the Data\n",
|
||||||
"We use the well-known `adult` census dataset, which we load using `shap` (for convenience). We start with a fairly unremarkable set of imports:"
|
"We use the well-known `adult` census dataset, which we will fetch from the OpenML website. We start with a fairly unremarkable set of imports:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -79,9 +86,16 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from fairlearn.reductions import GridSearch, DemographicParity, ErrorRate\n",
|
"from fairlearn.reductions import GridSearch, DemographicParity, ErrorRate\n",
|
||||||
"from fairlearn.widget import FairlearnDashboard\n",
|
"from fairlearn.widget import FairlearnDashboard\n",
|
||||||
"from sklearn import svm\n",
|
"\n",
|
||||||
"from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
|
"from sklearn.compose import ColumnTransformer\n",
|
||||||
|
"from sklearn.datasets import fetch_openml\n",
|
||||||
|
"from sklearn.impute import SimpleImputer\n",
|
||||||
"from sklearn.linear_model import LogisticRegression\n",
|
"from sklearn.linear_model import LogisticRegression\n",
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||||
|
"from sklearn.compose import make_column_selector as selector\n",
|
||||||
|
"from sklearn.pipeline import Pipeline\n",
|
||||||
|
"\n",
|
||||||
"import pandas as pd"
|
"import pandas as pd"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -89,7 +103,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We can now load and inspect the data from the `shap` package:"
|
"We can now load and inspect the data:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -98,13 +112,13 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from utilities import fetch_openml_with_retries\n",
|
"from fairness_nb_utils import fetch_openml_with_retries\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data = fetch_openml_with_retries(data_id=1590)\n",
|
"data = fetch_openml_with_retries(data_id=1590)\n",
|
||||||
" \n",
|
" \n",
|
||||||
"# Extract the items we want\n",
|
"# Extract the items we want\n",
|
||||||
"X_raw = data.data\n",
|
"X_raw = data.data\n",
|
||||||
"Y = (data.target == '>50K') * 1\n",
|
"y = (data.target == '>50K') * 1\n",
|
||||||
"\n",
|
"\n",
|
||||||
"X_raw[\"race\"].value_counts().to_dict()"
|
"X_raw[\"race\"].value_counts().to_dict()"
|
||||||
]
|
]
|
||||||
@@ -113,7 +127,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"We are going to treat the sex of each individual as a protected attribute (where 0 indicates female and 1 indicates male), and in this particular case we are going separate this attribute out and drop it from the main data (this is not always the best option - see the [Fairlearn website](http://fairlearn.github.io/) for further discussion). We also separate out the Race column, but we will not perform any mitigation based on it. Finally, we perform some standard data preprocessing steps to convert the data into a format suitable for the ML algorithms"
|
"We are going to treat the sex and race of each individual as protected attributes, and in this particular case we are going to remove these attributes from the main data (this is not always the best option - see the [Fairlearn website](http://fairlearn.github.io/) for further discussion). Protected attributes are often denoted by 'A' in the literature, and we follow that convention here:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -123,23 +137,14 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"A = X_raw[['sex','race']]\n",
|
"A = X_raw[['sex','race']]\n",
|
||||||
"X = X_raw.drop(labels=['sex', 'race'],axis = 1)\n",
|
"X_raw = X_raw.drop(labels=['sex', 'race'],axis = 1)"
|
||||||
"X_dummies = pd.get_dummies(X)\n",
|
|
||||||
"\n",
|
|
||||||
"sc = StandardScaler()\n",
|
|
||||||
"X_scaled = sc.fit_transform(X_dummies)\n",
|
|
||||||
"X_scaled = pd.DataFrame(X_scaled, columns=X_dummies.columns)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"le = LabelEncoder()\n",
|
|
||||||
"Y = le.fit_transform(Y)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"With our data prepared, we can make the conventional split in to 'test' and 'train' subsets:"
|
"We now preprocess our data. To avoid the problem of data leakage, we split our data into training and test sets before performing any other transformations. Subsequent transformations (such as scalings) will be fit to the training data set, and then applied to the test dataset."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -148,21 +153,76 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
"(X_train, X_test, y_train, y_test, A_train, A_test) = train_test_split(\n",
|
||||||
"X_train, X_test, Y_train, Y_test, A_train, A_test = train_test_split(X_scaled, \n",
|
" X_raw, y, A, test_size=0.3, random_state=12345, stratify=y\n",
|
||||||
" Y, \n",
|
")\n",
|
||||||
" A,\n",
|
"\n",
|
||||||
" test_size = 0.2,\n",
|
"# Ensure indices are aligned between X, y and A,\n",
|
||||||
" random_state=0,\n",
|
"# after all the slicing and splitting of DataFrames\n",
|
||||||
" stratify=Y)\n",
|
"# and Series\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Work around indexing issue\n",
|
|
||||||
"X_train = X_train.reset_index(drop=True)\n",
|
"X_train = X_train.reset_index(drop=True)\n",
|
||||||
"A_train = A_train.reset_index(drop=True)\n",
|
|
||||||
"X_test = X_test.reset_index(drop=True)\n",
|
"X_test = X_test.reset_index(drop=True)\n",
|
||||||
|
"y_train = y_train.reset_index(drop=True)\n",
|
||||||
|
"y_test = y_test.reset_index(drop=True)\n",
|
||||||
|
"A_train = A_train.reset_index(drop=True)\n",
|
||||||
"A_test = A_test.reset_index(drop=True)"
|
"A_test = A_test.reset_index(drop=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We have two types of column in the dataset - categorical columns which will need to be one-hot encoded, and numeric ones which will need to be rescaled. We also need to take care of missing values. We use a simple approach here, but please bear in mind that this is another way that bias could be introduced (especially if one subgroup tends to have more missing values).\n",
|
||||||
|
"\n",
|
||||||
|
"For this preprocessing, we make use of `Pipeline` objects from `sklearn`:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"numeric_transformer = Pipeline(\n",
|
||||||
|
" steps=[\n",
|
||||||
|
" (\"impute\", SimpleImputer()),\n",
|
||||||
|
" (\"scaler\", StandardScaler()),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"categorical_transformer = Pipeline(\n",
|
||||||
|
" [\n",
|
||||||
|
" (\"impute\", SimpleImputer(strategy=\"most_frequent\")),\n",
|
||||||
|
" (\"ohe\", OneHotEncoder(handle_unknown=\"ignore\", sparse=False)),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"preprocessor = ColumnTransformer(\n",
|
||||||
|
" transformers=[\n",
|
||||||
|
" (\"num\", numeric_transformer, selector(dtype_exclude=\"category\")),\n",
|
||||||
|
" (\"cat\", categorical_transformer, selector(dtype_include=\"category\")),\n",
|
||||||
|
" ]\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now, the preprocessing pipeline is defined, we can run it on our training data, and apply the generated transform to our test data:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X_train = preprocessor.fit_transform(X_train)\n",
|
||||||
|
"X_test = preprocessor.transform(X_test)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -181,7 +241,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"unmitigated_predictor = LogisticRegression(solver='liblinear', fit_intercept=True)\n",
|
"unmitigated_predictor = LogisticRegression(solver='liblinear', fit_intercept=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"unmitigated_predictor.fit(X_train, Y_train)"
|
"unmitigated_predictor.fit(X_train, y_train)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -198,7 +258,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"FairlearnDashboard(sensitive_features=A_test, sensitive_feature_names=['Sex', 'Race'],\n",
|
"FairlearnDashboard(sensitive_features=A_test, sensitive_feature_names=['Sex', 'Race'],\n",
|
||||||
" y_true=Y_test,\n",
|
" y_true=y_test,\n",
|
||||||
" y_pred={\"unmitigated\": unmitigated_predictor.predict(X_test)})"
|
" y_pred={\"unmitigated\": unmitigated_predictor.predict(X_test)})"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -249,9 +309,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"sweep.fit(X_train, Y_train,\n",
|
"sweep.fit(X_train, y_train,\n",
|
||||||
" sensitive_features=A_train.sex)\n",
|
" sensitive_features=A_train.sex)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# For Fairlearn v0.5.0, need sweep.predictors_\n",
|
||||||
"predictors = sweep._predictors"
|
"predictors = sweep._predictors"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -273,9 +334,9 @@
|
|||||||
" classifier = lambda X: m.predict(X)\n",
|
" classifier = lambda X: m.predict(X)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" error = ErrorRate()\n",
|
" error = ErrorRate()\n",
|
||||||
" error.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train.sex)\n",
|
" error.load_data(X_train, pd.Series(y_train), sensitive_features=A_train.sex)\n",
|
||||||
" disparity = DemographicParity()\n",
|
" disparity = DemographicParity()\n",
|
||||||
" disparity.load_data(X_train, pd.Series(Y_train), sensitive_features=A_train.sex)\n",
|
" disparity.load_data(X_train, pd.Series(y_train), sensitive_features=A_train.sex)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" errors.append(error.gamma(classifier)[0])\n",
|
" errors.append(error.gamma(classifier)[0])\n",
|
||||||
" disparities.append(disparity.gamma(classifier).max())\n",
|
" disparities.append(disparity.gamma(classifier).max())\n",
|
||||||
@@ -329,7 +390,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"FairlearnDashboard(sensitive_features=A_test, \n",
|
"FairlearnDashboard(sensitive_features=A_test, \n",
|
||||||
" sensitive_feature_names=['Sex', 'Race'],\n",
|
" sensitive_feature_names=['Sex', 'Race'],\n",
|
||||||
" y_true=Y_test.tolist(),\n",
|
" y_true=y_test.tolist(),\n",
|
||||||
" y_pred=predictions_dominant)"
|
" y_pred=predictions_dominant)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -337,7 +398,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"When using sex as the sensitive feature, we see a Pareto front forming - the set of predictors which represent optimal tradeoffs between accuracy and disparity in predictions. In the ideal case, we would have a predictor at (1,0) - perfectly accurate and without any unfairness under demographic parity (with respect to the protected attribute \"sex\"). The Pareto front represents the closest we can come to this ideal based on our data and choice of estimator. Note the range of the axes - the disparity axis covers more values than the accuracy, so we can reduce disparity substantially for a small loss in accuracy. Finally, we also see that the unmitigated model is towards the top right of the plot, with high accuracy, but worst disparity.\n",
|
"When using sex as the sensitive feature and accuracy as the metric, we see a Pareto front forming - the set of predictors which represent optimal tradeoffs between accuracy and disparity in predictions. In the ideal case, we would have a predictor at (1,0) - perfectly accurate and without any unfairness under demographic parity (with respect to the protected attribute \"sex\"). The Pareto front represents the closest we can come to this ideal based on our data and choice of estimator. Note the range of the axes - the disparity axis covers more values than the accuracy, so we can reduce disparity substantially for a small loss in accuracy. Finally, we also see that the unmitigated model is towards the top right of the plot, with high accuracy, but worst disparity.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"By clicking on individual models on the plot, we can inspect their metrics for disparity and accuracy in greater detail. In a real example, we would then pick the model which represented the best trade-off between accuracy and disparity given the relevant business constraints."
|
"By clicking on individual models on the plot, we can inspect their metrics for disparity and accuracy in greater detail. In a real example, we would then pick the model which represented the best trade-off between accuracy and disparity given the relevant business constraints."
|
||||||
]
|
]
|
||||||
@@ -444,7 +505,7 @@
|
|||||||
"from fairlearn.metrics._group_metric_set import _create_group_metric_set\n",
|
"from fairlearn.metrics._group_metric_set import _create_group_metric_set\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"dash_dict = _create_group_metric_set(y_true=Y_test,\n",
|
"dash_dict = _create_group_metric_set(y_true=y_test,\n",
|
||||||
" predictions=predictions_dominant_ids,\n",
|
" predictions=predictions_dominant_ids,\n",
|
||||||
" sensitive_features=sf,\n",
|
" sensitive_features=sf,\n",
|
||||||
" prediction_type='binary_classification')"
|
" prediction_type='binary_classification')"
|
||||||
|
|||||||
@@ -48,7 +48,7 @@
|
|||||||
"Please see the [configuration notebook](../../configuration.ipynb) for information about creating one, if required.\n",
|
"Please see the [configuration notebook](../../configuration.ipynb) for information about creating one, if required.\n",
|
||||||
"This notebook also requires the following packages:\n",
|
"This notebook also requires the following packages:\n",
|
||||||
"* `azureml-contrib-fairness`\n",
|
"* `azureml-contrib-fairness`\n",
|
||||||
"* `fairlearn==0.4.6`\n",
|
"* `fairlearn==0.4.6` (should also work with v0.5.0)\n",
|
||||||
"* `joblib`\n",
|
"* `joblib`\n",
|
||||||
"* `shap`\n",
|
"* `shap`\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -64,13 +64,20 @@
|
|||||||
"# !pip install --upgrade scikit-learn>=0.22.1"
|
"# !pip install --upgrade scikit-learn>=0.22.1"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Finally, please ensure that when you downloaded this notebook, you also downloaded the `fairness_nb_utils.py` file from the same location, and placed it in the same directory as this notebook."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<a id=\"LoadingData\"></a>\n",
|
"<a id=\"LoadingData\"></a>\n",
|
||||||
"## Loading the Data\n",
|
"## Loading the Data\n",
|
||||||
"We use the well-known `adult` census dataset, which we load using `shap` (for convenience). We start with a fairly unremarkable set of imports:"
|
"We use the well-known `adult` census dataset, which we fetch from the OpenML website. We start with a fairly unremarkable set of imports:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -80,9 +87,14 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from sklearn import svm\n",
|
"from sklearn import svm\n",
|
||||||
"from sklearn.preprocessing import LabelEncoder, StandardScaler\n",
|
"from sklearn.compose import ColumnTransformer\n",
|
||||||
|
"from sklearn.datasets import fetch_openml\n",
|
||||||
|
"from sklearn.impute import SimpleImputer\n",
|
||||||
"from sklearn.linear_model import LogisticRegression\n",
|
"from sklearn.linear_model import LogisticRegression\n",
|
||||||
"import pandas as pd"
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||||
|
"from sklearn.compose import make_column_selector as selector\n",
|
||||||
|
"from sklearn.pipeline import Pipeline"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -98,13 +110,13 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"from utilities import fetch_openml_with_retries\n",
|
"from fairness_nb_utils import fetch_openml_with_retries\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data = fetch_openml_with_retries(data_id=1590)\n",
|
"data = fetch_openml_with_retries(data_id=1590)\n",
|
||||||
" \n",
|
" \n",
|
||||||
"# Extract the items we want\n",
|
"# Extract the items we want\n",
|
||||||
"X_raw = data.data\n",
|
"X_raw = data.data\n",
|
||||||
"Y = (data.target == '>50K') * 1"
|
"y = (data.target == '>50K') * 1"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -130,7 +142,7 @@
|
|||||||
"<a id=\"ProcessingData\"></a>\n",
|
"<a id=\"ProcessingData\"></a>\n",
|
||||||
"## Processing the Data\n",
|
"## Processing the Data\n",
|
||||||
"\n",
|
"\n",
|
||||||
"With the data loaded, we process it for our needs. First, we extract the sensitive features of interest into `A` (conventionally used in the literature) and put the rest of the feature data into `X`:"
|
"With the data loaded, we process it for our needs. First, we extract the sensitive features of interest into `A` (conventionally used in the literature) and leave the rest of the feature data in `X_raw`:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -140,15 +152,14 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"A = X_raw[['sex','race']]\n",
|
"A = X_raw[['sex','race']]\n",
|
||||||
"X = X_raw.drop(labels=['sex', 'race'],axis = 1)\n",
|
"X_raw = X_raw.drop(labels=['sex', 'race'],axis = 1)"
|
||||||
"X_dummies = pd.get_dummies(X)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Next, we apply a standard set of scalings:"
|
"We now preprocess our data. To avoid the problem of data leakage, we split our data into training and test sets before performing any other transformations. Subsequent transformations (such as scalings) will be fit to the training data set, and then applied to the test dataset."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -157,42 +168,76 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"sc = StandardScaler()\n",
|
"(X_train, X_test, y_train, y_test, A_train, A_test) = train_test_split(\n",
|
||||||
"X_scaled = sc.fit_transform(X_dummies)\n",
|
" X_raw, y, A, test_size=0.3, random_state=12345, stratify=y\n",
|
||||||
"X_scaled = pd.DataFrame(X_scaled, columns=X_dummies.columns)\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"le = LabelEncoder()\n",
|
"# Ensure indices are aligned between X, y and A,\n",
|
||||||
"Y = le.fit_transform(Y)"
|
"# after all the slicing and splitting of DataFrames\n",
|
||||||
]
|
"# and Series\n",
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Finally, we can then split our data into training and test sets, and also make the labels on our test portion of `A` human-readable:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from sklearn.model_selection import train_test_split\n",
|
|
||||||
"X_train, X_test, Y_train, Y_test, A_train, A_test = train_test_split(X_scaled, \n",
|
|
||||||
" Y, \n",
|
|
||||||
" A,\n",
|
|
||||||
" test_size = 0.2,\n",
|
|
||||||
" random_state=0,\n",
|
|
||||||
" stratify=Y)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# Work around indexing issue\n",
|
|
||||||
"X_train = X_train.reset_index(drop=True)\n",
|
"X_train = X_train.reset_index(drop=True)\n",
|
||||||
"A_train = A_train.reset_index(drop=True)\n",
|
|
||||||
"X_test = X_test.reset_index(drop=True)\n",
|
"X_test = X_test.reset_index(drop=True)\n",
|
||||||
|
"y_train = y_train.reset_index(drop=True)\n",
|
||||||
|
"y_test = y_test.reset_index(drop=True)\n",
|
||||||
|
"A_train = A_train.reset_index(drop=True)\n",
|
||||||
"A_test = A_test.reset_index(drop=True)"
|
"A_test = A_test.reset_index(drop=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We have two types of column in the dataset - categorical columns which will need to be one-hot encoded, and numeric ones which will need to be rescaled. We also need to take care of missing values. We use a simple approach here, but please bear in mind that this is another way that bias could be introduced (especially if one subgroup tends to have more missing values).\n",
|
||||||
|
"\n",
|
||||||
|
"For this preprocessing, we make use of `Pipeline` objects from `sklearn`:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"numeric_transformer = Pipeline(\n",
|
||||||
|
" steps=[\n",
|
||||||
|
" (\"impute\", SimpleImputer()),\n",
|
||||||
|
" (\"scaler\", StandardScaler()),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"categorical_transformer = Pipeline(\n",
|
||||||
|
" [\n",
|
||||||
|
" (\"impute\", SimpleImputer(strategy=\"most_frequent\")),\n",
|
||||||
|
" (\"ohe\", OneHotEncoder(handle_unknown=\"ignore\", sparse=False)),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"preprocessor = ColumnTransformer(\n",
|
||||||
|
" transformers=[\n",
|
||||||
|
" (\"num\", numeric_transformer, selector(dtype_exclude=\"category\")),\n",
|
||||||
|
" (\"cat\", categorical_transformer, selector(dtype_include=\"category\")),\n",
|
||||||
|
" ]\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now, the preprocessing pipeline is defined, we can run it on our training data, and apply the generated transform to our test data:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"X_train = preprocessor.fit_transform(X_train)\n",
|
||||||
|
"X_test = preprocessor.transform(X_test)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@@ -211,7 +256,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"lr_predictor = LogisticRegression(solver='liblinear', fit_intercept=True)\n",
|
"lr_predictor = LogisticRegression(solver='liblinear', fit_intercept=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"lr_predictor.fit(X_train, Y_train)"
|
"lr_predictor.fit(X_train, y_train)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -229,7 +274,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"svm_predictor = svm.SVC()\n",
|
"svm_predictor = svm.SVC()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"svm_predictor.fit(X_train, Y_train)"
|
"svm_predictor.fit(X_train, y_train)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -348,7 +393,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"FairlearnDashboard(sensitive_features=A_test, \n",
|
"FairlearnDashboard(sensitive_features=A_test, \n",
|
||||||
" sensitive_feature_names=['Sex', 'Race'],\n",
|
" sensitive_feature_names=['Sex', 'Race'],\n",
|
||||||
" y_true=Y_test.tolist(),\n",
|
" y_true=y_test.tolist(),\n",
|
||||||
" y_pred=ys_pred)"
|
" y_pred=ys_pred)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -378,7 +423,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"from fairlearn.metrics._group_metric_set import _create_group_metric_set\n",
|
"from fairlearn.metrics._group_metric_set import _create_group_metric_set\n",
|
||||||
"\n",
|
"\n",
|
||||||
"dash_dict = _create_group_metric_set(y_true=Y_test,\n",
|
"dash_dict = _create_group_metric_set(y_true=y_test,\n",
|
||||||
" predictions=ys_pred,\n",
|
" predictions=ys_pred,\n",
|
||||||
" sensitive_features=sf,\n",
|
" sensitive_features=sf,\n",
|
||||||
" prediction_type='binary_classification')"
|
" prediction_type='binary_classification')"
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: azure_automl
|
|||||||
dependencies:
|
dependencies:
|
||||||
# The python interpreter version.
|
# The python interpreter version.
|
||||||
# Currently Azure ML only supports 3.5.2 and later.
|
# Currently Azure ML only supports 3.5.2 and later.
|
||||||
- pip<=19.3.1
|
- pip==20.2.4
|
||||||
- python>=3.5.2,<3.8
|
- python>=3.5.2,<3.8
|
||||||
- nb_conda
|
- nb_conda
|
||||||
- boto3==1.15.18
|
- boto3==1.15.18
|
||||||
@@ -21,8 +21,8 @@ dependencies:
|
|||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
- azureml-widgets~=1.19.0
|
- azureml-widgets~=1.21.0
|
||||||
- pytorch-transformers==1.0.0
|
- pytorch-transformers==1.0.0
|
||||||
- spacy==2.1.8
|
- spacy==2.1.8
|
||||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.19.0/validated_win32_requirements.txt [--no-deps]
|
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.21.0/validated_win32_requirements.txt [--no-deps]
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: azure_automl
|
|||||||
dependencies:
|
dependencies:
|
||||||
# The python interpreter version.
|
# The python interpreter version.
|
||||||
# Currently Azure ML only supports 3.5.2 and later.
|
# Currently Azure ML only supports 3.5.2 and later.
|
||||||
- pip<=19.3.1
|
- pip==20.2.4
|
||||||
- python>=3.5.2,<3.8
|
- python>=3.5.2,<3.8
|
||||||
- nb_conda
|
- nb_conda
|
||||||
- boto3==1.15.18
|
- boto3==1.15.18
|
||||||
@@ -21,9 +21,9 @@ dependencies:
|
|||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
- azureml-widgets~=1.19.0
|
- azureml-widgets~=1.21.0
|
||||||
- pytorch-transformers==1.0.0
|
- pytorch-transformers==1.0.0
|
||||||
- spacy==2.1.8
|
- spacy==2.1.8
|
||||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.19.0/validated_linux_requirements.txt [--no-deps]
|
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.21.0/validated_linux_requirements.txt [--no-deps]
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ name: azure_automl
|
|||||||
dependencies:
|
dependencies:
|
||||||
# The python interpreter version.
|
# The python interpreter version.
|
||||||
# Currently Azure ML only supports 3.5.2 and later.
|
# Currently Azure ML only supports 3.5.2 and later.
|
||||||
- pip<=19.3.1
|
- pip==20.2.4
|
||||||
- nomkl
|
- nomkl
|
||||||
- python>=3.5.2,<3.8
|
- python>=3.5.2,<3.8
|
||||||
- nb_conda
|
- nb_conda
|
||||||
@@ -22,8 +22,8 @@ dependencies:
|
|||||||
|
|
||||||
- pip:
|
- pip:
|
||||||
# Required packages for AzureML execution, history, and data preparation.
|
# Required packages for AzureML execution, history, and data preparation.
|
||||||
- azureml-widgets~=1.19.0
|
- azureml-widgets~=1.21.0
|
||||||
- pytorch-transformers==1.0.0
|
- pytorch-transformers==1.0.0
|
||||||
- spacy==2.1.8
|
- spacy==2.1.8
|
||||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||||
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.19.0/validated_darwin_requirements.txt [--no-deps]
|
- -r https://automlcesdkdataresources.blob.core.windows.net/validated-requirements/1.21.0/validated_darwin_requirements.txt [--no-deps]
|
||||||
|
|||||||
@@ -105,7 +105,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -167,7 +167,7 @@
|
|||||||
"You will need to create a compute target for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
"You will need to create a compute target for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||||
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
||||||
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read this article on the default limits and how to request more quota."
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-classification-bank-marketing-all-features
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -93,7 +93,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-classification-credit-card-fraud
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -96,7 +96,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-classification-text-dnn
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -81,7 +81,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -143,7 +143,7 @@
|
|||||||
"You will need to create a compute target for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
"You will need to create a compute target for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||||
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
||||||
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read this article on the default limits and how to request more quota."
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-continuous-retraining
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -18,3 +18,4 @@ dependencies:
|
|||||||
- azureml-sdk
|
- azureml-sdk
|
||||||
- azureml-widgets
|
- azureml-widgets
|
||||||
- azureml-explain-model
|
- azureml-explain-model
|
||||||
|
- PyJWT < 2.0.0
|
||||||
|
|||||||
@@ -19,3 +19,4 @@ dependencies:
|
|||||||
- azureml-sdk
|
- azureml-sdk
|
||||||
- azureml-widgets
|
- azureml-widgets
|
||||||
- azureml-explain-model
|
- azureml-explain-model
|
||||||
|
- PyJWT < 2.0.0
|
||||||
|
|||||||
@@ -93,7 +93,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-regression-model-proxy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -113,7 +113,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -218,6 +218,8 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"**Time series identifier columns** are identified by values of the columns listed `time_series_id_column_names`, for example \"store\" and \"item\" if your data has multiple time series of sales, one series for each combination of store and item sold.\n",
|
"**Time series identifier columns** are identified by values of the columns listed `time_series_id_column_names`, for example \"store\" and \"item\" if your data has multiple time series of sales, one series for each combination of store and item sold.\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"**Forecast frequency (freq)** This optional parameter represents the period with which the forecast is desired, for example, daily, weekly, yearly, etc. Use this parameter for the correction of time series containing irregular data points or for padding of short time series. The frequency needs to be a pandas offset alias. Please refer to [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects) for more information.\n",
|
||||||
|
"\n",
|
||||||
"This dataset has only one time series. Please see the [orange juice notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales) for an example of a multi-time series dataset."
|
"This dataset has only one time series. Please see the [orange juice notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales) for an example of a multi-time series dataset."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-forecasting-beer-remote
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -87,7 +87,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -131,7 +131,7 @@
|
|||||||
"You will need to create a [compute target](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute) for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
"You will need to create a [compute target](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute) for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||||
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
||||||
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read this article on the default limits and how to request more quota."
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -251,7 +251,8 @@
|
|||||||
"|**forecast_horizon**|The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly).|\n",
|
"|**forecast_horizon**|The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly).|\n",
|
||||||
"|**country_or_region_for_holidays**|The country/region used to generate holiday features. These should be ISO 3166 two-letter country/region codes (i.e. 'US', 'GB').|\n",
|
"|**country_or_region_for_holidays**|The country/region used to generate holiday features. These should be ISO 3166 two-letter country/region codes (i.e. 'US', 'GB').|\n",
|
||||||
"|**target_lags**|The target_lags specifies how far back we will construct the lags of the target variable.|\n",
|
"|**target_lags**|The target_lags specifies how far back we will construct the lags of the target variable.|\n",
|
||||||
"|**drop_column_names**|Name(s) of columns to drop prior to modeling|"
|
"|**drop_column_names**|Name(s) of columns to drop prior to modeling|\n",
|
||||||
|
"|**freq**|Forecast frequency. This optional parameter represents the period with which the forecast is desired, for example, daily, weekly, yearly, etc. Use this parameter for the correction of time series containing irregular data points or for padding of short time series. The frequency needs to be a pandas offset alias. Please refer to [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects) for more information."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -548,6 +549,9 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
|
"For more details on what metrics are included and how they are calculated, please refer to [supported metrics](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#regressionforecasting-metrics). You could also calculate residuals, like described [here](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#residuals).\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
"Since we did a rolling evaluation on the test set, we can analyze the predictions by their forecast horizon relative to the rolling origin. The model was initially trained at a forecast horizon of 14, so each prediction from the model is associated with a horizon value from 1 to 14. The horizon values are in a column named, \"horizon_origin,\" in the prediction set. For example, we can calculate some of the error metrics grouped by the horizon:"
|
"Since we did a rolling evaluation on the test set, we can analyze the predictions by their forecast horizon relative to the rolling origin. The model was initially trained at a forecast horizon of 14, so each prediction from the model is associated with a horizon value from 1 to 14. The horizon values are in a column named, \"horizon_origin,\" in the prediction set. For example, we can calculate some of the error metrics grouped by the horizon:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-forecasting-bike-share
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -97,7 +97,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -301,7 +301,8 @@
|
|||||||
"|Property|Description|\n",
|
"|Property|Description|\n",
|
||||||
"|-|-|\n",
|
"|-|-|\n",
|
||||||
"|**time_column_name**|The name of your time column.|\n",
|
"|**time_column_name**|The name of your time column.|\n",
|
||||||
"|**forecast_horizon**|The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly).|"
|
"|**forecast_horizon**|The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly).|\n",
|
||||||
|
"|**freq**|Forecast frequency. This optional parameter represents the period with which the forecast is desired, for example, daily, weekly, yearly, etc. Use this parameter for the correction of time series containing irregular data points or for padding of short time series. The frequency needs to be a pandas offset alias. Please refer to [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects) for more information."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -497,7 +498,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Evaluate\n",
|
"### Evaluate\n",
|
||||||
"To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE).\n",
|
"To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE). For more metrics that can be used for evaluation after training, please see [supported metrics](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#regressionforecasting-metrics), and [how to calculate residuals](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#residuals).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"It is a good practice to always align the output explicitly to the input, as the count and order of the rows may have changed during transformations that span multiple rows."
|
"It is a good practice to always align the output explicitly to the input, as the count and order of the rows may have changed during transformations that span multiple rows."
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-forecasting-energy-demand
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -94,7 +94,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -302,7 +302,8 @@
|
|||||||
"* Set early termination to True, so the iterations through the models will stop when no improvements in accuracy score will be made.\n",
|
"* Set early termination to True, so the iterations through the models will stop when no improvements in accuracy score will be made.\n",
|
||||||
"* Set limitations on the length of experiment run to 15 minutes.\n",
|
"* Set limitations on the length of experiment run to 15 minutes.\n",
|
||||||
"* Finally, we set the task to be forecasting.\n",
|
"* Finally, we set the task to be forecasting.\n",
|
||||||
"* We apply the lag lead operator to the target value i.e. we use the previous values as a predictor for the future ones."
|
"* We apply the lag lead operator to the target value i.e. we use the previous values as a predictor for the future ones.\n",
|
||||||
|
"* [Optional] Forecast frequency parameter (freq) represents the period with which the forecast is desired, for example, daily, weekly, yearly, etc. Use this parameter for the correction of time series containing irregular data points or for padding of short time series. The frequency needs to be a pandas offset alias. Please refer to [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects) for more information."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-forecasting-function
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -82,7 +82,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -126,7 +126,7 @@
|
|||||||
"You will need to create a [compute target](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute) for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
"You will need to create a [compute target](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute) for your AutoML run. In this tutorial, you create AmlCompute as your training compute resource.\n",
|
||||||
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
"#### Creation of AmlCompute takes approximately 5 minutes. \n",
|
||||||
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
"If the AmlCompute with that name is already in your workspace this code will skip the creation process.\n",
|
||||||
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read this article on the default limits and how to request more quota."
|
"As with other Azure services, there are limits on certain resources (e.g. AmlCompute) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -366,7 +366,8 @@
|
|||||||
"|-|-|\n",
|
"|-|-|\n",
|
||||||
"|**time_column_name**|The name of your time column.|\n",
|
"|**time_column_name**|The name of your time column.|\n",
|
||||||
"|**forecast_horizon**|The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly).|\n",
|
"|**forecast_horizon**|The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly).|\n",
|
||||||
"|**time_series_id_column_names**|The column names used to uniquely identify the time series in data that has multiple rows with the same timestamp. If the time series identifiers are not defined, the data set is assumed to be one time series.|"
|
"|**time_series_id_column_names**|The column names used to uniquely identify the time series in data that has multiple rows with the same timestamp. If the time series identifiers are not defined, the data set is assumed to be one time series.|\n",
|
||||||
|
"|**freq**|Forecast frequency. This optional parameter represents the period with which the forecast is desired, for example, daily, weekly, yearly, etc. Use this parameter for the correction of time series containing irregular data points or for padding of short time series. The frequency needs to be a pandas offset alias. Please refer to [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects) for more information."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -571,7 +572,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"# Evaluate\n",
|
"# Evaluate\n",
|
||||||
"\n",
|
"\n",
|
||||||
"To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE). \n",
|
"To evaluate the accuracy of the forecast, we'll compare against the actual sales quantities for some select metrics, included the mean absolute percentage error (MAPE). For more metrics that can be used for evaluation after training, please see [supported metrics](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#regressionforecasting-metrics), and [how to calculate residuals](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-understand-automated-ml#residuals).\n",
|
||||||
"\n",
|
"\n",
|
||||||
"We'll add predictions and actuals into a single dataframe for convenience in calculating the metrics."
|
"We'll add predictions and actuals into a single dataframe for convenience in calculating the metrics."
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-forecasting-orange-juice-sales
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -96,7 +96,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-classification-credit-card-fraud-local
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -96,7 +96,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-regression-explanation-featurization
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -66,7 +66,8 @@ engineered_explanations = explainer.explain(['local', 'global'], tag='engineered
|
|||||||
# Compute the raw explanations
|
# Compute the raw explanations
|
||||||
raw_explanations = explainer.explain(['local', 'global'], get_raw=True, tag='raw explanations',
|
raw_explanations = explainer.explain(['local', 'global'], get_raw=True, tag='raw explanations',
|
||||||
raw_feature_names=automl_explainer_setup_obj.raw_feature_names,
|
raw_feature_names=automl_explainer_setup_obj.raw_feature_names,
|
||||||
eval_dataset=automl_explainer_setup_obj.X_test_transform)
|
eval_dataset=automl_explainer_setup_obj.X_test_transform,
|
||||||
|
raw_eval_dataset=automl_explainer_setup_obj.X_test_raw)
|
||||||
|
|
||||||
print("Engineered and raw explanations computed successfully")
|
print("Engineered and raw explanations computed successfully")
|
||||||
|
|
||||||
|
|||||||
@@ -92,7 +92,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"print(\"This notebook was created using version 1.19.0 of the Azure ML SDK\")\n",
|
"print(\"This notebook was created using version 1.21.0 of the Azure ML SDK\")\n",
|
||||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,4 +0,0 @@
|
|||||||
name: auto-ml-regression
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
name: multi-model-register-and-deploy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- numpy
|
|
||||||
- scikit-learn
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
name: model-register-and-deploy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- numpy
|
|
||||||
- scikit-learn
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
name: deploy-aks-with-controlled-rollout
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
name: enable-app-insights-in-production-service
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
name: onnx-convert-aml-deploy-tinyyolo
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- numpy
|
|
||||||
- git+https://github.com/apple/coremltools@v2.1
|
|
||||||
- onnx<1.7.0
|
|
||||||
- onnxmltools
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
name: onnx-inference-facial-expression-recognition-deploy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- matplotlib
|
|
||||||
- numpy
|
|
||||||
- onnx<1.7.0
|
|
||||||
- opencv-python-headless
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
name: onnx-inference-mnist-deploy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- matplotlib
|
|
||||||
- numpy
|
|
||||||
- onnx<1.7.0
|
|
||||||
- opencv-python-headless
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
name: onnx-model-register-and-deploy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
name: onnx-modelzoo-aml-deploy-resnet50
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: onnx-train-pytorch-aml-deploy-mnist
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: production-deploy-to-aks-gpu
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- tensorflow
|
|
||||||
@@ -226,7 +226,7 @@
|
|||||||
"# Leaf domain label generates a name using the formula\n",
|
"# Leaf domain label generates a name using the formula\n",
|
||||||
"# \"<leaf-domain-label>######.<azure-region>.cloudapp.azure.net\"\n",
|
"# \"<leaf-domain-label>######.<azure-region>.cloudapp.azure.net\"\n",
|
||||||
"# where \"######\" is a random series of characters\n",
|
"# where \"######\" is a random series of characters\n",
|
||||||
"provisioning_config.enable_ssl(leaf_domain_label = \"contoso\")\n",
|
"provisioning_config.enable_ssl(leaf_domain_label = \"contoso\", overwrite_existing_domain = True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"aks_name = 'my-aks-ssl-1' \n",
|
"aks_name = 'my-aks-ssl-1' \n",
|
||||||
"# Create the cluster\n",
|
"# Create the cluster\n",
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
name: production-deploy-to-aks-ssl
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- matplotlib
|
|
||||||
- tqdm
|
|
||||||
- scipy
|
|
||||||
- sklearn
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
name: production-deploy-to-aks
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- matplotlib
|
|
||||||
- tqdm
|
|
||||||
- scipy
|
|
||||||
- sklearn
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
name: model-register-and-deploy-spark
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
name: explain-model-on-amlcompute
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-interpret
|
|
||||||
- interpret-community[visualization]
|
|
||||||
- matplotlib
|
|
||||||
- azureml-contrib-interpret
|
|
||||||
- sklearn-pandas<2.0.0
|
|
||||||
- azureml-dataset-runtime
|
|
||||||
- ipywidgets
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
name: save-retrieve-explanations-run-history
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-interpret
|
|
||||||
- interpret-community[visualization]
|
|
||||||
- matplotlib
|
|
||||||
- azureml-contrib-interpret
|
|
||||||
- ipywidgets
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
name: train-explain-model-locally-and-deploy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-interpret
|
|
||||||
- interpret-community[visualization]
|
|
||||||
- matplotlib
|
|
||||||
- azureml-contrib-interpret
|
|
||||||
- sklearn-pandas<2.0.0
|
|
||||||
- ipywidgets
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
name: train-explain-model-on-amlcompute-and-deploy
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-interpret
|
|
||||||
- interpret-community[visualization]
|
|
||||||
- matplotlib
|
|
||||||
- azureml-contrib-interpret
|
|
||||||
- sklearn-pandas<2.0.0
|
|
||||||
- azureml-dataset-runtime
|
|
||||||
- azureml-core
|
|
||||||
- ipywidgets
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: aml-pipelines-data-transfer
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: aml-pipelines-getting-started
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -168,7 +168,7 @@
|
|||||||
"def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
|
"def_blob_store = Datastore(ws, \"workspaceblobstore\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"#upload input data to workspaceblobstore\n",
|
"#upload input data to workspaceblobstore\n",
|
||||||
"def_blob_store.upload_files(files=['20news.pkl'], target_path='20newsgroups')"
|
"def_blob_store.upload_files(files=['20news.pkl'], target_path='20newsgroups', overwrite=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,5 +0,0 @@
|
|||||||
name: aml-pipelines-how-to-use-estimatorstep
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: aml-pipelines-how-to-use-modulestep
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: aml-pipelines-how-to-use-pipeline-drafts
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
name: aml-pipelines-parameter-tuning-with-hyperdrive
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- matplotlib
|
|
||||||
- numpy
|
|
||||||
- pandas_ml
|
|
||||||
- azureml-dataset-runtime[pandas,fuse]
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
name: aml-pipelines-publish-and-run-using-rest-endpoint
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- requests
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: aml-pipelines-setup-schedule-for-a-published-pipeline
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
name: aml-pipelines-setup-versioned-pipeline-endpoints
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- requests
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: aml-pipelines-showcasing-datapath-and-pipelineparameter
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: aml-pipelines-showcasing-dataset-and-pipelineparameter
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
name: aml-pipelines-with-automated-machine-learning-step
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: aml-pipelines-with-data-dependency-steps
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
name: aml-pipelines-with-notebook-runner-step
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- azureml-contrib-notebook
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
name: nyc-taxi-data-regression-model-building
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- azureml-opendatasets
|
|
||||||
- azureml-train-automl
|
|
||||||
- matplotlib
|
|
||||||
- pandas
|
|
||||||
- pyarrow
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
name: file-dataset-image-inference-mnist
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-pipeline-steps
|
|
||||||
- azureml-widgets
|
|
||||||
- pandas
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
name: tabular-dataset-inference-iris
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-pipeline-steps
|
|
||||||
- azureml-widgets
|
|
||||||
- pandas
|
|
||||||
@@ -1,185 +0,0 @@
|
|||||||
# Original source: https://github.com/pytorch/examples/blob/master/fast_neural_style/neural_style/neural_style.py
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import re
|
|
||||||
|
|
||||||
from PIL import Image
|
|
||||||
import torch
|
|
||||||
from torchvision import transforms
|
|
||||||
|
|
||||||
|
|
||||||
def load_image(filename, size=None, scale=None):
|
|
||||||
img = Image.open(filename)
|
|
||||||
if size is not None:
|
|
||||||
img = img.resize((size, size), Image.ANTIALIAS)
|
|
||||||
elif scale is not None:
|
|
||||||
img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
|
|
||||||
return img
|
|
||||||
|
|
||||||
|
|
||||||
def save_image(filename, data):
|
|
||||||
img = data.clone().clamp(0, 255).numpy()
|
|
||||||
img = img.transpose(1, 2, 0).astype("uint8")
|
|
||||||
img = Image.fromarray(img)
|
|
||||||
img.save(filename)
|
|
||||||
|
|
||||||
|
|
||||||
class TransformerNet(torch.nn.Module):
|
|
||||||
def __init__(self):
|
|
||||||
super(TransformerNet, self).__init__()
|
|
||||||
# Initial convolution layers
|
|
||||||
self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
|
|
||||||
self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
|
|
||||||
self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2)
|
|
||||||
self.in2 = torch.nn.InstanceNorm2d(64, affine=True)
|
|
||||||
self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2)
|
|
||||||
self.in3 = torch.nn.InstanceNorm2d(128, affine=True)
|
|
||||||
# Residual layers
|
|
||||||
self.res1 = ResidualBlock(128)
|
|
||||||
self.res2 = ResidualBlock(128)
|
|
||||||
self.res3 = ResidualBlock(128)
|
|
||||||
self.res4 = ResidualBlock(128)
|
|
||||||
self.res5 = ResidualBlock(128)
|
|
||||||
# Upsampling Layers
|
|
||||||
self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2)
|
|
||||||
self.in4 = torch.nn.InstanceNorm2d(64, affine=True)
|
|
||||||
self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2)
|
|
||||||
self.in5 = torch.nn.InstanceNorm2d(32, affine=True)
|
|
||||||
self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1)
|
|
||||||
# Non-linearities
|
|
||||||
self.relu = torch.nn.ReLU()
|
|
||||||
|
|
||||||
def forward(self, X):
|
|
||||||
y = self.relu(self.in1(self.conv1(X)))
|
|
||||||
y = self.relu(self.in2(self.conv2(y)))
|
|
||||||
y = self.relu(self.in3(self.conv3(y)))
|
|
||||||
y = self.res1(y)
|
|
||||||
y = self.res2(y)
|
|
||||||
y = self.res3(y)
|
|
||||||
y = self.res4(y)
|
|
||||||
y = self.res5(y)
|
|
||||||
y = self.relu(self.in4(self.deconv1(y)))
|
|
||||||
y = self.relu(self.in5(self.deconv2(y)))
|
|
||||||
y = self.deconv3(y)
|
|
||||||
return y
|
|
||||||
|
|
||||||
|
|
||||||
class ConvLayer(torch.nn.Module):
|
|
||||||
def __init__(self, in_channels, out_channels, kernel_size, stride):
|
|
||||||
super(ConvLayer, self).__init__()
|
|
||||||
reflection_padding = kernel_size // 2
|
|
||||||
self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
|
|
||||||
self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
out = self.reflection_pad(x)
|
|
||||||
out = self.conv2d(out)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class ResidualBlock(torch.nn.Module):
|
|
||||||
"""ResidualBlock
|
|
||||||
introduced in: https://arxiv.org/abs/1512.03385
|
|
||||||
recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, channels):
|
|
||||||
super(ResidualBlock, self).__init__()
|
|
||||||
self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
|
|
||||||
self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
|
|
||||||
self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
|
|
||||||
self.in2 = torch.nn.InstanceNorm2d(channels, affine=True)
|
|
||||||
self.relu = torch.nn.ReLU()
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
residual = x
|
|
||||||
out = self.relu(self.in1(self.conv1(x)))
|
|
||||||
out = self.in2(self.conv2(out))
|
|
||||||
out = out + residual
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class UpsampleConvLayer(torch.nn.Module):
|
|
||||||
"""UpsampleConvLayer
|
|
||||||
Upsamples the input and then does a convolution. This method gives better results
|
|
||||||
compared to ConvTranspose2d.
|
|
||||||
ref: http://distill.pub/2016/deconv-checkerboard/
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
|
|
||||||
super(UpsampleConvLayer, self).__init__()
|
|
||||||
self.upsample = upsample
|
|
||||||
if upsample:
|
|
||||||
self.upsample_layer = torch.nn.Upsample(mode='nearest', scale_factor=upsample)
|
|
||||||
reflection_padding = kernel_size // 2
|
|
||||||
self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
|
|
||||||
self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
x_in = x
|
|
||||||
if self.upsample:
|
|
||||||
x_in = self.upsample_layer(x_in)
|
|
||||||
out = self.reflection_pad(x_in)
|
|
||||||
out = self.conv2d(out)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
def stylize(args):
|
|
||||||
device = torch.device("cuda" if args.cuda else "cpu")
|
|
||||||
with torch.no_grad():
|
|
||||||
style_model = TransformerNet()
|
|
||||||
state_dict = torch.load(os.path.join(args.model_dir, args.style + ".pth"))
|
|
||||||
# remove saved deprecated running_* keys in InstanceNorm from the checkpoint
|
|
||||||
for k in list(state_dict.keys()):
|
|
||||||
if re.search(r'in\d+\.running_(mean|var)$', k):
|
|
||||||
del state_dict[k]
|
|
||||||
style_model.load_state_dict(state_dict)
|
|
||||||
style_model.to(device)
|
|
||||||
|
|
||||||
filenames = os.listdir(args.content_dir)
|
|
||||||
|
|
||||||
for filename in filenames:
|
|
||||||
print("Processing {}".format(filename))
|
|
||||||
full_path = os.path.join(args.content_dir, filename)
|
|
||||||
content_image = load_image(full_path, scale=args.content_scale)
|
|
||||||
content_transform = transforms.Compose([
|
|
||||||
transforms.ToTensor(),
|
|
||||||
transforms.Lambda(lambda x: x.mul(255))
|
|
||||||
])
|
|
||||||
content_image = content_transform(content_image)
|
|
||||||
content_image = content_image.unsqueeze(0).to(device)
|
|
||||||
|
|
||||||
output = style_model(content_image).cpu()
|
|
||||||
|
|
||||||
output_path = os.path.join(args.output_dir, filename)
|
|
||||||
save_image(output_path, output[0])
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
|
|
||||||
|
|
||||||
arg_parser.add_argument("--content-scale", type=float, default=None,
|
|
||||||
help="factor for scaling down the content image")
|
|
||||||
arg_parser.add_argument("--model-dir", type=str, required=True,
|
|
||||||
help="saved model to be used for stylizing the image.")
|
|
||||||
arg_parser.add_argument("--cuda", type=int, required=True,
|
|
||||||
help="set it to 1 for running on GPU, 0 for CPU")
|
|
||||||
arg_parser.add_argument("--style", type=str,
|
|
||||||
help="style name")
|
|
||||||
|
|
||||||
arg_parser.add_argument("--content-dir", type=str, required=True,
|
|
||||||
help="directory holding the images")
|
|
||||||
arg_parser.add_argument("--output-dir", type=str, required=True,
|
|
||||||
help="directory holding the output images")
|
|
||||||
args = arg_parser.parse_args()
|
|
||||||
|
|
||||||
if args.cuda and not torch.cuda.is_available():
|
|
||||||
print("ERROR: cuda is not available, try running on CPU")
|
|
||||||
sys.exit(1)
|
|
||||||
os.makedirs(args.output_dir, exist_ok=True)
|
|
||||||
stylize(args)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,207 +0,0 @@
|
|||||||
# Original source: https://github.com/pytorch/examples/blob/master/fast_neural_style/neural_style/neural_style.py
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import re
|
|
||||||
|
|
||||||
from PIL import Image
|
|
||||||
import torch
|
|
||||||
from torchvision import transforms
|
|
||||||
|
|
||||||
from mpi4py import MPI
|
|
||||||
|
|
||||||
|
|
||||||
def load_image(filename, size=None, scale=None):
|
|
||||||
img = Image.open(filename)
|
|
||||||
if size is not None:
|
|
||||||
img = img.resize((size, size), Image.ANTIALIAS)
|
|
||||||
elif scale is not None:
|
|
||||||
img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
|
|
||||||
return img
|
|
||||||
|
|
||||||
|
|
||||||
def save_image(filename, data):
|
|
||||||
img = data.clone().clamp(0, 255).numpy()
|
|
||||||
img = img.transpose(1, 2, 0).astype("uint8")
|
|
||||||
img = Image.fromarray(img)
|
|
||||||
img.save(filename)
|
|
||||||
|
|
||||||
|
|
||||||
class TransformerNet(torch.nn.Module):
|
|
||||||
def __init__(self):
|
|
||||||
super(TransformerNet, self).__init__()
|
|
||||||
# Initial convolution layers
|
|
||||||
self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
|
|
||||||
self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
|
|
||||||
self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2)
|
|
||||||
self.in2 = torch.nn.InstanceNorm2d(64, affine=True)
|
|
||||||
self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2)
|
|
||||||
self.in3 = torch.nn.InstanceNorm2d(128, affine=True)
|
|
||||||
# Residual layers
|
|
||||||
self.res1 = ResidualBlock(128)
|
|
||||||
self.res2 = ResidualBlock(128)
|
|
||||||
self.res3 = ResidualBlock(128)
|
|
||||||
self.res4 = ResidualBlock(128)
|
|
||||||
self.res5 = ResidualBlock(128)
|
|
||||||
# Upsampling Layers
|
|
||||||
self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2)
|
|
||||||
self.in4 = torch.nn.InstanceNorm2d(64, affine=True)
|
|
||||||
self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2)
|
|
||||||
self.in5 = torch.nn.InstanceNorm2d(32, affine=True)
|
|
||||||
self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1)
|
|
||||||
# Non-linearities
|
|
||||||
self.relu = torch.nn.ReLU()
|
|
||||||
|
|
||||||
def forward(self, X):
|
|
||||||
y = self.relu(self.in1(self.conv1(X)))
|
|
||||||
y = self.relu(self.in2(self.conv2(y)))
|
|
||||||
y = self.relu(self.in3(self.conv3(y)))
|
|
||||||
y = self.res1(y)
|
|
||||||
y = self.res2(y)
|
|
||||||
y = self.res3(y)
|
|
||||||
y = self.res4(y)
|
|
||||||
y = self.res5(y)
|
|
||||||
y = self.relu(self.in4(self.deconv1(y)))
|
|
||||||
y = self.relu(self.in5(self.deconv2(y)))
|
|
||||||
y = self.deconv3(y)
|
|
||||||
return y
|
|
||||||
|
|
||||||
|
|
||||||
class ConvLayer(torch.nn.Module):
|
|
||||||
def __init__(self, in_channels, out_channels, kernel_size, stride):
|
|
||||||
super(ConvLayer, self).__init__()
|
|
||||||
reflection_padding = kernel_size // 2
|
|
||||||
self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
|
|
||||||
self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
out = self.reflection_pad(x)
|
|
||||||
out = self.conv2d(out)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class ResidualBlock(torch.nn.Module):
|
|
||||||
"""ResidualBlock
|
|
||||||
introduced in: https://arxiv.org/abs/1512.03385
|
|
||||||
recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, channels):
|
|
||||||
super(ResidualBlock, self).__init__()
|
|
||||||
self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
|
|
||||||
self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
|
|
||||||
self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
|
|
||||||
self.in2 = torch.nn.InstanceNorm2d(channels, affine=True)
|
|
||||||
self.relu = torch.nn.ReLU()
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
residual = x
|
|
||||||
out = self.relu(self.in1(self.conv1(x)))
|
|
||||||
out = self.in2(self.conv2(out))
|
|
||||||
out = out + residual
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class UpsampleConvLayer(torch.nn.Module):
|
|
||||||
"""UpsampleConvLayer
|
|
||||||
Upsamples the input and then does a convolution. This method gives better results
|
|
||||||
compared to ConvTranspose2d.
|
|
||||||
ref: http://distill.pub/2016/deconv-checkerboard/
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
|
|
||||||
super(UpsampleConvLayer, self).__init__()
|
|
||||||
self.upsample = upsample
|
|
||||||
if upsample:
|
|
||||||
self.upsample_layer = torch.nn.Upsample(mode='nearest', scale_factor=upsample)
|
|
||||||
reflection_padding = kernel_size // 2
|
|
||||||
self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
|
|
||||||
self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
|
|
||||||
|
|
||||||
def forward(self, x):
|
|
||||||
x_in = x
|
|
||||||
if self.upsample:
|
|
||||||
x_in = self.upsample_layer(x_in)
|
|
||||||
out = self.reflection_pad(x_in)
|
|
||||||
out = self.conv2d(out)
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
def stylize(args, comm):
|
|
||||||
|
|
||||||
rank = comm.Get_rank()
|
|
||||||
size = comm.Get_size()
|
|
||||||
|
|
||||||
device = torch.device("cuda" if args.cuda else "cpu")
|
|
||||||
with torch.no_grad():
|
|
||||||
style_model = TransformerNet()
|
|
||||||
state_dict = torch.load(os.path.join(args.model_dir, args.style + ".pth"))
|
|
||||||
# remove saved deprecated running_* keys in InstanceNorm from the checkpoint
|
|
||||||
for k in list(state_dict.keys()):
|
|
||||||
if re.search(r'in\d+\.running_(mean|var)$', k):
|
|
||||||
del state_dict[k]
|
|
||||||
style_model.load_state_dict(state_dict)
|
|
||||||
style_model.to(device)
|
|
||||||
|
|
||||||
filenames = os.listdir(args.content_dir)
|
|
||||||
filenames = sorted(filenames)
|
|
||||||
partition_size = len(filenames) // size
|
|
||||||
partitioned_filenames = filenames[rank * partition_size: (rank + 1) * partition_size]
|
|
||||||
print("RANK {} - is processing {} images out of the total {}".format(rank, len(partitioned_filenames),
|
|
||||||
len(filenames)))
|
|
||||||
|
|
||||||
output_paths = []
|
|
||||||
for filename in partitioned_filenames:
|
|
||||||
# print("Processing {}".format(filename))
|
|
||||||
full_path = os.path.join(args.content_dir, filename)
|
|
||||||
content_image = load_image(full_path, scale=args.content_scale)
|
|
||||||
content_transform = transforms.Compose([
|
|
||||||
transforms.ToTensor(),
|
|
||||||
transforms.Lambda(lambda x: x.mul(255))
|
|
||||||
])
|
|
||||||
content_image = content_transform(content_image)
|
|
||||||
content_image = content_image.unsqueeze(0).to(device)
|
|
||||||
|
|
||||||
output = style_model(content_image).cpu()
|
|
||||||
|
|
||||||
output_path = os.path.join(args.output_dir, filename)
|
|
||||||
save_image(output_path, output[0])
|
|
||||||
|
|
||||||
output_paths.append(output_path)
|
|
||||||
|
|
||||||
print("RANK {} - number of pre-aggregated output files {}".format(rank, len(output_paths)))
|
|
||||||
|
|
||||||
output_paths_list = comm.gather(output_paths, root=0)
|
|
||||||
|
|
||||||
if rank == 0:
|
|
||||||
print("RANK {} - number of aggregated output files {}".format(rank, len(output_paths_list)))
|
|
||||||
print("RANK {} - end".format(rank))
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
|
|
||||||
|
|
||||||
arg_parser.add_argument("--content-scale", type=float, default=None,
|
|
||||||
help="factor for scaling down the content image")
|
|
||||||
arg_parser.add_argument("--model-dir", type=str, required=True,
|
|
||||||
help="saved model to be used for stylizing the image.")
|
|
||||||
arg_parser.add_argument("--cuda", type=int, required=True,
|
|
||||||
help="set it to 1 for running on GPU, 0 for CPU")
|
|
||||||
arg_parser.add_argument("--style", type=str, help="style name")
|
|
||||||
arg_parser.add_argument("--content-dir", type=str, required=True,
|
|
||||||
help="directory holding the images")
|
|
||||||
arg_parser.add_argument("--output-dir", type=str, required=True,
|
|
||||||
help="directory holding the output images")
|
|
||||||
args = arg_parser.parse_args()
|
|
||||||
|
|
||||||
comm = MPI.COMM_WORLD
|
|
||||||
|
|
||||||
if args.cuda and not torch.cuda.is_available():
|
|
||||||
print("ERROR: cuda is not available, try running on CPU")
|
|
||||||
sys.exit(1)
|
|
||||||
os.makedirs(args.output_dir, exist_ok=True)
|
|
||||||
stylize(args, comm)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,728 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
|
||||||
"\n",
|
|
||||||
"Licensed under the MIT License."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Neural style transfer on video\n",
|
|
||||||
"Using modified code from `pytorch`'s neural style [example](https://pytorch.org/tutorials/advanced/neural_style_tutorial.html), we show how to setup a pipeline for doing style transfer on video. The pipeline has following steps:\n",
|
|
||||||
"1. Split a video into images\n",
|
|
||||||
"2. Run neural style on each image using one of the provided models (from `pytorch` pretrained models for this example).\n",
|
|
||||||
"3. Stitch the image back into a video."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Prerequisites\n",
|
|
||||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the configuration Notebook located at https://github.com/Azure/MachineLearningNotebooks first if you haven't. This sets you up with a working config file that has information on your workspace, subscription id, etc. "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Initialize Workspace\n",
|
|
||||||
"\n",
|
|
||||||
"Initialize a workspace object from persisted configuration."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"from azureml.core import Workspace, Experiment\n",
|
|
||||||
"\n",
|
|
||||||
"ws = Workspace.from_config()\n",
|
|
||||||
"print('Workspace name: ' + ws.name, \n",
|
|
||||||
" 'Azure region: ' + ws.location, \n",
|
|
||||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
|
||||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')\n",
|
|
||||||
"\n",
|
|
||||||
"scripts_folder = \"mpi_scripts\"\n",
|
|
||||||
"\n",
|
|
||||||
"if not os.path.isdir(scripts_folder):\n",
|
|
||||||
" os.mkdir(scripts_folder)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
|
||||||
"from azureml.core.datastore import Datastore\n",
|
|
||||||
"from azureml.data.data_reference import DataReference\n",
|
|
||||||
"from azureml.pipeline.core import Pipeline, PipelineData\n",
|
|
||||||
"from azureml.pipeline.steps import PythonScriptStep, MpiStep\n",
|
|
||||||
"from azureml.core.runconfig import CondaDependencies, RunConfiguration\n",
|
|
||||||
"from azureml.core.compute_target import ComputeTargetException"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Create or use existing compute"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# AmlCompute\n",
|
|
||||||
"cpu_cluster_name = \"cpu-cluster\"\n",
|
|
||||||
"try:\n",
|
|
||||||
" cpu_cluster = AmlCompute(ws, cpu_cluster_name)\n",
|
|
||||||
" print(\"found existing cluster.\")\n",
|
|
||||||
"except ComputeTargetException:\n",
|
|
||||||
" print(\"creating new cluster\")\n",
|
|
||||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_v2\",\n",
|
|
||||||
" max_nodes = 1)\n",
|
|
||||||
"\n",
|
|
||||||
" # create the cluster\n",
|
|
||||||
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, provisioning_config)\n",
|
|
||||||
" cpu_cluster.wait_for_completion(show_output=True)\n",
|
|
||||||
" \n",
|
|
||||||
"# AmlCompute\n",
|
|
||||||
"gpu_cluster_name = \"gpu-cluster\"\n",
|
|
||||||
"try:\n",
|
|
||||||
" gpu_cluster = AmlCompute(ws, gpu_cluster_name)\n",
|
|
||||||
" print(\"found existing cluster.\")\n",
|
|
||||||
"except ComputeTargetException:\n",
|
|
||||||
" print(\"creating new cluster\")\n",
|
|
||||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\",\n",
|
|
||||||
" max_nodes = 3)\n",
|
|
||||||
"\n",
|
|
||||||
" # create the cluster\n",
|
|
||||||
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)\n",
|
|
||||||
" gpu_cluster.wait_for_completion(show_output=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Python Scripts\n",
|
|
||||||
"We use an edited version of `neural_style_mpi.py` (original is [here](https://github.com/pytorch/examples/blob/master/fast_neural_style/neural_style/neural_style.py)). Scripts to split and stitch the video are thin wrappers to calls to `ffmpeg`. These scripts are also located in the \"scripts_folder\".\n",
|
|
||||||
"\n",
|
|
||||||
"We install `ffmpeg` through conda dependencies."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile $scripts_folder/process_video.py\n",
|
|
||||||
"import argparse\n",
|
|
||||||
"import glob\n",
|
|
||||||
"import os\n",
|
|
||||||
"import subprocess\n",
|
|
||||||
"\n",
|
|
||||||
"parser = argparse.ArgumentParser(description=\"Process input video\")\n",
|
|
||||||
"parser.add_argument('--input_video', required=True)\n",
|
|
||||||
"parser.add_argument('--output_audio', required=True)\n",
|
|
||||||
"parser.add_argument('--output_images', required=True)\n",
|
|
||||||
"\n",
|
|
||||||
"args = parser.parse_args()\n",
|
|
||||||
"\n",
|
|
||||||
"os.makedirs(args.output_audio, exist_ok=True)\n",
|
|
||||||
"os.makedirs(args.output_images, exist_ok=True)\n",
|
|
||||||
"\n",
|
|
||||||
"subprocess.run(\"ffmpeg -i {} {}/video.aac\"\n",
|
|
||||||
" .format(args.input_video, args.output_audio),\n",
|
|
||||||
" shell=True, check=True\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
"subprocess.run(\"ffmpeg -i {} {}/%05d_video.jpg -hide_banner\"\n",
|
|
||||||
" .format(args.input_video, args.output_images),\n",
|
|
||||||
" shell=True, check=True\n",
|
|
||||||
" )"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"%%writefile $scripts_folder/stitch_video.py\n",
|
|
||||||
"import argparse\n",
|
|
||||||
"import os\n",
|
|
||||||
"import subprocess\n",
|
|
||||||
"\n",
|
|
||||||
"parser = argparse.ArgumentParser(description=\"Process input video\")\n",
|
|
||||||
"parser.add_argument('--images_dir', required=True)\n",
|
|
||||||
"parser.add_argument('--input_audio', required=True)\n",
|
|
||||||
"parser.add_argument('--output_dir', required=True)\n",
|
|
||||||
"\n",
|
|
||||||
"args = parser.parse_args()\n",
|
|
||||||
"\n",
|
|
||||||
"os.makedirs(args.output_dir, exist_ok=True)\n",
|
|
||||||
"\n",
|
|
||||||
"subprocess.run(\"ffmpeg -framerate 30 -i {}/%05d_video.jpg -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p \"\n",
|
|
||||||
" \"-y {}/video_without_audio.mp4\"\n",
|
|
||||||
" .format(args.images_dir, args.output_dir),\n",
|
|
||||||
" shell=True, check=True\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
"subprocess.run(\"ffmpeg -i {}/video_without_audio.mp4 -i {}/video.aac -map 0:0 -map 1:0 -vcodec \"\n",
|
|
||||||
" \"copy -acodec copy -y {}/video_with_audio.mp4\"\n",
|
|
||||||
" .format(args.output_dir, args.input_audio, args.output_dir),\n",
|
|
||||||
" shell=True, check=True\n",
|
|
||||||
" )"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"The sample video **organutan.mp4** is stored at a publicly shared datastore. We are registering the datastore below. If you want to take a look at the original video, click here. (https://pipelinedata.blob.core.windows.net/sample-videos/orangutan.mp4)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# datastore for input video\n",
|
|
||||||
"account_name = \"pipelinedata\"\n",
|
|
||||||
"video_ds = Datastore.register_azure_blob_container(ws, \"videos\", \"sample-videos\",\n",
|
|
||||||
" account_name=account_name, overwrite=True)\n",
|
|
||||||
"\n",
|
|
||||||
"# datastore for models\n",
|
|
||||||
"models_ds = Datastore.register_azure_blob_container(ws, \"models\", \"styletransfer\", \n",
|
|
||||||
" account_name=\"pipelinedata\", \n",
|
|
||||||
" overwrite=True)\n",
|
|
||||||
" \n",
|
|
||||||
"# downloaded models from https://pytorch.org/tutorials/advanced/neural_style_tutorial.html are kept here\n",
|
|
||||||
"models_dir = DataReference(data_reference_name=\"models\", datastore=models_ds, \n",
|
|
||||||
" path_on_datastore=\"saved_models\", mode=\"download\")\n",
|
|
||||||
"\n",
|
|
||||||
"# the default blob store attached to a workspace\n",
|
|
||||||
"default_datastore = ws.get_default_datastore()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Sample video"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"video_name=os.getenv(\"STYLE_TRANSFER_VIDEO_NAME\", \"orangutan.mp4\") \n",
|
|
||||||
"orangutan_video = DataReference(datastore=video_ds,\n",
|
|
||||||
" data_reference_name=\"video\",\n",
|
|
||||||
" path_on_datastore=video_name, mode=\"download\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"cd = CondaDependencies()\n",
|
|
||||||
"\n",
|
|
||||||
"cd.add_channel(\"conda-forge\")\n",
|
|
||||||
"cd.add_conda_package(\"ffmpeg\")\n",
|
|
||||||
"\n",
|
|
||||||
"cd.add_channel(\"pytorch\")\n",
|
|
||||||
"cd.add_conda_package(\"pytorch\")\n",
|
|
||||||
"cd.add_conda_package(\"torchvision\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Runconfig\n",
|
|
||||||
"amlcompute_run_config = RunConfiguration(conda_dependencies=cd)\n",
|
|
||||||
"amlcompute_run_config.environment.docker.enabled = True\n",
|
|
||||||
"amlcompute_run_config.environment.docker.base_image = \"pytorch/pytorch\"\n",
|
|
||||||
"amlcompute_run_config.environment.spark.precache_packages = False"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ffmpeg_audio = PipelineData(name=\"ffmpeg_audio\", datastore=default_datastore)\n",
|
|
||||||
"ffmpeg_images = PipelineData(name=\"ffmpeg_images\", datastore=default_datastore)\n",
|
|
||||||
"processed_images = PipelineData(name=\"processed_images\", datastore=default_datastore)\n",
|
|
||||||
"output_video = PipelineData(name=\"output_video\", datastore=default_datastore)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Define tweakable parameters to pipeline\n",
|
|
||||||
"These parameters can be changed when the pipeline is published and rerun from a REST call"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.pipeline.core.graph import PipelineParameter\n",
|
|
||||||
"# create a parameter for style (one of \"candy\", \"mosaic\", \"rain_princess\", \"udnie\") to transfer the images to\n",
|
|
||||||
"style_param = PipelineParameter(name=\"style\", default_value=\"mosaic\")\n",
|
|
||||||
"# create a parameter for the number of nodes to use in step no. 2 (style transfer)\n",
|
|
||||||
"nodecount_param = PipelineParameter(name=\"nodecount\", default_value=1)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"split_video_step = PythonScriptStep(\n",
|
|
||||||
" name=\"split video\",\n",
|
|
||||||
" script_name=\"process_video.py\",\n",
|
|
||||||
" arguments=[\"--input_video\", orangutan_video,\n",
|
|
||||||
" \"--output_audio\", ffmpeg_audio,\n",
|
|
||||||
" \"--output_images\", ffmpeg_images,\n",
|
|
||||||
" ],\n",
|
|
||||||
" compute_target=cpu_cluster,\n",
|
|
||||||
" inputs=[orangutan_video],\n",
|
|
||||||
" outputs=[ffmpeg_images, ffmpeg_audio],\n",
|
|
||||||
" runconfig=amlcompute_run_config,\n",
|
|
||||||
" source_directory=scripts_folder\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"# create a MPI step for distributing style transfer step across multiple nodes in AmlCompute \n",
|
|
||||||
"# using 'nodecount_param' PipelineParameter\n",
|
|
||||||
"distributed_style_transfer_step = MpiStep(\n",
|
|
||||||
" name=\"mpi style transfer\",\n",
|
|
||||||
" script_name=\"neural_style_mpi.py\",\n",
|
|
||||||
" arguments=[\"--content-dir\", ffmpeg_images,\n",
|
|
||||||
" \"--output-dir\", processed_images,\n",
|
|
||||||
" \"--model-dir\", models_dir,\n",
|
|
||||||
" \"--style\", style_param,\n",
|
|
||||||
" \"--cuda\", 1\n",
|
|
||||||
" ],\n",
|
|
||||||
" compute_target=gpu_cluster,\n",
|
|
||||||
" node_count=nodecount_param, \n",
|
|
||||||
" process_count_per_node=1,\n",
|
|
||||||
" inputs=[models_dir, ffmpeg_images],\n",
|
|
||||||
" outputs=[processed_images],\n",
|
|
||||||
" pip_packages=[\"mpi4py\", \"torch\", \"torchvision\"],\n",
|
|
||||||
" use_gpu=True,\n",
|
|
||||||
" source_directory=scripts_folder\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"stitch_video_step = PythonScriptStep(\n",
|
|
||||||
" name=\"stitch\",\n",
|
|
||||||
" script_name=\"stitch_video.py\",\n",
|
|
||||||
" arguments=[\"--images_dir\", processed_images, \n",
|
|
||||||
" \"--input_audio\", ffmpeg_audio, \n",
|
|
||||||
" \"--output_dir\", output_video],\n",
|
|
||||||
" compute_target=cpu_cluster,\n",
|
|
||||||
" inputs=[processed_images, ffmpeg_audio],\n",
|
|
||||||
" outputs=[output_video],\n",
|
|
||||||
" runconfig=amlcompute_run_config,\n",
|
|
||||||
" source_directory=scripts_folder\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Run the pipeline"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"pipeline = Pipeline(workspace=ws, steps=[stitch_video_step])\n",
|
|
||||||
"# submit the pipeline and provide values for the PipelineParameters used in the pipeline\n",
|
|
||||||
"pipeline_run = Experiment(ws, 'style_transfer').submit(pipeline, pipeline_parameters={\"style\": \"mosaic\", \"nodecount\": 3})"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Monitor using widget"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.widgets import RunDetails\n",
|
|
||||||
"RunDetails(pipeline_run).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Downloads the video in `output_video` folder"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Download output video"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def download_video(run, target_dir=None):\n",
|
|
||||||
" stitch_run = run.find_step_run(\"stitch\")[0]\n",
|
|
||||||
" port_data = stitch_run.get_output_data(\"output_video\")\n",
|
|
||||||
" port_data.download(target_dir, show_progress=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"pipeline_run.wait_for_completion()\n",
|
|
||||||
"download_video(pipeline_run, \"output_video_mosaic\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Publish pipeline"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"published_pipeline = pipeline_run.publish_pipeline(\n",
|
|
||||||
" name=\"batch score style transfer\", description=\"style transfer\", version=\"1.0\")\n",
|
|
||||||
"\n",
|
|
||||||
"published_pipeline"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Get published pipeline\n",
|
|
||||||
"\n",
|
|
||||||
"You can get the published pipeline using **pipeline id**.\n",
|
|
||||||
"\n",
|
|
||||||
"To get all the published pipelines for a given workspace(ws): \n",
|
|
||||||
"```css\n",
|
|
||||||
"all_pub_pipelines = PublishedPipeline.get_all(ws)\n",
|
|
||||||
"```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.pipeline.core import PublishedPipeline\n",
|
|
||||||
"\n",
|
|
||||||
"pipeline_id = published_pipeline.id # use your published pipeline id\n",
|
|
||||||
"published_pipeline = PublishedPipeline.get(ws, pipeline_id)\n",
|
|
||||||
"\n",
|
|
||||||
"published_pipeline"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# Re-run pipeline through REST calls for other styles"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Get AAD token\n",
|
|
||||||
"[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.core.authentication import InteractiveLoginAuthentication\n",
|
|
||||||
"import requests\n",
|
|
||||||
"\n",
|
|
||||||
"auth = InteractiveLoginAuthentication()\n",
|
|
||||||
"aad_token = auth.get_authentication_header()\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Get endpoint URL"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"rest_endpoint = published_pipeline.endpoint"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Send request and monitor"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Run the pipeline using PipelineParameter values style='candy' and nodecount=2"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"response = requests.post(rest_endpoint, \n",
|
|
||||||
" headers=aad_token,\n",
|
|
||||||
" json={\"ExperimentName\": \"style_transfer\",\n",
|
|
||||||
" \"ParameterAssignments\": {\"style\": \"candy\", \"nodecount\": 2}})"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"try:\n",
|
|
||||||
" response.raise_for_status()\n",
|
|
||||||
"except Exception: \n",
|
|
||||||
" raise Exception('Received bad response from the endpoint: {}\\n'\n",
|
|
||||||
" 'Response Code: {}\\n'\n",
|
|
||||||
" 'Headers: {}\\n'\n",
|
|
||||||
" 'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))\n",
|
|
||||||
"\n",
|
|
||||||
"run_id = response.json().get('Id')\n",
|
|
||||||
"print('Submitted pipeline run: ', run_id)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from azureml.pipeline.core.run import PipelineRun\n",
|
|
||||||
"published_pipeline_run_candy = PipelineRun(ws.experiments[\"style_transfer\"], run_id)\n",
|
|
||||||
"RunDetails(published_pipeline_run_candy).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Run the pipeline using PipelineParameter values style='rain_princess' and nodecount=3"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"response = requests.post(rest_endpoint, \n",
|
|
||||||
" headers=aad_token,\n",
|
|
||||||
" json={\"ExperimentName\": \"style_transfer\",\n",
|
|
||||||
" \"ParameterAssignments\": {\"style\": \"rain_princess\", \"nodecount\": 3}})"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"try:\n",
|
|
||||||
" response.raise_for_status()\n",
|
|
||||||
"except Exception: \n",
|
|
||||||
" raise Exception('Received bad response from the endpoint: {}\\n'\n",
|
|
||||||
" 'Response Code: {}\\n'\n",
|
|
||||||
" 'Headers: {}\\n'\n",
|
|
||||||
" 'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))\n",
|
|
||||||
"\n",
|
|
||||||
"run_id = response.json().get('Id')\n",
|
|
||||||
"print('Submitted pipeline run: ', run_id)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"published_pipeline_run_rain = PipelineRun(ws.experiments[\"style_transfer\"], run_id)\n",
|
|
||||||
"RunDetails(published_pipeline_run_rain).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Run the pipeline using PipelineParameter values style='udnie' and nodecount=4"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"response = requests.post(rest_endpoint, \n",
|
|
||||||
" headers=aad_token,\n",
|
|
||||||
" json={\"ExperimentName\": \"style_transfer\",\n",
|
|
||||||
" \"ParameterAssignments\": {\"style\": \"udnie\", \"nodecount\": 3}})\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"try:\n",
|
|
||||||
" response.raise_for_status()\n",
|
|
||||||
"except Exception: \n",
|
|
||||||
" raise Exception('Received bad response from the endpoint: {}\\n'\n",
|
|
||||||
" 'Response Code: {}\\n'\n",
|
|
||||||
" 'Headers: {}\\n'\n",
|
|
||||||
" 'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))\n",
|
|
||||||
"\n",
|
|
||||||
"run_id = response.json().get('Id')\n",
|
|
||||||
"print('Submitted pipeline run: ', run_id)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"published_pipeline_run_udnie = PipelineRun(ws.experiments[\"style_transfer\"], run_id)\n",
|
|
||||||
"RunDetails(published_pipeline_run_udnie).show()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Download output from re-run"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"published_pipeline_run_candy.wait_for_completion()\n",
|
|
||||||
"published_pipeline_run_rain.wait_for_completion()\n",
|
|
||||||
"published_pipeline_run_udnie.wait_for_completion()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"download_video(published_pipeline_run_candy, target_dir=\"output_video_candy\")\n",
|
|
||||||
"download_video(published_pipeline_run_rain, target_dir=\"output_video_rain_princess\")\n",
|
|
||||||
"download_video(published_pipeline_run_udnie, target_dir=\"output_video_udnie\")"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"authors": [
|
|
||||||
{
|
|
||||||
"name": "balapv mabables"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3.6",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python36"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.6.7"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
name: pipeline-style-transfer-mpi
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-pipeline-steps
|
|
||||||
- azureml-widgets
|
|
||||||
- requests
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
name: pipeline-style-transfer-parallel-run
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-pipeline-steps
|
|
||||||
- azureml-widgets
|
|
||||||
- requests
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: distributed-chainer
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
name: train-hyperparameter-tune-deploy-with-chainer
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- numpy
|
|
||||||
- matplotlib
|
|
||||||
- json
|
|
||||||
- urllib
|
|
||||||
- gzip
|
|
||||||
- struct
|
|
||||||
- requests
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: fastai-with-custom-docker
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- fastai==1.0.61
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
name: train-hyperparameter-tune-deploy-with-keras
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- tensorflow
|
|
||||||
- keras<=2.3.1
|
|
||||||
- matplotlib
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: distributed-pytorch-with-horovod
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: distributed-pytorch-with-nccl-gloo
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
name: train-hyperparameter-tune-deploy-with-pytorch
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- pillow==5.4.1
|
|
||||||
- matplotlib
|
|
||||||
- https://download.pytorch.org/whl/cpu/torch-1.1.0-cp35-cp35m-win_amd64.whl
|
|
||||||
- https://download.pytorch.org/whl/cpu/torchvision-0.3.0-cp35-cp35m-win_amd64.whl
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
name: train-hyperparameter-tune-deploy-with-sklearn
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- numpy
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
name: distributed-tensorflow-with-horovod
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- keras
|
|
||||||
- tensorflow-gpu==1.13.2
|
|
||||||
- horovod==0.19.1
|
|
||||||
- matplotlib
|
|
||||||
- pandas
|
|
||||||
- fuse
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
name: distributed-tensorflow-with-parameter-server
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
name: train-hyperparameter-tune-deploy-with-tensorflow
|
|
||||||
dependencies:
|
|
||||||
- numpy
|
|
||||||
- matplotlib
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
- pandas
|
|
||||||
- keras
|
|
||||||
- tensorflow==2.0.0
|
|
||||||
- matplotlib
|
|
||||||
- fuse
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
name: pong_rllib
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-contrib-reinforcementlearning
|
|
||||||
- azureml-widgets
|
|
||||||
- matplotlib
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
name: cartpole_ci
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-contrib-reinforcementlearning
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
name: cartpole_sc
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-contrib-reinforcementlearning
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
name: minecraft
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-contrib-reinforcementlearning
|
|
||||||
- azureml-widgets
|
|
||||||
- tensorboard
|
|
||||||
- azureml-tensorboard
|
|
||||||
@@ -1,9 +0,0 @@
|
|||||||
name: particle
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-contrib-reinforcementlearning
|
|
||||||
- azureml-widgets
|
|
||||||
- tensorboard
|
|
||||||
- azureml-tensorboard
|
|
||||||
- ipython
|
|
||||||
17
how-to-use-azureml/responsible-ai/README.md
Normal file
17
how-to-use-azureml/responsible-ai/README.md
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
# AzureML Responsible AI
|
||||||
|
|
||||||
|
AzureML Responsible AI empowers data scientists and developers to innovate responsibly with a growing set of tools including model interpretability and fairness.
|
||||||
|
|
||||||
|
Follow these sample notebooks to learn about the model interpretability and fairness integration in Azure:
|
||||||
|
|
||||||
|
<a name="samples"></a>
|
||||||
|
|
||||||
|
# Responsible AI Sample Notebooks
|
||||||
|
|
||||||
|
- **Visualize fairness metrics and model explanations**
|
||||||
|
- Dataset: [UCI Adult](https://archive.ics.uci.edu/ml/datasets/Adult)
|
||||||
|
- **[Jupyter Notebook](visualize-upload-loan-decision/rai-loan-decision.ipynb)**
|
||||||
|
- Train a model to predict annual income
|
||||||
|
- Generate fairness and interpretability explanations for the trained model
|
||||||
|
- Visualize the explanations in the notebook widget dashboard
|
||||||
|
- Upload the explanations to Azure to be viewed in AzureML studio
|
||||||
@@ -0,0 +1,720 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||||
|
"\n",
|
||||||
|
"Licensed under the MIT License."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Assess Fairness, Explore Interpretability, and Mitigate Fairness Issues \n",
|
||||||
|
"\n",
|
||||||
|
"This notebook demonstrates how to use [InterpretML](interpret.ml), [Fairlearn](fairlearn.org), and the [Responsible AI Widget's](https://github.com/microsoft/responsible-ai-widgets/) Fairness and Interpretability dashboards to understand a model trained on the Census dataset. This dataset is a classification problem - given a range of data about 32,000 individuals, predict whether their annual income is above or below fifty thousand dollars per year.\n",
|
||||||
|
"\n",
|
||||||
|
"For the purposes of this notebook, we shall treat this as a loan decision problem. We will pretend that the label indicates whether or not each individual repaid a loan in the past. We will use the data to train a predictor to predict whether previously unseen individuals will repay a loan or not. The assumption is that the model predictions are used to decide whether an individual should be offered a loan.\n",
|
||||||
|
"\n",
|
||||||
|
"We will first train a fairness-unaware predictor, load its global and local explanations, and use the interpretability and fairness dashboards to demonstrate how this model leads to unfair decisions (under a specific notion of fairness called *demographic parity*). We then mitigate unfairness by applying the `GridSearch` algorithm from `Fairlearn` package.\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Install required packages"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%pip install --upgrade fairlearn\n",
|
||||||
|
"%pip install --upgrade interpret-community\n",
|
||||||
|
"%pip install --upgrade raiwidgets"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"After installing packages, you must close and reopen the notebook as well as restarting the kernel."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Load and preprocess the dataset\n",
|
||||||
|
"\n",
|
||||||
|
"For simplicity, we import the dataset from the `shap` package, which contains the data in a cleaned format. We start by importing the various modules we're going to use:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from fairlearn.reductions import GridSearch\n",
|
||||||
|
"from fairlearn.reductions import DemographicParity, ErrorRate\n",
|
||||||
|
"from fairlearn.datasets import fetch_adult\n",
|
||||||
|
"from fairlearn.metrics import MetricFrame, selection_rate\n",
|
||||||
|
"\n",
|
||||||
|
"from sklearn import svm, neighbors, tree\n",
|
||||||
|
"from sklearn.compose import ColumnTransformer, make_column_selector\n",
|
||||||
|
"from sklearn.preprocessing import LabelEncoder,StandardScaler\n",
|
||||||
|
"from sklearn.linear_model import LogisticRegression\n",
|
||||||
|
"from sklearn.pipeline import Pipeline\n",
|
||||||
|
"from sklearn.impute import SimpleImputer\n",
|
||||||
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||||||
|
"from sklearn.svm import SVC\n",
|
||||||
|
"from sklearn.metrics import accuracy_score\n",
|
||||||
|
"\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"\n",
|
||||||
|
"# SHAP Tabular Explainer\n",
|
||||||
|
"from interpret.ext.blackbox import KernelExplainer\n",
|
||||||
|
"from interpret.ext.blackbox import MimicExplainer\n",
|
||||||
|
"from interpret.ext.glassbox import LGBMExplainableModel"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We can now load and inspect the data:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dataset = fetch_adult(as_frame=True)\n",
|
||||||
|
"X_raw, y = dataset['data'], dataset['target']\n",
|
||||||
|
"X_raw[\"race\"].value_counts().to_dict()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We are going to treat the sex of each individual as a protected attribute (where 0 indicates female and 1 indicates male), and in this particular case we are going separate this attribute out and drop it from the main data. We then perform some standard data preprocessing steps to convert the data into a format suitable for the ML algorithms"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sensitive_features = X_raw[['sex','race']]\n",
|
||||||
|
"\n",
|
||||||
|
"le = LabelEncoder()\n",
|
||||||
|
"y = le.fit_transform(y)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Finally, we split the data into training and test sets:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
|
"X_train, X_test, y_train, y_test, sensitive_features_train, sensitive_features_test = \\\n",
|
||||||
|
" train_test_split(X_raw, y, sensitive_features,\n",
|
||||||
|
" test_size = 0.2, random_state=0, stratify=y)\n",
|
||||||
|
"\n",
|
||||||
|
"# Work around indexing bug\n",
|
||||||
|
"X_train = X_train.reset_index(drop=True)\n",
|
||||||
|
"sensitive_features_train = sensitive_features_train.reset_index(drop=True)\n",
|
||||||
|
"X_test = X_test.reset_index(drop=True)\n",
|
||||||
|
"sensitive_features_test = sensitive_features_test.reset_index(drop=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Training a fairness-unaware predictor\n",
|
||||||
|
"\n",
|
||||||
|
"To show the effect of `Fairlearn` we will first train a standard ML predictor that does not incorporate fairness. For speed of demonstration, we use a simple logistic regression estimator from `sklearn`:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"numeric_transformer = Pipeline(\n",
|
||||||
|
" steps=[\n",
|
||||||
|
" (\"impute\", SimpleImputer()),\n",
|
||||||
|
" (\"scaler\", StandardScaler()),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"categorical_transformer = Pipeline(\n",
|
||||||
|
" [\n",
|
||||||
|
" (\"impute\", SimpleImputer(strategy=\"most_frequent\")),\n",
|
||||||
|
" (\"ohe\", OneHotEncoder(handle_unknown=\"ignore\")),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"preprocessor = ColumnTransformer(\n",
|
||||||
|
" transformers=[\n",
|
||||||
|
" (\"num\", numeric_transformer, make_column_selector(dtype_exclude=\"category\")),\n",
|
||||||
|
" (\"cat\", categorical_transformer, make_column_selector(dtype_include=\"category\")),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"model = Pipeline(\n",
|
||||||
|
" steps=[\n",
|
||||||
|
" (\"preprocessor\", preprocessor),\n",
|
||||||
|
" (\n",
|
||||||
|
" \"classifier\",\n",
|
||||||
|
" LogisticRegression(solver=\"liblinear\", fit_intercept=True),\n",
|
||||||
|
" ),\n",
|
||||||
|
" ]\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"model.fit(X_train, y_train)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Generate model explanations"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Using SHAP KernelExplainer\n",
|
||||||
|
"# clf.steps[-1][1] returns the trained classification model\n",
|
||||||
|
"explainer = MimicExplainer(model.steps[-1][1], \n",
|
||||||
|
" X_train,\n",
|
||||||
|
" LGBMExplainableModel,\n",
|
||||||
|
" features=X_raw.columns, \n",
|
||||||
|
" classes=['Rejected', 'Approved'],\n",
|
||||||
|
" transformations=preprocessor)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Generate global explanations\n",
|
||||||
|
"Explain overall model predictions (global explanation)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Explain the model based on a subset of 1000 rows\n",
|
||||||
|
"global_explanation = explainer.explain_global(X_test[:1000])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"global_explanation.get_feature_importance_dict()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Generate local explanations\n",
|
||||||
|
"Explain local data points (individual instances)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# You can pass a specific data point or a group of data points to the explain_local function\n",
|
||||||
|
"# E.g., Explain the first data point in the test set\n",
|
||||||
|
"instance_num = 1\n",
|
||||||
|
"local_explanation = explainer.explain_local(X_test[:instance_num])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Get the prediction for the first member of the test set and explain why model made that prediction\n",
|
||||||
|
"prediction_value = model.predict(X_test)[instance_num]\n",
|
||||||
|
"\n",
|
||||||
|
"sorted_local_importance_values = local_explanation.get_ranked_local_values()[prediction_value]\n",
|
||||||
|
"sorted_local_importance_names = local_explanation.get_ranked_local_names()[prediction_value]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print('local importance values: {}'.format(sorted_local_importance_values))\n",
|
||||||
|
"print('local importance names: {}'.format(sorted_local_importance_names))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Visualize model explanations\n",
|
||||||
|
"Load the interpretability visualization dashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from raiwidgets import ExplanationDashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"ExplanationDashboard(global_explanation, model, dataset=X_test[:1000], true_y=y_test[:1000])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We can load this predictor into the Fairness dashboard, and examine how it is unfair:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Assess model fairness \n",
|
||||||
|
"Load the fairness visualization dashboard"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from raiwidgets import FairnessDashboard\n",
|
||||||
|
"\n",
|
||||||
|
"y_pred = model.predict(X_test)\n",
|
||||||
|
"\n",
|
||||||
|
"FairnessDashboard(sensitive_features=sensitive_features_test,\n",
|
||||||
|
" y_true=y_test,\n",
|
||||||
|
" y_pred=y_pred)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Looking at the disparity in accuracy, we see that males have an error rate about three times greater than the females. More interesting is the disparity in opportunitiy - males are offered loans at three times the rate of females.\n",
|
||||||
|
"\n",
|
||||||
|
"Despite the fact that we removed the feature from the training data, our predictor still discriminates based on sex. This demonstrates that simply ignoring a protected attribute when fitting a predictor rarely eliminates unfairness. There will generally be enough other features correlated with the removed attribute to lead to disparate impact."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Mitigation with Fairlearn (GridSearch)\n",
|
||||||
|
"\n",
|
||||||
|
"The `GridSearch` class in `Fairlearn` implements a simplified version of the exponentiated gradient reduction of [Agarwal et al. 2018](https://arxiv.org/abs/1803.02453). The user supplies a standard ML estimator, which is treated as a blackbox. `GridSearch` works by generating a sequence of relabellings and reweightings, and trains a predictor for each.\n",
|
||||||
|
"\n",
|
||||||
|
"For this example, we specify demographic parity (on the protected attribute of sex) as the fairness metric. Demographic parity requires that individuals are offered the opportunity (are approved for a loan in this example) independent of membership in the protected class (i.e., females and males should be offered loans at the same rate). We are using this metric for the sake of simplicity; in general, the appropriate fairness metric will not be obvious."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Fairlearn is not yet fully compatible with Pipelines, so we have to pass the estimator only\n",
|
||||||
|
"X_train_prep = preprocessor.transform(X_train).toarray()\n",
|
||||||
|
"X_test_prep = preprocessor.transform(X_test).toarray()\n",
|
||||||
|
"\n",
|
||||||
|
"sweep = GridSearch(LogisticRegression(solver=\"liblinear\", fit_intercept=True),\n",
|
||||||
|
" constraints=DemographicParity(),\n",
|
||||||
|
" grid_size=70)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Our algorithms provide `fit()` and `predict()` methods, so they behave in a similar manner to other ML packages in Python. We do however have to specify two extra arguments to `fit()` - the column of protected attribute labels, and also the number of predictors to generate in our sweep.\n",
|
||||||
|
"\n",
|
||||||
|
"After `fit()` completes, we extract the full set of predictors from the `GridSearch` object."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sweep.fit(X_train_prep, y_train,\n",
|
||||||
|
" sensitive_features=sensitive_features_train.sex)\n",
|
||||||
|
"\n",
|
||||||
|
"predictors = sweep.predictors_"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We could load these predictors into the Fairness dashboard now. However, the plot would be somewhat confusing due to their number. In this case, we are going to remove the predictors which are dominated in the error-disparity space by others from the sweep (note that the disparity will only be calculated for the sensitive feature). In general, one might not want to do this, since there may be other considerations beyond the strict optimization of error and disparity (of the given protected attribute)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"accuracies, disparities = [], []\n",
|
||||||
|
"\n",
|
||||||
|
"for predictor in predictors:\n",
|
||||||
|
" accuracy_metric_frame = MetricFrame(accuracy_score, y_train, predictor.predict(X_train_prep), sensitive_features=sensitive_features_train.sex)\n",
|
||||||
|
" selection_rate_metric_frame = MetricFrame(selection_rate, y_train, predictor.predict(X_train_prep), sensitive_features=sensitive_features_train.sex)\n",
|
||||||
|
" accuracies.append(accuracy_metric_frame.overall)\n",
|
||||||
|
" disparities.append(selection_rate_metric_frame.difference())\n",
|
||||||
|
" \n",
|
||||||
|
"all_results = pd.DataFrame({\"predictor\": predictors, \"accuracy\": accuracies, \"disparity\": disparities})\n",
|
||||||
|
"\n",
|
||||||
|
"all_models_dict = {\"unmitigated\": model.steps[-1][1]}\n",
|
||||||
|
"dominant_models_dict = {\"unmitigated\": model.steps[-1][1]}\n",
|
||||||
|
"base_name_format = \"grid_{0}\"\n",
|
||||||
|
"row_id = 0\n",
|
||||||
|
"for row in all_results.itertuples():\n",
|
||||||
|
" model_name = base_name_format.format(row_id)\n",
|
||||||
|
" all_models_dict[model_name] = row.predictor\n",
|
||||||
|
" accuracy_for_lower_or_eq_disparity = all_results[\"accuracy\"][all_results[\"disparity\"] <= row.disparity]\n",
|
||||||
|
" if row.accuracy >= accuracy_for_lower_or_eq_disparity.max():\n",
|
||||||
|
" dominant_models_dict[model_name] = row.predictor\n",
|
||||||
|
" row_id = row_id + 1"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We can construct predictions for all the models, and also for the dominant models:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from raiwidgets import FairnessDashboard\n",
|
||||||
|
"\n",
|
||||||
|
"dashboard_all = {}\n",
|
||||||
|
"for name, predictor in all_models_dict.items():\n",
|
||||||
|
" value = predictor.predict(X_test_prep)\n",
|
||||||
|
" dashboard_all[name] = value\n",
|
||||||
|
" \n",
|
||||||
|
"dominant_all = {}\n",
|
||||||
|
"for name, predictor in dominant_models_dict.items():\n",
|
||||||
|
" dominant_all[name] = predictor.predict(X_test_prep)\n",
|
||||||
|
"\n",
|
||||||
|
"FairnessDashboard(sensitive_features=sensitive_features_test, \n",
|
||||||
|
" y_true=y_test,\n",
|
||||||
|
" y_pred=dominant_all)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We can look at just the dominant models in the dashboard:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We see a Pareto front forming - the set of predictors which represent optimal tradeoffs between accuracy and disparity in predictions. In the ideal case, we would have a predictor at (1,0) - perfectly accurate and without any unfairness under demographic parity (with respect to the protected attribute \"sex\"). The Pareto front represents the closest we can come to this ideal based on our data and choice of estimator. Note the range of the axes - the disparity axis covers more values than the accuracy, so we can reduce disparity substantially for a small loss in accuracy.\n",
|
||||||
|
"\n",
|
||||||
|
"By clicking on individual models on the plot, we can inspect their metrics for disparity and accuracy in greater detail. In a real example, we would then pick the model which represented the best trade-off between accuracy and disparity given the relevant business constraints."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# AzureML integration\n",
|
||||||
|
"\n",
|
||||||
|
"We will now go through a brief example of the AzureML integration.\n",
|
||||||
|
"\n",
|
||||||
|
"The required package can be installed via:\n",
|
||||||
|
"\n",
|
||||||
|
"```\n",
|
||||||
|
"pip install azureml-contrib-fairness\n",
|
||||||
|
"pip install azureml-interpret\n",
|
||||||
|
"```"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Connect to workspace\n",
|
||||||
|
"\n",
|
||||||
|
"Just like in the previous tutorials, we will need to connect to a [workspace](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace(class)?view=azure-ml-py).\n",
|
||||||
|
"\n",
|
||||||
|
"The following code will allow you to create a workspace if you don't already have one created. You must have an Azure subscription to create a workspace:\n",
|
||||||
|
"\n",
|
||||||
|
"```python\n",
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"ws = Workspace.create(name='myworkspace',\n",
|
||||||
|
" subscription_id='<azure-subscription-id>',\n",
|
||||||
|
" resource_group='myresourcegroup',\n",
|
||||||
|
" create_resource_group=True,\n",
|
||||||
|
" location='eastus2')\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"**If you are running this on a Notebook VM, you can import the existing workspace.**"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.core import Workspace\n",
|
||||||
|
"\n",
|
||||||
|
"ws = Workspace.from_config()\n",
|
||||||
|
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Registering models\n",
|
||||||
|
"\n",
|
||||||
|
"The fairness dashboard is designed to integrate with registered models, so we need to do this for the models we want in the Studio portal. The assumption is that the names of the models specified in the dashboard dictionary correspond to the `id`s (i.e. `<name>:<version>` pairs) of registered models in the workspace."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Next, we register each of the models in the `dashboard_predicted` dictionary into the workspace. For this, we have to save each model to a file, and then register that file:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import joblib\n",
|
||||||
|
"import os\n",
|
||||||
|
"from azureml.core import Model, Experiment, Run\n",
|
||||||
|
"\n",
|
||||||
|
"os.makedirs('models', exist_ok=True)\n",
|
||||||
|
"def register_model(name, model):\n",
|
||||||
|
" print(\"Registering \", name)\n",
|
||||||
|
" model_path = \"models/{0}.pkl\".format(name)\n",
|
||||||
|
" joblib.dump(value=model, filename=model_path)\n",
|
||||||
|
" registered_model = Model.register(model_path=model_path,\n",
|
||||||
|
" model_name=name,\n",
|
||||||
|
" workspace=ws)\n",
|
||||||
|
" print(\"Registered \", registered_model.id)\n",
|
||||||
|
" return registered_model.id\n",
|
||||||
|
"\n",
|
||||||
|
"model_name_id_mapping = dict()\n",
|
||||||
|
"for name, model in dashboard_all.items():\n",
|
||||||
|
" m_id = register_model(name, model)\n",
|
||||||
|
" model_name_id_mapping[name] = m_id"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now, produce new predictions dictionaries, with the updated names:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dashboard_all_ids = dict()\n",
|
||||||
|
"for name, y_pred in dashboard_all.items():\n",
|
||||||
|
" dashboard_all_ids[model_name_id_mapping[name]] = y_pred"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Uploading a dashboard\n",
|
||||||
|
"\n",
|
||||||
|
"We create a _dashboard dictionary_ using Fairlearn's `metrics` package. The `_create_group_metric_set` method has arguments similar to the Dashboard constructor, except that the sensitive features are passed as a dictionary (to ensure that names are available), and we must specify the type of prediction. Note that we use the `dashboard_registered` dictionary we just created:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"sf = { 'sex': sensitive_features_test.sex, 'race': sensitive_features_test.race }\n",
|
||||||
|
"\n",
|
||||||
|
"from fairlearn.metrics._group_metric_set import _create_group_metric_set\n",
|
||||||
|
"\n",
|
||||||
|
"dash_dict_all = _create_group_metric_set(y_true=y_test,\n",
|
||||||
|
" predictions=dashboard_all_ids,\n",
|
||||||
|
" sensitive_features=sf,\n",
|
||||||
|
" prediction_type='binary_classification')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now, we import our `contrib` package which contains the routine to perform the upload:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.contrib.fairness import upload_dashboard_dictionary, download_dashboard_by_upload_id"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Now we can create an Experiment, then a Run, and upload our dashboard to it:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"exp = Experiment(ws, 'responsible-ai-loan-decision')\n",
|
||||||
|
"print(exp)\n",
|
||||||
|
"\n",
|
||||||
|
"run = exp.start_logging()\n",
|
||||||
|
"try:\n",
|
||||||
|
" dashboard_title = \"Upload MultiAsset from Grid Search with Census Data Notebook\"\n",
|
||||||
|
" upload_id = upload_dashboard_dictionary(run,\n",
|
||||||
|
" dash_dict_all,\n",
|
||||||
|
" dashboard_name=dashboard_title)\n",
|
||||||
|
" print(\"\\nUploaded to id: {0}\\n\".format(upload_id))\n",
|
||||||
|
"\n",
|
||||||
|
" downloaded_dict = download_dashboard_by_upload_id(run, upload_id)\n",
|
||||||
|
"finally:\n",
|
||||||
|
" run.complete()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Uploading explanations\n",
|
||||||
|
"\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from azureml.interpret import ExplanationClient\n",
|
||||||
|
"\n",
|
||||||
|
"client = ExplanationClient.from_run(run)\n",
|
||||||
|
"client.upload_model_explanation(global_explanation, comment = \"census data global explanation\")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"authors": [
|
||||||
|
{
|
||||||
|
"name": "chgrego"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3.6",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python36"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.9"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
||||||
@@ -100,7 +100,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"# Check core SDK version number\n",
|
"# Check core SDK version number\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"This notebook was created using SDK version 1.19.0, you are currently running version\", azureml.core.VERSION)"
|
"print(\"This notebook was created using SDK version 1.21.0, you are currently running version\", azureml.core.VERSION)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
name: logging-api
|
|
||||||
dependencies:
|
|
||||||
- numpy
|
|
||||||
- matplotlib
|
|
||||||
- tqdm
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-widgets
|
|
||||||
@@ -1,4 +0,0 @@
|
|||||||
name: manage-runs
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
@@ -1,10 +0,0 @@
|
|||||||
name: export-run-history-to-tensorboard
|
|
||||||
dependencies:
|
|
||||||
- pip:
|
|
||||||
- azureml-sdk
|
|
||||||
- azureml-tensorboard
|
|
||||||
- tensorflow
|
|
||||||
- tqdm
|
|
||||||
- scipy
|
|
||||||
- sklearn
|
|
||||||
- setuptools>=41.0.0
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user