Merged notebook changes from release 1.0.45

2025-12-23 11:02:39 -05:00 · 2019-06-26 14:39:09 -04:00
parent 4a6bcebccc
commit 249bcac3c7
74 changed files with 11362 additions and 6761 deletions
--- a/how-to-use-azureml/explain-model/explain-tabular-data-raw-features/explain-sklearn-raw-features.ipynb
+++ b/how-to-use-azureml/explain-model/explain-tabular-data-raw-features/explain-sklearn-raw-features.ipynb
@@ -36,22 +36,6 @@
        "4. Visualize the global and local explanations with the visualization dashboard."
      ]
    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "This example needs sklearn-pandas. If it is not installed, uncomment and run the following line."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "#!pip install sklearn-pandas"
-      ]
-    },
    {
      "cell_type": "code",
      "execution_count": null,
@@ -63,7 +47,6 @@
        "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from azureml.explain.model.tabular_explainer import TabularExplainer\n",
-        "from sklearn_pandas import DataFrameMapper\n",
        "import pandas as pd\n",
        "import numpy as np"
      ]
@@ -113,6 +96,13 @@
        "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"
      ]
    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "sklearn imports"
+      ]
+    },
    {
      "cell_type": "code",
      "execution_count": null,
@@ -121,7 +111,51 @@
      "source": [
        "from sklearn.pipeline import Pipeline\n",
        "from sklearn.impute import SimpleImputer\n",
-        "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
+        "from sklearn.preprocessing import StandardScaler, OneHotEncoder"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "We can explain raw features by either using a `sklearn.compose.ColumnTransformer` or a list of fitted transformer tuples. The cell below uses `sklearn.compose.ColumnTransformer`. In case you want to run the example with the list of fitted transformer tuples, comment the cell below and uncomment the cell that follows after. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from sklearn.compose import ColumnTransformer\n",
+        "\n",
+        "transformations = ColumnTransformer([\n",
+        "    (\"age_fare\", Pipeline(steps=[\n",
+        "        ('imputer', SimpleImputer(strategy='median')),\n",
+        "        ('scaler', StandardScaler())\n",
+        "    ]), [\"age\", \"fare\"]),\n",
+        "    (\"embarked\", Pipeline(steps=[\n",
+        "        (\"imputer\", SimpleImputer(strategy='constant', fill_value='missing')), \n",
+        "        (\"encoder\", OneHotEncoder(sparse=False))]), [\"embarked\"]),\n",
+        "    (\"sex_pclass\", OneHotEncoder(sparse=False), [\"sex\", \"pclass\"])    \n",
+        "])\n",
+        "\n",
+        "\n",
+        "# Append classifier to preprocessing pipeline.\n",
+        "# Now we have a full prediction pipeline.\n",
+        "clf = Pipeline(steps=[('preprocessor', transformations),\n",
+        "                      ('classifier', LogisticRegression(solver='lbfgs'))])\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "'''\n",
+        "# Uncomment below if sklearn-pandas is not installed\n",
+        "#!pip install sklearn-pandas\n",
        "from sklearn_pandas import DataFrameMapper\n",
        "\n",
        "# Impute, standardize the numeric features and one-hot encode the categorical features.    \n",
@@ -141,7 +175,8 @@
        "# Append classifier to preprocessing pipeline.\n",
        "# Now we have a full prediction pipeline.\n",
        "clf = Pipeline(steps=[('preprocessor', DataFrameMapper(transformations)),\n",
-        "                      ('classifier', LogisticRegression(solver='lbfgs'))])"
+        "                      ('classifier', LogisticRegression(solver='lbfgs'))])\n",
+        "'''"
      ]
    },
    {