mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-19 17:17:04 -05:00
101 lines
3.6 KiB
Python
101 lines
3.6 KiB
Python
# Copyright (c) Microsoft. All rights reserved.
|
|
# Licensed under the MIT license.
|
|
import os
|
|
import joblib
|
|
|
|
from interpret.ext.glassbox import LGBMExplainableModel
|
|
from azureml.automl.core.shared.constants import MODEL_PATH
|
|
from azureml.core.experiment import Experiment
|
|
from azureml.core.dataset import Dataset
|
|
from azureml.core.run import Run
|
|
from azureml.interpret.mimic_wrapper import MimicWrapper
|
|
from azureml.interpret.scoring.scoring_explainer import TreeScoringExplainer
|
|
from azureml.train.automl.runtime.automl_explain_utilities import (
|
|
automl_setup_model_explanations,
|
|
automl_check_model_if_explainable,
|
|
)
|
|
|
|
|
|
OUTPUT_DIR = "./outputs/"
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
|
|
# Get workspace from the run context
|
|
run = Run.get_context()
|
|
ws = run.experiment.workspace
|
|
|
|
# Get the AutoML run object from the experiment name and the workspace
|
|
experiment = Experiment(ws, "<<experiment_name>>")
|
|
automl_run = Run(experiment=experiment, run_id="<<run_id>>")
|
|
|
|
# Check if this AutoML model is explainable
|
|
if not automl_check_model_if_explainable(automl_run):
|
|
raise Exception(
|
|
"Model explanations are currently not supported for "
|
|
+ automl_run.get_properties().get("run_algorithm")
|
|
)
|
|
|
|
# Download the best model from the artifact store
|
|
automl_run.download_file(name=MODEL_PATH, output_file_path="model.pkl")
|
|
|
|
# Load the AutoML model into memory
|
|
fitted_model = joblib.load("model.pkl")
|
|
|
|
# Get the train dataset from the workspace
|
|
train_dataset = Dataset.get_by_name(workspace=ws, name="<<train_dataset_name>>")
|
|
# Drop the labeled column to get the training set.
|
|
X_train = train_dataset.drop_columns(columns=["<<target_column_name>>"])
|
|
y_train = train_dataset.keep_columns(columns=["<<target_column_name>>"], validate=True)
|
|
|
|
# Get the test dataset from the workspace
|
|
test_dataset = Dataset.get_by_name(workspace=ws, name="<<test_dataset_name>>")
|
|
# Drop the labeled column to get the testing set.
|
|
X_test = test_dataset.drop_columns(columns=["<<target_column_name>>"])
|
|
|
|
# Setup the class for explaining the AutoML models
|
|
automl_explainer_setup_obj = automl_setup_model_explanations(
|
|
fitted_model, "<<task>>", X=X_train, X_test=X_test, y=y_train, automl_run=automl_run
|
|
)
|
|
|
|
# Initialize the Mimic Explainer
|
|
explainer = MimicWrapper(
|
|
ws,
|
|
automl_explainer_setup_obj.automl_estimator,
|
|
LGBMExplainableModel,
|
|
init_dataset=automl_explainer_setup_obj.X_transform,
|
|
run=automl_explainer_setup_obj.automl_run,
|
|
features=automl_explainer_setup_obj.engineered_feature_names,
|
|
feature_maps=[automl_explainer_setup_obj.feature_map],
|
|
classes=automl_explainer_setup_obj.classes,
|
|
)
|
|
|
|
# Compute the engineered explanations
|
|
engineered_explanations = explainer.explain(
|
|
["local", "global"],
|
|
tag="engineered explanations",
|
|
eval_dataset=automl_explainer_setup_obj.X_test_transform,
|
|
)
|
|
|
|
# Compute the raw explanations
|
|
raw_explanations = explainer.explain(
|
|
["local", "global"],
|
|
get_raw=True,
|
|
tag="raw explanations",
|
|
raw_feature_names=automl_explainer_setup_obj.raw_feature_names,
|
|
eval_dataset=automl_explainer_setup_obj.X_test_transform,
|
|
raw_eval_dataset=automl_explainer_setup_obj.X_test_raw,
|
|
)
|
|
|
|
print("Engineered and raw explanations computed successfully")
|
|
|
|
# Initialize the ScoringExplainer
|
|
scoring_explainer = TreeScoringExplainer(
|
|
explainer.explainer, feature_maps=[automl_explainer_setup_obj.feature_map]
|
|
)
|
|
|
|
# Pickle scoring explainer locally
|
|
with open("scoring_explainer.pkl", "wb") as stream:
|
|
joblib.dump(scoring_explainer, stream)
|
|
|
|
# Upload the scoring explainer to the automl run
|
|
automl_run.upload_file("outputs/scoring_explainer.pkl", "scoring_explainer.pkl")
|