From 75610ec31cc83471e0eea10f13937a1e13a2b5ad Mon Sep 17 00:00:00 2001 From: vizhur Date: Wed, 27 Nov 2019 21:02:21 +0000 Subject: [PATCH] update samples - test --- configuration.ipynb | 2 +- .../automl_setup.cmd | 2 +- .../automl_setup_linux.sh | 2 +- .../automl_setup_mac.sh | 2 +- ...fication-bank-marketing-all-features.ipynb | 85 +++- .../auto-ml-forecasting-bike-share.ipynb | 2 +- .../auto-ml-forecasting-energy-demand.ipynb | 26 +- .../forecasting-grouping/build.py | 4 +- .../forecasting-grouping/deploy/score.py | 2 +- .../automl-forecasting-function.ipynb | 21 +- ...to-ml-forecasting-orange-juice-sales.ipynb | 23 +- .../train_explainer.py | 7 +- .../deploy-aks-with-controlled-rollout.ipynb | 369 ++++++++++++++++++ .../deploy-aks-with-controlled-rollout.yml | 4 + .../deploy-with-controlled-rollout/score.py | 28 ++ .../sklearn_regression_model.pkl | Bin 0 -> 658 bytes ...-publish-and-run-using-rest-endpoint.ipynb | 21 +- ...s-setup-versioned-pipeline-endpoints.ipynb | 23 +- ...asing-datapath-and-pipelineparameter.ipynb | 30 +- .../pipeline-batch-scoring.ipynb | 28 +- .../pipeline-style-transfer.ipynb | 137 +++++-- ...erparameter-tune-deploy-with-chainer.ipynb | 2 +- .../pytorch_train.py | 2 +- ...erparameter-tune-deploy-with-pytorch.ipynb | 4 +- ...erparameter-tune-deploy-with-sklearn.ipynb | 2 +- ...arameter-tune-deploy-with-tensorflow.ipynb | 2 +- ...-tune-and-warm-start-with-tensorflow.ipynb | 9 +- .../logging-api/logging-api.ipynb | 2 +- ...yperparameter-tune-deploy-with-keras.ipynb | 2 +- .../using-environments.ipynb | 12 +- index.md | 103 ++++- setup-environment/configuration.ipynb | 2 +- ...ipeline-batch-scoring-classification.ipynb | 7 + 33 files changed, 839 insertions(+), 128 deletions(-) create mode 100644 how-to-use-azureml/deployment/deploy-with-controlled-rollout/deploy-aks-with-controlled-rollout.ipynb create mode 100644 how-to-use-azureml/deployment/deploy-with-controlled-rollout/deploy-aks-with-controlled-rollout.yml create mode 100644 how-to-use-azureml/deployment/deploy-with-controlled-rollout/score.py create mode 100644 how-to-use-azureml/deployment/deploy-with-controlled-rollout/sklearn_regression_model.pkl diff --git a/configuration.ipynb b/configuration.ipynb index 6d56b183..35401ea3 100644 --- a/configuration.ipynb +++ b/configuration.ipynb @@ -103,7 +103,7 @@ "source": [ "import azureml.core\n", "\n", - "print(\"This notebook was created using version 1.0.76.1 of the Azure ML SDK\")\n", + "print(\"This notebook was created using version 1.0.76.2 of the Azure ML SDK\")\n", "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" ] }, diff --git a/how-to-use-azureml/automated-machine-learning/automl_setup.cmd b/how-to-use-azureml/automated-machine-learning/automl_setup.cmd index dd94a314..8debd7d5 100644 --- a/how-to-use-azureml/automated-machine-learning/automl_setup.cmd +++ b/how-to-use-azureml/automated-machine-learning/automl_setup.cmd @@ -14,7 +14,7 @@ IF "%CONDA_EXE%"=="" GOTO CondaMissing call conda activate %conda_env_name% 2>nul: if not errorlevel 1 ( - echo Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment %conda_env_name% + echo Upgrading existing conda environment %conda_env_name% call pip uninstall azureml-train-automl -y -q call conda env update --name %conda_env_name% --file %automl_env_file% if errorlevel 1 goto ErrorExit diff --git a/how-to-use-azureml/automated-machine-learning/automl_setup_linux.sh b/how-to-use-azureml/automated-machine-learning/automl_setup_linux.sh index 629da7b8..3674a6cf 100644 --- a/how-to-use-azureml/automated-machine-learning/automl_setup_linux.sh +++ b/how-to-use-azureml/automated-machine-learning/automl_setup_linux.sh @@ -22,7 +22,7 @@ fi if source activate $CONDA_ENV_NAME 2> /dev/null then - echo "Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment" $CONDA_ENV_NAME + echo "Upgrading existing conda environment" $CONDA_ENV_NAME pip uninstall azureml-train-automl -y -q conda env update --name $CONDA_ENV_NAME --file $AUTOML_ENV_FILE && jupyter nbextension uninstall --user --py azureml.widgets diff --git a/how-to-use-azureml/automated-machine-learning/automl_setup_mac.sh b/how-to-use-azureml/automated-machine-learning/automl_setup_mac.sh index 56f86eb0..d81d2438 100644 --- a/how-to-use-azureml/automated-machine-learning/automl_setup_mac.sh +++ b/how-to-use-azureml/automated-machine-learning/automl_setup_mac.sh @@ -22,7 +22,7 @@ fi if source activate $CONDA_ENV_NAME 2> /dev/null then - echo "Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment" $CONDA_ENV_NAME + echo "Upgrading existing conda environment" $CONDA_ENV_NAME pip uninstall azureml-train-automl -y -q conda env update --name $CONDA_ENV_NAME --file $AUTOML_ENV_FILE && jupyter nbextension uninstall --user --py azureml.widgets diff --git a/how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb b/how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb index eaa6aa4a..1423925a 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb @@ -285,7 +285,8 @@ "|**task**|classification or regression or forecasting|\n", "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics:
accuracy
AUC_weighted
average_precision_score_weighted
norm_macro_recall
precision_score_weighted|\n", "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n", - "|**blacklist_models** or **whitelist_models** |*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.

Allowed values for **Classification**
LogisticRegression
SGD
MultinomialNaiveBayes
BernoulliNaiveBayes
SVM
LinearSVM
KNN
DecisionTree
RandomForest
ExtremeRandomTrees
LightGBM
GradientBoosting
TensorFlowDNN
TensorFlowLinearClassifier

Allowed values for **Regression**
ElasticNet
GradientBoosting
DecisionTree
KNN
LassoLars
SGD
RandomForest
ExtremeRandomTrees
LightGBM
TensorFlowLinearRegressor
TensorFlowDNN

Allowed values for **Forecasting**
ElasticNet
GradientBoosting
DecisionTree
KNN
LassoLars
SGD
RandomForest
ExtremeRandomTrees
LightGBM
TensorFlowLinearRegressor
TensorFlowDNN
Arima
Prophet|\n", + "|**blacklist_models** | *List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.

Allowed values for **Classification**
LogisticRegression
SGD
MultinomialNaiveBayes
BernoulliNaiveBayes
SVM
LinearSVM
KNN
DecisionTree
RandomForest
ExtremeRandomTrees
LightGBM
GradientBoosting
TensorFlowDNN
TensorFlowLinearClassifier

Allowed values for **Regression**
ElasticNet
GradientBoosting
DecisionTree
KNN
LassoLars
SGD
RandomForest
ExtremeRandomTrees
LightGBM
TensorFlowLinearRegressor
TensorFlowDNN

Allowed values for **Forecasting**
ElasticNet
GradientBoosting
DecisionTree
KNN
LassoLars
SGD
RandomForest
ExtremeRandomTrees
LightGBM
TensorFlowLinearRegressor
TensorFlowDNN
Arima
Prophet|\n", + "| **whitelist_models** | *List* of *strings* indicating machine learning algorithms for AutoML to use in this run. Same values listed above for **blacklist_models** allowed for **whitelist_models**.|\n", "|**experiment_exit_score**| Value indicating the target for *primary_metric*.
Once the target is surpassed the run terminates.|\n", "|**experiment_timeout_minutes**| Maximum amount of time in minutes that all iterations combined can take before the experiment terminates.|\n", "|**enable_early_stopping**| Flag to enble early termination if the score is not improving in the short term.|\n", @@ -293,6 +294,7 @@ "|**n_cross_validations**|Number of cross validation splits.|\n", "|**training_data**|Input dataset, containing both features and label column.|\n", "|**label_column_name**|The name of the label column.|\n", + "|**model_explainability**|Indicate to explain each trained pipeline or not.|\n", "\n", "**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)" ] @@ -324,6 +326,7 @@ " training_data = train_data,\n", " label_column_name = label,\n", " validation_data = validation_dataset,\n", + " model_explainability=True,\n", " **automl_settings\n", " )" ] @@ -456,6 +459,72 @@ "RunDetails(remote_run).show() " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model's explanation\n", + "Retrieve the explanation from the best_run which includes explanations for engineered features and raw features. Make sure that the run for generating explanations for the best model is completed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Wait for the best model explanation run to complete\n", + "from azureml.train.automl.run import AutoMLRun\n", + "model_explainability_run_id = remote_run.get_properties().get('ModelExplainRunId')\n", + "print(model_explainability_run_id)\n", + "if model_explainability_run_id is not None:\n", + " model_explainability_run = AutoMLRun(experiment=experiment, run_id=model_explainability_run_id)\n", + " model_explainability_run.wait_for_completion()\n", + "\n", + "# Get the best run object\n", + "best_run, fitted_model = remote_run.get_output()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download engineered feature importance from artifact store\n", + "You can use ExplanationClient to download the engineered feature explanations from the artifact store of the best_run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client = ExplanationClient.from_run(best_run)\n", + "engineered_explanations = client.download_model_explanation(raw=False)\n", + "exp_data = engineered_explanations.get_feature_importance_dict()\n", + "exp_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Download raw feature importance from artifact store\n", + "You can use ExplanationClient to download the raw feature explanations from the artifact store of the best_run." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client = ExplanationClient.from_run(best_run)\n", + "engineered_explanations = client.download_model_explanation(raw=True)\n", + "exp_data = engineered_explanations.get_feature_importance_dict()\n", + "exp_data" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -572,20 +641,6 @@ "best_run, fitted_model = remote_run.get_output()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import shutil\n", - "\n", - "sript_folder = os.path.join(os.getcwd(), 'inference')\n", - "project_folder = '/inference'\n", - "os.makedirs(project_folder, exist_ok=True)" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb b/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb index c49eb280..2432dfd1 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb +++ b/how-to-use-azureml/automated-machine-learning/forecasting-bike-share/auto-ml-forecasting-bike-share.ipynb @@ -42,7 +42,7 @@ "\n", "AutoML highlights here include built-in holiday featurization, accessing engineered feature names, and working with the `forecast` function. Please also look at the additional forecasting notebooks, which document lagging, rolling windows, forecast quantiles, other ways to use the forecast function, and forecaster deployment.\n", "\n", - "Make sure you have executed the [configuration](../configuration.ipynb) before running this notebook.\n", + "Make sure you have executed the [configuration notebook](../../../configuration.ipynb) before running this notebook.\n", "\n", "Notebook synopsis:\n", "1. Creating an Experiment in an existing Workspace\n", diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb b/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb index 43c13d9e..266dde25 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb +++ b/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb @@ -31,8 +31,8 @@ "1. [Results](#Results)\n", "\n", "Advanced Forecasting\n", - "1. [Advanced Training](#Advanced Training)\n", - "1. [Advanced Results](#Advanced Results)" + "1. [Advanced Training](#advanced_training)\n", + "1. [Advanced Results](#advanced Results)" ] }, { @@ -463,11 +463,7 @@ "metadata": {}, "source": [ "### Forecast Function\n", - "For forecasting, we will use the forecast function instead of the predict function. There are two reasons for this.\n", - "\n", - "We need to pass the recent values of the target variable y, whereas the scikit-compatible predict function only takes the non-target variables 'test'. In our case, the test data immediately follows the training data, and we fill the target variable with NaN. The NaN serves as a question mark for the forecaster to fill with the actuals. Using the forecast function will produce forecasts using the shortest possible forecast horizon. The last time at which a definite (non-NaN) value is seen is the forecast origin - the last time when the value of the target is known.\n", - "\n", - "Using the predict method would result in getting predictions for EVERY horizon the forecaster can predict at. This is useful when training and evaluating the performance of the forecaster at various horizons, but the level of detail is excessive for normal use." + "For forecasting, we will use the forecast function instead of the predict function. Using the predict method would result in getting predictions for EVERY horizon the forecaster can predict at. This is useful when training and evaluating the performance of the forecaster at various horizons, but the level of detail is excessive for normal use. Forecast function also can handle more complicated scenarios, see notebook on [high frequency forecasting](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb)." ] }, { @@ -476,15 +472,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Replace ALL values in y by NaN.\n", - "# The forecast origin will be at the beginning of the first forecast period.\n", - "# (Which is the same time as the end of the last training period.)\n", - "y_query = y_test.copy().astype(np.float)\n", - "y_query.fill(np.nan)\n", "# The featurized data, aligned to y, will also be returned.\n", "# This contains the assumptions that were made in the forecast\n", "# and helps align the forecast to the original data\n", - "y_predictions, X_trans = fitted_model.forecast(X_test, y_query)" + "y_predictions, X_trans = fitted_model.forecast(X_test)" ] }, { @@ -557,7 +548,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Advanced Training\n", + "## Advanced Training \n", "We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation." ] }, @@ -652,15 +643,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Replace ALL values in y by NaN.\n", - "# The forecast origin will be at the beginning of the first forecast period.\n", - "# (Which is the same time as the end of the last training period.)\n", - "y_query = y_test.copy().astype(np.float)\n", - "y_query.fill(np.nan)\n", "# The featurized data, aligned to y, will also be returned.\n", "# This contains the assumptions that were made in the forecast\n", "# and helps align the forecast to the original data\n", - "y_predictions, X_trans = fitted_model_lags.forecast(X_test, y_query)" + "y_predictions, X_trans = fitted_model_lags.forecast(X_test)" ] }, { diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-grouping/build.py b/how-to-use-azureml/automated-machine-learning/forecasting-grouping/build.py index 3a31d3d6..b6a4a8b5 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-grouping/build.py +++ b/how-to-use-azureml/automated-machine-learning/forecasting-grouping/build.py @@ -8,6 +8,7 @@ from azureml.core import RunConfiguration from azureml.core.compute import ComputeTarget from azureml.core.conda_dependencies import CondaDependencies from azureml.core.dataset import Dataset +from azureml.data import TabularDataset from azureml.pipeline.core import PipelineData, PipelineParameter, TrainingOutput, StepSequence from azureml.pipeline.steps import PythonScriptStep from azureml.train.automl import AutoMLConfig @@ -34,8 +35,9 @@ def _get_configs(automlconfig: AutoMLConfig, group_name = valid_chars.sub('', group_name) for key in group.index: single = single._dataflow.filter(data._dataflow[key] == group[key]) + t_dataset = TabularDataset._create(single) group_conf = copy.deepcopy(automlconfig) - group_conf.user_settings['training_data'] = single + group_conf.user_settings['training_data'] = t_dataset group_conf.user_settings['label_column_name'] = target_column group_conf.user_settings['compute_target'] = compute_target configs[group_name] = group_conf diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-grouping/deploy/score.py b/how-to-use-azureml/automated-machine-learning/forecasting-grouping/deploy/score.py index 2fc754b5..42e89392 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-grouping/deploy/score.py +++ b/how-to-use-azureml/automated-machine-learning/forecasting-grouping/deploy/score.py @@ -44,7 +44,7 @@ def run(raw_data): model_path = Model.get_model_path(cur_group) model = joblib.load(model_path) models[cur_group] = model - _, xtrans = models[cur_group].forecast(df_one, np.repeat(np.nan, len(df_one))) + _, xtrans = models[cur_group].forecast(df_one) dfs.append(xtrans) df_ret = pd.concat(dfs) df_ret.reset_index(drop=False, inplace=True) diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb b/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb index 21825bbd..f2708ee0 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb +++ b/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb @@ -377,9 +377,7 @@ "\n", "![Forecasting after training](forecast_function_at_train.png)\n", "\n", - "The `X_test` and `y_query` below, taken together, form the **forecast request**. The two are interpreted as aligned - `y_query` could actally be a column in `X_test`. `NaN`s in `y_query` are the question marks. These will be filled with the forecasts.\n", - "\n", - "When the forecast period immediately follows the training period, the models retain the last few points of data. You can simply fill `y_query` filled with question marks - the model has the data for the lookback already.\n" + "We use `X_test` as a **forecast request** to generate the predictions." ] }, { @@ -408,8 +406,7 @@ "metadata": {}, "outputs": [], "source": [ - "y_query = np.repeat(np.NaN, X_test.shape[0])\n", - "y_pred_no_gap, xy_nogap = fitted_model.forecast(X_test, y_query)\n", + "y_pred_no_gap, xy_nogap = fitted_model.forecast(X_test)\n", "\n", "# xy_nogap contains the predictions in the _automl_target_col column.\n", "# Those same numbers are output in y_pred_no_gap\n", @@ -437,7 +434,7 @@ "metadata": {}, "outputs": [], "source": [ - "quantiles = fitted_model.forecast_quantiles(X_test, y_query)\n", + "quantiles = fitted_model.forecast_quantiles(X_test)\n", "quantiles" ] }, @@ -460,10 +457,10 @@ "# specify which quantiles you would like \n", "fitted_model.quantiles = [0.01, 0.5, 0.95]\n", "# use forecast_quantiles function, not the forecast() one\n", - "y_pred_quantiles = fitted_model.forecast_quantiles(X_test, y_query)\n", + "y_pred_quantiles = fitted_model.forecast_quantiles(X_test)\n", "\n", "# it all nicely aligns column-wise\n", - "pd.concat([X_test.reset_index(), pd.DataFrame({'query' : y_query}), y_pred_quantiles], axis=1)" + "pd.concat([X_test.reset_index(), y_pred_quantiles], axis=1)" ] }, { @@ -539,9 +536,7 @@ "outputs": [], "source": [ "try: \n", - " y_query = y_away.copy()\n", - " y_query.fill(np.NaN)\n", - " y_pred_away, xy_away = fitted_model.forecast(X_away, y_query)\n", + " y_pred_away, xy_away = fitted_model.forecast(X_away)\n", " xy_away\n", "except Exception as e:\n", " print(e)" @@ -551,7 +546,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "How should we read that eror message? The forecast origin is at the last time the model saw an actual value of `y` (the target). That was at the end of the training data! Because the model received all `NaN` (and not an actual target value), it is attempting to forecast from the end of training data. But the requested forecast periods are past the maximum horizon. We need to provide a define `y` value to establish the forecast origin.\n", + "How should we read that eror message? The forecast origin is at the last time the model saw an actual value of `y` (the target). That was at the end of the training data! The model is attempting to forecast from the end of training data. But the requested forecast periods are past the maximum horizon. We need to provide a define `y` value to establish the forecast origin.\n", "\n", "We will use this helper function to take the required amount of context from the data preceding the testing data. It's definition is intentionally simplified to keep the idea in the clear." ] @@ -740,7 +735,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.6.8" }, "tags": [ "Forecasting", diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb b/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb index 6cd118d6..e8d3a49b 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb +++ b/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb @@ -40,7 +40,7 @@ "## Introduction\n", "In this example, we use AutoML to train, select, and operationalize a time-series forecasting model for multiple time-series.\n", "\n", - "Make sure you have executed the [configuration notebook](../configuration.ipynb) before running this notebook.\n", + "Make sure you have executed the [configuration notebook](../../../configuration.ipynb) before running this notebook.\n", "\n", "The examples in the follow code samples use the University of Chicago's Dominick's Finer Foods dataset to forecast orange juice sales. Dominick's was a grocery chain in the Chicago metropolitan area." ] @@ -454,9 +454,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "To produce predictions on the test set, we need to know the feature values at all dates in the test set. This requirement is somewhat reasonable for the OJ sales data since the features mainly consist of price, which is usually set in advance, and customer demographics which are approximately constant for each store over the 20 week forecast horizon in the testing data. \n", - "\n", - "We will first create a query `y_query`, which is aligned index-for-index to `X_test`. This is a vector of target values where each `NaN` serves the function of the question mark to be replaced by forecast. Passing definite values in the `y` argument allows the `forecast` function to make predictions on data that does not immediately follow the train data which contains `y`. In each grain, the last time point where the model sees a definite value of `y` is that grain's _forecast origin_." + "To produce predictions on the test set, we need to know the feature values at all dates in the test set. This requirement is somewhat reasonable for the OJ sales data since the features mainly consist of price, which is usually set in advance, and customer demographics which are approximately constant for each store over the 20 week forecast horizon in the testing data." ] }, { @@ -465,15 +463,10 @@ "metadata": {}, "outputs": [], "source": [ - "# Replace ALL values in y by NaN.\n", - "# The forecast origin will be at the beginning of the first forecast period.\n", - "# (Which is the same time as the end of the last training period.)\n", - "y_query = y_test.copy().astype(np.float)\n", - "y_query.fill(np.nan)\n", "# The featurized data, aligned to y, will also be returned.\n", "# This contains the assumptions that were made in the forecast\n", "# and helps align the forecast to the original data\n", - "y_predictions, X_trans = fitted_model.forecast(X_test, y_query)" + "y_predictions, X_trans = fitted_model.forecast(X_test)" ] }, { @@ -640,7 +633,7 @@ "import json\n", "# The request data frame needs to have y_query column which corresponds to query.\n", "X_query = X_test.copy()\n", - "X_query['y_query'] = y_query\n", + "X_query['y_query'] = np.NaN\n", "# We have to convert datetime to string, because Timestamps cannot be serialized to JSON.\n", "X_query[time_column_name] = X_query[time_column_name].astype(str)\n", "# The Service object accept the complex dictionary, which is internally converted to JSON string.\n", @@ -705,9 +698,6 @@ "framework": [ "Azure ML AutoML" ], - "tags": [ - "None" - ], "friendly_name": "Forecasting orange juice sales with deployment", "index_order": 1, "kernelspec": { @@ -725,8 +715,11 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.6.8" }, + "tags": [ + "None" + ], "task": "Forecasting" }, "nbformat": 4, diff --git a/how-to-use-azureml/automated-machine-learning/regression-hardware-performance-explanation-and-featurization/train_explainer.py b/how-to-use-azureml/automated-machine-learning/regression-hardware-performance-explanation-and-featurization/train_explainer.py index 9d3b8ca5..47360464 100644 --- a/how-to-use-azureml/automated-machine-learning/regression-hardware-performance-explanation-and-featurization/train_explainer.py +++ b/how-to-use-azureml/automated-machine-learning/regression-hardware-performance-explanation-and-featurization/train_explainer.py @@ -7,7 +7,7 @@ from azureml.core.experiment import Experiment from sklearn.externals import joblib from azureml.core.dataset import Dataset from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplainerSetupClass, \ - automl_setup_model_explanations, automl_check_model_if_explainable + automl_setup_model_explanations from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel from azureml.explain.model.mimic_wrapper import MimicWrapper from automl.client.core.common.constants import MODEL_PATH @@ -25,11 +25,6 @@ ws = run.experiment.workspace experiment = Experiment(ws, '<>') automl_run = Run(experiment=experiment, run_id='<>') -# Check if this AutoML model is explainable -if not automl_check_model_if_explainable(automl_run): - raise Exception("Model explanations is currently not supported for " + automl_run.get_properties().get( - 'run_algorithm')) - # Download the best model from the artifact store automl_run.download_file(name=MODEL_PATH, output_file_path='model.pkl') diff --git a/how-to-use-azureml/deployment/deploy-with-controlled-rollout/deploy-aks-with-controlled-rollout.ipynb b/how-to-use-azureml/deployment/deploy-with-controlled-rollout/deploy-aks-with-controlled-rollout.ipynb new file mode 100644 index 00000000..303f6be7 --- /dev/null +++ b/how-to-use-azureml/deployment/deploy-with-controlled-rollout/deploy-aks-with-controlled-rollout.ipynb @@ -0,0 +1,369 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Deploy models to Azure Kubernetes Service (AKS) using controlled roll out\n", + "This notebook will show you how to deploy mulitple AKS webservices with the same scoring endpoint and how to roll out your models in a controlled manner by configuring % of scoring traffic going to each webservice. If you are using a Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to install the Azure Machine Learning Python SDK and create an Azure ML Workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check for latest version\n", + "import azureml.core\n", + "print(azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize workspace\n", + "Create a [Workspace](https://docs.microsoft.com/python/api/azureml-core/azureml.core.workspace%28class%29?view=azure-ml-py) object from your persisted configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.workspace import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Register the model\n", + "Register a file or folder as a model by calling [Model.register()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py#register-workspace--model-path--model-name--tags-none--properties-none--description-none--datasets-none--model-framework-none--model-framework-version-none--child-paths-none-).\n", + "In addition to the content of the model file itself, your registered model will also store model metadata -- model description, tags, and framework information -- that will be useful when managing and deploying models in your workspace. Using tags, for instance, you can categorize your models and apply filters when listing models in your workspace." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Model\n", + "\n", + "model = Model.register(workspace=ws,\n", + " model_name='sklearn_regression_model.pkl', # Name of the registered model in your workspace.\n", + " model_path='./sklearn_regression_model.pkl', # Local file to upload and register as a model.\n", + " model_framework=Model.Framework.SCIKITLEARN, # Framework used to create the model.\n", + " model_framework_version='0.19.1', # Version of scikit-learn used to create the model.\n", + " description='Ridge regression model to predict diabetes progression.',\n", + " tags={'area': 'diabetes', 'type': 'regression'})\n", + "\n", + "print('Name:', model.name)\n", + "print('Version:', model.version)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Register an environment (for all models)\n", + "\n", + "If you control over how your model is run, or if it has special runtime requirements, you can specify your own environment and scoring method.\n", + "\n", + "Specify the model's runtime environment by creating an [Environment](https://docs.microsoft.com/python/api/azureml-core/azureml.core.environment%28class%29?view=azure-ml-py) object and providing the [CondaDependencies](https://docs.microsoft.com/python/api/azureml-core/azureml.core.conda_dependencies.condadependencies?view=azure-ml-py) needed by your model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Environment\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "environment=Environment('my-sklearn-environment')\n", + "environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[\n", + " 'azureml-defaults',\n", + " 'inference-schema[numpy-support]',\n", + " 'joblib',\n", + " 'numpy',\n", + " 'scikit-learn'\n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When using a custom environment, you must also provide Python code for initializing and running your model. An example script is included with this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('score.py') as f:\n", + " print(f.read())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create the InferenceConfig\n", + "Create the inference configuration to reference your environment and entry script during deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.model import InferenceConfig\n", + "\n", + "inference_config = InferenceConfig(entry_script='score.py', \n", + " source_directory='.',\n", + " environment=environment)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Provision the AKS Cluster\n", + "If you already have an AKS cluster attached to this workspace, skip the step below and provide the name of the cluster." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import AksCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "# Use the default configuration (can also provide parameters to customize)\n", + "prov_config = AksCompute.provisioning_configuration()\n", + "\n", + "aks_name = 'my-aks' \n", + "# Create the cluster\n", + "aks_target = ComputeTarget.create(workspace = ws, \n", + " name = aks_name, \n", + " provisioning_configuration = prov_config) \n", + "aks_target.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create an Endpoint and add a version (AKS service)\n", + "This creates a new endpoint and adds a version behind it. By default the first version added is the default version. You can specify the traffic percentile a version takes behind an endpoint. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# deploying the model and create a new endpoint\n", + "from azureml.core.webservice import AksEndpoint\n", + "# from azureml.core.compute import ComputeTarget\n", + "\n", + "#select a created compute\n", + "compute = ComputeTarget(ws, 'my-aks')\n", + "namespace_name=\"endpointnamespace\"\n", + "# define the endpoint name\n", + "endpoint_name = \"myendpoint1\"\n", + "# define the service name\n", + "version_name= \"versiona\"\n", + "\n", + "endpoint_deployment_config = AksEndpoint.deploy_configuration(tags = {'modelVersion':'firstversion', 'department':'finance'}, \n", + " description = \"my first version\", namespace = namespace_name, \n", + " version_name = version_name, traffic_percentile = 40)\n", + "\n", + "endpoint = Model.deploy(ws, endpoint_name, [model], inference_config, endpoint_deployment_config, compute)\n", + "endpoint.wait_for_deployment(True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "endpoint.get_logs()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Add another version of the service to an existing endpoint\n", + "This adds another version behind an existing endpoint. You can specify the traffic percentile the new version takes. If no traffic_percentile is specified then it defaults to 0. All the unspecified traffic percentile (in this example 50) across all versions goes to default version." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Adding a new version to an existing Endpoint.\n", + "version_name_add=\"versionb\" \n", + "\n", + "endpoint.create_version(version_name = version_name_add, inference_config=inference_config, models=[model], tags = {'modelVersion':'secondversion', 'department':'finance'}, \n", + " description = \"my second version\", traffic_percentile = 10)\n", + "endpoint.wait_for_deployment(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Update an existing version in an endpoint\n", + "There are two types of versions: control and treatment. An endpoint contains one or more treatment versions but only one control version. This categorization helps compare the different versions against the defined control version." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "endpoint.update_version(version_name=endpoint.versions[version_name_add].name, description=\"my second version update\", traffic_percentile=40, is_default=True, is_control_version_type=True)\n", + "endpoint.wait_for_deployment(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test the web service using run method\n", + "Test the web sevice by passing in data. Run() method retrieves API keys behind the scenes to make sure that call is authenticated." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scoring on endpoint\n", + "import json\n", + "test_sample = json.dumps({'data': [\n", + " [1,2,3,4,5,6,7,8,9,10], \n", + " [10,9,8,7,6,5,4,3,2,1]\n", + "]})\n", + "\n", + "test_sample_encoded = bytes(test_sample, encoding='utf8')\n", + "prediction = endpoint.run(input_data=test_sample_encoded)\n", + "print(prediction)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Delete Resources" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# deleting a version in an endpoint\n", + "endpoint.delete_version(version_name=version_name)\n", + "endpoint.wait_for_deployment(True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# deleting an endpoint, this will delete all versions in the endpoint and the endpoint itself\n", + "endpoint.delete()" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "shipatel" + } + ], + "category": "deployment", + "compute": [ + "None" + ], + "datasets": [ + "Diabetes" + ], + "deployment": [ + "Azure Kubernetes Service" + ], + "exclude_from_index": false, + "framework": [ + "Scikit-learn" + ], + "friendly_name": "Deploy models to AKS using controlled roll out", + "index_order": 3, + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.0" + }, + "star_tag": [ + "featured" + ], + "tags": [ + "None" + ], + "task": "Deploy a model with Azure Machine Learning" + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/deployment/deploy-with-controlled-rollout/deploy-aks-with-controlled-rollout.yml b/how-to-use-azureml/deployment/deploy-with-controlled-rollout/deploy-aks-with-controlled-rollout.yml new file mode 100644 index 00000000..0bf2d3a4 --- /dev/null +++ b/how-to-use-azureml/deployment/deploy-with-controlled-rollout/deploy-aks-with-controlled-rollout.yml @@ -0,0 +1,4 @@ +name: deploy-aks-with-controlled-rollout +dependencies: +- pip: + - azureml-sdk diff --git a/how-to-use-azureml/deployment/deploy-with-controlled-rollout/score.py b/how-to-use-azureml/deployment/deploy-with-controlled-rollout/score.py new file mode 100644 index 00000000..50cdc3ff --- /dev/null +++ b/how-to-use-azureml/deployment/deploy-with-controlled-rollout/score.py @@ -0,0 +1,28 @@ +import pickle +import json +import numpy +from sklearn.externals import joblib +from sklearn.linear_model import Ridge +from azureml.core.model import Model + + +def init(): + global model + # note here "sklearn_regression_model.pkl" is the name of the model registered under + # this is a different behavior than before when the code is run locally, even though the code is the same. + model_path = Model.get_model_path('sklearn_regression_model.pkl') + # deserialize the model file back into a sklearn model + model = joblib.load(model_path) + + +# note you can pass in multiple rows for scoring +def run(raw_data): + try: + data = json.loads(raw_data)['data'] + data = numpy.array(data) + result = model.predict(data) + # you can return any data type as long as it is JSON-serializable + return result.tolist() + except Exception as e: + error = str(e) + return error diff --git a/how-to-use-azureml/deployment/deploy-with-controlled-rollout/sklearn_regression_model.pkl b/how-to-use-azureml/deployment/deploy-with-controlled-rollout/sklearn_regression_model.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d10309b6cf4c8e87846850edfe8b8d54a1aa64f2 GIT binary patch literal 658 zcmX|<&rcLF6vt=T1%{DT5XE00;?FfP6EJauKjMzrgOLdnGx0~`rRmHRcC<6?n;*&= zN%R0L{srpQlL>dP>%oH_y>T~@%bJ)Nz2J{-A-uzD`+a?Vub=jL(N7;SN|M-QVJt@+ z@qjWj34;Y{xXOce{sk14pr?X*HBBQ-Gzb*^IFCfr^m#(fC}&wnl7uvk)F+H229&nr zMvyfHHJ}&u$kh26=(9DuunPSy=oPz&3R1lW1CHa&{*$Jhtz}?%b^Xoju5Hv{&k78> zP)6nM5n+bIIHQSAMFx9YXh4cFPa?v?rxf!-0Au_Kjv^vpvXy(kXKN2W-78z0 z>vNZ`Pm(rkKN~H7ZCY%7rZV=8Sr`9mTen_aZBJFl-Q~M?+P^-#owpkc?F;ts_YsV( zD-%xj=Voq+)eoJuDzZ?&tPv}u7*0B>