From 44a7481ed17b9365960f43392ca79847619f55b5 Mon Sep 17 00:00:00 2001 From: vizhur Date: Mon, 19 Aug 2019 23:33:44 +0000 Subject: [PATCH] update samples from Release-141 as a part of 1.0.57 SDK release --- configuration.ipynb | 2 +- how-to-use-azureml/README.md | 2 +- .../automated-machine-learning/README.md | 20 +- .../automated-machine-learning/automl_env.yml | 5 +- .../automl_env_mac.yml | 5 +- ...uto-ml-classification-bank-marketing.ipynb | 53 +- .../auto-ml-classification-bank-marketing.yml | 2 + ...-ml-classification-credit-card-fraud.ipynb | 37 +- ...to-ml-classification-credit-card-fraud.yml | 2 + ...to-ml-classification-with-deployment.ipynb | 6 +- .../auto-ml-dataset-remote-execution.ipynb | 509 ++ .../auto-ml-dataset-remote-execution.yml | 10 + .../dataset/auto-ml-dataset.ipynb | 402 ++ .../dataset/auto-ml-dataset.yml | 8 + .../auto-ml-forecasting-energy-demand.ipynb | 3 +- ...to-ml-forecasting-orange-juice-sales.ipynb | 12 +- ...auto-ml-regression-concrete-strength.ipynb | 36 +- .../auto-ml-regression-concrete-strength.yml | 2 + ...o-ml-regression-hardware-performance.ipynb | 36 +- ...uto-ml-regression-hardware-performance.yml | 2 + .../auto-ml-remote-amlcompute-with-onnx.ipynb | 34 +- .../auto-ml-remote-amlcompute-with-onnx.yml | 2 + .../auto-ml-remote-amlcompute.ipynb | 29 +- .../auto-ml-remote-amlcompute.yml | 2 + .../sql-server/setup/auto-ml-sql-setup.ipynb | 1 - .../automl/automl-databricks-local-01.ipynb | 29 +- ...oml-databricks-local-with-deployment.ipynb | 29 +- .../model-register-and-deploy.ipynb | 35 +- .../accelerated-models-object-detection.ipynb | 2 +- .../accelerated-models-quickstart.ipynb | 2 +- .../accelerated-models-training.ipynb | 2 +- ...e-app-insights-in-production-service.ipynb | 46 +- .../onnx-convert-aml-deploy-tinyyolo.ipynb | 56 +- ...facial-expression-recognition-deploy.ipynb | 72 +- .../onnx/onnx-inference-mnist-deploy.ipynb | 57 +- .../onnx-modelzoo-aml-deploy-resnet50.ipynb | 54 +- .../onnx-train-pytorch-aml-deploy-mnist.ipynb | 55 +- .../production-deploy-to-aks.ipynb | 222 +- .../explain-model-on-amlcompute.ipynb | 748 +++ .../explain-model-on-amlcompute.yml | 8 + .../remote-explanation/train_explain.py | 63 + ...explain-model-on-amlcompute-and-deploy.yml | 2 +- .../aml-pipelines-getting-started.ipynb | 2 +- ...nes-parameter-tuning-with-hyperdrive.ipynb | 6 +- ...nes-use-databricks-as-compute-target.ipynb | 2 +- ...with-automated-machine-learning-step.ipynb | 39 +- ...-taxi-data-regression-model-building.ipynb | 21 + .../pipeline-batch-scoring.ipynb | 8 +- .../pipeline-style-transfer.ipynb | 2 +- .../authentication-in-azureml.ipynb | 173 +- .../track-and-monitor-experiments/README.md | 19 + .../logging-api/img/run_details.PNG | Bin 0 -> 30114 bytes .../logging-api/img/run_history.PNG | Bin 0 -> 32412 bytes .../logging-api/logging-api.ipynb | 545 ++ .../logging-api/logging-api.yml | 8 + .../manage-runs/hello.py | 7 + .../manage-runs/hello_with_children.py | 11 + .../manage-runs/hello_with_delay.py | 8 + .../manage-runs/manage-runs.ipynb | 602 +++ .../manage-runs/manage-runs.yml | 4 + .../tensorboard/tensorboard.ipynb | 562 +++ .../tensorboard/tensorboard.yml | 6 + .../deploy-model/deploy-model.ipynb | 322 ++ .../deploy-model/deploy-model.yml | 8 + .../train-deploy-pytorch/scripts/train.py | 150 + .../train-and-deploy-pytorch.ipynb | 481 ++ .../train-and-deploy-pytorch.yml | 8 + .../train-local/train-local.ipynb | 248 + .../using-mlflow/train-local/train-local.yml | 7 + .../train-remote/train-remote.ipynb | 318 ++ .../train-remote/train-remote.yml | 4 + .../train-remote/train_diabetes.py | 46 + .../distributed-cntk-with-custom-docker.ipynb | 6 +- ...erparameter-tune-deploy-with-chainer.ipynb | 6 +- ...erparameter-tune-deploy-with-pytorch.ipynb | 6 +- ...arameter-tune-deploy-with-tensorflow.ipynb | 6 +- how-to-use-azureml/training/README.md | 4 +- ...erparameter-tune-deploy-with-sklearn.ipynb | 6 +- .../using-environments.ipynb | 6 +- how-to-use-azureml/work-with-data/README.md | 9 + .../work-with-data/dataprep/README.md | 300 ++ .../new-york-taxi/new-york-taxi.ipynb | 513 ++ .../new-york-taxi_scale-out.ipynb | 135 + .../dataprep/data/ADLSgen2-datapreptest.crt | 45 + .../dataprep/data/adls-dpreptestfiles.crt | 45 + .../dataprep/data/chicago-aldermen-2015.csv | 54 + .../dataprep/data/crime-dirty.csv | 15 + .../dataprep/data/crime-full.csv | 1001 ++++ .../dataprep/data/crime-spring.csv | 11 + .../dataprep/data/crime-winter.csv | 11 + .../work-with-data/dataprep/data/crime.dprep | 204 + .../dataprep/data/crime.parquet | Bin 0 -> 3607 bytes .../work-with-data/dataprep/data/crime.txt | 10 + .../work-with-data/dataprep/data/crime.xlsx | Bin 0 -> 16109 bytes .../work-with-data/dataprep/data/crime.zip | Bin 0 -> 3685 bytes .../dataprep/data/crime_duplicate_headers.csv | 12 + .../dataprep/data/crime_fixed_width_file.txt | 10 + .../data/crime_multiple_separators.csv | 11 + .../dataprep/data/crime_partfiles/_SUCCESS | 0 ...8e77b-f17a-4c20-972c-aa382e830fca-c000.csv | 914 ++++ ...8e77b-f17a-4c20-972c-aa382e830fca-c000.csv | 921 ++++ ...8e77b-f17a-4c20-972c-aa382e830fca-c000.csv | 930 ++++ ...8e77b-f17a-4c20-972c-aa382e830fca-c000.csv | 953 ++++ ...8e77b-f17a-4c20-972c-aa382e830fca-c000.csv | 923 ++++ ...8e77b-f17a-4c20-972c-aa382e830fca-c000.csv | 887 ++++ ...8e77b-f17a-4c20-972c-aa382e830fca-c000.csv | 971 ++++ ...8e77b-f17a-4c20-972c-aa382e830fca-c000.csv | 759 +++ .../work-with-data/dataprep/data/json.json | 1306 +++++ .../dataprep/data/large_dflow.json | 4415 +++++++++++++++++ .../work-with-data/dataprep/data/map_func.py | 4 + .../dataprep/data/median_income.csv | 251 + .../data/median_income_transformed.csv | 251 + .../dataprep/data/parquet.parquet | Bin 0 -> 3091 bytes ...7a7-c3cd-4926-92b2-ba2dcd3f95b7.gz.parquet | Bin 0 -> 6078 bytes ...7a7-c3cd-4926-92b2-ba2dcd3f95b7.gz.parquet | Bin 0 -> 5083 bytes .../dataprep/data/secrets.dprep | 63 + .../dataprep/data/stream-path.csv | 11 + .../add-column-using-expression.ipynb | 360 ++ .../append-columns-and-rows.ipynb | 251 + .../dataprep/how-to-guides/assertions.ipynb | 133 + .../how-to-guides/auto-read-file.ipynb | 189 + .../dataprep/how-to-guides/cache.ipynb | 194 + .../how-to-guides/column-manipulations.ipynb | 563 +++ .../column-type-transforms.ipynb | 473 ++ .../custom-python-transforms.ipynb | 231 + .../how-to-guides/data-ingestion.ipynb | 1111 +++++ .../dataprep/how-to-guides/data-profile.ipynb | 179 + .../dataprep/how-to-guides/datastore.ipynb | 246 + .../derive-column-by-example.ipynb | 187 + .../how-to-guides/external-references.ipynb | 118 + .../dataprep/how-to-guides/filtering.ipynb | 220 + .../dataprep/how-to-guides/fuzzy-group.ipynb | 211 + .../how-to-guides/impute-missing-values.ipynb | 147 + .../dataprep/how-to-guides/join.ipynb | 265 + .../how-to-guides/label-encoder.ipynb | 168 + .../how-to-guides/min-max-scaler.ipynb | 239 + .../how-to-guides/one-hot-encoder.ipynb | 179 + .../how-to-guides/open-save-dataflows.ipynb | 171 + .../quantile-transformation.ipynb | 91 + .../dataprep/how-to-guides/random-split.ipynb | 145 + ...replace-datasource-replace-reference.ipynb | 130 + .../how-to-guides/replace-fill-error.ipynb | 239 + .../dataprep/how-to-guides/secrets.ipynb | 140 + .../how-to-guides/semantic-types.ipynb | 164 + .../split-column-by-example.ipynb | 220 + .../how-to-guides/subsetting-sampling.ipynb | 217 + .../dataprep/how-to-guides/summarize.ipynb | 590 +++ .../working-with-file-streams.ipynb | 192 + .../dataprep/how-to-guides/writing-data.ipynb | 183 + .../getting-started/getting-started.ipynb | 433 ++ .../work-with-data/datasets/README.md | 20 + .../datasets/dataset-api-change-notice.md | 57 + .../datasets-diff/datasets-diff.ipynb | 796 +++ .../tabular-dataset-tutorial.ipynb | 312 ++ .../train-dataset/Titanic.csv | 892 ++++ .../datasets-tutorial/train-dataset/train.py | 43 + setup-environment/configuration.ipynb | 2 +- tutorials/regression-part1-data-prep.ipynb | 7 +- 158 files changed, 32395 insertions(+), 619 deletions(-) create mode 100644 how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb create mode 100644 how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.yml create mode 100644 how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb create mode 100644 how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.yml create mode 100644 how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb create mode 100644 how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.yml create mode 100644 how-to-use-azureml/explain-model/azure-integration/remote-explanation/train_explain.py create mode 100644 how-to-use-azureml/track-and-monitor-experiments/README.md create mode 100644 how-to-use-azureml/track-and-monitor-experiments/logging-api/img/run_details.PNG create mode 100644 how-to-use-azureml/track-and-monitor-experiments/logging-api/img/run_history.PNG create mode 100644 how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb create mode 100644 how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.yml create mode 100644 how-to-use-azureml/track-and-monitor-experiments/manage-runs/hello.py create mode 100644 how-to-use-azureml/track-and-monitor-experiments/manage-runs/hello_with_children.py create mode 100644 how-to-use-azureml/track-and-monitor-experiments/manage-runs/hello_with_delay.py create mode 100644 how-to-use-azureml/track-and-monitor-experiments/manage-runs/manage-runs.ipynb create mode 100644 how-to-use-azureml/track-and-monitor-experiments/manage-runs/manage-runs.yml create mode 100644 how-to-use-azureml/track-and-monitor-experiments/tensorboard/tensorboard.ipynb create mode 100644 how-to-use-azureml/track-and-monitor-experiments/tensorboard/tensorboard.yml create mode 100644 how-to-use-azureml/track-and-monitor-experiments/using-mlflow/deploy-model/deploy-model.ipynb create mode 100644 how-to-use-azureml/track-and-monitor-experiments/using-mlflow/deploy-model/deploy-model.yml create mode 100644 how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-deploy-pytorch/scripts/train.py create mode 100644 how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-deploy-pytorch/train-and-deploy-pytorch.ipynb create mode 100644 how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-deploy-pytorch/train-and-deploy-pytorch.yml create mode 100644 how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-local/train-local.ipynb create mode 100644 how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-local/train-local.yml create mode 100644 how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-remote/train-remote.ipynb create mode 100644 how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-remote/train-remote.yml create mode 100644 how-to-use-azureml/track-and-monitor-experiments/using-mlflow/train-remote/train_diabetes.py create mode 100644 how-to-use-azureml/work-with-data/README.md create mode 100644 how-to-use-azureml/work-with-data/dataprep/README.md create mode 100644 how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/case-studies/new-york-taxi/new-york-taxi_scale-out.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/ADLSgen2-datapreptest.crt create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/adls-dpreptestfiles.crt create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/chicago-aldermen-2015.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime-dirty.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime-full.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime-spring.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime-winter.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime.dprep create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime.parquet create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime.txt create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime.xlsx create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime.zip create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_duplicate_headers.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_fixed_width_file.txt create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_multiple_separators.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/_SUCCESS create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/part-00000-0b08e77b-f17a-4c20-972c-aa382e830fca-c000.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/part-00001-0b08e77b-f17a-4c20-972c-aa382e830fca-c000.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/part-00002-0b08e77b-f17a-4c20-972c-aa382e830fca-c000.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/part-00003-0b08e77b-f17a-4c20-972c-aa382e830fca-c000.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/part-00004-0b08e77b-f17a-4c20-972c-aa382e830fca-c000.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/part-00005-0b08e77b-f17a-4c20-972c-aa382e830fca-c000.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/part-00006-0b08e77b-f17a-4c20-972c-aa382e830fca-c000.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/crime_partfiles/part-00007-0b08e77b-f17a-4c20-972c-aa382e830fca-c000.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/json.json create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/large_dflow.json create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/map_func.py create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/median_income.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/median_income_transformed.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/parquet.parquet create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/parquet_dataset/Arrest=false/part-00000-34f8a7a7-c3cd-4926-92b2-ba2dcd3f95b7.gz.parquet create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/parquet_dataset/Arrest=true/part-00000-34f8a7a7-c3cd-4926-92b2-ba2dcd3f95b7.gz.parquet create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/secrets.dprep create mode 100644 how-to-use-azureml/work-with-data/dataprep/data/stream-path.csv create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/add-column-using-expression.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/append-columns-and-rows.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/assertions.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/auto-read-file.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/cache.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/column-manipulations.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/column-type-transforms.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/custom-python-transforms.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/data-ingestion.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/data-profile.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/datastore.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/derive-column-by-example.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/external-references.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/filtering.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/fuzzy-group.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/impute-missing-values.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/join.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/label-encoder.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/min-max-scaler.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/one-hot-encoder.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/open-save-dataflows.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/quantile-transformation.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/random-split.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/replace-datasource-replace-reference.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/replace-fill-error.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/secrets.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/semantic-types.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/split-column-by-example.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/subsetting-sampling.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/summarize.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/working-with-file-streams.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/how-to-guides/writing-data.ipynb create mode 100644 how-to-use-azureml/work-with-data/dataprep/tutorials/getting-started/getting-started.ipynb create mode 100644 how-to-use-azureml/work-with-data/datasets/README.md create mode 100644 how-to-use-azureml/work-with-data/datasets/dataset-api-change-notice.md create mode 100644 how-to-use-azureml/work-with-data/datasets/datasets-diff/datasets-diff.ipynb create mode 100644 how-to-use-azureml/work-with-data/datasets/datasets-tutorial/tabular-dataset-tutorial.ipynb create mode 100644 how-to-use-azureml/work-with-data/datasets/datasets-tutorial/train-dataset/Titanic.csv create mode 100644 how-to-use-azureml/work-with-data/datasets/datasets-tutorial/train-dataset/train.py diff --git a/configuration.ipynb b/configuration.ipynb index b89b6e00..d82c6131 100644 --- a/configuration.ipynb +++ b/configuration.ipynb @@ -103,7 +103,7 @@ "source": [ "import azureml.core\n", "\n", - "print(\"This notebook was created using version 1.0.55 of the Azure ML SDK\")\n", + "print(\"This notebook was created using version 1.0.57 of the Azure ML SDK\")\n", "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" ] }, diff --git a/how-to-use-azureml/README.md b/how-to-use-azureml/README.md index cedd4581..ee4829e0 100644 --- a/how-to-use-azureml/README.md +++ b/how-to-use-azureml/README.md @@ -8,7 +8,7 @@ As a pre-requisite, run the [configuration Notebook](../configuration.ipynb) not * [train-on-local](./training/train-on-local): Learn how to submit a run to local computer and use Azure ML managed run configuration. * [train-on-amlcompute](./training/train-on-amlcompute): Use a 1-n node Azure ML managed compute cluster for remote runs on Azure CPU or GPU infrastructure. * [train-on-remote-vm](./training/train-on-remote-vm): Use Data Science Virtual Machine as a target for remote runs. -* [logging-api](./training/logging-api): Learn about the details of logging metrics to run history. +* [logging-api](./track-and-monitor-experiments/logging-api): Learn about the details of logging metrics to run history. * [register-model-create-image-deploy-service](./deployment/register-model-create-image-deploy-service): Learn about the details of model management. * [production-deploy-to-aks](./deployment/production-deploy-to-aks) Deploy a model to production at scale on Azure Kubernetes Service. * [enable-data-collection-for-models-in-aks](./deployment/enable-data-collection-for-models-in-aks) Learn about data collection APIs for deployed model. diff --git a/how-to-use-azureml/automated-machine-learning/README.md b/how-to-use-azureml/automated-machine-learning/README.md index 4d95e16a..adc4a1d6 100644 --- a/how-to-use-azureml/automated-machine-learning/README.md +++ b/how-to-use-azureml/automated-machine-learning/README.md @@ -155,11 +155,11 @@ jupyter notebook - [auto-ml-subsampling-local.ipynb](subsampling/auto-ml-subsampling-local.ipynb) - How to enable subsampling -- [auto-ml-dataprep.ipynb](dataprep/auto-ml-dataprep.ipynb) - - Using DataPrep for reading data +- [auto-ml-dataset.ipynb](dataprep/auto-ml-dataset.ipynb) + - Using Dataset for reading data -- [auto-ml-dataprep-remote-execution.ipynb](dataprep-remote-execution/auto-ml-dataprep-remote-execution.ipynb) - - Using DataPrep for reading data with remote execution +- [auto-ml-dataset-remote-execution.ipynb](dataprep-remote-execution/auto-ml-dataset-remote-execution.ipynb) + - Using Dataset for reading data with remote execution - [auto-ml-classification-with-whitelisting.ipynb](classification-with-whitelisting/auto-ml-classification-with-whitelisting.ipynb) - Dataset: scikit learn's [digit dataset](http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html#sklearn.datasets.load_digits) @@ -229,7 +229,7 @@ The main code of the file must be indented so that it is under this condition. 2. Check that you have conda 64-bit installed rather than 32-bit. You can check this with the command `conda info`. The `platform` should be `win-64` for Windows or `osx-64` for Mac. 3. Check that you have conda 4.4.10 or later. You can check the version with the command `conda -V`. If you have a previous version installed, you can update it using the command: `conda update conda`. 4. On Linux, if the error is `gcc: error trying to exec 'cc1plus': execvp: No such file or directory`, install build essentials using the command `sudo apt-get install build-essential`. -5. Pass a new name as the first parameter to automl_setup so that it creates a new conda environment. You can view existing conda environments using `conda env list` and remove them with `conda env remove -n `. +5. Pass a new name as the first parameter to automl_setup so that it creates a new conda environment. You can view existing conda environments using `conda env list` and remove them with `conda env remove -n `. ## automl_setup_linux.sh fails If automl_setup_linux.sh fails on Ubuntu Linux with the error: `unable to execute 'gcc': No such file or directory` @@ -264,13 +264,13 @@ Some Windows environments see an error loading numpy with the latest Python vers Check the tensorflow version in the automated ml conda environment. Supported versions are < 1.13. Uninstall tensorflow from the environment if version is >= 1.13 You may check the version of tensorflow and uninstall as follows 1) start a command shell, activate conda environment where automated ml packages are installed -2) enter `pip freeze` and look for `tensorflow` , if found, the version listed should be < 1.13 -3) If the listed version is a not a supported version, `pip uninstall tensorflow` in the command shell and enter y for confirmation. +2) enter `pip freeze` and look for `tensorflow` , if found, the version listed should be < 1.13 +3) If the listed version is a not a supported version, `pip uninstall tensorflow` in the command shell and enter y for confirmation. -## Remote run: DsvmCompute.create fails +## Remote run: DsvmCompute.create fails There are several reasons why the DsvmCompute.create can fail. The reason is usually in the error message but you have to look at the end of the error message for the detailed reason. Some common reasons are: 1) `Compute name is invalid, it should start with a letter, be between 2 and 16 character, and only include letters (a-zA-Z), numbers (0-9) and \'-\'.` Note that underscore is not allowed in the name. -2) `The requested VM size xxxxx is not available in the current region.` You can select a different region or vm_size. +2) `The requested VM size xxxxx is not available in the current region.` You can select a different region or vm_size. ## Remote run: Unable to establish SSH connection Automated ML uses the SSH protocol to communicate with remote DSVMs. This defaults to port 22. Possible causes for this error are: @@ -296,4 +296,4 @@ To resolve this issue, allocate a DSVM with more memory or reduce the value spec ## Remote run: Iterations show as "Not Responding" in the RunDetails widget. This can be caused by too many concurrent iterations for a remote DSVM. Each concurrent iteration usually takes 100% of a core when it is running. Some iterations can use multiple cores. So, the max_concurrent_iterations setting should always be less than the number of cores of the DSVM. -To resolve this issue, try reducing the value specified for the max_concurrent_iterations setting. \ No newline at end of file +To resolve this issue, try reducing the value specified for the max_concurrent_iterations setting. diff --git a/how-to-use-azureml/automated-machine-learning/automl_env.yml b/how-to-use-azureml/automated-machine-learning/automl_env.yml index 07b7c974..5e280f0c 100644 --- a/how-to-use-azureml/automated-machine-learning/automl_env.yml +++ b/how-to-use-azureml/automated-machine-learning/automl_env.yml @@ -13,10 +13,13 @@ dependencies: - scikit-learn>=0.19.0,<=0.20.3 - pandas>=0.22.0,<=0.23.4 - py-xgboost<=0.80 +- pyarrow>=0.11.0 - pip: # Required packages for AzureML execution, history, and data preparation. - - azureml-sdk[automl,explain] + - azureml-defaults + - azureml-train-automl - azureml-widgets + - azureml-explain-model - pandas_ml diff --git a/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml b/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml index 2ea6f3ea..3a2c2498 100644 --- a/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml +++ b/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml @@ -14,10 +14,13 @@ dependencies: - scikit-learn>=0.19.0,<=0.20.3 - pandas>=0.22.0,<0.23.0 - py-xgboost<=0.80 +- pyarrow>=0.11.0 - pip: # Required packages for AzureML execution, history, and data preparation. - - azureml-sdk[automl,explain] + - azureml-defaults + - azureml-train-automl - azureml-widgets + - azureml-explain-model - pandas_ml diff --git a/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb b/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb index 8827a394..64750a56 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb @@ -69,22 +69,17 @@ "metadata": {}, "outputs": [], "source": [ - "import json\n", "import logging\n", "\n", "from matplotlib import pyplot as plt\n", - "import numpy as np\n", "import pandas as pd\n", "import os\n", - "from sklearn import datasets\n", - "import azureml.dataprep as dprep\n", - "from sklearn.model_selection import train_test_split\n", "\n", "import azureml.core\n", "from azureml.core.experiment import Experiment\n", "from azureml.core.workspace import Workspace\n", - "from azureml.train.automl import AutoMLConfig\n", - "from azureml.train.automl.run import AutoMLRun" + "from azureml.core.dataset import Dataset\n", + "from azureml.train.automl import AutoMLConfig" ] }, { @@ -155,11 +150,12 @@ " # Create the cluster.\n", " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n", " \n", - " # Can poll for a minimum number of nodes and for a specific timeout.\n", - " # If no min_node_count is provided, it will use the scale settings for the cluster.\n", - " compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", + "print('Checking cluster status...')\n", + "# Can poll for a minimum number of nodes and for a specific timeout.\n", + "# If no min_node_count is provided, it will use the scale settings for the cluster.\n", + "compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", " \n", - " # For a more detailed view of current AmlCompute status, use get_status()." + "# For a more detailed view of current AmlCompute status, use get_status()." ] }, { @@ -200,11 +196,8 @@ "# Set compute target to AmlCompute\n", "conda_run_config.target = compute_target\n", "conda_run_config.environment.docker.enabled = True\n", - "conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", "\n", - "dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n", - "\n", - "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n", + "cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n", "conda_run_config.environment.python.conda_dependencies = cd" ] }, @@ -224,11 +217,10 @@ "outputs": [], "source": [ "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv\"\n", - "dflow = dprep.read_csv(data, infer_column_types=True)\n", - "dflow.get_profile()\n", - "X_train = dflow.drop_columns(columns=['y'])\n", - "y_train = dflow.keep_columns(columns=['y'], validate_column_exists=True)\n", - "dflow.head()" + "dataset = Dataset.Tabular.from_delimited_files(data)\n", + "X_train = dataset.drop_columns(columns=['y'])\n", + "y_train = dataset.keep_columns(columns=['y'], validate=True)\n", + "dataset.take(5).to_pandas_dataframe()" ] }, { @@ -406,7 +398,7 @@ "def run(rawdata):\n", " try:\n", " data = json.loads(rawdata)['data']\n", - " data = numpy.array(data)\n", + " data = np.array(data)\n", " result = model.predict(data)\n", " except Exception as e:\n", " result = str(e)\n", @@ -443,7 +435,7 @@ "metadata": {}, "outputs": [], "source": [ - "for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n", + "for p in ['azureml-train-automl', 'azureml-core']:\n", " print('{}\\t{}'.format(p, dependencies[p]))" ] }, @@ -453,10 +445,8 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.core.conda_dependencies import CondaDependencies\n", - "\n", "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],\n", - " pip_packages=['azureml-sdk[automl]'])\n", + " pip_packages=['azureml-train-automl'])\n", "\n", "conda_env_file_name = 'myenv.yml'\n", "myenv.save_to_file('.', conda_env_file_name)" @@ -476,7 +466,7 @@ " content = cefr.read()\n", "\n", "with open(conda_env_file_name, 'w') as cefw:\n", - " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n", + " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n", "\n", "# Substitute the actual model id in the script file.\n", "\n", @@ -618,8 +608,6 @@ "outputs": [], "source": [ "# Load the bank marketing datasets.\n", - "from sklearn.datasets import load_diabetes\n", - "from sklearn.model_selection import train_test_split\n", "from numpy import array" ] }, @@ -630,11 +618,10 @@ "outputs": [], "source": [ "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_validate.csv\"\n", - "dflow = dprep.read_csv(data, infer_column_types=True)\n", - "dflow.get_profile()\n", - "X_test = dflow.drop_columns(columns=['y'])\n", - "y_test = dflow.keep_columns(columns=['y'], validate_column_exists=True)\n", - "dflow.head()" + "dataset = Dataset.Tabular.from_delimited_files(data)\n", + "X_test = dataset.drop_columns(columns=['y'])\n", + "y_test = dataset.keep_columns(columns=['y'], validate=True)\n", + "dataset.take(5).to_pandas_dataframe()" ] }, { diff --git a/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.yml b/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.yml index a46c905b..4c8a39ca 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.yml +++ b/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.yml @@ -2,6 +2,8 @@ name: auto-ml-classification-bank-marketing dependencies: - pip: - azureml-sdk + - azureml-defaults + - azureml-explain-model - azureml-train-automl - azureml-widgets - matplotlib diff --git a/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb b/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb index 73c96856..952e9de4 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb @@ -74,14 +74,12 @@ "from matplotlib import pyplot as plt\n", "import pandas as pd\n", "import os\n", - "from sklearn.model_selection import train_test_split\n", - "import azureml.dataprep as dprep\n", "\n", "import azureml.core\n", "from azureml.core.experiment import Experiment\n", "from azureml.core.workspace import Workspace\n", - "from azureml.train.automl import AutoMLConfig\n", - "from azureml.train.automl.run import AutoMLRun" + "from azureml.core.dataset import Dataset\n", + "from azureml.train.automl import AutoMLConfig" ] }, { @@ -152,11 +150,12 @@ " # Create the cluster.\n", " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n", " \n", - " # Can poll for a minimum number of nodes and for a specific timeout.\n", - " # If no min_node_count is provided, it will use the scale settings for the cluster.\n", - " compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", - " \n", - " # For a more detailed view of current AmlCompute status, use get_status()." + "print('Checking cluster status...')\n", + "# Can poll for a minimum number of nodes and for a specific timeout.\n", + "# If no min_node_count is provided, it will use the scale settings for the cluster.\n", + "compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", + "\n", + "# For a more detailed view of current AmlCompute status, use get_status()." ] }, { @@ -197,11 +196,8 @@ "# Set compute target to AmlCompute\n", "conda_run_config.target = compute_target\n", "conda_run_config.environment.docker.enabled = True\n", - "conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", "\n", - "dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n", - "\n", - "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n", + "cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n", "conda_run_config.environment.python.conda_dependencies = cd" ] }, @@ -211,7 +207,7 @@ "source": [ "### Load Data\n", "\n", - "Here create the script to be run in azure compute for loading the data, load the credit card dataset into cards and store the Class column (y) in the y variable and store the remaining data in the x variable. Next split the data using train_test_split and return X_train and y_train for training the model." + "Here create the script to be run in azure compute for loading the data, load the credit card dataset into cards and store the Class column (y) in the y variable and store the remaining data in the x variable. Next split the data using random_split and return X_train and y_train for training the model." ] }, { @@ -221,10 +217,9 @@ "outputs": [], "source": [ "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/creditcard.csv\"\n", - "dflow = dprep.read_csv(data, infer_column_types=True)\n", - "dflow.get_profile()\n", - "X = dflow.drop_columns(columns=['Class'])\n", - "y = dflow.keep_columns(columns=['Class'], validate_column_exists=True)\n", + "dataset = Dataset.Tabular.from_delimited_files(data)\n", + "X = dataset.drop_columns(columns=['Class'])\n", + "y = dataset.keep_columns(columns=['Class'], validate=True)\n", "X_train, X_test = X.random_split(percentage=0.8, seed=223)\n", "y_train, y_test = y.random_split(percentage=0.8, seed=223)" ] @@ -447,7 +442,7 @@ "metadata": {}, "outputs": [], "source": [ - "for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n", + "for p in ['azureml-train-automl', 'azureml-core']:\n", " print('{}\\t{}'.format(p, dependencies[p]))" ] }, @@ -458,7 +453,7 @@ "outputs": [], "source": [ "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],\n", - " pip_packages=['azureml-sdk[automl]'])\n", + " pip_packages=['azureml-train-automl'])\n", "\n", "conda_env_file_name = 'myenv.yml'\n", "myenv.save_to_file('.', conda_env_file_name)" @@ -478,7 +473,7 @@ " content = cefr.read()\n", "\n", "with open(conda_env_file_name, 'w') as cefw:\n", - " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n", + " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n", "\n", "# Substitute the actual model id in the script file.\n", "\n", diff --git a/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.yml b/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.yml index 14c8fe46..f4a3601e 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.yml +++ b/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.yml @@ -2,6 +2,8 @@ name: auto-ml-classification-credit-card-fraud dependencies: - pip: - azureml-sdk + - azureml-defaults + - azureml-explain-model - azureml-train-automl - azureml-widgets - matplotlib diff --git a/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb b/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb index 2e00e9c3..3dd3b13f 100644 --- a/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb +++ b/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb @@ -297,7 +297,7 @@ "metadata": {}, "outputs": [], "source": [ - "for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n", + "for p in ['azureml-train-automl', 'azureml-core']:\n", " print('{}\\t{}'.format(p, dependencies[p]))" ] }, @@ -310,7 +310,7 @@ "from azureml.core.conda_dependencies import CondaDependencies\n", "\n", "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost<=0.80'],\n", - " pip_packages=['azureml-sdk[automl]'])\n", + " pip_packages=['azureml-train-automl'])\n", "\n", "conda_env_file_name = 'myenv.yml'\n", "myenv.save_to_file('.', conda_env_file_name)" @@ -330,7 +330,7 @@ " content = cefr.read()\n", "\n", "with open(conda_env_file_name, 'w') as cefw:\n", - " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n", + " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n", "\n", "# Substitute the actual model id in the script file.\n", "\n", diff --git a/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb b/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb new file mode 100644 index 00000000..39742e9b --- /dev/null +++ b/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.ipynb @@ -0,0 +1,509 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/dataprep-remote-execution/auto-ml-dataprep-remote-execution.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Automated Machine Learning\n", + "_**Load Data using `TabularDataset` for Remote Execution (AmlCompute)**_\n", + "\n", + "## Contents\n", + "1. [Introduction](#Introduction)\n", + "1. [Setup](#Setup)\n", + "1. [Data](#Data)\n", + "1. [Train](#Train)\n", + "1. [Results](#Results)\n", + "1. [Test](#Test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "In this example we showcase how you can use AzureML Dataset to load data for AutoML.\n", + "\n", + "Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you will learn how to:\n", + "1. Create a `TabularDataset` pointing to the training data.\n", + "2. Pass the `TabularDataset` to AutoML for a remote run." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "\n", + "import pandas as pd\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.core.dataset import Dataset\n", + "from azureml.train.automl import AutoMLConfig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + "\n", + "# choose a name for experiment\n", + "experiment_name = 'automl-dataset-remote-bai'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-dataprep-remote-bai'\n", + " \n", + "experiment = Experiment(ws, experiment_name)\n", + " \n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace Name'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "outputDf = pd.DataFrame(data = output, index = [''])\n", + "outputDf.T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n", + "example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n", + "dataset = Dataset.Tabular.from_delimited_files(example_data)\n", + "dataset.take(5).to_pandas_dataframe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Review the data\n", + "\n", + "You can peek the result of a `TabularDataset` at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only `j` records, which makes it fast even against large datasets.\n", + "\n", + "`TabularDataset` objects are immutable and are composed of a list of subsetting transformations (optional)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X = dataset.drop_columns(columns=['Primary Type', 'FBI Code'])\n", + "y = dataset.keep_columns(columns=['Primary Type'], validate=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train\n", + "\n", + "This creates a general AutoML settings object applicable for both local and remote runs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_settings = {\n", + " \"iteration_timeout_minutes\" : 10,\n", + " \"iterations\" : 2,\n", + " \"primary_metric\" : 'AUC_weighted',\n", + " \"preprocess\" : True,\n", + " \"verbosity\" : logging.INFO\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create or Attach an AmlCompute cluster" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import AmlCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", + "# Choose a name for your cluster.\n", + "amlcompute_cluster_name = \"automlc2\"\n", + "\n", + "found = False\n", + "\n", + "# Check if this compute target already exists in the workspace.\n", + "\n", + "cts = ws.compute_targets\n", + "if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n", + " found = True\n", + " print('Found existing compute target.')\n", + " compute_target = cts[amlcompute_cluster_name]\n", + "\n", + "if not found:\n", + " print('Creating a new compute target...')\n", + " provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n", + " #vm_priority = 'lowpriority', # optional\n", + " max_nodes = 6)\n", + "\n", + " # Create the cluster.\\n\",\n", + " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n", + "\n", + "print('Checking cluster status...')\n", + "# Can poll for a minimum number of nodes and for a specific timeout.\n", + "# If no min_node_count is provided, it will use the scale settings for the cluster.\n", + "compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", + "\n", + "# For a more detailed view of current AmlCompute status, use get_status()." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "import pkg_resources\n", + "\n", + "# create a new RunConfig object\n", + "conda_run_config = RunConfiguration(framework=\"python\")\n", + "\n", + "# Set compute target to AmlCompute\n", + "conda_run_config.target = compute_target\n", + "conda_run_config.environment.docker.enabled = True\n", + "\n", + "cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n", + "conda_run_config.environment.python.conda_dependencies = cd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pass Data with `TabularDataset` Objects\n", + "\n", + "The `TabularDataset` objects captured above can also be passed to the `submit` method for a remote run. AutoML will serialize the `TabularDataset` object and send it to the remote compute target. The `TabularDataset` will not be evaluated locally." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " path = project_folder,\n", + " run_configuration=conda_run_config,\n", + " X = X,\n", + " y = y,\n", + " **automl_settings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_run = experiment.submit(automl_config, show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pre-process cache cleanup\n", + "The preprocess data gets cache at user default file store. When the run is completed the cache can be cleaned by running below cell" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "remote_run.clean_preprocessor_cache()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cancelling Runs\n", + "You can cancel ongoing remote runs using the `cancel` and `cancel_iteration` functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Cancel the ongoing experiment and stop scheduling new iterations.\n", + "# remote_run.cancel()\n", + "\n", + "# Cancel iteration 1 and move onto iteration 2.\n", + "# remote_run.cancel_iteration(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for Monitoring Runs\n", + "\n", + "The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "RunDetails(remote_run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Retrieve All Child Runs\n", + "You can also use SDK methods to fetch all the child runs and see individual metrics that we log." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(remote_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n", + " metricslist[int(properties['iteration'])] = metrics\n", + " \n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "rundata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = remote_run.get_output()\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model Based on Any Other Metric\n", + "Show the run and the model that has the smallest `log_loss` value:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lookup_metric = \"log_loss\"\n", + "best_run, fitted_model = remote_run.get_output(metric = lookup_metric)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Model from a Specific Iteration\n", + "Show the run and the model from the first iteration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 0\n", + "best_run, fitted_model = remote_run.get_output(iteration = iteration)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test\n", + "\n", + "#### Load Test Data\n", + "For the test data, it should have the same preparation step as the train data. Otherwise it might get failed at the preprocessing step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset_test = Dataset.Tabular.from_delimited_files(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv')\n", + "\n", + "df_test = dataset_test.to_pandas_dataframe()\n", + "df_test = df_test[pd.notnull(df_test['Primary Type'])]\n", + "\n", + "y_test = df_test[['Primary Type']]\n", + "X_test = df_test.drop(['Primary Type', 'FBI Code'], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Testing Our Best Fitted Model\n", + "We will use confusion matrix to see how our model works." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas_ml import ConfusionMatrix\n", + "\n", + "ypred = fitted_model.predict(X_test)\n", + "\n", + "cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n", + "\n", + "print(cm)\n", + "\n", + "cm.plot()" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "savitam" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.yml b/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.yml new file mode 100644 index 00000000..aa6e4e65 --- /dev/null +++ b/how-to-use-azureml/automated-machine-learning/dataset-remote-execution/auto-ml-dataset-remote-execution.yml @@ -0,0 +1,10 @@ +name: auto-ml-dataset-remote-execution +dependencies: +- pip: + - azureml-sdk + - azureml-defaults + - azureml-explain-model + - azureml-train-automl + - azureml-widgets + - matplotlib + - pandas_ml diff --git a/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb b/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb new file mode 100644 index 00000000..03499dad --- /dev/null +++ b/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.ipynb @@ -0,0 +1,402 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/dataprep/auto-ml-dataprep.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Automated Machine Learning\n", + "_**Load Data using `TabularDataset` for Local Execution**_\n", + "\n", + "## Contents\n", + "1. [Introduction](#Introduction)\n", + "1. [Setup](#Setup)\n", + "1. [Data](#Data)\n", + "1. [Train](#Train)\n", + "1. [Results](#Results)\n", + "1. [Test](#Test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "In this example we showcase how you can use AzureML Dataset to load data for AutoML.\n", + "\n", + "Make sure you have executed the [configuration](../../../configuration.ipynb) before running this notebook.\n", + "\n", + "In this notebook you will learn how to:\n", + "1. Create a `TabularDataset` pointing to the training data.\n", + "2. Pass the `TabularDataset` to AutoML for a local run." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As part of the setup you have already created an Azure ML `Workspace` object. For AutoML you will need to create an `Experiment` object, which is a named object in a `Workspace` used to run experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import logging\n", + "\n", + "import pandas as pd\n", + "\n", + "import azureml.core\n", + "from azureml.core.experiment import Experiment\n", + "from azureml.core.workspace import Workspace\n", + "from azureml.core.dataset import Dataset\n", + "from azureml.train.automl import AutoMLConfig" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ws = Workspace.from_config()\n", + " \n", + "# choose a name for experiment\n", + "experiment_name = 'automl-dataset-local'\n", + "# project folder\n", + "project_folder = './sample_projects/automl-dataset-local'\n", + " \n", + "experiment = Experiment(ws, experiment_name)\n", + " \n", + "output = {}\n", + "output['SDK version'] = azureml.core.VERSION\n", + "output['Subscription ID'] = ws.subscription_id\n", + "output['Workspace Name'] = ws.name\n", + "output['Resource Group'] = ws.resource_group\n", + "output['Location'] = ws.location\n", + "output['Project Directory'] = project_folder\n", + "output['Experiment Name'] = experiment.name\n", + "pd.set_option('display.max_colwidth', -1)\n", + "outputDf = pd.DataFrame(data = output, index = [''])\n", + "outputDf.T" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n", + "example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n", + "dataset = Dataset.Tabular.from_delimited_files(example_data)\n", + "dataset.take(5).to_pandas_dataframe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Review the data\n", + "\n", + "You can peek the result of a `TabularDataset` at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only `j` records, which makes it fast even against large datasets.\n", + "\n", + "`TabularDataset` objects are immutable and are composed of a list of subsetting transformations (optional)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X = dataset.drop_columns(columns=['Primary Type', 'FBI Code'])\n", + "y = dataset.keep_columns(columns=['Primary Type'], validate=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train\n", + "\n", + "This creates a general AutoML settings object applicable for both local and remote runs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_settings = {\n", + " \"iteration_timeout_minutes\" : 10,\n", + " \"iterations\" : 2,\n", + " \"primary_metric\" : 'AUC_weighted',\n", + " \"preprocess\" : True,\n", + " \"verbosity\" : logging.INFO\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pass Data with `TabularDataset` Objects\n", + "\n", + "The `TabularDataset` objects captured above can be passed to the `submit` method for a local run. AutoML will retrieve the results from the `TabularDataset` for model training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "automl_config = AutoMLConfig(task = 'classification',\n", + " debug_log = 'automl_errors.log',\n", + " X = X,\n", + " y = y,\n", + " **automl_settings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run = experiment.submit(automl_config, show_output = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Widget for Monitoring Runs\n", + "\n", + "The widget will first report a \"loading\" status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n", + "\n", + "**Note:** The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.widgets import RunDetails\n", + "RunDetails(local_run).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Retrieve All Child Runs\n", + "You can also use SDK methods to fetch all the child runs and see individual metrics that we log." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "children = list(local_run.get_children())\n", + "metricslist = {}\n", + "for run in children:\n", + " properties = run.get_properties()\n", + " metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}\n", + " metricslist[int(properties['iteration'])] = metrics\n", + " \n", + "rundata = pd.DataFrame(metricslist).sort_index(1)\n", + "rundata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Retrieve the Best Model\n", + "\n", + "Below we select the best pipeline from our iterations. The `get_output` method returns the best run and the fitted model. Overloads on `get_output` allow you to retrieve the best run and fitted model for *any* logged metric or for a particular *iteration*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "best_run, fitted_model = local_run.get_output()\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Best Model Based on Any Other Metric\n", + "Show the run and the model that has the smallest `log_loss` value:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lookup_metric = \"log_loss\"\n", + "best_run, fitted_model = local_run.get_output(metric = lookup_metric)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Model from a Specific Iteration\n", + "Show the run and the model from the first iteration:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "iteration = 0\n", + "best_run, fitted_model = local_run.get_output(iteration = iteration)\n", + "print(best_run)\n", + "print(fitted_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Test\n", + "\n", + "#### Load Test Data\n", + "For the test data, it should have the same preparation step as the train data. Otherwise it might get failed at the preprocessing step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset_test = Dataset.Tabular.from_delimited_files(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv')\n", + "\n", + "df_test = dataset_test.to_pandas_dataframe()\n", + "df_test = df_test[pd.notnull(df_test['Primary Type'])]\n", + "\n", + "y_test = df_test[['Primary Type']]\n", + "X_test = df_test.drop(['Primary Type', 'FBI Code'], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Testing Our Best Fitted Model\n", + "We will use confusion matrix to see how our model works." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pandas_ml import ConfusionMatrix\n", + "\n", + "ypred = fitted_model.predict(X_test)\n", + "\n", + "cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n", + "\n", + "print(cm)\n", + "\n", + "cm.plot()" + ] + } + ], + "metadata": { + "authors": [ + { + "name": "savitam" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.yml b/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.yml new file mode 100644 index 00000000..87242fe5 --- /dev/null +++ b/how-to-use-azureml/automated-machine-learning/dataset/auto-ml-dataset.yml @@ -0,0 +1,8 @@ +name: auto-ml-dataset +dependencies: +- pip: + - azureml-sdk + - azureml-train-automl + - azureml-widgets + - matplotlib + - pandas_ml diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb b/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb index 12d6ae16..042ee804 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb +++ b/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb @@ -231,6 +231,7 @@ "automl_config = AutoMLConfig(task='forecasting',\n", " debug_log='automl_nyc_energy_errors.log',\n", " primary_metric='normalized_root_mean_squared_error',\n", + " blacklist_models = ['ExtremeRandomTrees'],\n", " iterations=10,\n", " iteration_timeout_minutes=5,\n", " X=X_train,\n", @@ -481,7 +482,7 @@ "automl_config_lags = AutoMLConfig(task='forecasting',\n", " debug_log='automl_nyc_energy_errors.log',\n", " primary_metric='normalized_root_mean_squared_error',\n", - " blacklist_models=['ElasticNet'],\n", + " blacklist_models=['ElasticNet','ExtremeRandomTrees','GradientBoosting'],\n", " iterations=10,\n", " iteration_timeout_minutes=10,\n", " X=X_train,\n", diff --git a/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb b/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb index 629edb02..23c13fc9 100644 --- a/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb +++ b/how-to-use-azureml/automated-machine-learning/forecasting-orange-juice-sales/auto-ml-forecasting-orange-juice-sales.ipynb @@ -244,7 +244,8 @@ "|**X**|Training matrix of features as a pandas DataFrame, shape = [n_training_samples, n_features]|\n", "|**y**|Target values as a numpy.ndarray, shape = [n_training_samples, ]|\n", "|**n_cross_validations**|Number of cross-validation folds to use for model/pipeline selection|\n", - "|**enable_ensembling**|Allow AutoML to create ensembles of the best performing models\n", + "|**enable_voting_ensemble**|Allow AutoML to create a Voting ensemble of the best performing models\n", + "|**enable_stack_ensemble**|Allow AutoML to create a Stack ensemble of the best performing models\n", "|**debug_log**|Log file path for writing debugging information\n", "|**path**|Relative path to the project folder. AutoML stores configuration files for the experiment under this folder. You can specify a new empty folder.|\n", "|**time_column_name**|Name of the datetime column in the input data|\n", @@ -273,7 +274,8 @@ " X=X_train,\n", " y=y_train,\n", " n_cross_validations=3,\n", - " enable_ensembling=False,\n", + " enable_voting_ensemble=False,\n", + " enable_stack_ensemble=False,\n", " path=project_folder,\n", " verbosity=logging.INFO,\n", " **time_series_settings)" @@ -663,10 +665,10 @@ "conda_env_file_name = 'fcast_env.yml'\n", "\n", "dependencies = ml_run.get_run_sdk_dependencies(iteration = best_iteration)\n", - "for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n", + "for p in ['azureml-train-automl', 'azureml-core']:\n", " print('{}\\t{}'.format(p, dependencies[p]))\n", "\n", - "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n", + "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-train-automl'])\n", "\n", "myenv.save_to_file('.', conda_env_file_name)" ] @@ -688,7 +690,7 @@ " content = cefr.read()\n", "\n", "with open(conda_env_file_name, 'w') as cefw:\n", - " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n", + " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n", "\n", "# Substitute the actual model id in the script file.\n", "\n", diff --git a/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb b/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb index 4868b9ea..bdf37d20 100644 --- a/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb +++ b/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb @@ -70,13 +70,12 @@ "import numpy as np\n", "import pandas as pd\n", "import os\n", - "from sklearn.model_selection import train_test_split\n", - "import azureml.dataprep as dprep\n", " \n", "\n", "import azureml.core\n", "from azureml.core.experiment import Experiment\n", "from azureml.core.workspace import Workspace\n", + "from azureml.core.dataset import Dataset\n", "from azureml.train.automl import AutoMLConfig" ] }, @@ -147,11 +146,12 @@ " # Create the cluster.\n", " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n", " \n", - " # Can poll for a minimum number of nodes and for a specific timeout.\n", - " # If no min_node_count is provided, it will use the scale settings for the cluster.\n", - " compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", + "print('Checking cluster status...')\n", + "# Can poll for a minimum number of nodes and for a specific timeout.\n", + "# If no min_node_count is provided, it will use the scale settings for the cluster.\n", + "compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", " \n", - " # For a more detailed view of current AmlCompute status, use get_status()." + "# For a more detailed view of current AmlCompute status, use get_status()." ] }, { @@ -192,11 +192,8 @@ "# Set compute target to AmlCompute\n", "conda_run_config.target = compute_target\n", "conda_run_config.environment.docker.enabled = True\n", - "conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", "\n", - "dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n", - "\n", - "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy'])\n", + "cd = CondaDependencies.create(conda_packages=['numpy', 'py-xgboost<=0.80'])\n", "conda_run_config.environment.python.conda_dependencies = cd" ] }, @@ -206,7 +203,7 @@ "source": [ "### Load Data\n", "\n", - "Here create the script to be run in azure compute for loading the data, load the concrete strength dataset into the X and y variables. Next, split the data using train_test_split and return X_train and y_train for training the model. Finally, return X_train and y_train for training the model." + "Here create the script to be run in azure compute for loading the data, load the concrete strength dataset into the X and y variables. Next, split the data using random_split and return X_train and y_train for training the model. Finally, return X_train and y_train for training the model." ] }, { @@ -216,13 +213,12 @@ "outputs": [], "source": [ "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/compresive_strength_concrete.csv\"\n", - "dflow = dprep.read_csv(data, infer_column_types=True)\n", - "dflow.get_profile()\n", - "X = dflow.drop_columns(columns=['CONCRETE'])\n", - "y = dflow.keep_columns(columns=['CONCRETE'], validate_column_exists=True)\n", + "dataset = Dataset.Tabular.from_delimited_files(data)\n", + "X = dataset.drop_columns(columns=['CONCRETE'])\n", + "y = dataset.keep_columns(columns=['CONCRETE'], validate=True)\n", "X_train, X_test = X.random_split(percentage=0.8, seed=223)\n", "y_train, y_test = y.random_split(percentage=0.8, seed=223) \n", - "dflow.head()" + "dataset.take(5).to_pandas_dataframe()" ] }, { @@ -484,7 +480,7 @@ "metadata": {}, "outputs": [], "source": [ - "for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n", + "for p in ['azureml-train-automl', 'azureml-core']:\n", " print('{}\\t{}'.format(p, dependencies[p]))" ] }, @@ -494,9 +490,7 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.core.conda_dependencies import CondaDependencies\n", - "\n", - "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n", + "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost==0.80'], pip_packages=['azureml-train-automl'])\n", "\n", "conda_env_file_name = 'myenv.yml'\n", "myenv.save_to_file('.', conda_env_file_name)" @@ -516,7 +510,7 @@ " content = cefr.read()\n", "\n", "with open(conda_env_file_name, 'w') as cefw:\n", - " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n", + " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n", "\n", "# Substitute the actual model id in the script file.\n", "\n", diff --git a/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.yml b/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.yml index eb39aa20..e29c5b3e 100644 --- a/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.yml +++ b/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.yml @@ -2,6 +2,8 @@ name: auto-ml-regression-concrete-strength dependencies: - pip: - azureml-sdk + - azureml-defaults + - azureml-explain-model - azureml-train-automl - azureml-widgets - matplotlib diff --git a/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb b/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb index cb0dd394..84d88ed4 100644 --- a/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb +++ b/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb @@ -70,13 +70,12 @@ "import numpy as np\n", "import pandas as pd\n", "import os\n", - "from sklearn.model_selection import train_test_split\n", - "import azureml.dataprep as dprep\n", " \n", "\n", "import azureml.core\n", "from azureml.core.experiment import Experiment\n", "from azureml.core.workspace import Workspace\n", + "from azureml.core.dataset import Dataset\n", "from azureml.train.automl import AutoMLConfig" ] }, @@ -147,11 +146,12 @@ " # Create the cluster.\n", " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n", " \n", - " # Can poll for a minimum number of nodes and for a specific timeout.\n", - " # If no min_node_count is provided, it will use the scale settings for the cluster.\n", - " compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", + "print('Checking cluster status...')\n", + "# Can poll for a minimum number of nodes and for a specific timeout.\n", + "# If no min_node_count is provided, it will use the scale settings for the cluster.\n", + "compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", " \n", - " # For a more detailed view of current AmlCompute status, use get_status()." + "# For a more detailed view of current AmlCompute status, use get_status()." ] }, { @@ -192,11 +192,8 @@ "# Set compute target to AmlCompute\n", "conda_run_config.target = compute_target\n", "conda_run_config.environment.docker.enabled = True\n", - "conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", "\n", - "dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n", - "\n", - "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy'])\n", + "cd = CondaDependencies.create(conda_packages=['numpy', 'py-xgboost<=0.80'])\n", "conda_run_config.environment.python.conda_dependencies = cd" ] }, @@ -206,7 +203,7 @@ "source": [ "### Load Data\n", "\n", - "Here create the script to be run in azure compute for loading the data, load the hardware dataset into the X and y variables. Next split the data using train_test_split and return X_train and y_train for training the model." + "Here create the script to be run in azure compute for loading the data, load the hardware dataset into the X and y variables. Next split the data using random_split and return X_train and y_train for training the model." ] }, { @@ -216,13 +213,12 @@ "outputs": [], "source": [ "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/machineData.csv\"\n", - "dflow = dprep.read_csv(data, infer_column_types=True)\n", - "dflow.get_profile()\n", - "X = dflow.drop_columns(columns=['ERP'])\n", - "y = dflow.keep_columns(columns=['ERP'], validate_column_exists=True)\n", + "dataset = Dataset.Tabular.from_delimited_files(data)\n", + "X = dataset.drop_columns(columns=['ERP'])\n", + "y = dataset.keep_columns(columns=['ERP'], validate=True)\n", "X_train, X_test = X.random_split(percentage=0.8, seed=223)\n", - "y_train, y_test = y.random_split(percentage=0.8, seed=223) \n", - "dflow.head()" + "y_train, y_test = y.random_split(percentage=0.8, seed=223)\n", + "dataset.take(5).to_pandas_dataframe()" ] }, { @@ -502,7 +498,7 @@ "metadata": {}, "outputs": [], "source": [ - "for p in ['azureml-train-automl', 'azureml-sdk', 'azureml-core']:\n", + "for p in ['azureml-train-automl', 'azureml-core']:\n", " print('{}\\t{}'.format(p, dependencies[p]))" ] }, @@ -512,7 +508,7 @@ "metadata": {}, "outputs": [], "source": [ - "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-sdk[automl]'])\n", + "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn','py-xgboost==0.80'], pip_packages=['azureml-train-automl'])\n", "\n", "conda_env_file_name = 'myenv.yml'\n", "myenv.save_to_file('.', conda_env_file_name)" @@ -532,7 +528,7 @@ " content = cefr.read()\n", "\n", "with open(conda_env_file_name, 'w') as cefw:\n", - " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-sdk']))\n", + " cefw.write(content.replace(azureml.core.VERSION, dependencies['azureml-train-automl']))\n", "\n", "# Substitute the actual model id in the script file.\n", "\n", diff --git a/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.yml b/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.yml index ddc29fa8..94323586 100644 --- a/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.yml +++ b/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.yml @@ -2,6 +2,8 @@ name: auto-ml-regression-hardware-performance dependencies: - pip: - azureml-sdk + - azureml-defaults + - azureml-explain-model - azureml-train-automl - azureml-widgets - matplotlib diff --git a/how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.ipynb b/how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.ipynb index 5ca334e9..32c06d56 100644 --- a/how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.ipynb +++ b/how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.ipynb @@ -73,10 +73,7 @@ "source": [ "import logging\n", "import os\n", - "import csv\n", "\n", - "from matplotlib import pyplot as plt\n", - "import numpy as np\n", "import pandas as pd\n", "from sklearn import datasets\n", "from sklearn.model_selection import train_test_split\n", @@ -84,8 +81,8 @@ "import azureml.core\n", "from azureml.core.experiment import Experiment\n", "from azureml.core.workspace import Workspace\n", - "from azureml.train.automl import AutoMLConfig\n", - "import azureml.dataprep as dprep" + "from azureml.core.dataset import Dataset\n", + "from azureml.train.automl import AutoMLConfig" ] }, { @@ -137,7 +134,7 @@ "from azureml.core.compute import ComputeTarget\n", "\n", "# Choose a name for your cluster.\n", - "amlcompute_cluster_name = \"cpu-cluster\"\n", + "amlcompute_cluster_name = \"automlc2\"\n", "\n", "found = False\n", "# Check if this compute target already exists in the workspace.\n", @@ -156,11 +153,12 @@ " # Create the cluster.\\n\",\n", " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n", "\n", - " # Can poll for a minimum number of nodes and for a specific timeout.\n", - " # If no min_node_count is provided, it will use the scale settings for the cluster.\n", - " compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", + "print('Checking cluster status...')\n", + "# Can poll for a minimum number of nodes and for a specific timeout.\n", + "# If no min_node_count is provided, it will use the scale settings for the cluster.\n", + "compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", "\n", - " # For a more detailed view of current AmlCompute status, use get_status()." + "# For a more detailed view of current AmlCompute status, use get_status()." ] }, { @@ -236,11 +234,8 @@ "# Set compute target to AmlCompute\n", "conda_run_config.target = compute_target\n", "conda_run_config.environment.docker.enabled = True\n", - "conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", "\n", - "dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n", - "\n", - "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n", + "cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n", "conda_run_config.environment.python.conda_dependencies = cd" ] }, @@ -248,9 +243,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Dprep reference\n", + "### Creating a TabularDataset\n", "\n", - "Defined X and y as dprep references, which are passed to automated machine learning in the AutoMLConfig." + "Defined X and y as `TabularDataset`s, which are passed to automated machine learning in the AutoMLConfig." ] }, { @@ -259,8 +254,8 @@ "metadata": {}, "outputs": [], "source": [ - "X = dprep.read_csv(path=ds.path('irisdata/X_train.csv'), infer_column_types=True)\n", - "y = dprep.read_csv(path=ds.path('irisdata/y_train.csv'), infer_column_types=True)" + "X = Dataset.Tabular.from_delimited_files(path=ds.path('irisdata/X_train.csv'))\n", + "y = Dataset.Tabular.from_delimited_files(path=ds.path('irisdata/y_train.csv'))" ] }, { @@ -498,8 +493,7 @@ " res_path = 'onnx_resource.json'\n", " run.download_file(name=constants.MODEL_RESOURCE_PATH_ONNX, output_file_path=res_path)\n", " with open(res_path) as f:\n", - " onnx_res = json.load(f)\n", - " return onnx_res\n", + " return json.load(f)\n", "\n", "if onnxrt_present and python_version_compatible: \n", " mdl_bytes = onnx_mdl.SerializeToString()\n", diff --git a/how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.yml b/how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.yml index 6beced4e..22bad59a 100644 --- a/how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.yml +++ b/how-to-use-azureml/automated-machine-learning/remote-amlcompute-with-onnx/auto-ml-remote-amlcompute-with-onnx.yml @@ -2,6 +2,8 @@ name: auto-ml-remote-amlcompute-with-onnx dependencies: - pip: - azureml-sdk + - azureml-defaults + - azureml-explain-model - azureml-train-automl - azureml-widgets - matplotlib diff --git a/how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.ipynb b/how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.ipynb index 82d2b610..c3591826 100644 --- a/how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.ipynb +++ b/how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.ipynb @@ -74,7 +74,6 @@ "source": [ "import logging\n", "import os\n", - "import csv\n", "\n", "from matplotlib import pyplot as plt\n", "import numpy as np\n", @@ -84,8 +83,8 @@ "import azureml.core\n", "from azureml.core.experiment import Experiment\n", "from azureml.core.workspace import Workspace\n", - "from azureml.train.automl import AutoMLConfig\n", - "import azureml.dataprep as dprep" + "from azureml.core.dataset import Dataset\n", + "from azureml.train.automl import AutoMLConfig" ] }, { @@ -137,7 +136,7 @@ "from azureml.core.compute import ComputeTarget\n", "\n", "# Choose a name for your cluster.\n", - "amlcompute_cluster_name = \"cpu-cluster\"\n", + "amlcompute_cluster_name = \"automlc2\"\n", "\n", "found = False\n", "# Check if this compute target already exists in the workspace.\n", @@ -156,11 +155,12 @@ " # Create the cluster.\\n\",\n", " compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n", "\n", - " # Can poll for a minimum number of nodes and for a specific timeout.\n", - " # If no min_node_count is provided, it will use the scale settings for the cluster.\n", - " compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", + "print('Checking cluster status...')\n", + "# Can poll for a minimum number of nodes and for a specific timeout.\n", + "# If no min_node_count is provided, it will use the scale settings for the cluster.\n", + "compute_target.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", "\n", - " # For a more detailed view of current AmlCompute status, use get_status()." + "# For a more detailed view of current AmlCompute status, use get_status()." ] }, { @@ -210,11 +210,8 @@ "# Set compute target to AmlCompute\n", "conda_run_config.target = compute_target\n", "conda_run_config.environment.docker.enabled = True\n", - "conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n", "\n", - "dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n", - "\n", - "cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n", + "cd = CondaDependencies.create(conda_packages=['numpy','py-xgboost<=0.80'])\n", "conda_run_config.environment.python.conda_dependencies = cd" ] }, @@ -222,9 +219,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Dprep reference\n", + "### Creating TabularDataset\n", "\n", - "Defined X and y as dprep references, which are passed to automated machine learning in the AutoMLConfig." + "Defined X and y as `TabularDataset`s, which are passed to Automated ML in the AutoMLConfig. `from_delimited_files` by default sets the `infer_column_types` to true, which will infer the columns type automatically. If you do wish to manually set the column types, you can set the `set_column_types` argument to manually set the type of each columns." ] }, { @@ -233,8 +230,8 @@ "metadata": {}, "outputs": [], "source": [ - "X = dprep.read_csv(path=ds.path('digitsdata/X_train.csv'), infer_column_types=True)\n", - "y = dprep.read_csv(path=ds.path('digitsdata/y_train.csv'), infer_column_types=True)" + "X = Dataset.Tabular.from_delimited_files(path=ds.path('digitsdata/X_train.csv'))\n", + "y = Dataset.Tabular.from_delimited_files(path=ds.path('digitsdata/y_train.csv'))" ] }, { diff --git a/how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.yml b/how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.yml index 41b4f214..6ec4511a 100644 --- a/how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.yml +++ b/how-to-use-azureml/automated-machine-learning/remote-amlcompute/auto-ml-remote-amlcompute.yml @@ -2,6 +2,8 @@ name: auto-ml-remote-amlcompute dependencies: - pip: - azureml-sdk + - azureml-defaults + - azureml-explain-model - azureml-train-automl - azureml-widgets - matplotlib diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb b/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb index 8bf4e0d3..cb227bcd 100644 --- a/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb +++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb @@ -342,7 +342,6 @@ " n_cross_validations = n_cross_validations, \r\n", " preprocess = preprocess,\r\n", " verbosity = logging.INFO, \r\n", - " enable_ensembling = False,\r\n", " X = X_train, \r\n", " y = y_train, \r\n", " path = project_folder,\r\n", diff --git a/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb b/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb index 04223832..23a79fda 100644 --- a/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb +++ b/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-01.ipynb @@ -314,25 +314,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Load Training Data Using DataPrep" + "## Load Training Data Using Dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Automated ML takes a Dataflow as input.\n", + "Automated ML takes a `TabularDataset` as input.\n", "\n", - "If you are familiar with Pandas and have done your data preparation work in Pandas already, you can use the `read_pandas_dataframe` method in dprep to convert the DataFrame to a Dataflow.\n", - "```python\n", - "df = pd.read_csv(...)\n", - "# apply some transforms\n", - "dprep.read_pandas_dataframe(df, temp_folder='/path/accessible/by/both/driver/and/worker')\n", - "```\n", + "You are free to use the data preparation libraries/tools of your choice to do the require preparation and once you are done, you can write it to a datastore and create a TabularDataset from it.\n", "\n", - "If you just need to ingest data without doing any preparation, you can directly use AzureML Data Prep (Data Prep) to do so. The code below demonstrates this scenario. Data Prep also has data preparation capabilities, we have many [sample notebooks](https://github.com/Microsoft/AMLDataPrepDocs) demonstrating the capabilities.\n", - "\n", - "You will get the datastore you registered previously and pass it to Data Prep for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. " + "You will get the datastore you registered previously and pass it to Dataset for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. " ] }, { @@ -341,21 +334,21 @@ "metadata": {}, "outputs": [], "source": [ - "import azureml.dataprep as dprep\n", + "from azureml.core.dataset import Dataset\n", "from azureml.data.datapath import DataPath\n", "\n", "datastore = Datastore.get(workspace = ws, datastore_name = datastore_name)\n", "\n", - "X_train = dprep.read_csv(datastore.path('X.csv'))\n", - "y_train = dprep.read_csv(datastore.path('y.csv')).to_long(dprep.ColumnSelector(term='.*', use_regex = True))" + "X_train = Dataset.Tabular.from_delimited_files(datastore.path('X.csv'))\n", + "y_train = Dataset.Tabular.from_delimited_files(datastore.path('y.csv'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Review the Data Preparation Result\n", - "You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only j records for all the steps in the Dataflow, which makes it fast even against large datasets." + "## Review the TabularDataset\n", + "You can peek the result of a TabularDataset at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only j records for all the steps in the TabularDataset, which makes it fast even against large datasets." ] }, { @@ -364,7 +357,7 @@ "metadata": {}, "outputs": [], "source": [ - "X_train.get_profile()" + "X_train.take(5).to_pandas_dataframe()" ] }, { @@ -373,7 +366,7 @@ "metadata": {}, "outputs": [], "source": [ - "y_train.get_profile()" + "y_train.take(5).to_pandas_dataframe()" ] }, { diff --git a/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb b/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb index 56b23696..f765cccf 100644 --- a/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb +++ b/how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb @@ -331,25 +331,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Load Training Data Using DataPrep" + "## Load Training Data Using Dataset" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Automated ML takes a Dataflow as input.\n", + "Automated ML takes a `TabularDataset` as input.\n", "\n", - "If you are familiar with Pandas and have done your data preparation work in Pandas already, you can use the `read_pandas_dataframe` method in dprep to convert the DataFrame to a Dataflow.\n", - "```python\n", - "df = pd.read_csv(...)\n", - "# apply some transforms\n", - "dprep.read_pandas_dataframe(df, temp_folder='/path/accessible/by/both/driver/and/worker')\n", - "```\n", + "You are free to use the data preparation libraries/tools of your choice to do the require preparation and once you are done, you can write it to a datastore and create a TabularDataset from it.\n", "\n", - "If you just need to ingest data without doing any preparation, you can directly use AzureML Data Prep (Data Prep) to do so. The code below demonstrates this scenario. Data Prep also has data preparation capabilities, we have many [sample notebooks](https://github.com/Microsoft/AMLDataPrepDocs) demonstrating the capabilities.\n", - "\n", - "You will get the datastore you registered previously and pass it to Data Prep for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. " + "You will get the datastore you registered previously and pass it to Dataset for reading. The data comes from the digits dataset: `sklearn.datasets.load_digits()`. `DataPath` points to a specific location within a datastore. " ] }, { @@ -358,21 +351,21 @@ "metadata": {}, "outputs": [], "source": [ - "import azureml.dataprep as dprep\n", + "from azureml.core.dataset import Dataset\n", "from azureml.data.datapath import DataPath\n", "\n", "datastore = Datastore.get(workspace = ws, datastore_name = datastore_name)\n", "\n", - "X_train = dprep.read_csv(datastore.path('X.csv'))\n", - "y_train = dprep.read_csv(datastore.path('y.csv')).to_long(dprep.ColumnSelector(term='.*', use_regex = True))" + "X_train = Dataset.Tabular.from_delimited_files(datastore.path('X.csv'))\n", + "y_train = Dataset.Tabular.from_delimited_files(datastore.path('y.csv'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Review the Data Preparation Result\n", - "You can peek the result of a Dataflow at any range using skip(i) and head(j). Doing so evaluates only j records for all the steps in the Dataflow, which makes it fast even against large datasets." + "## Review the TabularDataset\n", + "You can peek the result of a TabularDataset at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only j records for all the steps in the TabularDataset, which makes it fast even against large datasets." ] }, { @@ -381,7 +374,7 @@ "metadata": {}, "outputs": [], "source": [ - "X_train.get_profile()" + "X_train.take(5).to_pandas_dataframe()" ] }, { @@ -390,7 +383,7 @@ "metadata": {}, "outputs": [], "source": [ - "y_train.get_profile()" + "y_train.take(5).to_pandas_dataframe()" ] }, { diff --git a/how-to-use-azureml/deploy-to-cloud/model-register-and-deploy.ipynb b/how-to-use-azureml/deploy-to-cloud/model-register-and-deploy.ipynb index 1175ddfd..2836a25e 100644 --- a/how-to-use-azureml/deploy-to-cloud/model-register-and-deploy.ipynb +++ b/how-to-use-azureml/deploy-to-cloud/model-register-and-deploy.ipynb @@ -115,6 +115,36 @@ " workspace=ws)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Environment" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can now create and/or use an Environment object when deploying a Webservice. The Environment can have been previously registered with your Workspace, or it will be registered with it as a part of the Webservice deployment. Only Environments that were created using azureml-defaults version 1.0.48 or later will work with this new handling however.\n", + "\n", + "More information can be found in our [using environments notebook](../training/using-environments/using-environments.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Environment\n", + "\n", + "env = Environment.from_conda_specification(name='deploytocloudenv', file_path='myenv.yml')\n", + "\n", + "# This is optional at this point\n", + "# env.register(workspace=ws)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -153,10 +183,7 @@ "source": [ "from azureml.core.model import InferenceConfig\n", "\n", - "inference_config = InferenceConfig(runtime= \"python\", \n", - " entry_script=\"score.py\",\n", - " conda_file=\"myenv.yml\", \n", - " extra_docker_file_steps=\"helloworld.txt\")" + "inference_config = InferenceConfig(entry_script=\"score.py\", environment=env)" ] }, { diff --git a/how-to-use-azureml/deployment/accelerated-models/accelerated-models-object-detection.ipynb b/how-to-use-azureml/deployment/accelerated-models/accelerated-models-object-detection.ipynb index d1af93d4..ab65977d 100644 --- a/how-to-use-azureml/deployment/accelerated-models/accelerated-models-object-detection.ipynb +++ b/how-to-use-azureml/deployment/accelerated-models/accelerated-models-object-detection.ipynb @@ -336,7 +336,7 @@ " num_replicas=1,\n", " auth_enabled = False)\n", "\n", - "aks_service_name ='my-aks-service'\n", + "aks_service_name ='my-aks-service-3'\n", "\n", "aks_service = Webservice.deploy_from_image(workspace = ws,\n", " name = aks_service_name,\n", diff --git a/how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb b/how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb index a840d28a..0f7f20b4 100644 --- a/how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb +++ b/how-to-use-azureml/deployment/accelerated-models/accelerated-models-quickstart.ipynb @@ -404,7 +404,7 @@ " num_replicas=1,\n", " auth_enabled = False)\n", "\n", - "aks_service_name ='my-aks-service'\n", + "aks_service_name ='my-aks-service-1'\n", "\n", "aks_service = Webservice.deploy_from_image(workspace = ws,\n", " name = aks_service_name,\n", diff --git a/how-to-use-azureml/deployment/accelerated-models/accelerated-models-training.ipynb b/how-to-use-azureml/deployment/accelerated-models/accelerated-models-training.ipynb index aa637f1b..88918c22 100644 --- a/how-to-use-azureml/deployment/accelerated-models/accelerated-models-training.ipynb +++ b/how-to-use-azureml/deployment/accelerated-models/accelerated-models-training.ipynb @@ -694,7 +694,7 @@ " num_replicas=1,\n", " auth_enabled = False)\n", "\n", - "aks_service_name ='my-aks-service'\n", + "aks_service_name ='my-aks-service-2'\n", "\n", "aks_service = Webservice.deploy_from_image(workspace = ws,\n", " name = aks_service_name,\n", diff --git a/how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb b/how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb index 79d98eac..7a1831c5 100644 --- a/how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb +++ b/how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb @@ -22,7 +22,7 @@ "If you want to log custom traces, you will follow the standard deplyment process for AKS and you will:\n", "1. Update scoring file.\n", "2. Update aks configuration.\n", - "3. Build new image and deploy it. " + "3. Deploy the model with this new configuration. " ] }, { @@ -178,7 +178,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 6. Create your new Image" + "## 6. Create Inference Configuration" ] }, { @@ -187,22 +187,11 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.core.image import ContainerImage\n", + "from azureml.core.model import InferenceConfig\n", "\n", - "image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n", - " runtime = \"python\",\n", - " conda_file = \"myenv.yml\",\n", - " description = \"Image with ridge regression model\",\n", - " tags = {'area': \"diabetes\", 'type': \"regression\"}\n", - " )\n", - "\n", - "image = ContainerImage.create(name = \"myimage1\",\n", - " # this is the model object\n", - " models = [model],\n", - " image_config = image_config,\n", - " workspace = ws)\n", - "\n", - "image.wait_for_creation(show_output = True)" + "inference_config = InferenceConfig(runtime= \"python\", \n", + " entry_script=\"score.py\",\n", + " conda_file=\"myenv.yml\")" ] }, { @@ -220,7 +209,7 @@ "source": [ "from azureml.core.webservice import AciWebservice\n", "\n", - "aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n", + "aci_deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, \n", " memory_gb = 1, \n", " tags = {'area': \"diabetes\", 'type': \"regression\"}, \n", " description = 'Predict diabetes using regression model',\n", @@ -236,11 +225,7 @@ "from azureml.core.webservice import Webservice\n", "\n", "aci_service_name = 'my-aci-service-4'\n", - "print(aci_service_name)\n", - "aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n", - " image = image,\n", - " name = aci_service_name,\n", - " workspace = ws)\n", + "aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aci_deployment_config)\n", "aci_service.wait_for_deployment(True)\n", "print(aci_service.state)" ] @@ -361,7 +346,7 @@ "outputs": [], "source": [ "#Set the web service configuration\n", - "aks_config = AksWebservice.deploy_configuration(enable_app_insights=True)" + "aks_deployment_config = AksWebservice.deploy_configuration(enable_app_insights=True)" ] }, { @@ -379,12 +364,12 @@ "source": [ "if aks_target.provisioning_state== \"Succeeded\": \n", " aks_service_name ='aks-w-dc5'\n", - " aks_service = Webservice.deploy_from_image(workspace = ws, \n", - " name = aks_service_name,\n", - " image = image,\n", - " deployment_config = aks_config,\n", - " deployment_target = aks_target\n", - " )\n", + " aks_service = Model.deploy(ws,\n", + " aks_service_name, \n", + " [model], \n", + " inference_config, \n", + " aks_deployment_config, \n", + " deployment_target = aks_target) \n", " aks_service.wait_for_deployment(show_output = True)\n", " print(aks_service.state)\n", "else:\n", @@ -464,7 +449,6 @@ "%%time\n", "aks_service.delete()\n", "aci_service.delete()\n", - "image.delete()\n", "model.delete()" ] } diff --git a/how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb b/how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb index 9c147c3c..b7f35149 100644 --- a/how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb +++ b/how-to-use-azureml/deployment/onnx/onnx-convert-aml-deploy-tinyyolo.ipynb @@ -243,7 +243,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Create container image\n", + "### Setting up inference configuration\n", "First we create a YAML file that specifies which dependencies we would like to see in our container." ] }, @@ -265,7 +265,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Then we have Azure ML create the container. This step will likely take a few minutes." + "Then we create the inference configuration." ] }, { @@ -274,48 +274,19 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.core.image import ContainerImage\n", + "from azureml.core.model import InferenceConfig\n", "\n", - "image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n", - " runtime = \"python\",\n", - " conda_file = \"myenv.yml\",\n", - " docker_file = \"Dockerfile\",\n", - " description = \"TinyYOLO ONNX Demo\",\n", - " tags = {\"demo\": \"onnx\"}\n", - " )\n", - "\n", - "\n", - "image = ContainerImage.create(name = \"onnxyolo\",\n", - " models = [model],\n", - " image_config = image_config,\n", - " workspace = ws)\n", - "\n", - "image.wait_for_creation(show_output = True)" + "inference_config = InferenceConfig(runtime= \"python\", \n", + " entry_script=\"score.py\",\n", + " conda_file=\"myenv.yml\",\n", + " extra_docker_file_steps = \"Dockerfile\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In case you need to debug your code, the next line of code accesses the log file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(image.image_build_log_uri)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We're all set! Let's get our model chugging.\n", - "\n", - "### Deploy the container image" + "### Deploy the model" ] }, { @@ -336,7 +307,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The following cell will likely take a few minutes to run as well." + "The following cell will take a few minutes to run as the model gets packaged up and deployed to ACI." ] }, { @@ -348,14 +319,9 @@ "from azureml.core.webservice import Webservice\n", "from random import randint\n", "\n", - "aci_service_name = 'onnx-tinyyolo'+str(randint(0,100))\n", + "aci_service_name = 'my-aci-service-15ad'\n", "print(\"Service\", aci_service_name)\n", - "\n", - "aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n", - " image = image,\n", - " name = aci_service_name,\n", - " workspace = ws)\n", - "\n", + "aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n", "aci_service.wait_for_deployment(True)\n", "print(aci_service.state)" ] diff --git a/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb b/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb index 3f3a0fd9..f57f927c 100644 --- a/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb +++ b/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb @@ -54,7 +54,7 @@ "\n", "### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n", "\n", - "In the following lines of code, we download [the trained ONNX Emotion FER+ model and corresponding test data](https://github.com/onnx/models/tree/master/emotion_ferplus) and place them in the same folder as this tutorial notebook. For more information about the FER+ dataset, please visit Microsoft Researcher Emad Barsoum's [FER+ source data repository](https://github.com/ebarsoum/FERPlus)." + "In the following lines of code, we download [the trained ONNX Emotion FER+ model and corresponding test data](https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus) and place them in the same folder as this tutorial notebook. For more information about the FER+ dataset, please visit Microsoft Researcher Emad Barsoum's [FER+ source data repository](https://github.com/ebarsoum/FERPlus)." ] }, { @@ -176,7 +176,7 @@ "source": [ "### ONNX FER+ Model Methodology\n", "\n", - "The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the well-known FER+ data set, provided as part of the [trained Emotion Recognition model](https://github.com/onnx/models/tree/master/emotion_ferplus) in the ONNX model zoo.\n", + "The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the well-known FER+ data set, provided as part of the [trained Emotion Recognition model](https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus) in the ONNX model zoo.\n", "\n", "The original Facial Emotion Recognition (FER) Dataset was released in 2013 by Pierre-Luc Carrier and Aaron Courville as part of a [Kaggle Competition](https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data), but some of the labels are not entirely appropriate for the expression. In the FER+ Dataset, each photo was evaluated by at least 10 croud sourced reviewers, creating a more accurate basis for ground truth. \n", "\n", @@ -341,9 +341,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Create the Container Image\n", - "\n", - "This step will likely take a few minutes." + "### Setup inference configuration" ] }, { @@ -352,48 +350,19 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.core.image import ContainerImage\n", + "from azureml.core.model import InferenceConfig\n", "\n", - "image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n", - " runtime = \"python\",\n", - " conda_file = \"myenv.yml\",\n", - " docker_file = \"Dockerfile\",\n", - " description = \"Emotion ONNX Runtime container\",\n", - " tags = {\"demo\": \"onnx\"})\n", - "\n", - "\n", - "image = ContainerImage.create(name = \"onnximage\",\n", - " # this is the model object\n", - " models = [model],\n", - " image_config = image_config,\n", - " workspace = ws)\n", - "\n", - "image.wait_for_creation(show_output = True)" + "inference_config = InferenceConfig(runtime= \"python\", \n", + " entry_script=\"score.py\",\n", + " conda_file=\"myenv.yml\",\n", + " extra_docker_file_steps = \"Dockerfile\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In case you need to debug your code, the next line of code accesses the log file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(image.image_build_log_uri)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We're all done specifying what we want our virtual machine to do. Let's configure and deploy our container image.\n", - "\n", - "### Deploy the container image" + "### Deploy the model" ] }, { @@ -410,6 +379,13 @@ " description = 'ONNX for emotion recognition model')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The following cell will likely take a few minutes to run as well." + ] + }, { "cell_type": "code", "execution_count": null, @@ -420,23 +396,11 @@ "\n", "aci_service_name = 'onnx-demo-emotion'\n", "print(\"Service\", aci_service_name)\n", - "\n", - "aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n", - " image = image,\n", - " name = aci_service_name,\n", - " workspace = ws)\n", - "\n", + "aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n", "aci_service.wait_for_deployment(True)\n", "print(aci_service.state)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The following cell will likely take a few minutes to run as well." - ] - }, { "cell_type": "code", "execution_count": null, @@ -470,7 +434,7 @@ "\n", "### Useful Helper Functions\n", "\n", - "We preprocess and postprocess our data (see score.py file) using the helper functions specified in the [ONNX FER+ Model page in the Model Zoo repository](https://github.com/onnx/models/tree/master/emotion_ferplus)." + "We preprocess and postprocess our data (see score.py file) using the helper functions specified in the [ONNX FER+ Model page in the Model Zoo repository](https://github.com/onnx/models/tree/master/vision/body_analysis/emotion_ferplus)." ] }, { diff --git a/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb b/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb index 43c22a09..4e3e83cc 100644 --- a/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb +++ b/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb @@ -54,7 +54,7 @@ "\n", "### 3. Download sample data and pre-trained ONNX model from ONNX Model Zoo.\n", "\n", - "In the following lines of code, we download [the trained ONNX MNIST model and corresponding test data](https://github.com/onnx/models/tree/master/mnist) and place them in the same folder as this tutorial notebook. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/)." + "In the following lines of code, we download [the trained ONNX MNIST model and corresponding test data](https://github.com/onnx/models/tree/master/vision/classification/mnist) and place them in the same folder as this tutorial notebook. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/)." ] }, { @@ -187,7 +187,7 @@ "source": [ "### ONNX MNIST Model Methodology\n", "\n", - "The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the famous MNIST data set, provided as part of the [trained MNIST model](https://github.com/onnx/models/tree/master/mnist) in the ONNX model zoo.\n", + "The image classification model we are using is pre-trained using Microsoft's deep learning cognitive toolkit, [CNTK](https://github.com/Microsoft/CNTK), from the [ONNX model zoo](http://github.com/onnx/models). The model zoo has many other models that can be deployed on cloud providers like AzureML without any additional training. To ensure that our cloud deployed model works, we use testing data from the famous MNIST data set, provided as part of the [trained MNIST model](https://github.com/onnx/models/tree/master/vision/classification/mnist) in the ONNX model zoo.\n", "\n", "***Input: Handwritten Images from MNIST Dataset***\n", "\n", @@ -325,8 +325,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Create the Container Image\n", - "This step will likely take a few minutes." + "### Create Inference Configuration" ] }, { @@ -335,48 +334,19 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.core.image import ContainerImage\n", + "from azureml.core.model import InferenceConfig\n", "\n", - "image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n", - " runtime = \"python\",\n", - " conda_file = \"myenv.yml\",\n", - " docker_file = \"Dockerfile\",\n", - " description = \"MNIST ONNX Runtime container\",\n", - " tags = {\"demo\": \"onnx\"}) \n", - "\n", - "\n", - "image = ContainerImage.create(name = \"onnximage\",\n", - " # this is the model object\n", - " models = [model],\n", - " image_config = image_config,\n", - " workspace = ws)\n", - "\n", - "image.wait_for_creation(show_output = True)" + "inference_config = InferenceConfig(runtime= \"python\", \n", + " entry_script=\"score.py\",\n", + " extra_docker_file_steps = \"Dockerfile\",\n", + " conda_file=\"myenv.yml\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In case you need to debug your code, the next line of code accesses the log file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(image.image_build_log_uri)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We're all done specifying what we want our virtual machine to do. Let's configure and deploy our container image.\n", - "\n", - "### Deploy the container image" + "### Deploy the model" ] }, { @@ -397,7 +367,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The following cell will likely take a few minutes to run as well." + "The following cell will likely take a few minutes to run." ] }, { @@ -410,12 +380,7 @@ "\n", "aci_service_name = 'onnx-demo-mnist'\n", "print(\"Service\", aci_service_name)\n", - "\n", - "aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n", - " image = image,\n", - " name = aci_service_name,\n", - " workspace = ws)\n", - "\n", + "aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n", "aci_service.wait_for_deployment(True)\n", "print(aci_service.state)" ] diff --git a/how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb b/how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb index 1213c12f..af3f5f1c 100644 --- a/how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb +++ b/how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb @@ -28,7 +28,7 @@ "ONNX is an open format for representing machine learning and deep learning models. ONNX enables open and interoperable AI by enabling data scientists and developers to use the tools of their choice without worrying about lock-in and flexibility to deploy to a variety of platforms. ONNX is developed and supported by a community of partners including Microsoft, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai).\n", "\n", "## ResNet50 Details\n", - "ResNet classifies the major object in an input image into a set of 1000 pre-defined classes. For more information about the ResNet50 model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/models/image_classification/resnet). " + "ResNet classifies the major object in an input image into a set of 1000 pre-defined classes. For more information about the ResNet50 model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/vision/classification/resnet). " ] }, { @@ -221,7 +221,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Create container image" + "### Create inference configuration" ] }, { @@ -249,7 +249,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Then we have Azure ML create the container. This step will likely take a few minutes." + "Create the inference configuration object" ] }, { @@ -258,48 +258,19 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.core.image import ContainerImage\n", + "from azureml.core.model import InferenceConfig\n", "\n", - "image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n", - " runtime = \"python\",\n", - " conda_file = \"myenv.yml\",\n", - " docker_file = \"Dockerfile\",\n", - " description = \"ONNX ResNet50 Demo\",\n", - " tags = {\"demo\": \"onnx\"}\n", - " )\n", - "\n", - "\n", - "image = ContainerImage.create(name = \"onnxresnet50v2\",\n", - " models = [model],\n", - " image_config = image_config,\n", - " workspace = ws)\n", - "\n", - "image.wait_for_creation(show_output = True)" + "inference_config = InferenceConfig(runtime= \"python\", \n", + " entry_script=\"score.py\",\n", + " conda_file=\"myenv.yml\",\n", + " extra_docker_file_steps = \"Dockerfile\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In case you need to debug your code, the next line of code accesses the log file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(image.image_build_log_uri)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We're all set! Let's get our model chugging.\n", - "\n", - "### Deploy the container image" + "### Deploy the model" ] }, { @@ -334,12 +305,7 @@ "\n", "aci_service_name = 'onnx-demo-resnet50'+str(randint(0,100))\n", "print(\"Service\", aci_service_name)\n", - "\n", - "aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n", - " image = image,\n", - " name = aci_service_name,\n", - " workspace = ws)\n", - "\n", + "aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n", "aci_service.wait_for_deployment(True)\n", "print(aci_service.state)" ] diff --git a/how-to-use-azureml/deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb b/how-to-use-azureml/deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb index 9a7f2035..a8a18aa2 100644 --- a/how-to-use-azureml/deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb +++ b/how-to-use-azureml/deployment/onnx/onnx-train-pytorch-aml-deploy-mnist.ipynb @@ -28,7 +28,7 @@ "ONNX is an open format for representing machine learning and deep learning models. ONNX enables open and interoperable AI by enabling data scientists and developers to use the tools of their choice without worrying about lock-in and flexibility to deploy to a variety of platforms. ONNX is developed and supported by a community of partners including Microsoft, Facebook, and Amazon. For more information, explore the [ONNX website](http://onnx.ai).\n", "\n", "## MNIST Details\n", - "The Modified National Institute of Standards and Technology (MNIST) dataset consists of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing numbers from 0 to 9. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/). For more information about the MNIST model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/mnist). " + "The Modified National Institute of Standards and Technology (MNIST) dataset consists of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing numbers from 0 to 9. For more information about the MNIST dataset, please visit [Yan LeCun's website](http://yann.lecun.com/exdb/mnist/). For more information about the MNIST model and how it was created can be found on the [ONNX Model Zoo github](https://github.com/onnx/models/tree/master/vision/classification/mnist). " ] }, { @@ -401,7 +401,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Create container image\n", + "### Create inference configuration\n", "First we create a YAML file that specifies which dependencies we would like to see in our container." ] }, @@ -423,7 +423,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Then we have Azure ML create the container. This step will likely take a few minutes." + "Then we setup the inference configuration " ] }, { @@ -432,48 +432,19 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.core.image import ContainerImage\n", + "from azureml.core.model import InferenceConfig\n", "\n", - "image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n", - " runtime = \"python\",\n", - " conda_file = \"myenv.yml\",\n", - " docker_file = \"Dockerfile\",\n", - " description = \"MNIST ONNX Demo\",\n", - " tags = {\"demo\": \"onnx\"}\n", - " )\n", - "\n", - "\n", - "image = ContainerImage.create(name = \"onnxmnistdemo\",\n", - " models = [model],\n", - " image_config = image_config,\n", - " workspace = ws)\n", - "\n", - "image.wait_for_creation(show_output = True)" + "inference_config = InferenceConfig(runtime= \"python\", \n", + " entry_script=\"score.py\",\n", + " conda_file=\"myenv.yml\",\n", + " extra_docker_file_steps = \"Dockerfile\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In case you need to debug your code, the next line of code accesses the log file." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(image.image_build_log_uri)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We're all set! Let's get our model chugging.\n", - "\n", - "### Deploy the container image" + "### Deploy the model" ] }, { @@ -504,16 +475,12 @@ "outputs": [], "source": [ "from azureml.core.webservice import Webservice\n", + "from azureml.core.model import Model\n", "from random import randint\n", "\n", "aci_service_name = 'onnx-demo-mnist'+str(randint(0,100))\n", "print(\"Service\", aci_service_name)\n", - "\n", - "aci_service = Webservice.deploy_from_image(deployment_config = aciconfig,\n", - " image = image,\n", - " name = aci_service_name,\n", - " workspace = ws)\n", - "\n", + "aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)\n", "aci_service.wait_for_deployment(True)\n", "print(aci_service.state)" ] diff --git a/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb b/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb index 292eb621..0d934276 100644 --- a/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb +++ b/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb @@ -34,7 +34,6 @@ "from azureml.core import Workspace\n", "from azureml.core.compute import AksCompute, ComputeTarget\n", "from azureml.core.webservice import Webservice, AksWebservice\n", - "from azureml.core.image import Image\n", "from azureml.core.model import Model" ] }, @@ -97,8 +96,51 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Create an image\n", - "Create an image using the registered model the script that will load and run the model." + "# Create the Environment\n", + "Create an environment that the model will be deployed with" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Environment\n", + "from azureml.core.conda_dependencies import CondaDependencies \n", + "\n", + "conda_deps = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-defaults'])\n", + "myenv = Environment(name='myenv')\n", + "myenv.python.conda_dependencies = conda_deps" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Use a custom Docker image\n", + "\n", + "You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n", + "\n", + "Only supported with `python` runtime.\n", + "```python\n", + "# use an image available in public Container Registry without authentication\n", + "myenv.docker.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n", + "\n", + "# or, use an image available in a private Container Registry\n", + "myenv.docker.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n", + "myenv.docker.base_image_registry.address = \"myregistry.azurecr.io\"\n", + "myenv.docker.base_image_registry.username = \"username\"\n", + "myenv.docker.base_image_registry.password = \"password\"\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Write the Entry Script\n", + "Write the script that will be used to predict on your model" ] }, { @@ -136,67 +178,23 @@ " return error" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.conda_dependencies import CondaDependencies \n", - "\n", - "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n", - "\n", - "with open(\"myenv.yml\",\"w\") as f:\n", - " f.write(myenv.serialize_to_string())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from azureml.core.image import ContainerImage\n", - "\n", - "image_config = ContainerImage.image_configuration(execution_script = \"score.py\",\n", - " runtime = \"python\",\n", - " conda_file = \"myenv.yml\",\n", - " description = \"Image with ridge regression model\",\n", - " tags = {'area': \"diabetes\", 'type': \"regression\"}\n", - " )\n", - "\n", - "image = ContainerImage.create(name = \"myimage1\",\n", - " # this is the model object\n", - " models = [model],\n", - " image_config = image_config,\n", - " workspace = ws)\n", - "\n", - "image.wait_for_creation(show_output = True)" - ] - }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Use a custom Docker image\n", + "# Create the InferenceConfig\n", + "Create the inference config that will be used when deploying the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.model import InferenceConfig\n", "\n", - "You can also specify a custom Docker image to be used as base image if you don't want to use the default base image provided by Azure ML. Please make sure the custom Docker image has Ubuntu >= 16.04, Conda >= 4.5.\\* and Python(3.5.\\* or 3.6.\\*).\n", - "\n", - "Only Supported for `ContainerImage`(from azureml.core.image) with `python` runtime.\n", - "```python\n", - "# use an image available in public Container Registry without authentication\n", - "image_config.base_image = \"mcr.microsoft.com/azureml/o16n-sample-user-base/ubuntu-miniconda\"\n", - "\n", - "# or, use an image available in a private Container Registry\n", - "image_config.base_image = \"myregistry.azurecr.io/mycustomimage:1.0\"\n", - "image_config.base_image_registry.address = \"myregistry.azurecr.io\"\n", - "image_config.base_image_registry.username = \"username\"\n", - "image_config.base_image_registry.password = \"password\"\n", - "\n", - "# or, use an image built during training.\n", - "image_config.base_image = run.properties[\"AzureML.DerivedImageName\"]\n", - "```\n", - "You can get the address of training image from the properties of a Run object. Only new runs submitted with azureml-sdk>=1.0.22 to AMLCompute targets will have the 'AzureML.DerivedImageName' property. Instructions on how to get a Run can be found in [manage-runs](../../training/manage-runs/manage-runs.ipynb). \n" + "inf_config = InferenceConfig(entry_script='score.py', environment=myenv)" ] }, { @@ -237,23 +235,21 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", - "from azureml.core.compute import ComputeTarget, AksCompute\n", + "# from azureml.core.compute import ComputeTarget, AksCompute\n", "\n", - "# Create the compute configuration and set virtual network information\n", - "config = AksCompute.provisioning_configuration(location=\"eastus2\")\n", - "config.vnet_resourcegroup_name = \"mygroup\"\n", - "config.vnet_name = \"mynetwork\"\n", - "config.subnet_name = \"default\"\n", - "config.service_cidr = \"10.0.0.0/16\"\n", - "config.dns_service_ip = \"10.0.0.10\"\n", - "config.docker_bridge_cidr = \"172.17.0.1/16\"\n", + "# # Create the compute configuration and set virtual network information\n", + "# config = AksCompute.provisioning_configuration(location=\"eastus2\")\n", + "# config.vnet_resourcegroup_name = \"mygroup\"\n", + "# config.vnet_name = \"mynetwork\"\n", + "# config.subnet_name = \"default\"\n", + "# config.service_cidr = \"10.0.0.0/16\"\n", + "# config.dns_service_ip = \"10.0.0.10\"\n", + "# config.docker_bridge_cidr = \"172.17.0.1/16\"\n", "\n", - "# Create the compute target\n", - "aks_target = ComputeTarget.create(workspace = ws,\n", - " name = \"myaks\",\n", - " provisioning_configuration = config)\n", - "'''" + "# # Create the compute target\n", + "# aks_target = ComputeTarget.create(workspace = ws,\n", + "# name = \"myaks\",\n", + "# provisioning_configuration = config)" ] }, { @@ -300,17 +296,15 @@ "metadata": {}, "outputs": [], "source": [ - "'''\n", - "# Use the default configuration (can also provide parameters to customize)\n", - "resource_id = '/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/raymondsdk0604/providers/Microsoft.ContainerService/managedClusters/my-aks-0605d37425356b7d01'\n", + "# # Use the default configuration (can also provide parameters to customize)\n", + "# resource_id = '/subscriptions/92c76a2f-0e1c-4216-b65e-abf7a3f34c1e/resourcegroups/raymondsdk0604/providers/Microsoft.ContainerService/managedClusters/my-aks-0605d37425356b7d01'\n", "\n", - "create_name='my-existing-aks' \n", - "# Create the cluster\n", - "attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n", - "aks_target = ComputeTarget.attach(workspace=ws, name=create_name, attach_configuration=attach_config)\n", - "# Wait for the operation to complete\n", - "aks_target.wait_for_completion(True)\n", - "'''" + "# create_name='my-existing-aks' \n", + "# # Create the cluster\n", + "# attach_config = AksCompute.attach_configuration(resource_id=resource_id)\n", + "# aks_target = ComputeTarget.attach(workspace=ws, name=create_name, attach_configuration=attach_config)\n", + "# # Wait for the operation to complete\n", + "# aks_target.wait_for_completion(True)" ] }, { @@ -326,8 +320,11 @@ "metadata": {}, "outputs": [], "source": [ - "#Set the web service configuration (using default here)\n", - "aks_config = AksWebservice.deploy_configuration()" + "# Set the web service configuration (using default here)\n", + "aks_config = AksWebservice.deploy_configuration()\n", + "\n", + "# # Enable token auth and disable (key) auth on the webservice\n", + "# aks_config = AksWebservice.deploy_configuration(token_auth_enabled=True, auth_enabled=False)\n" ] }, { @@ -339,11 +336,13 @@ "%%time\n", "aks_service_name ='aks-service-1'\n", "\n", - "aks_service = Webservice.deploy_from_image(workspace = ws, \n", - " name = aks_service_name,\n", - " image = image,\n", - " deployment_config = aks_config,\n", - " deployment_target = aks_target)\n", + "aks_service = Model.deploy(workspace=ws,\n", + " name=aks_service_name,\n", + " models=[model],\n", + " inference_config=inf_config,\n", + " deployment_config=aks_config,\n", + " deployment_target=aks_target)\n", + "\n", "aks_service.wait_for_deployment(show_output = True)\n", "print(aks_service.state)" ] @@ -390,11 +389,12 @@ "metadata": {}, "outputs": [], "source": [ - "# retreive the API keys. AML generates two keys.\n", - "'''\n", - "key1, Key2 = aks_service.get_keys()\n", - "print(key1)\n", - "'''" + "# # if (key) auth is enabled, retrieve the API keys. AML generates two keys.\n", + "# key1, Key2 = aks_service.get_keys()\n", + "# print(key1)\n", + "\n", + "# # if token auth is enabled, retrieve the token.\n", + "# access_token, refresh_after = aks_service.get_token()" ] }, { @@ -404,27 +404,28 @@ "outputs": [], "source": [ "# construct raw HTTP request and send to the service\n", - "'''\n", - "%%time\n", + "# %%time\n", "\n", - "import requests\n", + "# import requests\n", "\n", - "import json\n", + "# import json\n", "\n", - "test_sample = json.dumps({'data': [\n", - " [1,2,3,4,5,6,7,8,9,10], \n", - " [10,9,8,7,6,5,4,3,2,1]\n", - "]})\n", - "test_sample = bytes(test_sample,encoding = 'utf8')\n", + "# test_sample = json.dumps({'data': [\n", + "# [1,2,3,4,5,6,7,8,9,10], \n", + "# [10,9,8,7,6,5,4,3,2,1]\n", + "# ]})\n", + "# test_sample = bytes(test_sample,encoding = 'utf8')\n", "\n", - "# Don't forget to add key to the HTTP header.\n", - "headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n", + "# # If (key) auth is enabled, don't forget to add key to the HTTP header.\n", + "# headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + key1}\n", "\n", - "resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)\n", + "# # If token auth is enabled, don't forget to add token to the HTTP header.\n", + "# headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + access_token}\n", + "\n", + "# resp = requests.post(aks_service.scoring_uri, test_sample, headers=headers)\n", "\n", "\n", - "print(\"prediction:\", resp.text)\n", - "'''" + "# print(\"prediction:\", resp.text)" ] }, { @@ -443,7 +444,6 @@ "source": [ "%%time\n", "aks_service.delete()\n", - "image.delete()\n", "model.delete()" ] } diff --git a/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb new file mode 100644 index 00000000..2760c29f --- /dev/null +++ b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.ipynb @@ -0,0 +1,748 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) Microsoft Corporation. All rights reserved.\n", + "\n", + "Licensed under the MIT License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train and explain models remotely via Azure Machine Learning Compute\n", + "\n", + "\n", + "_**This notebook showcases how to use the Azure Machine Learning Interpretability SDK to train and explain a regression model remotely on an Azure Machine Leanrning Compute Target (AMLCompute).**_\n", + "\n", + "\n", + "\n", + "\n", + "## Table of Contents\n", + "\n", + "1. [Introduction](#Introduction)\n", + "1. [Setup](#Setup)\n", + " 1. Initialize a Workspace\n", + " 1. Create an Experiment\n", + " 1. Introduction to AmlCompute\n", + " 1. Submit an AmlCompute run in a few different ways\n", + " 1. Option 1: Provision as a run based compute target \n", + " 1. Option 2: Provision as a persistent compute target (Basic)\n", + " 1. Option 3: Provision as a persistent compute target (Advanced)\n", + "1. Additional operations to perform on AmlCompute\n", + "1. [Download model explanations from Azure Machine Learning Run History](#Download)\n", + "1. [Visualize explanations](#Visualize)\n", + "1. [Next steps](#Next)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "This notebook showcases how to train and explain a regression model remotely via Azure Machine Learning Compute (AMLCompute), and download the calculated explanations locally for visualization.\n", + "It demonstrates the API calls that you need to make to submit a run for training and explaining a model to AMLCompute, download the compute explanations remotely, and visualizing the global and local explanations via a visualization dashboard that provides an interactive way of discovering patterns in model predictions and downloaded explanations.\n", + "\n", + "We will showcase one of the tabular data explainers: TabularExplainer (SHAP).\n", + "\n", + "Problem: Boston Housing Price Prediction with scikit-learn (train a model and run an explainer remotely via AMLCompute, and download and visualize the remotely-calculated explanations.)\n", + "\n", + "| ![explanations-run-history](./img/explanations-run-history.PNG) |\n", + "|:--:|\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you go through the [configuration notebook](../../../configuration.ipynb) first if you haven't.\n", + "\n", + "\n", + "You will need to have extensions enabled prior to jupyter kernel starting to see the visualization dashboard.\n", + "```\n", + "(myenv) $ jupyter nbextension install --py --sys-prefix azureml.contrib.explain.model.visualize\n", + "(myenv) $ jupyter nbextension enable --py --sys-prefix azureml.contrib.explain.model.visualize\n", + "```\n", + "Or\n", + "\n", + "```\n", + "(myenv) $ jupyter nbextension install azureml.contrib.explain.model.visualize --user --py\n", + "(myenv) $ jupyter nbextension enable azureml.contrib.explain.model.visualize --user --py\n", + "```\n", + "\n", + "If you are using Jupyter Labs run the following commands instead:\n", + "```\n", + "(myenv) $ jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", + "(myenv) $ jupyter labextension install microsoft-mli-widget\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check core SDK version number\n", + "import azureml.core\n", + "\n", + "print(\"SDK version:\", azureml.core.VERSION)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize a Workspace\n", + "\n", + "Initialize a workspace object from persisted configuration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [ + "create workspace" + ] + }, + "outputs": [], + "source": [ + "from azureml.core import Workspace\n", + "\n", + "ws = Workspace.from_config()\n", + "print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create An Experiment\n", + "\n", + "**Experiment** is a logical container in an Azure ML Workspace. It hosts run records which can include run metrics and output artifacts from your experiments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment\n", + "experiment_name = 'explainer-remote-run-on-amlcompute'\n", + "experiment = Experiment(workspace=ws, name=experiment_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction to AmlCompute\n", + "\n", + "Azure Machine Learning Compute is managed compute infrastructure that allows the user to easily create single to multi-node compute of the appropriate VM Family. It is created **within your workspace region** and is a resource that can be used by other users in your workspace. It autoscales by default to the max_nodes, when a job is submitted, and executes in a containerized environment packaging the dependencies as specified by the user. \n", + "\n", + "Since it is managed compute, job scheduling and cluster management are handled internally by Azure Machine Learning service. \n", + "\n", + "For more information on Azure Machine Learning Compute, please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-set-up-training-targets#amlcompute)\n", + "\n", + "If you are an existing BatchAI customer who is migrating to Azure Machine Learning, please read [this article](https://aka.ms/batchai-retirement)\n", + "\n", + "**Note**: As with other Azure services, there are limits on certain resources (for eg. AmlCompute quota) associated with the Azure Machine Learning service. Please read [this article](https://docs.microsoft.com/azure/machine-learning/service/how-to-manage-quotas) on the default limits and how to request more quota.\n", + "\n", + "\n", + "The training script `train_explain.py` is already created for you. Let's have a look." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Submit an AmlCompute run in a few different ways\n", + "\n", + "First lets check which VM families are available in your region. Azure is a regional service and some specialized SKUs (especially GPUs) are only available in certain regions. Since AmlCompute is created in the region of your workspace, we will use the supported_vms () function to see if the VM family we want to use ('STANDARD_D2_V2') is supported.\n", + "\n", + "You can also pass a different region to check availability and then re-create your workspace in that region through the [configuration notebook](../../../configuration.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "\n", + "AmlCompute.supported_vmsizes(workspace=ws)\n", + "# AmlCompute.supported_vmsizes(workspace=ws, location='southcentralus')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create project directory\n", + "\n", + "Create a directory that will contain all the necessary code from your local machine that you will need access to on the remote resource. This includes the training script, and any additional files your training script depends on" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import shutil\n", + "\n", + "project_folder = './explainer-remote-run-on-amlcompute'\n", + "os.makedirs(project_folder, exist_ok=True)\n", + "shutil.copy('train_explain.py', project_folder)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Option 1: Provision as a run based compute target\n", + "\n", + "You can provision AmlCompute as a compute target at run-time. In this case, the compute is auto-created for your run, scales up to max_nodes that you specify, and then **deleted automatically** after the run completes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "from azureml.core.runconfig import DEFAULT_CPU_IMAGE\n", + "\n", + "# create a new runconfig object\n", + "run_config = RunConfiguration()\n", + "\n", + "# signal that you want to use AmlCompute to execute script.\n", + "run_config.target = \"amlcompute\"\n", + "\n", + "# AmlCompute will be created in the same region as workspace\n", + "# Set vm size for AmlCompute\n", + "run_config.amlcompute.vm_size = 'STANDARD_D2_V2'\n", + "\n", + "# enable Docker \n", + "run_config.environment.docker.enabled = True\n", + "\n", + "# set Docker base image to the default CPU-based image\n", + "run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE\n", + "\n", + "# use conda_dependencies.yml to create a conda environment in the Docker image for execution\n", + "run_config.environment.python.user_managed_dependencies = False\n", + "\n", + "azureml_pip_packages = [\n", + " 'azureml-defaults', 'azureml-contrib-explain-model', 'azureml-core', 'azureml-telemetry',\n", + " 'azureml-explain-model', 'sklearn-pandas', 'azureml-dataprep'\n", + "]\n", + "\n", + "# specify CondaDependencies obj\n", + "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],\n", + " pip_packages=azureml_pip_packages)\n", + "\n", + "# Now submit a run on AmlCompute\n", + "from azureml.core.script_run_config import ScriptRunConfig\n", + "\n", + "script_run_config = ScriptRunConfig(source_directory=project_folder,\n", + " script='train_explain.py',\n", + " run_config=run_config)\n", + "\n", + "run = experiment.submit(script_run_config)\n", + "\n", + "# Show run details\n", + "run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: if you need to cancel a run, you can follow [these instructions](https://aka.ms/aml-docs-cancel-run)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "# Shows output of the run on stdout.\n", + "run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Option 2: Provision as a persistent compute target (Basic)\n", + "\n", + "You can provision a persistent AmlCompute resource by simply defining two parameters thanks to smart defaults. By default it autoscales from 0 nodes and provisions dedicated VMs to run your job in a container. This is useful when you want to continously re-use the same target, debug it between jobs or simply share the resource with other users of your workspace.\n", + "\n", + "* `vm_size`: VM family of the nodes provisioned by AmlCompute. Simply choose from the supported_vmsizes() above\n", + "* `max_nodes`: Maximum nodes to autoscale to while running a job on AmlCompute" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# Choose a name for your CPU cluster\n", + "cpu_cluster_name = \"cpu-cluster\"\n", + "\n", + "# Verify that cluster does not exist already\n", + "try:\n", + " cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n", + " print('Found existing cluster, use it.')\n", + "except ComputeTargetException:\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n", + " max_nodes=4)\n", + " cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n", + "\n", + "cpu_cluster.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure & Run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "# create a new RunConfig object\n", + "run_config = RunConfiguration(framework=\"python\")\n", + "\n", + "# Set compute target to AmlCompute target created in previous step\n", + "run_config.target = cpu_cluster.name\n", + "\n", + "# enable Docker \n", + "run_config.environment.docker.enabled = True\n", + "\n", + "azureml_pip_packages = [\n", + " 'azureml-defaults', 'azureml-contrib-explain-model', 'azureml-core', 'azureml-telemetry',\n", + " 'azureml-explain-model', 'azureml-dataprep'\n", + "]\n", + "\n", + "# specify CondaDependencies obj\n", + "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],\n", + " pip_packages=azureml_pip_packages)\n", + "\n", + "from azureml.core import Run\n", + "from azureml.core import ScriptRunConfig\n", + "\n", + "src = ScriptRunConfig(source_directory=project_folder, \n", + " script='train_explain.py', \n", + " run_config=run_config) \n", + "run = experiment.submit(config=src)\n", + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "# Shows output of the run on stdout.\n", + "run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.get_metrics()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Option 3: Provision as a persistent compute target (Advanced)\n", + "\n", + "You can also specify additional properties or change defaults while provisioning AmlCompute using a more advanced configuration. This is useful when you want a dedicated cluster of 4 nodes (for example you can set the min_nodes and max_nodes to 4), or want the compute to be within an existing VNet in your subscription.\n", + "\n", + "In addition to `vm_size` and `max_nodes`, you can specify:\n", + "* `min_nodes`: Minimum nodes (default 0 nodes) to downscale to while running a job on AmlCompute\n", + "* `vm_priority`: Choose between 'dedicated' (default) and 'lowpriority' VMs when provisioning AmlCompute. Low Priority VMs use Azure's excess capacity and are thus cheaper but risk your run being pre-empted\n", + "* `idle_seconds_before_scaledown`: Idle time (default 120 seconds) to wait after run completion before auto-scaling to min_nodes\n", + "* `vnet_resourcegroup_name`: Resource group of the **existing** VNet within which AmlCompute should be provisioned\n", + "* `vnet_name`: Name of VNet\n", + "* `subnet_name`: Name of SubNet within the VNet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.compute import ComputeTarget, AmlCompute\n", + "from azureml.core.compute_target import ComputeTargetException\n", + "\n", + "# Choose a name for your CPU cluster\n", + "cpu_cluster_name = \"cpu-cluster\"\n", + "\n", + "# Verify that cluster does not exist already\n", + "try:\n", + " cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)\n", + " print('Found existing cluster, use it.')\n", + "except ComputeTargetException:\n", + " compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',\n", + " vm_priority='lowpriority',\n", + " min_nodes=2,\n", + " max_nodes=4,\n", + " idle_seconds_before_scaledown='300',\n", + " vnet_resourcegroup_name='',\n", + " vnet_name='',\n", + " subnet_name='')\n", + " cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)\n", + "\n", + "cpu_cluster.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Configure & Run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.runconfig import RunConfiguration\n", + "from azureml.core.conda_dependencies import CondaDependencies\n", + "\n", + "# create a new RunConfig object\n", + "run_config = RunConfiguration(framework=\"python\")\n", + "\n", + "# Set compute target to AmlCompute target created in previous step\n", + "run_config.target = cpu_cluster.name\n", + "\n", + "# enable Docker \n", + "run_config.environment.docker.enabled = True\n", + "\n", + "azureml_pip_packages = [\n", + " 'azureml-defaults', 'azureml-contrib-explain-model', 'azureml-core', 'azureml-telemetry',\n", + " 'azureml-explain-model', 'azureml-dataprep'\n", + "]\n", + "\n", + "\n", + "\n", + "# specify CondaDependencies obj\n", + "run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'],\n", + " pip_packages=azureml_pip_packages)\n", + "\n", + "from azureml.core import Run\n", + "from azureml.core import ScriptRunConfig\n", + "\n", + "src = ScriptRunConfig(source_directory=project_folder, \n", + " script='train_explain.py', \n", + " run_config=run_config) \n", + "run = experiment.submit(config=src)\n", + "run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "# Shows output of the run on stdout.\n", + "run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "run.get_metrics()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient\n", + "\n", + "client = ExplanationClient.from_run(run)\n", + "# Get the top k (e.g., 4) most important features with their importance values\n", + "explanation = client.download_model_explanation(top_k=4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Additional operations to perform on AmlCompute\n", + "\n", + "You can perform more operations on AmlCompute such as updating the node counts or deleting the compute. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get_status () gets the latest status of the AmlCompute target\n", + "cpu_cluster.get_status().serialize()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Update () takes in the min_nodes, max_nodes and idle_seconds_before_scaledown and updates the AmlCompute target\n", + "# cpu_cluster.update(min_nodes=1)\n", + "# cpu_cluster.update(max_nodes=10)\n", + "cpu_cluster.update(idle_seconds_before_scaledown=300)\n", + "# cpu_cluster.update(min_nodes=2, max_nodes=4, idle_seconds_before_scaledown=600)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete () is used to deprovision and delete the AmlCompute target. Useful if you want to re-use the compute name \n", + "# 'cpu-cluster' in this case but use a different VM family for instance.\n", + "\n", + "# cpu_cluster.delete()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download \n", + "1. Download model explanation data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient\n", + "\n", + "# Get model explanation data\n", + "client = ExplanationClient.from_run(run)\n", + "global_explanation = client.download_model_explanation()\n", + "local_importance_values = global_explanation.local_importance_values\n", + "expected_values = global_explanation.expected_values\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Or you can use the saved run.id to retrive the feature importance values\n", + "client = ExplanationClient.from_run_id(ws, experiment_name, run.id)\n", + "global_explanation = client.download_model_explanation()\n", + "local_importance_values = global_explanation.local_importance_values\n", + "expected_values = global_explanation.expected_values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the top k (e.g., 4) most important features with their importance values\n", + "global_explanation_topk = client.download_model_explanation(top_k=4)\n", + "global_importance_values = global_explanation_topk.get_ranked_global_values()\n", + "global_importance_names = global_explanation_topk.get_ranked_global_names()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print('global importance values: {}'.format(global_importance_values))\n", + "print('global importance names: {}'.format(global_importance_names))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2. Download model file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# retrieve model for visualization and deployment\n", + "from azureml.core.model import Model\n", + "from sklearn.externals import joblib\n", + "original_model = Model(ws, 'original_model')\n", + "model_path = original_model.download(exist_ok=True)\n", + "original_model = joblib.load(model_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3. Download test dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# retrieve x_test for visualization\n", + "from sklearn.externals import joblib\n", + "x_test_path = './x_test_boston_housing.pkl'\n", + "run.download_file('x_test_boston_housing.pkl', output_file_path=x_test_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x_test = joblib.load('x_test_boston_housing.pkl')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualize\n", + "Load the visualization dashboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.contrib.explain.model.visualize import ExplanationDashboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ExplanationDashboard(global_explanation, original_model, x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Next\n", + "Learn about other use cases of the explain package on a:\n", + "1. [Training time: regression problem](../../tabular-data/explain-binary-classification-local.ipynb) \n", + "1. [Training time: binary classification problem](../../tabular-data/explain-binary-classification-local.ipynb)\n", + "1. [Training time: multiclass classification problem](../../tabular-data/explain-multiclass-classification-local.ipynb)\n", + "1. Explain models with engineered features:\n", + " 1. [Simple feature transformations](../../tabular-data/simple-feature-transformations-explain-local.ipynb)\n", + " 1. [Advanced feature transformations](../../tabular-data/advanced-feature-transformations-explain-local.ipynb)\n", + "1. [Save model explanations via Azure Machine Learning Run History](../run-history/save-retrieve-explanations-run-history.ipynb)\n", + "1. Inferencing time: deploy a classification model and explainer:\n", + " 1. [Deploy a locally-trained model and explainer](../scoring-time/train-explain-model-locally-and-deploy.ipynb)\n", + " 1. [Deploy a remotely-trained model and explainer](../scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "authors": [ + { + "name": "mesameki" + } + ], + "kernelspec": { + "display_name": "Python 3.6", + "language": "python", + "name": "python36" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.yml b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.yml new file mode 100644 index 00000000..53d58768 --- /dev/null +++ b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.yml @@ -0,0 +1,8 @@ +name: explain-model-on-amlcompute +dependencies: +- pip: + - azureml-sdk + - azureml-explain-model + - azureml-contrib-explain-model + - sklearn-pandas + - azureml-dataprep diff --git a/how-to-use-azureml/explain-model/azure-integration/remote-explanation/train_explain.py b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/train_explain.py new file mode 100644 index 00000000..beefff8e --- /dev/null +++ b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/train_explain.py @@ -0,0 +1,63 @@ +# Copyright (c) Microsoft. All rights reserved. +# Licensed under the MIT license. + +from sklearn import datasets +from sklearn.linear_model import Ridge +from azureml.explain.model.tabular_explainer import TabularExplainer +from azureml.contrib.explain.model.explanation.explanation_client import ExplanationClient +from sklearn.model_selection import train_test_split +from azureml.core.run import Run +from sklearn.externals import joblib +import os +import numpy as np + +OUTPUT_DIR = './outputs/' +os.makedirs(OUTPUT_DIR, exist_ok=True) + +boston_data = datasets.load_boston() + +run = Run.get_context() +client = ExplanationClient.from_run(run) + +X_train, X_test, y_train, y_test = train_test_split(boston_data.data, + boston_data.target, + test_size=0.2, + random_state=0) +# write x_test out as a pickle file for later visualization +x_test_pkl = 'x_test.pkl' +with open(x_test_pkl, 'wb') as file: + joblib.dump(value=X_test, filename=os.path.join(OUTPUT_DIR, x_test_pkl)) +run.upload_file('x_test_boston_housing.pkl', os.path.join(OUTPUT_DIR, x_test_pkl)) + + +alpha = 0.5 +# Use Ridge algorithm to create a regression model +reg = Ridge(alpha) +model = reg.fit(X_train, y_train) + +preds = reg.predict(X_test) +run.log('alpha', alpha) + +model_file_name = 'ridge_{0:.2f}.pkl'.format(alpha) +# save model in the outputs folder so it automatically get uploaded +with open(model_file_name, 'wb') as file: + joblib.dump(value=reg, filename=os.path.join(OUTPUT_DIR, + model_file_name)) + +# register the model +run.upload_file('original_model.pkl', os.path.join('./outputs/', model_file_name)) +original_model = run.register_model(model_name='original_model', model_path='original_model.pkl') + +# Explain predictions on your local machine +tabular_explainer = TabularExplainer(model, X_train, features=boston_data.feature_names) + +# Explain overall model predictions (global explanation) +# Passing in test dataset for evaluation examples - note it must be a representative sample of the original data +# x_train can be passed as well, but with more examples explanations it will +# take longer although they may be more accurate +global_explanation = tabular_explainer.explain_global(X_test) + +# Uploading model explanation data for storage or visualization in webUX +# The explanation can then be downloaded on any compute +comment = 'Global explanation on regression model trained on boston dataset' +client.upload_model_explanation(global_explanation, comment=comment) diff --git a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.yml b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.yml index b4d008e1..5657cbe3 100644 --- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.yml +++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.yml @@ -4,5 +4,5 @@ dependencies: - azureml-sdk - azureml-explain-model - azureml-contrib-explain-model - - azureml-dataprep - sklearn-pandas + - azureml-dataprep diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb index 474d9496..490113df 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.ipynb @@ -460,7 +460,7 @@ "source": [ "# Submit syntax\n", "# submit(experiment_name, \n", - "# pipeline_params=None, \n", + "# pipeline_parameters=None, \n", "# continue_on_step_failure=False, \n", "# regenerate_outputs=False)\n", "\n", diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb index a7e1dabe..a0f413dc 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-parameter-tuning-with-hyperdrive.ipynb @@ -321,7 +321,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "hyperdriveconfig-remarks-sample" + ] + }, "outputs": [], "source": [ "hd_config = HyperDriveConfig(estimator=est, \n", diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb index 8b6d280f..afbd1082 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb @@ -299,7 +299,7 @@ "metadata": {}, "outputs": [], "source": [ - "from azureml.pipelince.core import PipelineParameter\n", + "from azureml.pipeline.core import PipelineParameter\n", "\n", "# Use the default blob storage\n", "def_blob_store = Datastore(ws, \"workspaceblobstore\")\n", diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb index cd42882b..aa943c48 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-automated-machine-learning-step.ipynb @@ -28,14 +28,14 @@ "metadata": {}, "source": [ "## Introduction\n", - "In this example we showcase how you can use the `azureml.dataprep` SDK to load and prepare data for AutoML via AML Pipeline. `azureml.dataprep` can also be used standalone; full documentation can be found [here](https://github.com/Microsoft/PendletonDocs).\n", + "In this example we showcase how you can use AzureML Dataset to load data for AutoML via AML Pipeline. \n", "\n", "If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you have executed the [configuration](https://aka.ms/pl-config) before running this notebook.\n", "\n", "In this notebook you will learn how to:\n", "1. Create an `Experiment` in an existing `Workspace`.\n", "2. Create or Attach existing AmlCompute to a workspace.\n", - "3. Define data loading and preparation steps in a `Dataflow` using `azureml.dataprep`.\n", + "3. Define data loading in a `TabularDataset`.\n", "4. Configure AutoML using `AutoMLConfig`.\n", "5. Use AutoMLStep\n", "6. Train the model using AmlCompute\n", @@ -65,7 +65,6 @@ "import pandas as pd\n", "from sklearn import datasets\n", "import pkg_resources\n", - "import azureml.dataprep as dprep\n", "\n", "import azureml.core\n", "from azureml.core.experiment import Experiment\n", @@ -73,6 +72,7 @@ "from azureml.train.automl import AutoMLConfig\n", "from azureml.core.compute import AmlCompute\n", "from azureml.core.compute import ComputeTarget\n", + "from azureml.core.dataset import Dataset\n", "from azureml.core.runconfig import RunConfiguration\n", "from azureml.core.conda_dependencies import CondaDependencies\n", "\n", @@ -197,13 +197,10 @@ "metadata": {}, "outputs": [], "source": [ - "# You can use `auto_read_file` which intelligently figures out delimiters and datatypes of a file.\n", "# The data referenced here was a 1MB simple random sample of the Chicago Crime data into a local temporary directory.\n", - "# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n", - "# and convert column types manually.\n", "example_data = 'https://dprepdata.blob.core.windows.net/demo/crime0-random.csv'\n", - "dflow = dprep.auto_read_file(example_data).skip(1) # Remove the header row.\n", - "dflow.get_profile()" + "dataset = Dataset.Tabular.from_delimited_files(example_data)\n", + "dataset.to_pandas_dataframe().describe()" ] }, { @@ -212,20 +209,18 @@ "metadata": {}, "outputs": [], "source": [ - "# As `Primary Type` is our y data, we need to drop the values those are null in this column.\n", - "dflow = dflow.drop_nulls('Primary Type')\n", - "dflow.head(5)" + "dataset.take(5).to_pandas_dataframe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Review the Data Preparation Result\n", + "### Review the Dataset Result\n", "\n", - "You can peek the result of a Dataflow at any range using `skip(i)` and `head(j)`. Doing so evaluates only `j` records for all the steps in the Dataflow, which makes it fast even against large datasets.\n", + "You can peek the result of a TabularDataset at any range using `skip(i)` and `take(j).to_pandas_dataframe()`. Doing so evaluates only `j` records for all the steps in the TabularDataset, which makes it fast even against large datasets.\n", "\n", - "`Dataflow` objects are immutable and are composed of a list of data preparation steps. A `Dataflow` object can be branched at any point for further usage." + "`TabularDataset` objects are composed of a list of transformation steps (optional)." ] }, { @@ -234,8 +229,8 @@ "metadata": {}, "outputs": [], "source": [ - "X = dflow.drop_columns(columns=['Primary Type', 'FBI Code'])\n", - "y = dflow.keep_columns(columns=['Primary Type'], validate_column_exists=True)\n", + "X = dataset.drop_columns(columns=['Primary Type', 'FBI Code'])\n", + "y = dataset.keep_columns(columns=['Primary Type'], validate=True)\n", "print('X and y are ready!')" ] }, @@ -441,8 +436,12 @@ "metadata": {}, "outputs": [], "source": [ - "dflow_test = dprep.auto_read_file(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv').skip(1)\n", - "dflow_test = dflow_test.drop_nulls('Primary Type')" + "dataset = Dataset.Tabular.from_delimited_files(path='https://dprepdata.blob.core.windows.net/demo/crime0-test.csv')\n", + "df_test = dataset_test.to_pandas_dataframe()\n", + "df_test = df_test[pd.notnull(df['Primary Type'])]\n", + "\n", + "y_test = df_test[['Primary Type']]\n", + "X_test = df_test.drop(['Primary Type', 'FBI Code'], axis=1)" ] }, { @@ -462,10 +461,6 @@ "source": [ "from pandas_ml import ConfusionMatrix\n", "\n", - "y_test = dflow_test.keep_columns(columns=['Primary Type']).to_pandas_dataframe()\n", - "X_test = dflow_test.drop_columns(columns=['Primary Type', 'FBI Code']).to_pandas_dataframe()\n", - "\n", - "\n", "ypred = best_model.predict(X_test)\n", "\n", "cm = ConfusionMatrix(y_test['Primary Type'], ypred)\n", diff --git a/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb b/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb index 7b4fedf5..fa5d1121 100644 --- a/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.png)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -187,7 +194,19 @@ "metadata": {}, "outputs": [], "source": [ + "from azureml.core.compute import AmlCompute\n", + "from azureml.core.compute import ComputeTarget\n", + "\n", "aml_compute = ws.get_default_compute_target(\"CPU\")\n", + "\n", + "if aml_compute is None:\n", + " amlcompute_cluster_name = \"cpu-cluster\"\n", + " provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\",\n", + " max_nodes = 4)\n", + "\n", + " aml_compute = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n", + " aml_compute.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)\n", + "\n", "aml_compute" ] }, @@ -735,6 +754,8 @@ "outputs": [], "source": [ "%%writefile $train_model_folder/get_data.py\n", + "import os\n", + "import pandas as pd\n", "\n", "def get_data():\n", " print(\"In get_data\")\n", diff --git a/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb b/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb index bf2a4dae..a10f9297 100644 --- a/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb @@ -387,11 +387,15 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [ + "pipelineparameterssample" + ] + }, "outputs": [], "source": [ "pipeline = Pipeline(workspace=ws, steps=[batch_score_step])\n", - "pipeline_run = Experiment(ws, 'batch_scoring').submit(pipeline, pipeline_params={\"param_batch_size\": 20})" + "pipeline_run = Experiment(ws, 'batch_scoring').submit(pipeline, pipeline_parameters={\"param_batch_size\": 20})" ] }, { diff --git a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb b/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb index b0deef24..6ddaba93 100644 --- a/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb @@ -384,7 +384,7 @@ "source": [ "pipeline = Pipeline(workspace=ws, steps=[stitch_video_step])\n", "# submit the pipeline and provide values for the PipelineParameters used in the pipeline\n", - "pipeline_run = Experiment(ws, 'style_transfer').submit(pipeline, pipeline_params={\"style\": \"mosaic\", \"nodecount\": 3})" + "pipeline_run = Experiment(ws, 'style_transfer').submit(pipeline, pipeline_parameters={\"style\": \"mosaic\", \"nodecount\": 3})" ] }, { diff --git a/how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azureml.ipynb b/how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azureml.ipynb index 047068f3..5b4618b9 100644 --- a/how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azureml.ipynb +++ b/how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azureml.ipynb @@ -26,9 +26,10 @@ "\n", " 1. Interactive Login Authentication\n", " 2. Azure CLI Authentication\n", - " 3. Service Principal Authentication\n", + " 3. Managed Service Identity (MSI) Authentication\n", + " 4. Service Principal Authentication\n", " \n", - "The interactive authentication is suitable for local experimentation on your own computer. Azure CLI authentication is suitable if you are already using Azure CLI for managing Azure resources, and want to sign in only once. The Service Principal authentication is suitable for automated workflows, for example as part of Azure Devops build." + "The interactive authentication is suitable for local experimentation on your own computer. Azure CLI authentication is suitable if you are already using Azure CLI for managing Azure resources, and want to sign in only once. The MSI and Service Principal authentication are suitable for automated workflows, for example as part of Azure Devops build." ] }, { @@ -145,6 +146,43 @@ "print(\"Found workspace {} at location {}\".format(ws.name, ws.location))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### MSI Authentication\n", + "\n", + "__Note__: _MSI authentication is supported only when using SDK from Azure Virtual Machine. The code below will fail on local computer._\n", + "\n", + "When using Azure ML SDK on Azure Virtual Machine (VM), you can use Managed Service Identity (MSI) based authentication. This mode allows the VM connect to the Workspace without storing credentials in the Python code.\n", + "\n", + "As a pre-requisite, enable System-assigned Managed Identity for your VM as described in [this document](https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/qs-configure-portal-windows-vm).\n", + "\n", + "Then, assign the VM access to your Workspace. For example from Azure Portal, navigate to your workspace, select __Access Control (IAM)__, __Add Role Assignment__, specify __Virtual Machine__ for __Assign Access To__ dropdown, and select your VM's identity.\n", + "\n", + "![msi assignment](images/msiaccess.PNG)\n", + "\n", + "After completing these steps, you can use authenticate using MsiAuthentication instance." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core.authentication import MsiAuthentication\n", + "\n", + "msi_auth = MsiAuthentication()\n", + "\n", + "ws = Workspace(subscription_id=\"my-subscription-id\",\n", + " resource_group=\"my-ml-rg\",\n", + " workspace_name=\"my-ml-workspace\",\n", + " auth=msi_auth)\n", + "\n", + "print(\"Found workspace {} at location {}\".format(ws.name, ws.location))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -238,6 +276,135 @@ "See [Register an application with the Microsoft identity platform](https://docs.microsoft.com/en-us/azure/active-directory/develop/quickstart-register-app) quickstart for more details about application registrations. " ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using Secrets in Remote Runs\n", + "\n", + "Sometimes, you may have to pass a secret to a remote run, for example username and password to authenticate against external data source.\n", + "\n", + "Azure ML SDK enables this use case through Key Vault associated with your workspace. The workflow for adding a secret is following.\n", + "\n", + "On local computer:\n", + "\n", + " 1. Read in a local secret, for example from environment variable or user input. To keep them secret, do not insert secret values into code as hard-coded strings.\n", + " 2. Obtain a reference to the keyvault\n", + " 3. Add the secret name-value pair in the key vault.\n", + " \n", + "The secret is then available for remote runs as shown further below.\n", + "\n", + "__Note__: The _azureml.core.keyvault.Keyvault_ is different from _azure.keyvault_ library. It is intended as simplified wrapper for setting, getting and listing user secrets in Workspace Key Vault." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os, uuid\n", + "\n", + "local_secret = os.environ.get(\"LOCAL_SECRET\", default = str(uuid.uuid4())) # Use random UUID as a substitute for real secret.\n", + "keyvault = ws.get_default_keyvault()\n", + "keyvault.set_secret(name=\"secret-name\", value = local_secret)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The _set_secret_ method adds a new secret if one doesn't exist, or updates an existing one with new value.\n", + "\n", + "You can list secret names you've added. This method doesn't return the values of the secrets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "keyvault.list_secrets()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can retrieve the value of the secret, and validate that it matches the original value. \n", + "\n", + "__Note__: This method returns the secret value. Take care not to write the the secret value to output." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "retrieved_secret = keyvault.get_secret(name=\"secret-name\")\n", + "local_secret==retrieved_secret" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In submitted runs on local and remote compute, you can use the get_secret method of Run instance to get the secret value from Key Vault. \n", + "\n", + "The method gives you a simple shortcut: the Run instance is aware of its Workspace and Keyvault, so it can directly obtain the secret without you having to instantiate the Workspace and Keyvault within remote run.\n", + "\n", + "__Note__: This method returns the secret value. Take care not to write the secret to output.\n", + "\n", + "For example, let's create a simple script _get_secret.py_ that gets the secret we set earlier. In an actual appication, you would use the secret, for example to access a database or other password-protected resource." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile get_secret.py\n", + "\n", + "from azureml.core import Run\n", + "\n", + "run = Run.get_context()\n", + "secret_value = run.get_secret(name=\"secret-name\")\n", + "print(\"Got secret value {} , but don't write it out!\".format(len(secret_value) * \"*\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then, submit the script as a regular script run, and find the obfuscated secret value in run output. You can use the same approach to other kinds of runs, such as Estimator ones." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from azureml.core import Experiment, Run\n", + "from azureml.core.script_run_config import ScriptRunConfig\n", + "\n", + "exp = Experiment(workspace = ws, name=\"try-secret\")\n", + "src = ScriptRunConfig(source_directory=\".\", script=\"get_secret.py\")\n", + "\n", + "run = exp.submit(src)\n", + "run.wait_for_completion(show_output=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Furthermore, you can set and get multiple secrets using set_secrets and get_secrets methods." + ] + }, { "cell_type": "code", "execution_count": null, @@ -267,7 +434,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.4" + "version": "3.6.9" } }, "nbformat": 4, diff --git a/how-to-use-azureml/track-and-monitor-experiments/README.md b/how-to-use-azureml/track-and-monitor-experiments/README.md new file mode 100644 index 00000000..6f46406a --- /dev/null +++ b/how-to-use-azureml/track-and-monitor-experiments/README.md @@ -0,0 +1,19 @@ + +## Follow these sample notebooks to learn: + +1. [Logging API](./logging-api/logging-api.ipynb): experiment with various logging functions to create runs and automatically generate graphs. +2. [Manage runs](./manage-runs/manage-runs.ipynb): learn different ways how to start runs and child runs, monitor them, and cancel them. +1. [Tensorboard to monitor runs](./tensorboard/tensorboard.ipynb) + +## Use MLflow with Azure Machine Learning service (Preview) + +[MLflow](https://mlflow.org/) is an open-source platform for tracking machine learning experiments and managing models. You can use MLflow logging APIs with Azure Machine Learning service: the metrics and artifacts are logged to your Azure ML Workspace. + +Try out the sample notebooks: +1. [Use MLflow with Azure Machine Learning for Local Training Run](./train-local/train-local.ipynb) +1. [Use MLflow with Azure Machine Learning for Remote Training Run](./train-remote/train-remote.ipynb) +1. [Deploy Model as Azure Machine Learning Web Service using MLflow](./deploy-model/deploy-model.ipynb) +1. [Train and Deploy PyTorch Image Classifier](./train-deploy-pytorch/train-deploy-pytorch.ipynb) + + ![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/track-and-monitor-experiments/README.png) + diff --git a/how-to-use-azureml/track-and-monitor-experiments/logging-api/img/run_details.PNG b/how-to-use-azureml/track-and-monitor-experiments/logging-api/img/run_details.PNG new file mode 100644 index 0000000000000000000000000000000000000000..9bfe60fd12296121925588ee3e8a2fc42f6e06e8 GIT binary patch literal 30114 zcmb5VWl&sEw=GH_xVuBp#wEB1ch}%9!QH)a3r=ti1c%`6uEE_UXyc9F&UenOI`_S* zTkrko>e_qvmR@_!ImR4gt|%2HX;dU4Bq%5-R9P7bH7KYLU69WY1USf^duk0U$kzu~ zHEA)Z>M4@Hkd2R4qKcwWP<08&FD9^%ZA2%TudYx~XaoN~A5t07h@qg|d1NI-HM|VZ zH(-siM~UD;fg8vTGW06z8ykG<8?QnAeCw|u-~`lW6+`1lp;5OwkZ1fEc6&tQ}Zl2o(Im(BzQK9I@a846#V1f*xrE z3W-vcZ7Lu}p*kH1Qw(929tW(kzwyWcQpCanVu*2SmG>Mu*Ak=oARuZ+ISrrWGy?}} zQulq)-(CG^isw%!5vBdG5fLF5Ct}px=HMJh{xUwbOXNeo)LPq}8 zu+HXwE^lHO5ctNWnIS(pGmiu3BNhUAj-fG-R_R{3&GD=+)cuyC0g9i8C8>|cd7PPY zz;-VOz6m`5L@0HC8ihC>Pr<(ZVKdBi9K{q9*LI-B!^Z`G*OBFT>oz^Z zAUCXO0$UWDZ{D1VU5%usgCBE#^GACMqwO8NI-Fq@NjU}>?l-Bg2dpav)%gdR# zm>koapLH-epym7bxjGAh0ZJ1?!qTQ!=G?iWU~nu*2=aQHo|U7fdp^pv@ZiEO3QKXx zkJe)u;P>A4b2+dOkoUFiJ+!rg4`2g`qu#2JKsRPQo&5)s-8VPuL}v!8R!eY$sp%=B-xa3Mzh>4??Gh_m|&xZ2o>K;gtuIP=^_q<~Hb`roKy8TJUuUaF* zNIbqDxVS)_OC+g{5Ijvv*fP)iq^3KbilSW1_dcCx87j*#1cftR|WLDI{1?j z``i6^7l1=2ihK&CzVtupF*@dT`n1Y*JX-qRYz!41OBN|_6!Z{ZarIg9Z}o5ny-t@m z|E^AACWe?x&^w$k)%IO~g7sI5yK)aWNerxOguIGH>@=B-AM%vqNu$zlnqxDov5bmH zh@wSuC1HG0C>4E&U%qn)|8!M261Sac--5cmTti-)a#js>^&b{w(t8%E)NRDbQf>*_ z7*W7k_)-!5@-3Kj3=i#N(#qB+Z%~t^8xIPBAWmnS=ODl%fpakL==D$_UYR&7u|LA} zc&6Z37PMWJyE8KWU$FWJT<2rA}fG}p0&H-%A=EG1YD28#N(k#2L zp{2Z?M{Fz`{4wahMwGKhjBTtNVWAOd{T8MH>(_T-rG@}~em?^39G>l$DyNo+wY{GB z{$m{!x-Cbz@*_+%7Tl6AwE!Rn>O)>ndJ2Muy*^&^mb29y4hyw+|Ikkoq>e`>rbLAM zS9Z7t2GskJ@sVw=T$hs6K-_?{BZXKKq~eHOD7wvf4K=0iNjMOK%*H4Y^&=Tsno6GoB0d zXRCaOn%66!n4}i9{FEs6hdTgGf3K+8D(;Wv9=6GyuOl|XkB%+z1i?D(&@bq;N~$qK zz;+ec-FQ`9wNH|7PYcu>B&q{E)N9(oo!_O%#0Nfx?X~U6#S*Ziygn&Jc&n;wR2k?m z2SKJY#{ifY)?#rvZ^hR@#ny>lyl92N%|D~+~+Zin3`$o z=2z%k=isv;;>O%jiMra_M2WPXhqFV52*zI)a@jyoK2$YW4_Ebp^(H3z-3#$e z`PCa5e%iG}t>1hAX-m1DE_kpvkeY_NzK_IlDA?e5A%IrWZon1`pMAyB;yUGbDcM^1KUX%X?#=yY6Td zU$4@0ylP$QHGObx9;jQd2Qa?j1Rff`=L*WKTMguyYL$lLpRfpLU8BqG4gN^oWx-xV zrmyo_D5lu$3Js5{Y5QHyZ0Zbd*V_&5Y>ygdUhOaSE?^xn5OBlnmQ^2>usld>so8yY zq#ni^?!+f|vfy$U*yST#jXJvQrh40dRAY*Nq3e3rR6C`)kI4cfKZkxax@-rf+7*lC zZeRxP+1?LCOE)k8h=sa;h7*MB1wD90`yF7lncX?(A@C!(8QI9~| z)qW>glU^^;({^rVaebT@*T#QKZSC@o2bUD*&US}v3^*huC?Qbjvc|y7KADXi3-Q*P z#eaWW+RqiQy`TS3Kp9Q>N5s-cUrQl;Gv%%f(4G&L&n9C>kZd9qBPFZ?K!}hvS{TgM zO6QI9_$WU6lN1IZbNdp8xHs4EQBXMMPTH}Tl@pG8(R7~dmIrPjS#W7g!Dl#S>9w^# zXytk4c8j09dwqv2N9Alpl5=cmTONUe+Vv~b6@)fXtR9Fo@*`3Jzh}q(f1y~AXF0|} z@(bbF+G9Vr{v;AOCzKhu;4UouXHz~9^*%sJNeOgwgPjI}&y!J7aIV(?h2ia1)5TSn z5&@SVXTZqu0C;ZB>OD3om&~XXn}FQ~PNdqU@*cF=(Z^61%iTugR`D5!^FbIzB->dy}w_a z-05cI^H~dsMCJuJYqqpA3a<=&X-SaYN^a2sA>9#qh`LRn$C9yQT~eF%?5ci8ygxHU zeUE9T7(!q7z5WQeKkw;a%lza%9GqX+x!ZWZ{OB*MDn=MkAn6wwaXNQ{Xp?%t=ZgcES!eUPk|Nfrf^Jo5=ds1yf_o|B+48EVPW@D;HVGa? z2DUHS34nu1<6J@9xg3?)G7jspj1AvSwgAYNyZoN!dBjry?1 zX#l!(Ivt9G9dq&90XC%=EnXzy$x$ee+dJ%mr zVj*2@wlGv#uU`9YS!Ce0rPUC1K8i%-IYO;^UI5V(lqe>c5VwS3dnjvQNY37# zv#`V>^=Z0>V@S8}g+9y)EE^kY>$z2%P*m<9nK?Vl9LkaRVqVs|uGvHy2X#0N7t?3$ zWOb9tNn{V)CD^SCZBHc{$VgC!E0du7{>%)vaCSyy<9>`j9d^jq*(6v8!I53$!VX)p zmuXdDyeJUm%Ui%K>o_0XWe^PDjl%SQ^v4--jH+Gc)u{Ijl6~dYAMMAWX*ISpP6b|i z#&6fCNs`wTUgH8pU?}nxX2D?&)vRSX7rl4;|?e`60TI-SESJvS zy*j|o(we@Ja=OAoCY73R>=MJ~9?3Zvg%zCD$tPvf70$yZaQyPfc-V-PtPS@N4Q63Qy;WMEv+cD3`_*g-$N~DZG8;mr-|D zEd*aQBBZQjdKVbcc(w+5C~7d_xc8$o_*pnPalEhhG1X4+$;qeFZ{+nznT)!V8R2P(R?dPA6evda!=j$-1KOpgBNi|XGrqxt z!y@^Lr>kGYsB;BDA;(AdB>q^@aGLT6(uwvfNaU5dsvk+KOzXyl5NRoU7+5ebyMcvc zFQ441tfPw_LP3PIwMcPNEPB{p-qJhMG=|{pX_UN+@V<=!49y!NBLFP?fS+KGPSfS_fqWq@|{ixjU$JWOiV;G6I=UbeJTpJg^I8XgfKPKxGc+G2FXQ z&A>LLsgTSCLP`XR^>*iYIQQYP8?FD4M@`@F*4R5@IJuCcWV-jB@NInG>O$DfJ@1vT zA$M{?4@oT{4Dj(Ta7g}{1Y@3WO=B|hL(EbJgc@8SO_|g+{He8)rU`8Xe)jnb=h?T2 z<8+a$ZG(^R6bAZxGo?9PKbJToY*nE7)3{Pes9+9%CW$z%Um5G?S|*fQz0{o;l2oHL z^of-cM853JH2g&DR+PGb{fg1w^b%C6&FLVSVQj9zD;6883TP$()JSZP{AH`m_?AEn zA4n<4hKbs0M*9@WW+EZHE^ zD)obV_1OIOo#yGyKJ@cfsn%GNRqA`dt)X9vSL=@=#L0xnq(mN<=@5J+Hd%YJ@5B6~ zt%(!!Q{aW&$@^3ItxL~dMDX!AECAVzL%)p>!HB?Q@*YY*mP_27iv?d@5B~4*dk8h#I_(_*ode_4#ICz z+A47E)&Ip!D6C3}!)l5^h3&uYjS-1YDMT;?=;v1c40jGXplsv2Ws0r4mx0=~N9i4N z`+4|t<^fqcFEyZSZXPKhuiyDZuW-Ky5-~D`%&%L}CI8pTm zNG7^83n?9k=l8w!iD6+l5WP=PDn|qRUpW5VJfVi1>{6&fBP=8&WTNCAj!GOJhKx_i zAQ#*O1vwO%Ia8(dFA~EX+#!nLKRNLK9i?IaKPc}12=4#eY5Qc6-2D8R zG{VAUlF>MrYQMt5!mdu~$uY$OV80$wwZa&gno?pxq)8l#`nsSYmLv#@0+?i(1qDf@ zq@?nL-w`QJ(l#Gu3iNW^>M4JicvJuO6MKxrxMnQ&6fU&#`}a6$ zkdOm5Iqy2?AI1)uCwgg&O};^?3r{N2!m%A1#A%~naHr}m=a&;JcRwu^Q z8I#-^?(pT>6jk3{hk3xTyM#VbFkYyY*~p)&TCg)prxN`3(?|gUSX#NVi!cgs6VbcU z10B(>6rSfz5a6gzRI>IGZ+(N%Fg1r!F|{t5K>VU%d4DM9k-p@a_|6jrS@5mkBVerBk-oYrwV+{C0Gfh=8ij$e?e`#xmg=(kk+qf_mp%9c}g8~ zxik-1w+v?+w?I^N`7Eb3F^mgc)x#VT)=0Q(9APgnWxAhaUwfc7ls4p4%M5qpXNeHY zCOu6%_?=D;Z!>QnK|goqh97N7?Y%$j#!7`5y?LUaus<9nG^QD`J1R!gfRiV1X!@wW z7MLpTjPv>T&@pq&Y~a4>{J2#-b&=EjV-@N|lEjM#5F-h*1UfG_k|j_N%`LVKVQ~``A9vs+XLn&}++=5fqsUuV|v%p`xSruL;K)DJNC~(4B1$w^MW3 zQK%yzDa(zJX&2#!Q0`F9MpjnV?@k4V3Z}#Tx(5_C$6oBpxBhM&%@8(WVzmANZrwva z7(;R_HKdCU(^4|4&?}quY%DdjPF7fh^&&we{h~jw{jkVyBc7M1ivz_+LG=0hKJrM; zZKyRmzqpDf*Kjpx{OyZc;d;f_dE)x@ctX;*M9?@bfhd==hiPx~zs}egeJL5isM8JSEfSiipGzzS( zDh9pW8yg$<=uME>^C?PTmN3id8PKaEf+!33lZ|~|+RqMUIY(aWN-{eoD5FKn^K=TuTe)DoAw>#ePnO33;xLRyY^`EN#8IkJ`O)#27v;r zeo9&nURdl;k@Y{4<38%L9TO8%)Fb_w`kbDksK?6qi}Ef4ML7*4x9qgEjjw}hS&h=- z0uBYC31OV3Lw#c-Zd&os(~0@R+QG!Xxk>M_Y2rz3v5{&%j)ccVS4_Q+=2uWqFp?ax zzyqK*V`@2=%F&Z4k%FYCLNq%JSdR;n)T8A+oJ5&F9Z{NN3Tv`ljR*h+=@LFM84u$U zArI-lgE9CowyS^gaQ;J?_@FCL11{G2biRJQs$pVA{@*)+U%LbRQ&^3111)U6{+G4m zbJj%b+ckWGQ5!UDSTg>n@IUs?sic%t-_lZgUclSSX!jfVD%2=fD-C+<)hy8LBvuoI z{)9HIVp6c(uf7-xfPhJ1z@MJskX$!Yn^d?i;E_PxnI>$do)C6=+I`Qm@TXl4N)!DV=l+}VlmH{!d1j}b5QD8s{C`;7lG###Fb1=Hwqxw|}& zPwCdmOA`5p-cCQulo*0NSZ8UprzBM8)D@OiX{%TExy!`Wk59XS=l;%X-=KYPuZP~= zs|G9ls8m@%v_li8@loSlW@eXAm3Os4@DFAq+XXK#*Q&1|3G;|l zAyT&I6W3*F-Ce6UBpoB+kH0tJi|ih!%8bt=>ZMGU!E9Ks@*mCQC5U62TH?loX==;G zKisaoVjLi+^`8uO-wbPLN!j21lpVQ-@1^8hb}Bj3sgsb>t@UAY)qjVZO3< z;dujsK}Tx*ps3JIlfwoP%8x0ggV?2Q=q-#RGHgKbdz3*8ULmJ+g)@b0k1Js{Q7t(kw+=p zV+e)igCo@3zPU!ZD#_rZE3l(8p*WORJ^e?Zk zv%k~dYDtE1FNa$N4JP+W@!wiAEQ4q3K}s&PXDaPKy)8 z#YX$EK`JtRZzz+RzDbJu$=e{=-w|BHur8lfhBbTxb zD9lM?HHwFZMFNSk($d16uQeZ~Cy?-A*)Galu#oaP1-)+w2@0}#Uz-Q~Xl~~5xjhbE zUe?L@@uR@7)8~cvyBQ=b(wOzY_A*nF!3ox!kV+nfT$rR(Id5n*8Su#1lAr&H$*_aq z_4#2qj+h5*U#?XZ4dJ#X2V<$r4K{NkAF99pwtQI-V6-6<@Q4-pRkv4*O@{n)t;v3G z3Geh$YU?M0aOTP30P&R#hn6T)YvFBi*o zFp62+k_BR6VE^mG$?+PqJ-MGec389j3eDjZdUaz5hbe6XE1_FGJ-wU9&;uBFc=(LW z%tEiL-P(qR{Ro|8EoZC8umg&;wEIJ4d@(+9SmDB1BCc<@)A>!EK6kroC$($YXD2zS z_p1#y{YF*ot*r$H?H&YzI%>9F5W9&$$cGb;Sr@2UB-OjQnHwm~Yro3b_3`-VC}J{; z=Vq-A%|6P*#l=NzsoKE#;lh9#04TCwYtm`=VD`9JA591A2vnQ&hhGK93H4W~7QyA^ zvFNw-7%3QZ`rxKAXzsECvvYFT%||eVr@v>%F*IYO7F~g5JWNYDJ!bo%$=dy8-9jl- z^&r`NXJ_XH7Z(Bm+KXpRrmL6xp$I1071eo=*4WcQvhdPZ74JEz5w5$;STUl``&d1q0xNp$KT#_G7mo0OOH=Z^&x2 z-*u^7NoN1(jS)0x050}2B;EHx@uzh>I^t124r}jvVItdfJ9rZz!}hrwdcbit^}8f$Y$Ce4_2i+-DT8^ksth_Jo6w9^Rna`7gt}&;AL`8!l-0PFlE6@Lw*FgYPOE z*4r_(r?|IY3)W0t&9_Q!5Wv{u=j!ipkNH0QOdvK1%~mU`tUxr6jjUZGedM5y zL41dEbI39T`g{85-w<{z{fRq=cXZm(uo0ay5g?U76ywC=@3BDH5oc~2QtS1^04_x^ zR}JIe2Deb3@jHJk}Tn^+$h+w)0inO_0 zx;ys!@osoX_M0H9zLpZ_N(gN4Uv1TyCJ1y3g|M29rlr5wf$QR01||2jkK*_Ix6uO# zDSmtnI1mzqDFUV(nl^Hiiv%zgo6n`l;b->D-*)e{cD$suz5fwU=%!@*tR(Y6q~G+a zTQ3ioG7!F&cQryKBO`0=e~`AEK}VpMf%MBO#hw=f#&%%G+b@9Ab_FYUx~7N8k>1PS zJ7Fq^-|S`Lt;K7XjzQBdRt@d)YOI+(P1z_~U-ey!Dn3S8A{Ier@u&-2Os!$fMnJ5E z4%D2R1y^*UYE1|Puj4WlS+wr{l4HqaQB@}Qms{JJnk*{gu>LfGf?O2jOm{ER5`iwd zpqhBe@R{}SW#t3CKsc)S}N{s{7 z)JUZYWa~cq-Lb(Cn-32RA;sw~h9?el0heq;+a6s^PLJ1gxC^nmUrQF!zfVtKwu?aH zM#P1IFYNu~IHkUd!vqLX5b|wbHdrL+z#cW?lnRrc){Nq|}I-Zlza(54Z&*$A2uqrvP zpwL#o9pgk#c`-iK>r3&F^!K5&9)(P%`h{c11YCSb+-FyRd+Xho*ypp#MTSfW=zu7O}gsQU0;ZYluH@NT==KfRfMD%U#{9Fv7fXoYsQ zmPNN<^Qo`@aTh>ANTk+Hr@*98YbA3+#?FaJY`wUUDn$03(?Eu=4hw;JFqq4motyh@ z`l(XsY`H$o%KbAtyXxqX|1s%Q>O|MJzcI;!EG| z>C}3VrI`UDdeE0a${V{*IfgxR)SyVasU%M9F#0nKboSYoPPt0@YOS4b+g0~?X z4KZHb4a|71!H%-&(S5p6Ed$viNa&D)AhM6l4^0dP$xrtng1B~QA6qXKPfg|~pZh|{Y*F-UjYUml zAuve6Wp6iCyB7>_`9W0RND6Nf>T}CQu2N z(=_(WLKPB;9j9luxa9tA50BTh`pax3^~v~Gxuvik>1O_@u~Bd4RGLbOzvt(uJUl$Y zWh*PPxq7vKEXSfug!b8NmMk6yl%UUk*J*z}{q1U+C%f9IF?O&{pMKdq+dD+vKds0q z?qx@57=WHEaQqhuLYf#meIL;7P8LEtJB5_;gyWR+s7d$ht4r0CVsDYy&fCs%ms44v z?ank8pU$$Et&%)gQ(gE3EM%%}Z#iDpsh>99qTs$MC_zZze0vby1c!trujJeY+qoovgOocP_}w zsgk6LF}h5YW+O@;1r$fp5-)Y&{Pd^{Dr{PvFL|#Kh4cVY@>orOnr9XDcl#D<>-bvR zzg%q_IJXOtg%}%~F9=Ti+B5 z>4+O&jhguY#t1AekxCR~5hJzxU}*r_eZ&Sj#~54)_9)j#FSojLZ2J23*2BhJ@v##z zO2rPH`9I9;H(S13w5Lt?RNG#*7>yuHqhAj08l*oSNHi99I_@PXy-5Vod6hohI30CEB&bEtx^vBfTt`H`xe?}a#CyQBp@Mw*tv+{6ee>L5BF?ZK6++z7?{4C!jz{`7XR*7D%ddw80iK&$V= zGk(Ir{qGvE1+W-*Je88ft_-ID6^%bo^io$6h$Aci*aJ9>#*jKI_(jew#3S9VJBx6{ z0{037vm%$W!@}*6kR8V&Zq=)X6Ey-d9&obN0boQSQr32Tx{YHtG0y-(!`d6?0uX0a z*To8TX(j7?-O-s=+wmX7o=sv?$pa87c5hL3>O8sQ4FY4rtm4g%TedJuh68BC-cA#x zF^JNCViGn4TN$G;m5D64(UXH+Qo|08#C2*91*eQQQ_BB*kc