Compare commits

...

16 Commits

Author SHA1 Message Date
vizhur
c95f970dc8 update samples from Release-167 as a part of 1.0.83 SDK release 2020-01-06 20:16:21 +00:00
Shané Winner
9b9d112719 Update index.md 2019-12-24 07:40:48 -08:00
vizhur
fe8fcd4b48 Merge pull request #712 from Azure/release_update/Release-31
update samples - test
2019-12-23 20:28:02 -05:00
vizhur
296ae01587 update samples - test 2019-12-24 00:42:48 +00:00
Shané Winner
8f4efe15eb Update index.md 2019-12-10 09:05:23 -08:00
vizhur
d179080467 Merge pull request #690 from Azure/release_update/Release-163
update samples from Release-163 as a part of 1.0.79 SDK release
2019-12-09 15:41:03 -05:00
vizhur
0040644e7a update samples from Release-163 as a part of 1.0.79 SDK release 2019-12-09 20:09:30 +00:00
Shané Winner
8aa04307fb Update index.md 2019-12-03 10:24:18 -08:00
Shané Winner
a525da4488 Update index.md 2019-11-27 13:08:21 -08:00
Shané Winner
e149565a8a Merge pull request #679 from Azure/release_update/Release-30
update samples - test
2019-11-27 13:05:00 -08:00
vizhur
75610ec31c update samples - test 2019-11-27 21:02:21 +00:00
Shané Winner
0c2c450b6b Update index.md 2019-11-25 14:34:48 -08:00
Shané Winner
0d548eabff Merge pull request #677 from Azure/release_update/Release-29
update samples - test
2019-11-25 14:31:50 -08:00
vizhur
e4029801e6 update samples - test 2019-11-25 22:24:09 +00:00
Shané Winner
156974ee7b Update index.md 2019-11-25 11:42:53 -08:00
Shané Winner
1f05157d24 Merge pull request #676 from Azure/release_update/Release-160
update samples from Release-160 as a part of 1.0.76 SDK release
2019-11-25 11:39:27 -08:00
109 changed files with 3433 additions and 1018 deletions

View File

@@ -103,7 +103,7 @@
"source": [ "source": [
"import azureml.core\n", "import azureml.core\n",
"\n", "\n",
"print(\"This notebook was created using version 1.0.76 of the Azure ML SDK\")\n", "print(\"This notebook was created using version 1.0.83 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
] ]
}, },

View File

@@ -13,7 +13,6 @@ dependencies:
- scikit-learn>=0.19.0,<=0.20.3 - scikit-learn>=0.19.0,<=0.20.3
- pandas>=0.22.0,<=0.23.4 - pandas>=0.22.0,<=0.23.4
- py-xgboost<=0.80 - py-xgboost<=0.80
- pyarrow>=0.11.0
- fbprophet==0.5 - fbprophet==0.5
- pytorch=1.1.0 - pytorch=1.1.0
- cudatoolkit=9.0 - cudatoolkit=9.0
@@ -30,7 +29,7 @@ dependencies:
- pytorch-transformers==1.0.0 - pytorch-transformers==1.0.0
- spacy==2.1.8 - spacy==2.1.8
- joblib - joblib
- onnxruntime==0.4.0 - onnxruntime==1.0.0
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz - https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
channels: channels:

View File

@@ -14,7 +14,6 @@ dependencies:
- scikit-learn>=0.19.0,<=0.20.3 - scikit-learn>=0.19.0,<=0.20.3
- pandas>=0.22.0,<0.23.0 - pandas>=0.22.0,<0.23.0
- py-xgboost<=0.80 - py-xgboost<=0.80
- pyarrow>=0.11.0
- fbprophet==0.5 - fbprophet==0.5
- pytorch=1.1.0 - pytorch=1.1.0
- cudatoolkit=9.0 - cudatoolkit=9.0
@@ -31,7 +30,7 @@ dependencies:
- pytorch-transformers==1.0.0 - pytorch-transformers==1.0.0
- spacy==2.1.8 - spacy==2.1.8
- joblib - joblib
- onnxruntime==0.4.0 - onnxruntime==1.0.0
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz - https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
channels: channels:

View File

@@ -14,8 +14,9 @@ IF "%CONDA_EXE%"=="" GOTO CondaMissing
call conda activate %conda_env_name% 2>nul: call conda activate %conda_env_name% 2>nul:
if not errorlevel 1 ( if not errorlevel 1 (
echo Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment %conda_env_name% echo Upgrading existing conda environment %conda_env_name%
call pip install --upgrade azureml-sdk[automl,notebooks,explain] call pip uninstall azureml-train-automl -y -q
call conda env update --name %conda_env_name% --file %automl_env_file%
if errorlevel 1 goto ErrorExit if errorlevel 1 goto ErrorExit
) else ( ) else (
call conda env create -f %automl_env_file% -n %conda_env_name% call conda env create -f %automl_env_file% -n %conda_env_name%

View File

@@ -22,8 +22,9 @@ fi
if source activate $CONDA_ENV_NAME 2> /dev/null if source activate $CONDA_ENV_NAME 2> /dev/null
then then
echo "Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment" $CONDA_ENV_NAME echo "Upgrading existing conda environment" $CONDA_ENV_NAME
pip install --upgrade azureml-sdk[automl,notebooks,explain] && pip uninstall azureml-train-automl -y -q
conda env update --name $CONDA_ENV_NAME --file $AUTOML_ENV_FILE &&
jupyter nbextension uninstall --user --py azureml.widgets jupyter nbextension uninstall --user --py azureml.widgets
else else
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME && conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&

View File

@@ -22,8 +22,9 @@ fi
if source activate $CONDA_ENV_NAME 2> /dev/null if source activate $CONDA_ENV_NAME 2> /dev/null
then then
echo "Upgrading azureml-sdk[automl,notebooks,explain] in existing conda environment" $CONDA_ENV_NAME echo "Upgrading existing conda environment" $CONDA_ENV_NAME
pip install --upgrade azureml-sdk[automl,notebooks,explain] && pip uninstall azureml-train-automl -y -q
conda env update --name $CONDA_ENV_NAME --file $AUTOML_ENV_FILE &&
jupyter nbextension uninstall --user --py azureml.widgets jupyter nbextension uninstall --user --py azureml.widgets
else else
conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME && conda env create -f $AUTOML_ENV_FILE -n $CONDA_ENV_NAME &&

View File

@@ -285,14 +285,16 @@
"|**task**|classification or regression or forecasting|\n", "|**task**|classification or regression or forecasting|\n",
"|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n", "|**primary_metric**|This is the metric that you want to optimize. Classification supports the following primary metrics: <br><i>accuracy</i><br><i>AUC_weighted</i><br><i>average_precision_score_weighted</i><br><i>norm_macro_recall</i><br><i>precision_score_weighted</i>|\n",
"|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n", "|**iteration_timeout_minutes**|Time limit in minutes for each iteration.|\n",
"|**blacklist_models** or **whitelist_models** |*List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run.<br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i><br><br>Allowed values for **Forecasting**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i><br><i>Arima</i><br><i>Prophet</i>|\n", "|**blacklist_models** | *List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run. <br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i><br><br>Allowed values for **Forecasting**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i><br><i>Arima</i><br><i>Prophet</i>|\n",
"| **whitelist_models** | *List* of *strings* indicating machine learning algorithms for AutoML to use in this run. Same values listed above for **blacklist_models** allowed for **whitelist_models**.|\n",
"|**experiment_exit_score**| Value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n", "|**experiment_exit_score**| Value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
"|**experiment_timeout_minutes**| Maximum amount of time in minutes that all iterations combined can take before the experiment terminates.|\n", "|**experiment_timeout_hours**| Maximum amount of time in hours that all iterations combined can take before the experiment terminates.|\n",
"|**enable_early_stopping**| Flag to enble early termination if the score is not improving in the short term.|\n", "|**enable_early_stopping**| Flag to enble early termination if the score is not improving in the short term.|\n",
"|**featurization**| 'auto' / 'off' Indicator for whether featurization step should be done automatically or not. Note: If the input data is sparse, featurization cannot be turned on.|\n", "|**featurization**| 'auto' / 'off' Indicator for whether featurization step should be done automatically or not. Note: If the input data is sparse, featurization cannot be turned on.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n", "|**n_cross_validations**|Number of cross validation splits.|\n",
"|**training_data**|Input dataset, containing both features and label column.|\n", "|**training_data**|Input dataset, containing both features and label column.|\n",
"|**label_column_name**|The name of the label column.|\n", "|**label_column_name**|The name of the label column.|\n",
"|**model_explainability**|Indicate to explain each trained pipeline or not.|\n",
"\n", "\n",
"**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)" "**_You can find more information about primary metrics_** [here](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#primary-metric)"
] ]
@@ -304,7 +306,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"automl_settings = {\n", "automl_settings = {\n",
" \"experiment_timeout_minutes\" : 20,\n", " \"experiment_timeout_hours\" : 0.3,\n",
" \"enable_early_stopping\" : True,\n", " \"enable_early_stopping\" : True,\n",
" \"iteration_timeout_minutes\": 5,\n", " \"iteration_timeout_minutes\": 5,\n",
" \"max_concurrent_iterations\": 4,\n", " \"max_concurrent_iterations\": 4,\n",
@@ -324,6 +326,7 @@
" training_data = train_data,\n", " training_data = train_data,\n",
" label_column_name = label,\n", " label_column_name = label,\n",
" validation_data = validation_dataset,\n", " validation_data = validation_dataset,\n",
" model_explainability=True,\n",
" **automl_settings\n", " **automl_settings\n",
" )" " )"
] ]
@@ -456,6 +459,72 @@
"RunDetails(remote_run).show() " "RunDetails(remote_run).show() "
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retrieve the Best Model's explanation\n",
"Retrieve the explanation from the best_run which includes explanations for engineered features and raw features. Make sure that the run for generating explanations for the best model is completed."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Wait for the best model explanation run to complete\n",
"from azureml.train.automl.run import AutoMLRun\n",
"model_explainability_run_id = remote_run.get_properties().get('ModelExplainRunId')\n",
"print(model_explainability_run_id)\n",
"if model_explainability_run_id is not None:\n",
" model_explainability_run = AutoMLRun(experiment=experiment, run_id=model_explainability_run_id)\n",
" model_explainability_run.wait_for_completion()\n",
"\n",
"# Get the best run object\n",
"best_run, fitted_model = remote_run.get_output()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Download engineered feature importance from artifact store\n",
"You can use ExplanationClient to download the engineered feature explanations from the artifact store of the best_run."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client = ExplanationClient.from_run(best_run)\n",
"engineered_explanations = client.download_model_explanation(raw=False)\n",
"exp_data = engineered_explanations.get_feature_importance_dict()\n",
"exp_data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Download raw feature importance from artifact store\n",
"You can use ExplanationClient to download the raw feature explanations from the artifact store of the best_run."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client = ExplanationClient.from_run(best_run)\n",
"engineered_explanations = client.download_model_explanation(raw=True)\n",
"exp_data = engineered_explanations.get_feature_importance_dict()\n",
"exp_data"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -572,20 +641,6 @@
"best_run, fitted_model = remote_run.get_output()" "best_run, fitted_model = remote_run.get_output()"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import shutil\n",
"\n",
"sript_folder = os.path.join(os.getcwd(), 'inference')\n",
"project_folder = '/inference'\n",
"os.makedirs(project_folder, exist_ok=True)"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -639,10 +694,10 @@
"from azureml.core.webservice import AciWebservice\n", "from azureml.core.webservice import AciWebservice\n",
"from azureml.core.webservice import Webservice\n", "from azureml.core.webservice import Webservice\n",
"from azureml.core.model import Model\n", "from azureml.core.model import Model\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime = \"python\", \n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=conda_env_file_name)\n",
" entry_script = script_file_name,\n", "inference_config = InferenceConfig(entry_script=script_file_name, environment=myenv)\n",
" conda_file = conda_env_file_name)\n",
"\n", "\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n", "aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
" memory_gb = 1, \n", " memory_gb = 1, \n",

View File

@@ -2,12 +2,3 @@ name: auto-ml-classification-bank-marketing-all-features
dependencies: dependencies:
- pip: - pip:
- azureml-sdk - azureml-sdk
- interpret
- azureml-defaults
- azureml-train-automl
- azureml-widgets
- matplotlib
- pandas_ml
- onnxruntime==0.4.0
- azureml-explain-model
- azureml-contrib-interpret

View File

@@ -213,7 +213,7 @@
" \"preprocess\": True,\n", " \"preprocess\": True,\n",
" \"enable_early_stopping\": True,\n", " \"enable_early_stopping\": True,\n",
" \"max_concurrent_iterations\": 2, # This is a limit for testing purpose, please increase it as per cluster size\n", " \"max_concurrent_iterations\": 2, # This is a limit for testing purpose, please increase it as per cluster size\n",
" \"experiment_timeout_minutes\": 10, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible\n", " \"experiment_timeout_hours\": 0.2, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible\n",
" \"verbosity\": logging.INFO,\n", " \"verbosity\": logging.INFO,\n",
"}\n", "}\n",
"\n", "\n",
@@ -305,7 +305,7 @@
"source": [ "source": [
"#### Explain model\n", "#### Explain model\n",
"\n", "\n",
"Automated ML models can be explained and visualized using the SDK Explainability library. [Learn how to use the explainer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/model-explanation-remote-amlcompute/auto-ml-model-explanations-remote-compute.ipynb)." "Automated ML models can be explained and visualized using the SDK Explainability library. "
] ]
}, },
{ {
@@ -334,17 +334,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"#### Print the properties of the model\n", "#### Print the properties of the model\n",
"The fitted_model is a python object and you can read the different properties of the object.\n", "The fitted_model is a python object and you can read the different properties of the object.\n"
"See *Print the properties of the model* section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Deploy\n",
"\n",
"To deploy the model into a web service endpoint, see _Deploy_ section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb)"
] ]
}, },
{ {

View File

@@ -2,10 +2,3 @@ name: auto-ml-classification-credit-card-fraud
dependencies: dependencies:
- pip: - pip:
- azureml-sdk - azureml-sdk
- interpret
- azureml-defaults
- azureml-explain-model
- azureml-train-automl
- azureml-widgets
- matplotlib
- pandas_ml

View File

@@ -519,12 +519,12 @@
"name": "anshirga" "name": "anshirga"
} }
], ],
"datasets": [
"None"
],
"compute": [ "compute": [
"AML Compute" "AML Compute"
], ],
"datasets": [
"None"
],
"deployment": [ "deployment": [
"None" "None"
], ],

View File

@@ -2,9 +2,3 @@ name: auto-ml-classification-text-dnn
dependencies: dependencies:
- pip: - pip:
- azureml-sdk - azureml-sdk
- azureml-train-automl
- azureml-train
- azureml-widgets
- matplotlib
- pandas_ml
- statsmodels

View File

@@ -210,7 +210,24 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Data Ingestion Pipeline \n", "## Data Ingestion Pipeline \n",
"For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You can replace this with your own dataset, or you can skip this pipeline if you already have a time-series based `TabularDataset`.\n", "For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You can replace this with your own dataset, or you can skip this pipeline if you already have a time-series based `TabularDataset`.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# The name and target column of the Dataset to create \n",
"dataset = \"NOAA-Weather-DS4\"\n",
"target_column_name = \"temperature\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n", "\n",
"### Upload Data Step\n", "### Upload Data Step\n",
"The data ingestion pipeline has a single step with a script to query the latest weather data and upload it to the blob store. During the first run, the script will create and register a time-series based `TabularDataset` with the past one week of weather data. For each subsequent run, the script will create a partition in the blob store by querying NOAA for new weather data since the last modified time of the dataset (`dataset.data_changed_time`) and creating a data.csv file." "The data ingestion pipeline has a single step with a script to query the latest weather data and upload it to the blob store. During the first run, the script will create and register a time-series based `TabularDataset` with the past one week of weather data. For each subsequent run, the script will create a partition in the blob store by querying NOAA for new weather data since the last modified time of the dataset (`dataset.data_changed_time`) and creating a data.csv file."
@@ -225,8 +242,6 @@
"from azureml.pipeline.core import Pipeline, PipelineParameter\n", "from azureml.pipeline.core import Pipeline, PipelineParameter\n",
"from azureml.pipeline.steps import PythonScriptStep\n", "from azureml.pipeline.steps import PythonScriptStep\n",
"\n", "\n",
"# The name of the Dataset to create \n",
"dataset = \"NOAA-Weather-DS4\"\n",
"ds_name = PipelineParameter(name=\"ds_name\", default_value=dataset)\n", "ds_name = PipelineParameter(name=\"ds_name\", default_value=dataset)\n",
"upload_data_step = PythonScriptStep(script_name=\"upload_weather_data.py\", \n", "upload_data_step = PythonScriptStep(script_name=\"upload_weather_data.py\", \n",
" allow_reuse=False,\n", " allow_reuse=False,\n",
@@ -272,7 +287,7 @@
"## Training Pipeline\n", "## Training Pipeline\n",
"### Prepare Training Data Step\n", "### Prepare Training Data Step\n",
"\n", "\n",
"Script to bring data into common X,y format. We need to set allow_reuse flag to False to allow the pipeline to run even when inputs don't change. We also need the name of the model to check the time the model was last trained." "Script to check if new data is available since the model was last trained. If no new data is available, we cancel the remaining pipeline steps. We need to set allow_reuse flag to False to allow the pipeline to run even when inputs don't change. We also need the name of the model to check the time the model was last trained."
] ]
}, },
{ {
@@ -283,11 +298,8 @@
"source": [ "source": [
"from azureml.pipeline.core import PipelineData\n", "from azureml.pipeline.core import PipelineData\n",
"\n", "\n",
"target_column = PipelineParameter(\"target_column\", default_value=\"y\")\n",
"# The model name with which to register the trained model in the workspace.\n", "# The model name with which to register the trained model in the workspace.\n",
"model_name = PipelineParameter(\"model_name\", default_value=\"y\")\n", "model_name = PipelineParameter(\"model_name\", default_value=\"noaaweatherds\")"
"output_x = PipelineData(\"output_x\", datastore=dstor)\n",
"output_y = PipelineData(\"output_y\", datastore=dstor)"
] ]
}, },
{ {
@@ -299,16 +311,23 @@
"data_prep_step = PythonScriptStep(script_name=\"check_data.py\", \n", "data_prep_step = PythonScriptStep(script_name=\"check_data.py\", \n",
" allow_reuse=False,\n", " allow_reuse=False,\n",
" name=\"check_data\",\n", " name=\"check_data\",\n",
" arguments=[\"--target_column\", target_column,\n", " arguments=[\"--ds_name\", ds_name,\n",
" \"--output_x\", output_x,\n",
" \"--output_y\", output_y,\n",
" \"--ds_name\", ds_name,\n",
" \"--model_name\", model_name],\n", " \"--model_name\", model_name],\n",
" outputs=[output_x, output_y], \n",
" compute_target=compute_target, \n", " compute_target=compute_target, \n",
" runconfig=conda_run_config)" " runconfig=conda_run_config)"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Dataset\n",
"train_ds = Dataset.get_by_name(ws, dataset)\n",
"train_ds = train_ds.drop_columns([\"partition_date\"])"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -324,11 +343,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.train.automl import AutoMLConfig\n", "from azureml.train.automl import AutoMLConfig\n",
"from azureml.train.automl.runtime import AutoMLStep\n", "from azureml.train.automl import AutoMLStep\n",
"\n", "\n",
"automl_settings = {\n", "automl_settings = {\n",
" \"iteration_timeout_minutes\": 20,\n", " \"iteration_timeout_minutes\": 10,\n",
" \"experiment_timeout_minutes\": 30,\n", " \"experiment_timeout_hours\": 0.2,\n",
" \"n_cross_validations\": 3,\n", " \"n_cross_validations\": 3,\n",
" \"primary_metric\": 'r2_score',\n", " \"primary_metric\": 'r2_score',\n",
" \"preprocess\": True,\n", " \"preprocess\": True,\n",
@@ -342,8 +361,8 @@
" debug_log = 'automl_errors.log',\n", " debug_log = 'automl_errors.log',\n",
" path = \".\",\n", " path = \".\",\n",
" compute_target=compute_target,\n", " compute_target=compute_target,\n",
" run_configuration=conda_run_config,\n", " training_data = train_ds,\n",
" data_script = \"get_data.py\",\n", " label_column_name = target_column_name,\n",
" **automl_settings\n", " **automl_settings\n",
" )" " )"
] ]
@@ -359,7 +378,7 @@
"metrics_output_name = 'metrics_output'\n", "metrics_output_name = 'metrics_output'\n",
"best_model_output_name = 'best_model_output'\n", "best_model_output_name = 'best_model_output'\n",
"\n", "\n",
"metirics_data = PipelineData(name='metrics_data',\n", "metrics_data = PipelineData(name='metrics_data',\n",
" datastore=dstor,\n", " datastore=dstor,\n",
" pipeline_output_name=metrics_output_name,\n", " pipeline_output_name=metrics_output_name,\n",
" training_output=TrainingOutput(type='Metrics'))\n", " training_output=TrainingOutput(type='Metrics'))\n",
@@ -378,8 +397,7 @@
"automl_step = AutoMLStep(\n", "automl_step = AutoMLStep(\n",
" name='automl_module',\n", " name='automl_module',\n",
" automl_config=automl_config,\n", " automl_config=automl_config,\n",
" inputs=[output_x, output_y],\n", " outputs=[metrics_data, model_data],\n",
" outputs=[metirics_data, model_data],\n",
" allow_reuse=False)" " allow_reuse=False)"
] ]
}, },
@@ -432,7 +450,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"training_pipeline_run = experiment.submit(training_pipeline, pipeline_parameters={\n", "training_pipeline_run = experiment.submit(training_pipeline, pipeline_parameters={\n",
" \"target_column\": \"temperature\", \"ds_name\": dataset, \"model_name\": \"noaaweatherds\"})" " \"ds_name\": dataset, \"model_name\": \"noaaweatherds\"})"
] ]
}, },
{ {
@@ -441,7 +459,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"training_pipeline_run.wait_for_completion()" "training_pipeline_run.wait_for_completion(show_output=False)"
] ]
}, },
{ {
@@ -475,7 +493,7 @@
"source": [ "source": [
"from azureml.pipeline.core import Schedule\n", "from azureml.pipeline.core import Schedule\n",
"schedule = Schedule.create(workspace=ws, name=\"RetrainingSchedule\",\n", "schedule = Schedule.create(workspace=ws, name=\"RetrainingSchedule\",\n",
" pipeline_parameters={\"target_column\": \"temperature\",\"ds_name\": dataset, \"model_name\": \"noaaweatherds\"},\n", " pipeline_parameters={\"ds_name\": dataset, \"model_name\": \"noaaweatherds\"},\n",
" pipeline_id=published_pipeline.id, \n", " pipeline_id=published_pipeline.id, \n",
" experiment_name=experiment_name, \n", " experiment_name=experiment_name, \n",
" datastore=dstor,\n", " datastore=dstor,\n",

View File

@@ -2,8 +2,3 @@ name: auto-ml-continuous-retraining
dependencies: dependencies:
- pip: - pip:
- azureml-sdk - azureml-sdk
- azureml-train-automl
- azureml-pipeline
- azureml-widgets
- matplotlib
- pandas_ml

View File

@@ -15,32 +15,16 @@ if type(run) == _OfflineRun:
else: else:
ws = run.experiment.workspace ws = run.experiment.workspace
print("Check for new data.")
def write_output(df, path):
os.makedirs(path, exist_ok=True)
print("%s created" % path)
df.to_csv(path + "/part-00000", index=False)
print("Check for new data and prepare the data")
parser = argparse.ArgumentParser("split") parser = argparse.ArgumentParser("split")
parser.add_argument("--target_column", type=str, help="input split features")
parser.add_argument("--ds_name", help="input dataset name") parser.add_argument("--ds_name", help="input dataset name")
parser.add_argument("--model_name", help="name of the deployed model") parser.add_argument("--model_name", help="name of the deployed model")
parser.add_argument("--output_x", type=str,
help="output features")
parser.add_argument("--output_y", type=str,
help="output labels")
args = parser.parse_args() args = parser.parse_args()
print("Argument 1(ds_name): %s" % args.ds_name) print("Argument 1(ds_name): %s" % args.ds_name)
print("Argument 2(target_column): %s" % args.target_column) print("Argument 2(model_name): %s" % args.model_name)
print("Argument 3(model_name): %s" % args.model_name)
print("Argument 4(output_x): %s" % args.output_x)
print("Argument 5(output_y): %s" % args.output_y)
# Get the latest registered model # Get the latest registered model
try: try:
@@ -54,22 +38,9 @@ except Exception as e:
train_ds = Dataset.get_by_name(ws, args.ds_name) train_ds = Dataset.get_by_name(ws, args.ds_name)
dataset_changed_time = train_ds.data_changed_time dataset_changed_time = train_ds.data_changed_time
if dataset_changed_time > last_train_time: if not dataset_changed_time > last_train_time:
# New data is available since the model was last trained
print("Dataset was last updated on {0}. Retraining...".format(dataset_changed_time))
train_ds = train_ds.drop_columns(["partition_date"])
X_train = train_ds.drop_columns(
columns=[args.target_column]).to_pandas_dataframe()
y_train = train_ds.keep_columns(
columns=[args.target_column]).to_pandas_dataframe()
non_null = y_train[args.target_column].notnull()
y = y_train[non_null]
X = X_train[non_null]
if not (args.output_x is None and args.output_y is None):
write_output(X, args.output_x)
write_output(y, args.output_y)
else:
print("Cancelling run since there is no new data.") print("Cancelling run since there is no new data.")
run.parent.cancel() run.parent.cancel()
else:
# New data is available since the model was last trained
print("Dataset was last updated on {0}. Retraining...".format(dataset_changed_time))

View File

@@ -1,15 +0,0 @@
import os
import pandas as pd
def get_data():
print("In get_data")
print(os.environ['AZUREML_DATAREFERENCE_output_x'])
X_train = pd.read_csv(
os.environ['AZUREML_DATAREFERENCE_output_x'] + "/part-00000")
y_train = pd.read_csv(
os.environ['AZUREML_DATAREFERENCE_output_y'] + "/part-00000")
print(X_train.head(3))
return {"X": X_train.values, "y": y_train.values.flatten()}

View File

@@ -58,7 +58,7 @@ except Exception as e:
print(traceback.format_exc()) print(traceback.format_exc())
print("Dataset with name {0} not found, registering new dataset.".format(args.ds_name)) print("Dataset with name {0} not found, registering new dataset.".format(args.ds_name))
register_dataset = True register_dataset = True
end_time_last_slice = datetime.today() - relativedelta(weeks=1) end_time_last_slice = datetime.today() - relativedelta(weeks=2)
end_time = datetime.utcnow() end_time = datetime.utcnow()
train_df = get_noaa_data(end_time_last_slice, end_time) train_df = get_noaa_data(end_time_last_slice, end_time)
@@ -80,10 +80,10 @@ if train_df.size > 0:
target_path=folder_name, target_path=folder_name,
overwrite=True, overwrite=True,
show_progress=True) show_progress=True)
if register_dataset:
ds = Dataset.Tabular.from_delimited_files(dstor.path("{}/**/*.csv".format(
args.ds_name)), partition_format='/{partition_date:yyyy/MM/dd/hh/mm/ss}/data.csv')
ds.register(ws, name=args.ds_name)
else: else:
print("No new data since {0}.".format(end_time_last_slice)) print("No new data since {0}.".format(end_time_last_slice))
if register_dataset:
ds = Dataset.Tabular.from_delimited_files(dstor.path("{}/**/*.csv".format(
args.ds_name)), partition_format='/{partition_date:yyyy/MM/dd/HH/mm/ss}/data.csv')
ds.register(ws, name=args.ds_name)

View File

@@ -358,7 +358,7 @@
"\n", "\n",
"automl_config = AutoMLConfig(task='forecasting', \n", "automl_config = AutoMLConfig(task='forecasting', \n",
" primary_metric='normalized_root_mean_squared_error',\n", " primary_metric='normalized_root_mean_squared_error',\n",
" experiment_timeout_minutes = 60,\n", " experiment_timeout_hours = 1,\n",
" training_data=train_dataset,\n", " training_data=train_dataset,\n",
" label_column_name=target_column_name,\n", " label_column_name=target_column_name,\n",
" validation_data=valid_dataset, \n", " validation_data=valid_dataset, \n",

View File

@@ -1,12 +1,4 @@
name: auto-ml-forecasting-beer-remote name: auto-ml-forecasting-beer-remote
dependencies: dependencies:
- fbprophet==0.5
- py-xgboost<=0.80
- pip: - pip:
- azureml-sdk - azureml-sdk
- azureml-train-automl
- azureml-train
- azureml-widgets
- matplotlib
- pandas_ml
- statsmodels

View File

@@ -42,7 +42,7 @@
"\n", "\n",
"AutoML highlights here include built-in holiday featurization, accessing engineered feature names, and working with the `forecast` function. Please also look at the additional forecasting notebooks, which document lagging, rolling windows, forecast quantiles, other ways to use the forecast function, and forecaster deployment.\n", "AutoML highlights here include built-in holiday featurization, accessing engineered feature names, and working with the `forecast` function. Please also look at the additional forecasting notebooks, which document lagging, rolling windows, forecast quantiles, other ways to use the forecast function, and forecaster deployment.\n",
"\n", "\n",
"Make sure you have executed the [configuration](../configuration.ipynb) before running this notebook.\n", "Make sure you have executed the [configuration notebook](../../../configuration.ipynb) before running this notebook.\n",
"\n", "\n",
"Notebook synopsis:\n", "Notebook synopsis:\n",
"1. Creating an Experiment in an existing Workspace\n", "1. Creating an Experiment in an existing Workspace\n",
@@ -202,7 +202,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'dataset/bike-no.csv')]).with_timestamp_columns(fine_grain_timestamp=time_column_name) \n", "dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'dataset/bike-no.csv')]).with_timestamp_columns(fine_grain_timestamp=time_column_name) \n",
"dataset.take(5).to_pandas_dataframe()" "dataset.take(5).to_pandas_dataframe().reset_index(drop=True)"
] ]
}, },
{ {
@@ -221,8 +221,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# select data that occurs before a specified date\n", "# select data that occurs before a specified date\n",
"train = dataset.time_before(datetime(2012, 9, 1))\n", "train = dataset.time_before(datetime(2012, 8, 31), include_boundary=True)\n",
"train.to_pandas_dataframe().tail(5)" "train.to_pandas_dataframe().tail(5).reset_index(drop=True)"
] ]
}, },
{ {
@@ -231,8 +231,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"test = dataset.time_after(datetime(2012, 8, 31))\n", "test = dataset.time_after(datetime(2012, 9, 1), include_boundary=True)\n",
"test.to_pandas_dataframe().head(5)" "test.to_pandas_dataframe().head(5).reset_index(drop=True)"
] ]
}, },
{ {
@@ -247,8 +247,8 @@
"|-|-|\n", "|-|-|\n",
"|**task**|forecasting|\n", "|**task**|forecasting|\n",
"|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>\n", "|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>\n",
"|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.constants.supportedmodels.regression?view=azure-ml-py).|\n", "|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.constants.supportedmodels.forecasting?view=azure-ml-py).|\n",
"|**experiment_timeout_minutes**|Experimentation timeout in minutes.|\n", "|**experiment_timeout_hours**|Experimentation timeout in hours.|\n",
"|**training_data**|Input dataset, containing both features and label column.|\n", "|**training_data**|Input dataset, containing both features and label column.|\n",
"|**label_column_name**|The name of the label column.|\n", "|**label_column_name**|The name of the label column.|\n",
"|**compute_target**|The remote compute for training.|\n", "|**compute_target**|The remote compute for training.|\n",
@@ -260,7 +260,7 @@
"|**target_lags**|The target_lags specifies how far back we will construct the lags of the target variable.|\n", "|**target_lags**|The target_lags specifies how far back we will construct the lags of the target variable.|\n",
"|**drop_column_names**|Name(s) of columns to drop prior to modeling|\n", "|**drop_column_names**|Name(s) of columns to drop prior to modeling|\n",
"\n", "\n",
"This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the experiment_timeout_minutes parameter value to get results." "This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the experiment_timeout_hours parameter value to get results."
] ]
}, },
{ {
@@ -305,7 +305,7 @@
"automl_config = AutoMLConfig(task='forecasting', \n", "automl_config = AutoMLConfig(task='forecasting', \n",
" primary_metric='normalized_root_mean_squared_error',\n", " primary_metric='normalized_root_mean_squared_error',\n",
" blacklist_models = ['ExtremeRandomTrees'], \n", " blacklist_models = ['ExtremeRandomTrees'], \n",
" experiment_timeout_minutes=20,\n", " experiment_timeout_hours=0.3,\n",
" training_data=train,\n", " training_data=train,\n",
" label_column_name=target_column_name,\n", " label_column_name=target_column_name,\n",
" compute_target=compute_target,\n", " compute_target=compute_target,\n",

View File

@@ -1,11 +1,4 @@
name: auto-ml-forecasting-bike-share name: auto-ml-forecasting-bike-share
dependencies: dependencies:
- fbprophet==0.5
- py-xgboost<=0.80
- pip: - pip:
- azureml-sdk - azureml-sdk
- azureml-train-automl
- azureml-widgets
- matplotlib
- pandas_ml
- statsmodels

View File

@@ -32,18 +32,17 @@ test_dataset = run.input_datasets['test_data']
grain_column_names = [] grain_column_names = []
df = test_dataset.to_pandas_dataframe() df = test_dataset.to_pandas_dataframe().reset_index(drop=True)
X_test_df = test_dataset.drop_columns(columns=[target_column_name]) X_test_df = test_dataset.drop_columns(columns=[target_column_name]).to_pandas_dataframe().reset_index(drop=True)
y_test_df = test_dataset.with_timestamp_columns( y_test_df = test_dataset.with_timestamp_columns(None).keep_columns(columns=[target_column_name]).to_pandas_dataframe()
None).keep_columns(columns=[target_column_name])
fitted_model = joblib.load('model.pkl') fitted_model = joblib.load('model.pkl')
df_all = forecasting_helper.do_rolling_forecast( df_all = forecasting_helper.do_rolling_forecast(
fitted_model, fitted_model,
X_test_df.to_pandas_dataframe(), X_test_df,
y_test_df.to_pandas_dataframe().values.T[0], y_test_df.values.T[0],
target_column_name, target_column_name,
time_column_name, time_column_name,
max_horizon, max_horizon,

View File

@@ -31,8 +31,8 @@
"1. [Results](#Results)\n", "1. [Results](#Results)\n",
"\n", "\n",
"Advanced Forecasting\n", "Advanced Forecasting\n",
"1. [Advanced Training](#Advanced Training)\n", "1. [Advanced Training](#advanced_training)\n",
"1. [Advanced Results](#Advanced Results)" "1. [Advanced Results](#advanced_results)"
] ]
}, },
{ {
@@ -211,7 +211,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"dataset = Dataset.Tabular.from_delimited_files(path = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/nyc_energy.csv\").with_timestamp_columns(fine_grain_timestamp=time_column_name) \n", "dataset = Dataset.Tabular.from_delimited_files(path = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/nyc_energy.csv\").with_timestamp_columns(fine_grain_timestamp=time_column_name) \n",
"dataset.take(5).to_pandas_dataframe()" "dataset.take(5).to_pandas_dataframe().reset_index(drop=True)"
] ]
}, },
{ {
@@ -253,7 +253,7 @@
"source": [ "source": [
"# split into train based on time\n", "# split into train based on time\n",
"train = dataset.time_before(datetime(2017, 8, 8, 5), include_boundary=True)\n", "train = dataset.time_before(datetime(2017, 8, 8, 5), include_boundary=True)\n",
"train.to_pandas_dataframe().sort_values(time_column_name).tail(5)" "train.to_pandas_dataframe().sort_values(time_column_name).tail(5).reset_index(drop=True)"
] ]
}, },
{ {
@@ -263,8 +263,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# split into test based on time\n", "# split into test based on time\n",
"test = dataset.time_between(datetime(2017, 8, 8, 5), datetime(2017, 8, 10, 5))\n", "test = dataset.time_between(datetime(2017, 8, 8, 6), datetime(2017, 8, 10, 5))\n",
"test.to_pandas_dataframe().head(5)" "test.to_pandas_dataframe().head(5).reset_index(drop=True)"
] ]
}, },
{ {
@@ -301,8 +301,8 @@
"|-|-|\n", "|-|-|\n",
"|**task**|forecasting|\n", "|**task**|forecasting|\n",
"|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n", "|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
"|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.constants.supportedmodels.regression?view=azure-ml-py).|\n", "|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.constants.supportedmodels.forecasting?view=azure-ml-py).|\n",
"|**experiment_timeout_minutes**|Maximum amount of time in minutes that the experiment take before it terminates.|\n", "|**experiment_timeout_hours**|Maximum amount of time in hours that the experiment take before it terminates.|\n",
"|**training_data**|The training data to be used within the experiment.|\n", "|**training_data**|The training data to be used within the experiment.|\n",
"|**label_column_name**|The name of the label column.|\n", "|**label_column_name**|The name of the label column.|\n",
"|**compute_target**|The remote compute for training.|\n", "|**compute_target**|The remote compute for training.|\n",
@@ -316,7 +316,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the experiment_timeout_minutes parameter value to get results." "This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the experiment_timeout_hours parameter value to get results."
] ]
}, },
{ {
@@ -333,7 +333,7 @@
"automl_config = AutoMLConfig(task='forecasting', \n", "automl_config = AutoMLConfig(task='forecasting', \n",
" primary_metric='normalized_root_mean_squared_error',\n", " primary_metric='normalized_root_mean_squared_error',\n",
" blacklist_models = ['ExtremeRandomTrees', 'AutoArima', 'Prophet'], \n", " blacklist_models = ['ExtremeRandomTrees', 'AutoArima', 'Prophet'], \n",
" experiment_timeout_minutes=20,\n", " experiment_timeout_hours=0.3,\n",
" training_data=train,\n", " training_data=train,\n",
" label_column_name=target_column_name,\n", " label_column_name=target_column_name,\n",
" compute_target=compute_target,\n", " compute_target=compute_target,\n",
@@ -454,7 +454,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"X_test = test.to_pandas_dataframe()\n", "X_test = test.to_pandas_dataframe().reset_index(drop=True)\n",
"y_test = X_test.pop(target_column_name).values" "y_test = X_test.pop(target_column_name).values"
] ]
}, },
@@ -463,11 +463,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"### Forecast Function\n", "### Forecast Function\n",
"For forecasting, we will use the forecast function instead of the predict function. There are two reasons for this.\n", "For forecasting, we will use the forecast function instead of the predict function. Using the predict method would result in getting predictions for EVERY horizon the forecaster can predict at. This is useful when training and evaluating the performance of the forecaster at various horizons, but the level of detail is excessive for normal use. Forecast function also can handle more complicated scenarios, see notebook on [high frequency forecasting](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb)."
"\n",
"We need to pass the recent values of the target variable y, whereas the scikit-compatible predict function only takes the non-target variables 'test'. In our case, the test data immediately follows the training data, and we fill the target variable with NaN. The NaN serves as a question mark for the forecaster to fill with the actuals. Using the forecast function will produce forecasts using the shortest possible forecast horizon. The last time at which a definite (non-NaN) value is seen is the forecast origin - the last time when the value of the target is known.\n",
"\n",
"Using the predict method would result in getting predictions for EVERY horizon the forecaster can predict at. This is useful when training and evaluating the performance of the forecaster at various horizons, but the level of detail is excessive for normal use."
] ]
}, },
{ {
@@ -476,15 +472,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Replace ALL values in y by NaN.\n",
"# The forecast origin will be at the beginning of the first forecast period.\n",
"# (Which is the same time as the end of the last training period.)\n",
"y_query = y_test.copy().astype(np.float)\n",
"y_query.fill(np.nan)\n",
"# The featurized data, aligned to y, will also be returned.\n", "# The featurized data, aligned to y, will also be returned.\n",
"# This contains the assumptions that were made in the forecast\n", "# This contains the assumptions that were made in the forecast\n",
"# and helps align the forecast to the original data\n", "# and helps align the forecast to the original data\n",
"y_predictions, X_trans = fitted_model.forecast(X_test, y_query)" "y_predictions, X_trans = fitted_model.forecast(X_test)"
] ]
}, },
{ {
@@ -557,7 +548,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Advanced Training\n", "## Advanced Training <a id=\"advanced_training\"></a>\n",
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation." "We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation."
] ]
}, },
@@ -587,7 +578,7 @@
"automl_config = AutoMLConfig(task='forecasting', \n", "automl_config = AutoMLConfig(task='forecasting', \n",
" primary_metric='normalized_root_mean_squared_error',\n", " primary_metric='normalized_root_mean_squared_error',\n",
" blacklist_models = ['ElasticNet','ExtremeRandomTrees','GradientBoosting','XGBoostRegressor','ExtremeRandomTrees', 'AutoArima', 'Prophet'], #These models are blacklisted for tutorial purposes, remove this for real use cases. \n", " blacklist_models = ['ElasticNet','ExtremeRandomTrees','GradientBoosting','XGBoostRegressor','ExtremeRandomTrees', 'AutoArima', 'Prophet'], #These models are blacklisted for tutorial purposes, remove this for real use cases. \n",
" experiment_timeout_minutes=20,\n", " experiment_timeout_hours=0.3,\n",
" training_data=train,\n", " training_data=train,\n",
" label_column_name=target_column_name,\n", " label_column_name=target_column_name,\n",
" compute_target=compute_target,\n", " compute_target=compute_target,\n",
@@ -642,7 +633,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Advanced Results\n", "## Advanced Results<a id=\"advanced_results\"></a>\n",
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation." "We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation."
] ]
}, },
@@ -652,15 +643,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Replace ALL values in y by NaN.\n",
"# The forecast origin will be at the beginning of the first forecast period.\n",
"# (Which is the same time as the end of the last training period.)\n",
"y_query = y_test.copy().astype(np.float)\n",
"y_query.fill(np.nan)\n",
"# The featurized data, aligned to y, will also be returned.\n", "# The featurized data, aligned to y, will also be returned.\n",
"# This contains the assumptions that were made in the forecast\n", "# This contains the assumptions that were made in the forecast\n",
"# and helps align the forecast to the original data\n", "# and helps align the forecast to the original data\n",
"y_predictions, X_trans = fitted_model_lags.forecast(X_test, y_query)" "y_predictions, X_trans = fitted_model_lags.forecast(X_test)"
] ]
}, },
{ {

View File

@@ -2,11 +2,3 @@ name: auto-ml-forecasting-energy-demand
dependencies: dependencies:
- pip: - pip:
- azureml-sdk - azureml-sdk
- interpret
- azureml-train-automl
- azureml-widgets
- matplotlib
- pandas_ml
- statsmodels
- azureml-explain-model
- azureml-contrib-interpret

View File

@@ -251,7 +251,7 @@
"source": [ "source": [
"automl_settings = {\n", "automl_settings = {\n",
" \"iteration_timeout_minutes\" : 5,\n", " \"iteration_timeout_minutes\" : 5,\n",
" \"experiment_timeout_minutes\" : 15,\n", " \"experiment_timeout_hours\" : 0.25,\n",
" \"primary_metric\" : 'normalized_mean_absolute_error',\n", " \"primary_metric\" : 'normalized_mean_absolute_error',\n",
" \"time_column_name\": time_column_name,\n", " \"time_column_name\": time_column_name,\n",
" \"grain_column_names\": grain_column_names,\n", " \"grain_column_names\": grain_column_names,\n",

View File

@@ -2,9 +2,3 @@ name: auto-ml-forecasting-grouping
dependencies: dependencies:
- pip: - pip:
- azureml-sdk - azureml-sdk
- azureml-train-automl
- azureml-pipeline
- azureml-widgets
- pandas_ml
- statsmodels
- matplotlib

View File

@@ -8,6 +8,7 @@ from azureml.core import RunConfiguration
from azureml.core.compute import ComputeTarget from azureml.core.compute import ComputeTarget
from azureml.core.conda_dependencies import CondaDependencies from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.dataset import Dataset from azureml.core.dataset import Dataset
from azureml.data import TabularDataset
from azureml.pipeline.core import PipelineData, PipelineParameter, TrainingOutput, StepSequence from azureml.pipeline.core import PipelineData, PipelineParameter, TrainingOutput, StepSequence
from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.steps import PythonScriptStep
from azureml.train.automl import AutoMLConfig from azureml.train.automl import AutoMLConfig
@@ -29,13 +30,14 @@ def _get_configs(automlconfig: AutoMLConfig,
groups = _get_groups(data, group_column_names) groups = _get_groups(data, group_column_names)
configs = {} configs = {}
for i, group in groups.iterrows(): for i, group in groups.iterrows():
single = data single = data._dataflow
group_name = "#####".join(str(x) for x in group.values) group_name = "#####".join(str(x) for x in group.values)
group_name = valid_chars.sub('', group_name) group_name = valid_chars.sub('', group_name)
for key in group.index: for key in group.index:
single = single._dataflow.filter(data._dataflow[key] == group[key]) single = single.filter(data._dataflow[key] == group[key])
t_dataset = TabularDataset._create(single)
group_conf = copy.deepcopy(automlconfig) group_conf = copy.deepcopy(automlconfig)
group_conf.user_settings['training_data'] = single group_conf.user_settings['training_data'] = t_dataset
group_conf.user_settings['label_column_name'] = target_column group_conf.user_settings['label_column_name'] = target_column
group_conf.user_settings['compute_target'] = compute_target group_conf.user_settings['compute_target'] = compute_target
configs[group_name] = group_conf configs[group_name] = group_conf
@@ -69,7 +71,7 @@ def build_pipeline_steps(automlconfig: AutoMLConfig,
# create each automl step end-to-end (train, register) # create each automl step end-to-end (train, register)
for group_name, conf in configs.items(): for group_name, conf in configs.items():
# create automl metrics output # create automl metrics output
metirics_data = PipelineData( metrics_data = PipelineData(
name='metrics_data_{}'.format(group_name), name='metrics_data_{}'.format(group_name),
pipeline_output_name=metrics_output_name.format(group_name), pipeline_output_name=metrics_output_name.format(group_name),
training_output=TrainingOutput(type='Metrics')) training_output=TrainingOutput(type='Metrics'))
@@ -82,7 +84,7 @@ def build_pipeline_steps(automlconfig: AutoMLConfig,
automl_step = AutoMLStep( automl_step = AutoMLStep(
name='automl_{}'.format(group_name), name='automl_{}'.format(group_name),
automl_config=conf, automl_config=conf,
outputs=[metirics_data, model_data], outputs=[metrics_data, model_data],
allow_reuse=True) allow_reuse=True)
steps.append(automl_step) steps.append(automl_step)

View File

@@ -44,7 +44,7 @@ def run(raw_data):
model_path = Model.get_model_path(cur_group) model_path = Model.get_model_path(cur_group)
model = joblib.load(model_path) model = joblib.load(model_path)
models[cur_group] = model models[cur_group] = model
_, xtrans = models[cur_group].forecast(df_one, np.repeat(np.nan, len(df_one))) _, xtrans = models[cur_group].forecast(df_one)
dfs.append(xtrans) dfs.append(xtrans)
df_ret = pd.concat(dfs) df_ret = pd.concat(dfs)
df_ret.reset_index(drop=False, inplace=True) df_ret.reset_index(drop=False, inplace=True)

View File

@@ -335,7 +335,7 @@
"automl_config = AutoMLConfig(task='forecasting',\n", "automl_config = AutoMLConfig(task='forecasting',\n",
" debug_log='automl_forecasting_function.log',\n", " debug_log='automl_forecasting_function.log',\n",
" primary_metric='normalized_root_mean_squared_error',\n", " primary_metric='normalized_root_mean_squared_error',\n",
" experiment_timeout_minutes=15,\n", " experiment_timeout_hours=0.25,\n",
" enable_early_stopping=True,\n", " enable_early_stopping=True,\n",
" training_data=train_data,\n", " training_data=train_data,\n",
" compute_target=compute_target,\n", " compute_target=compute_target,\n",
@@ -377,9 +377,7 @@
"\n", "\n",
"![Forecasting after training](forecast_function_at_train.png)\n", "![Forecasting after training](forecast_function_at_train.png)\n",
"\n", "\n",
"The `X_test` and `y_query` below, taken together, form the **forecast request**. The two are interpreted as aligned - `y_query` could actally be a column in `X_test`. `NaN`s in `y_query` are the question marks. These will be filled with the forecasts.\n", "We use `X_test` as a **forecast request** to generate the predictions."
"\n",
"When the forecast period immediately follows the training period, the models retain the last few points of data. You can simply fill `y_query` filled with question marks - the model has the data for the lookback already.\n"
] ]
}, },
{ {
@@ -408,8 +406,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"y_query = np.repeat(np.NaN, X_test.shape[0])\n", "y_pred_no_gap, xy_nogap = fitted_model.forecast(X_test)\n",
"y_pred_no_gap, xy_nogap = fitted_model.forecast(X_test, y_query)\n",
"\n", "\n",
"# xy_nogap contains the predictions in the _automl_target_col column.\n", "# xy_nogap contains the predictions in the _automl_target_col column.\n",
"# Those same numbers are output in y_pred_no_gap\n", "# Those same numbers are output in y_pred_no_gap\n",
@@ -437,7 +434,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"quantiles = fitted_model.forecast_quantiles(X_test, y_query)\n", "quantiles = fitted_model.forecast_quantiles(X_test)\n",
"quantiles" "quantiles"
] ]
}, },
@@ -460,10 +457,10 @@
"# specify which quantiles you would like \n", "# specify which quantiles you would like \n",
"fitted_model.quantiles = [0.01, 0.5, 0.95]\n", "fitted_model.quantiles = [0.01, 0.5, 0.95]\n",
"# use forecast_quantiles function, not the forecast() one\n", "# use forecast_quantiles function, not the forecast() one\n",
"y_pred_quantiles = fitted_model.forecast_quantiles(X_test, y_query)\n", "y_pred_quantiles = fitted_model.forecast_quantiles(X_test)\n",
"\n", "\n",
"# it all nicely aligns column-wise\n", "# it all nicely aligns column-wise\n",
"pd.concat([X_test.reset_index(), pd.DataFrame({'query' : y_query}), y_pred_quantiles], axis=1)" "pd.concat([X_test.reset_index(), y_pred_quantiles], axis=1)"
] ]
}, },
{ {
@@ -539,9 +536,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"try: \n", "try: \n",
" y_query = y_away.copy()\n", " y_pred_away, xy_away = fitted_model.forecast(X_away)\n",
" y_query.fill(np.NaN)\n",
" y_pred_away, xy_away = fitted_model.forecast(X_away, y_query)\n",
" xy_away\n", " xy_away\n",
"except Exception as e:\n", "except Exception as e:\n",
" print(e)" " print(e)"
@@ -551,7 +546,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"How should we read that eror message? The forecast origin is at the last time the model saw an actual value of `y` (the target). That was at the end of the training data! Because the model received all `NaN` (and not an actual target value), it is attempting to forecast from the end of training data. But the requested forecast periods are past the maximum horizon. We need to provide a define `y` value to establish the forecast origin.\n", "How should we read that eror message? The forecast origin is at the last time the model saw an actual value of `y` (the target). That was at the end of the training data! The model is attempting to forecast from the end of training data. But the requested forecast periods are past the maximum horizon. We need to provide a define `y` value to establish the forecast origin.\n",
"\n", "\n",
"We will use this helper function to take the required amount of context from the data preceding the testing data. It's definition is intentionally simplified to keep the idea in the clear." "We will use this helper function to take the required amount of context from the data preceding the testing data. It's definition is intentionally simplified to keep the idea in the clear."
] ]
@@ -740,7 +735,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.7" "version": "3.6.8"
}, },
"tags": [ "tags": [
"Forecasting", "Forecasting",

View File

@@ -1,11 +1,4 @@
name: automl-forecasting-function name: automl-forecasting-function
dependencies: dependencies:
- fbprophet==0.5
- py-xgboost<=0.80
- pip: - pip:
- azureml-sdk - azureml-sdk
- azureml-train-automl
- azureml-widgets
- pandas_ml
- statsmodels
- matplotlib

View File

@@ -40,7 +40,7 @@
"## Introduction\n", "## Introduction\n",
"In this example, we use AutoML to train, select, and operationalize a time-series forecasting model for multiple time-series.\n", "In this example, we use AutoML to train, select, and operationalize a time-series forecasting model for multiple time-series.\n",
"\n", "\n",
"Make sure you have executed the [configuration notebook](../configuration.ipynb) before running this notebook.\n", "Make sure you have executed the [configuration notebook](../../../configuration.ipynb) before running this notebook.\n",
"\n", "\n",
"The examples in the follow code samples use the University of Chicago's Dominick's Finer Foods dataset to forecast orange juice sales. Dominick's was a grocery chain in the Chicago metropolitan area." "The examples in the follow code samples use the University of Chicago's Dominick's Finer Foods dataset to forecast orange juice sales. Dominick's was a grocery chain in the Chicago metropolitan area."
] ]
@@ -335,7 +335,7 @@
"|-|-|\n", "|-|-|\n",
"|**task**|forecasting|\n", "|**task**|forecasting|\n",
"|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>\n", "|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>\n",
"|**experiment_timeout_minutes**|Experimentation timeout in minutes.|\n", "|**experiment_timeout_hours**|Experimentation timeout in hours.|\n",
"|**enable_early_stopping**|If early stopping is on, training will stop when the primary metric is no longer improving.|\n", "|**enable_early_stopping**|If early stopping is on, training will stop when the primary metric is no longer improving.|\n",
"|**training_data**|Input dataset, containing both features and label column.|\n", "|**training_data**|Input dataset, containing both features and label column.|\n",
"|**label_column_name**|The name of the label column.|\n", "|**label_column_name**|The name of the label column.|\n",
@@ -366,7 +366,7 @@
"automl_config = AutoMLConfig(task='forecasting',\n", "automl_config = AutoMLConfig(task='forecasting',\n",
" debug_log='automl_oj_sales_errors.log',\n", " debug_log='automl_oj_sales_errors.log',\n",
" primary_metric='normalized_mean_absolute_error',\n", " primary_metric='normalized_mean_absolute_error',\n",
" experiment_timeout_minutes=15,\n", " experiment_timeout_hours=0.25,\n",
" training_data=train_dataset,\n", " training_data=train_dataset,\n",
" label_column_name=target_column_name,\n", " label_column_name=target_column_name,\n",
" compute_target=compute_target,\n", " compute_target=compute_target,\n",
@@ -454,9 +454,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"To produce predictions on the test set, we need to know the feature values at all dates in the test set. This requirement is somewhat reasonable for the OJ sales data since the features mainly consist of price, which is usually set in advance, and customer demographics which are approximately constant for each store over the 20 week forecast horizon in the testing data. \n", "To produce predictions on the test set, we need to know the feature values at all dates in the test set. This requirement is somewhat reasonable for the OJ sales data since the features mainly consist of price, which is usually set in advance, and customer demographics which are approximately constant for each store over the 20 week forecast horizon in the testing data."
"\n",
"We will first create a query `y_query`, which is aligned index-for-index to `X_test`. This is a vector of target values where each `NaN` serves the function of the question mark to be replaced by forecast. Passing definite values in the `y` argument allows the `forecast` function to make predictions on data that does not immediately follow the train data which contains `y`. In each grain, the last time point where the model sees a definite value of `y` is that grain's _forecast origin_."
] ]
}, },
{ {
@@ -465,15 +463,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Replace ALL values in y by NaN.\n",
"# The forecast origin will be at the beginning of the first forecast period.\n",
"# (Which is the same time as the end of the last training period.)\n",
"y_query = y_test.copy().astype(np.float)\n",
"y_query.fill(np.nan)\n",
"# The featurized data, aligned to y, will also be returned.\n", "# The featurized data, aligned to y, will also be returned.\n",
"# This contains the assumptions that were made in the forecast\n", "# This contains the assumptions that were made in the forecast\n",
"# and helps align the forecast to the original data\n", "# and helps align the forecast to the original data\n",
"y_predictions, X_trans = fitted_model.forecast(X_test, y_query)" "y_predictions, X_trans = fitted_model.forecast(X_test)"
] ]
}, },
{ {
@@ -640,7 +633,7 @@
"import json\n", "import json\n",
"# The request data frame needs to have y_query column which corresponds to query.\n", "# The request data frame needs to have y_query column which corresponds to query.\n",
"X_query = X_test.copy()\n", "X_query = X_test.copy()\n",
"X_query['y_query'] = y_query\n", "X_query['y_query'] = np.NaN\n",
"# We have to convert datetime to string, because Timestamps cannot be serialized to JSON.\n", "# We have to convert datetime to string, because Timestamps cannot be serialized to JSON.\n",
"X_query[time_column_name] = X_query[time_column_name].astype(str)\n", "X_query[time_column_name] = X_query[time_column_name].astype(str)\n",
"# The Service object accept the complex dictionary, which is internally converted to JSON string.\n", "# The Service object accept the complex dictionary, which is internally converted to JSON string.\n",
@@ -705,9 +698,6 @@
"framework": [ "framework": [
"Azure ML AutoML" "Azure ML AutoML"
], ],
"tags": [
"None"
],
"friendly_name": "Forecasting orange juice sales with deployment", "friendly_name": "Forecasting orange juice sales with deployment",
"index_order": 1, "index_order": 1,
"kernelspec": { "kernelspec": {
@@ -725,8 +715,11 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.7" "version": "3.6.8"
}, },
"tags": [
"None"
],
"task": "Forecasting" "task": "Forecasting"
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -1,11 +1,4 @@
name: auto-ml-forecasting-orange-juice-sales name: auto-ml-forecasting-orange-juice-sales
dependencies: dependencies:
- fbprophet==0.5
- py-xgboost<=0.80
- pip: - pip:
- azureml-sdk - azureml-sdk
- azureml-train-automl
- azureml-widgets
- matplotlib
- pandas_ml
- statsmodels

View File

@@ -156,7 +156,7 @@
" \"n_cross_validations\": 3,\n", " \"n_cross_validations\": 3,\n",
" \"primary_metric\": 'average_precision_score_weighted',\n", " \"primary_metric\": 'average_precision_score_weighted',\n",
" \"preprocess\": True,\n", " \"preprocess\": True,\n",
" \"experiment_timeout_minutes\": 10, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible\n", " \"experiment_timeout_hours\": 0.2, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ability to find the best model possible\n",
" \"verbosity\": logging.INFO,\n", " \"verbosity\": logging.INFO,\n",
" \"enable_stack_ensemble\": False\n", " \"enable_stack_ensemble\": False\n",
"}\n", "}\n",
@@ -260,17 +260,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"#### Print the properties of the model\n", "#### Print the properties of the model\n",
"The fitted_model is a python object and you can read the different properties of the object.\n", "The fitted_model is a python object and you can read the different properties of the object.\n"
"See *Print the properties of the model* section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb)."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Deploy\n",
"\n",
"To deploy the model into a web service endpoint, see _Deploy_ section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb)"
] ]
}, },
{ {

View File

@@ -2,10 +2,3 @@ name: auto-ml-classification-credit-card-fraud-local
dependencies: dependencies:
- pip: - pip:
- azureml-sdk - azureml-sdk
- interpret
- azureml-defaults
- azureml-explain-model
- azureml-train-automl
- azureml-widgets
- matplotlib
- pandas_ml

View File

@@ -206,7 +206,7 @@
"|-|-|\n", "|-|-|\n",
"|**task**|classification, regression or forecasting|\n", "|**task**|classification, regression or forecasting|\n",
"|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n", "|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
"|**experiment_timeout_minutes**| Maximum amount of time in minutes that all iterations combined can take before the experiment terminates.|\n", "|**experiment_timeout_hours**| Maximum amount of time in hours that all iterations combined can take before the experiment terminates.|\n",
"|**enable_early_stopping**| Flag to enble early termination if the score is not improving in the short term.|\n", "|**enable_early_stopping**| Flag to enble early termination if the score is not improving in the short term.|\n",
"|**featurization**| 'auto' / 'off' / FeaturizationConfig Indicator for whether featurization step should be done automatically or not, or whether customized featurization should be used. Note: If the input data is sparse, featurization cannot be turned on.|\n", "|**featurization**| 'auto' / 'off' / FeaturizationConfig Indicator for whether featurization step should be done automatically or not, or whether customized featurization should be used. Note: If the input data is sparse, featurization cannot be turned on.|\n",
"|**n_cross_validations**|Number of cross validation splits.|\n", "|**n_cross_validations**|Number of cross validation splits.|\n",
@@ -262,7 +262,7 @@
"source": [ "source": [
"automl_settings = {\n", "automl_settings = {\n",
" \"enable_early_stopping\": True, \n", " \"enable_early_stopping\": True, \n",
" \"experiment_timeout_minutes\" : 10,\n", " \"experiment_timeout_hours\" : 0.2,\n",
" \"max_concurrent_iterations\": 4,\n", " \"max_concurrent_iterations\": 4,\n",
" \"max_cores_per_iteration\": -1,\n", " \"max_cores_per_iteration\": -1,\n",
" \"n_cross_validations\": 5,\n", " \"n_cross_validations\": 5,\n",
@@ -558,7 +558,6 @@
"\n", "\n",
"# specify CondaDependencies obj\n", "# specify CondaDependencies obj\n",
"conda_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n", "conda_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n",
" conda_packages=['scikit-learn', 'numpy','py-xgboost<=0.80'],\n",
" pip_packages=azureml_pip_packages)" " pip_packages=azureml_pip_packages)"
] ]
}, },
@@ -718,17 +717,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "myenv = automl_run.get_environment().python.conda_dependencies\n",
"\n",
"azureml_pip_packages = [\n",
" 'azureml-explain-model', 'azureml-train-automl', 'azureml-defaults'\n",
"]\n",
" \n",
"\n",
"# specify CondaDependencies obj\n",
"myenv = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas', 'numpy', 'py-xgboost<=0.80'],\n",
" pip_packages=azureml_pip_packages,\n",
" pin_sdk_version=True)\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())\n", " f.write(myenv.serialize_to_string())\n",

View File

@@ -2,12 +2,3 @@ name: auto-ml-regression-hardware-performance-explanation-and-featurization
dependencies: dependencies:
- pip: - pip:
- azureml-sdk - azureml-sdk
- interpret
- azureml-defaults
- azureml-explain-model
- azureml-train-automl
- azureml-widgets
- matplotlib
- pandas_ml
- azureml-explain-model
- azureml-contrib-interpret

View File

@@ -7,7 +7,7 @@ from azureml.core.experiment import Experiment
from sklearn.externals import joblib from sklearn.externals import joblib
from azureml.core.dataset import Dataset from azureml.core.dataset import Dataset
from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplainerSetupClass, \ from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplainerSetupClass, \
automl_setup_model_explanations automl_setup_model_explanations, automl_check_model_if_explainable
from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel
from azureml.explain.model.mimic_wrapper import MimicWrapper from azureml.explain.model.mimic_wrapper import MimicWrapper
from automl.client.core.common.constants import MODEL_PATH from automl.client.core.common.constants import MODEL_PATH
@@ -25,6 +25,11 @@ ws = run.experiment.workspace
experiment = Experiment(ws, '<<experimnet_name>>') experiment = Experiment(ws, '<<experimnet_name>>')
automl_run = Run(experiment=experiment, run_id='<<run_id>>') automl_run = Run(experiment=experiment, run_id='<<run_id>>')
# Check if this AutoML model is explainable
if not automl_check_model_if_explainable(automl_run):
raise Exception("Model explanations is currently not supported for " + automl_run.get_properties().get(
'run_algorithm'))
# Download the best model from the artifact store # Download the best model from the artifact store
automl_run.download_file(name=MODEL_PATH, output_file_path='model.pkl') automl_run.download_file(name=MODEL_PATH, output_file_path='model.pkl')

View File

@@ -188,7 +188,11 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": [
"automlconfig-remarks-sample"
]
},
"outputs": [], "outputs": [],
"source": [ "source": [
"automl_settings = {\n", "automl_settings = {\n",
@@ -196,7 +200,7 @@
" \"primary_metric\": 'r2_score',\n", " \"primary_metric\": 'r2_score',\n",
" \"preprocess\": True,\n", " \"preprocess\": True,\n",
" \"enable_early_stopping\": True, \n", " \"enable_early_stopping\": True, \n",
" \"experiment_timeout_minutes\": 20, #for real scenarios we reccommend a timeout of at least one hour \n", " \"experiment_timeout_hours\": 0.3, #for real scenarios we reccommend a timeout of at least one hour \n",
" \"max_concurrent_iterations\": 4,\n", " \"max_concurrent_iterations\": 4,\n",
" \"max_cores_per_iteration\": -1,\n", " \"max_cores_per_iteration\": -1,\n",
" \"verbosity\": logging.INFO,\n", " \"verbosity\": logging.INFO,\n",

View File

@@ -2,8 +2,3 @@ name: auto-ml-regression
dependencies: dependencies:
- pip: - pip:
- azureml-sdk - azureml-sdk
- azureml-train-automl
- azureml-widgets
- matplotlib
- pandas_ml
- paramiko<2.5.0

View File

@@ -56,7 +56,7 @@ CREATE OR ALTER PROCEDURE [dbo].[AutoMLTrain]
@task NVARCHAR(40)='classification', -- The type of task. Can be classification, regression or forecasting. @task NVARCHAR(40)='classification', -- The type of task. Can be classification, regression or forecasting.
@experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal. @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.
@iteration_timeout_minutes INT = 15, -- The maximum time in minutes for training a single pipeline. @iteration_timeout_minutes INT = 15, -- The maximum time in minutes for training a single pipeline.
@experiment_timeout_minutes INT = 60, -- The maximum time in minutes for training all pipelines. @experiment_timeout_hours FLOAT = 1, -- The maximum time in hours for training all pipelines.
@n_cross_validations INT = 3, -- The number of cross validations. @n_cross_validations INT = 3, -- The number of cross validations.
@blacklist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that will not be used. @blacklist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that will not be used.
-- The list of possible models can be found at: -- The list of possible models can be found at:
@@ -131,8 +131,8 @@ if __name__.startswith("sqlindb"):
X_train = data_train X_train = data_train
if experiment_timeout_minutes == 0: if experiment_timeout_hours == 0:
experiment_timeout_minutes = None experiment_timeout_hours = None
if experiment_exit_score == 0: if experiment_exit_score == 0:
experiment_exit_score = None experiment_exit_score = None
@@ -163,7 +163,7 @@ if __name__.startswith("sqlindb"):
debug_log = log_file_name, debug_log = log_file_name,
primary_metric = primary_metric, primary_metric = primary_metric,
iteration_timeout_minutes = iteration_timeout_minutes, iteration_timeout_minutes = iteration_timeout_minutes,
experiment_timeout_minutes = experiment_timeout_minutes, experiment_timeout_hours = experiment_timeout_hours,
iterations = iterations, iterations = iterations,
n_cross_validations = n_cross_validations, n_cross_validations = n_cross_validations,
preprocess = preprocess, preprocess = preprocess,
@@ -204,7 +204,7 @@ if __name__.startswith("sqlindb"):
@iterations INT, @task NVARCHAR(40), @iterations INT, @task NVARCHAR(40),
@experiment_name NVARCHAR(32), @experiment_name NVARCHAR(32),
@iteration_timeout_minutes INT, @iteration_timeout_minutes INT,
@experiment_timeout_minutes INT, @experiment_timeout_hours FLOAT,
@n_cross_validations INT, @n_cross_validations INT,
@blacklist_models NVARCHAR(MAX), @blacklist_models NVARCHAR(MAX),
@whitelist_models NVARCHAR(MAX), @whitelist_models NVARCHAR(MAX),
@@ -223,7 +223,7 @@ if __name__.startswith("sqlindb"):
, @task = @task , @task = @task
, @experiment_name = @experiment_name , @experiment_name = @experiment_name
, @iteration_timeout_minutes = @iteration_timeout_minutes , @iteration_timeout_minutes = @iteration_timeout_minutes
, @experiment_timeout_minutes = @experiment_timeout_minutes , @experiment_timeout_hours = @experiment_timeout_hours
, @n_cross_validations = @n_cross_validations , @n_cross_validations = @n_cross_validations
, @blacklist_models = @blacklist_models , @blacklist_models = @blacklist_models
, @whitelist_models = @whitelist_models , @whitelist_models = @whitelist_models

View File

@@ -235,7 +235,7 @@
" @task NVARCHAR(40)='classification', -- The type of task. Can be classification, regression or forecasting.\r\n", " @task NVARCHAR(40)='classification', -- The type of task. Can be classification, regression or forecasting.\r\n",
" @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.\r\n", " @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.\r\n",
" @iteration_timeout_minutes INT = 15, -- The maximum time in minutes for training a single pipeline. \r\n", " @iteration_timeout_minutes INT = 15, -- The maximum time in minutes for training a single pipeline. \r\n",
" @experiment_timeout_minutes INT = 60, -- The maximum time in minutes for training all pipelines.\r\n", " @experiment_timeout_hours FLOAT = 1, -- The maximum time in hours for training all pipelines.\r\n",
" @n_cross_validations INT = 3, -- The number of cross validations.\r\n", " @n_cross_validations INT = 3, -- The number of cross validations.\r\n",
" @blacklist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that will not be used.\r\n", " @blacklist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that will not be used.\r\n",
" -- The list of possible models can be found at:\r\n", " -- The list of possible models can be found at:\r\n",
@@ -307,8 +307,8 @@
"\r\n", "\r\n",
" X_train = data_train\r\n", " X_train = data_train\r\n",
"\r\n", "\r\n",
" if experiment_timeout_minutes == 0:\r\n", " if experiment_timeout_hours == 0:\r\n",
" experiment_timeout_minutes = None\r\n", " experiment_timeout_hours = None\r\n",
"\r\n", "\r\n",
" if experiment_exit_score == 0:\r\n", " if experiment_exit_score == 0:\r\n",
" experiment_exit_score = None\r\n", " experiment_exit_score = None\r\n",
@@ -337,7 +337,7 @@
" debug_log = log_file_name, \r\n", " debug_log = log_file_name, \r\n",
" primary_metric = primary_metric, \r\n", " primary_metric = primary_metric, \r\n",
" iteration_timeout_minutes = iteration_timeout_minutes, \r\n", " iteration_timeout_minutes = iteration_timeout_minutes, \r\n",
" experiment_timeout_minutes = experiment_timeout_minutes,\r\n", " experiment_timeout_hours = experiment_timeout_hours,\r\n",
" iterations = iterations, \r\n", " iterations = iterations, \r\n",
" n_cross_validations = n_cross_validations, \r\n", " n_cross_validations = n_cross_validations, \r\n",
" preprocess = preprocess,\r\n", " preprocess = preprocess,\r\n",
@@ -378,7 +378,7 @@
"\t\t\t\t @iterations INT, @task NVARCHAR(40),\r\n", "\t\t\t\t @iterations INT, @task NVARCHAR(40),\r\n",
"\t\t\t\t @experiment_name NVARCHAR(32),\r\n", "\t\t\t\t @experiment_name NVARCHAR(32),\r\n",
"\t\t\t\t @iteration_timeout_minutes INT,\r\n", "\t\t\t\t @iteration_timeout_minutes INT,\r\n",
"\t\t\t\t @experiment_timeout_minutes INT,\r\n", "\t\t\t\t @experiment_timeout_hours FLOAT,\r\n",
"\t\t\t\t @n_cross_validations INT,\r\n", "\t\t\t\t @n_cross_validations INT,\r\n",
"\t\t\t\t @blacklist_models NVARCHAR(MAX),\r\n", "\t\t\t\t @blacklist_models NVARCHAR(MAX),\r\n",
"\t\t\t\t @whitelist_models NVARCHAR(MAX),\r\n", "\t\t\t\t @whitelist_models NVARCHAR(MAX),\r\n",
@@ -396,7 +396,7 @@
"\t, @task = @task\r\n", "\t, @task = @task\r\n",
"\t, @experiment_name = @experiment_name\r\n", "\t, @experiment_name = @experiment_name\r\n",
"\t, @iteration_timeout_minutes = @iteration_timeout_minutes\r\n", "\t, @iteration_timeout_minutes = @iteration_timeout_minutes\r\n",
"\t, @experiment_timeout_minutes = @experiment_timeout_minutes\r\n", "\t, @experiment_timeout_hours = @experiment_timeout_hours\r\n",
"\t, @n_cross_validations = @n_cross_validations\r\n", "\t, @n_cross_validations = @n_cross_validations\r\n",
"\t, @blacklist_models = @blacklist_models\r\n", "\t, @blacklist_models = @blacklist_models\r\n",
"\t, @whitelist_models = @whitelist_models\r\n", "\t, @whitelist_models = @whitelist_models\r\n",
@@ -560,9 +560,6 @@
"framework": [ "framework": [
"Azure ML AutoML" "Azure ML AutoML"
], ],
"tags": [
""
],
"friendly_name": "Setup automated ML SQL integration", "friendly_name": "Setup automated ML SQL integration",
"index_order": 1, "index_order": 1,
"kernelspec": { "kernelspec": {
@@ -574,6 +571,9 @@
"name": "sql", "name": "sql",
"version": "" "version": ""
}, },
"tags": [
""
],
"task": "None" "task": "None"
}, },
"nbformat": 4, "nbformat": 4,

View File

@@ -161,9 +161,9 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myacienv = CondaDependencies.create(conda_packages=['scikit-learn','numpy','pandas']) #showing how to add libs as an eg. - not needed for this model.\n", "myacienv = CondaDependencies.create(conda_packages=['scikit-learn','numpy','pandas']) # showing how to add libs as an eg. - not needed for this model.\n",
"\n", "\n",
"with open(\"mydeployenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myacienv.serialize_to_string())" " f.write(myacienv.serialize_to_string())"
] ]
}, },
@@ -177,6 +177,9 @@
"from azureml.core.webservice import AciWebservice, Webservice\n", "from azureml.core.webservice import AciWebservice, Webservice\n",
"from azureml.exceptions import WebserviceException\n", "from azureml.exceptions import WebserviceException\n",
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.environment import Environment\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"\n", "\n",
"myaci_config = AciWebservice.deploy_configuration(cpu_cores = 2, \n", "myaci_config = AciWebservice.deploy_configuration(cpu_cores = 2, \n",
" memory_gb = 2, \n", " memory_gb = 2, \n",
@@ -191,9 +194,16 @@
"except WebserviceException:\n", "except WebserviceException:\n",
" pass\n", " pass\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= 'spark-py', \n", "myenv = Environment.get(ws, name='AzureML-PySpark-MmlSpark-0.15')\n",
" entry_script='score_sparkml.py',\n", "# we need to add extra packages to procured environment\n",
" conda_file='mydeployenv.yml')\n", "# in order to deploy amended environment we need to rename it\n",
"myenv.name = 'myenv'\n",
"model_dependencies = CondaDependencies('myenv.yml')\n",
"for pip_dep in model_dependencies.pip_packages:\n",
" myenv.python.conda_dependencies.add_pip_package(pip_dep)\n",
"for conda_dep in model_dependencies.conda_packages:\n",
" myenv.python.conda_dependencies.add_conda_package(conda_dep)\n",
"inference_config = InferenceConfig(entry_script='score_sparkml.py', environment=myenv)\n",
"\n", "\n",
"myservice = Model.deploy(ws, service_name, [mymodel], inference_config, myaci_config)\n", "myservice = Model.deploy(ws, service_name, [mymodel], inference_config, myaci_config)\n",
"myservice.wait_for_deployment(show_output=True)" "myservice.wait_for_deployment(show_output=True)"

View File

@@ -640,7 +640,7 @@
"\n", "\n",
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-defaults', 'azureml-sdk[automl]'])\n", "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-defaults', 'azureml-sdk[automl]'])\n",
"\n", "\n",
"conda_env_file_name = 'mydeployenv.yml'\n", "conda_env_file_name = 'myenv.yml'\n",
"myenv.save_to_file('.', conda_env_file_name)" "myenv.save_to_file('.', conda_env_file_name)"
] ]
}, },
@@ -664,17 +664,27 @@
"from azureml.exceptions import WebserviceException\n", "from azureml.exceptions import WebserviceException\n",
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.model import Model\n", "from azureml.core.model import Model\n",
"from azureml.core.environment import Environment\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"import uuid\n", "import uuid\n",
"\n", "\n",
"\n",
"myaci_config = AciWebservice.deploy_configuration(\n", "myaci_config = AciWebservice.deploy_configuration(\n",
" cpu_cores = 2, \n", " cpu_cores = 2, \n",
" memory_gb = 2, \n", " memory_gb = 2, \n",
" tags = {'name':'Databricks Azure ML ACI'}, \n", " tags = {'name':'Databricks Azure ML ACI'}, \n",
" description = 'This is for ADB and AutoML example.')\n", " description = 'This is for ADB and AutoML example.')\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= 'spark-py', \n", "myenv = Environment.get(ws, name='AzureML-PySpark-MmlSpark-0.15')\n",
" entry_script='score.py',\n", "# we need to add extra packages to procured environment\n",
" conda_file='mydeployenv.yml')\n", "# in order to deploy amended environment we need to rename it\n",
"myenv.name = 'myenv'\n",
"model_dependencies = CondaDependencies('myenv.yml')\n",
"for pip_dep in model_dependencies.pip_packages:\n",
" myenv.python.conda_dependencies.add_pip_package(pip_dep)\n",
"for conda_dep in model_dependencies.conda_packages:\n",
" myenv.python.conda_dependencies.add_conda_package(conda_dep)\n",
"inference_config = InferenceConfig(entry_script='score_sparkml.py', environment=myenv)\n",
"\n", "\n",
"guid = str(uuid.uuid4()).split(\"-\")[0]\n", "guid = str(uuid.uuid4()).split(\"-\")[0]\n",
"service_name = \"myservice-{}\".format(guid)\n", "service_name = \"myservice-{}\".format(guid)\n",

View File

@@ -195,7 +195,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"You can now create and/or use an Environment object when deploying a Webservice. The Environment can have been previously registered with your Workspace, or it will be registered with it as a part of the Webservice deployment. Only Environments that were created using azureml-defaults version 1.0.48 or later will work with this new handling however.\n", "You can now create and/or use an Environment object when deploying a Webservice. The Environment can have been previously registered with your Workspace, or it will be registered with it as a part of the Webservice deployment. Please note that your environment must include azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service.\n",
"\n", "\n",
"More information can be found in our [using environments notebook](../training/using-environments/using-environments.ipynb)." "More information can be found in our [using environments notebook](../training/using-environments/using-environments.ipynb)."
] ]
@@ -221,23 +221,30 @@
"## Create Inference Configuration\n", "## Create Inference Configuration\n",
"\n", "\n",
"There is now support for a source directory, you can upload an entire folder from your local machine as dependencies for the Webservice.\n", "There is now support for a source directory, you can upload an entire folder from your local machine as dependencies for the Webservice.\n",
"Note: in that case, your entry_script, conda_file, and extra_docker_file_steps paths are relative paths to the source_directory path.\n", "Note: in that case, environments's entry_script and file_path are relative paths to the source_directory path; myenv.docker.base_dockerfile is a string containing extra docker steps or contents of the docker file.\n",
"\n", "\n",
"Sample code for using a source directory:\n", "Sample code for using a source directory:\n",
"\n", "\n",
"```python\n", "```python\n",
"from azureml.core.environment import Environment\n",
"from azureml.core.model import InferenceConfig\n",
"\n",
"myenv = Environment.from_conda_specification(name='myenv', file_path='env/myenv.yml')\n",
"\n",
"# explicitly set base_image to None when setting base_dockerfile\n",
"myenv.docker.base_image = None\n",
"# add extra docker commends to execute\n",
"myenv.docker.base_dockerfile = \"FROM ubuntu\\n RUN echo \\\"hello\\\"\"\n",
"\n",
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n", "inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
" runtime= \"python\", \n",
" entry_script=\"x/y/score.py\",\n", " entry_script=\"x/y/score.py\",\n",
" conda_file=\"env/myenv.yml\", \n", " environment=myenv)\n",
" extra_docker_file_steps=\"helloworld.txt\")\n",
"```\n", "```\n",
"\n", "\n",
" - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n", " - file_path: input parameter to Environment constructor. Manages conda and python package dependencies.\n",
" - runtime = Which runtime to use for the image. Current supported runtimes are 'spark-py' and 'python\n", " - env.docker.base_dockerfile: any extra steps you want to inject into docker file\n",
" - entry_script = contains logic specific to initializing your model and running predictions\n", " - source_directory: holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
" - conda_file = manages conda and python package dependencies.\n", " - entry_script: contains logic specific to initializing your model and running predictions"
" - extra_docker_file_steps = optional: any extra steps you want to inject into docker file"
] ]
}, },
{ {

View File

@@ -405,7 +405,7 @@
"\n", "\n",
" - To run a production-ready web service, see the [notebook on deployment to Azure Kubernetes Service](../production-deploy-to-aks/production-deploy-to-aks.ipynb).\n", " - To run a production-ready web service, see the [notebook on deployment to Azure Kubernetes Service](../production-deploy-to-aks/production-deploy-to-aks.ipynb).\n",
" - To run a local web service, see the [notebook on deployment to a local Docker container](../deploy-to-local/register-model-deploy-local.ipynb).\n", " - To run a local web service, see the [notebook on deployment to a local Docker container](../deploy-to-local/register-model-deploy-local.ipynb).\n",
" - For more information on datasets, see the [notebook on training with datasets](../../work-with-data/datasets-tutorial/train-with-datasets.ipynb).\n", " - For more information on datasets, see the [notebook on training with datasets](../../work-with-data/datasets-tutorial/train-with-datasets/train-with-datasets.ipynb).\n",
" - For more information on environments, see the [notebook on using environments](../../training/using-environments/using-environments.ipynb).\n", " - For more information on environments, see the [notebook on using environments](../../training/using-environments/using-environments.ipynb).\n",
" - For information on all the available deployment targets, see [&ldquo;How and where to deploy models&rdquo;](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where#choose-a-compute-target)." " - For information on all the available deployment targets, see [&ldquo;How and where to deploy models&rdquo;](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where#choose-a-compute-target)."
] ]

View File

@@ -189,6 +189,15 @@
" return error" " return error"
] ]
}, },
{
"cell_type": "markdown",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency for your environemnt. This package contains the functionality needed to host the model as a web service."
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -206,16 +215,6 @@
" - inference-schema[numpy-support]" " - inference-schema[numpy-support]"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile C:/abc/dockerstep/customDockerStep.txt\n",
"RUN echo \"this is test\""
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -240,11 +239,10 @@
"source": [ "source": [
"## Create Inference Configuration\n", "## Create Inference Configuration\n",
"\n", "\n",
" - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n", " - file_path: input parameter to Environment constructor. Manages conda and python package dependencies.\n",
" - runtime = Which runtime to use for the image. Current supported runtimes are 'spark-py' and 'python\n", " - env.docker.base_dockerfile: any extra steps you want to inject into docker file\n",
" - entry_script = contains logic specific to initializing your model and running predictions\n", " - source_directory: holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
" - conda_file = manages conda and python package dependencies.\n", " - entry_script: contains logic specific to initializing your model and running predictions"
" - extra_docker_file_steps = optional: any extra steps you want to inject into docker file"
] ]
}, },
{ {
@@ -253,13 +251,19 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.core.environment import Environment\n",
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"\n", "\n",
"\n",
"myenv = Environment.from_conda_specification(name='myenv', file_path='env/myenv.yml')\n",
"\n",
"# explicitly set base_image to None when setting base_dockerfile\n",
"myenv.docker.base_image = None\n",
"myenv.docker.base_dockerfile = \"RUN echo \\\"this is test\\\"\"\n",
"\n",
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n", "inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
" runtime=\"python\", \n",
" entry_script=\"x/y/score.py\",\n", " entry_script=\"x/y/score.py\",\n",
" conda_file=\"env/myenv.yml\", \n", " environment=myenv)\n"
" extra_docker_file_steps=\"dockerstep/customDockerStep.txt\")"
] ]
}, },
{ {

View File

@@ -0,0 +1,369 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Deploy models to Azure Kubernetes Service (AKS) using controlled roll out\n",
"This notebook will show you how to deploy mulitple AKS webservices with the same scoring endpoint and how to roll out your models in a controlled manner by configuring % of scoring traffic going to each webservice. If you are using a Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to install the Azure Machine Learning Python SDK and create an Azure ML Workspace."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Check for latest version\n",
"import azureml.core\n",
"print(azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize workspace\n",
"Create a [Workspace](https://docs.microsoft.com/python/api/azureml-core/azureml.core.workspace%28class%29?view=azure-ml-py) object from your persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.workspace import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Register the model\n",
"Register a file or folder as a model by calling [Model.register()](https://docs.microsoft.com/python/api/azureml-core/azureml.core.model.model?view=azure-ml-py#register-workspace--model-path--model-name--tags-none--properties-none--description-none--datasets-none--model-framework-none--model-framework-version-none--child-paths-none-).\n",
"In addition to the content of the model file itself, your registered model will also store model metadata -- model description, tags, and framework information -- that will be useful when managing and deploying models in your workspace. Using tags, for instance, you can categorize your models and apply filters when listing models in your workspace."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Model\n",
"\n",
"model = Model.register(workspace=ws,\n",
" model_name='sklearn_regression_model.pkl', # Name of the registered model in your workspace.\n",
" model_path='./sklearn_regression_model.pkl', # Local file to upload and register as a model.\n",
" model_framework=Model.Framework.SCIKITLEARN, # Framework used to create the model.\n",
" model_framework_version='0.19.1', # Version of scikit-learn used to create the model.\n",
" description='Ridge regression model to predict diabetes progression.',\n",
" tags={'area': 'diabetes', 'type': 'regression'})\n",
"\n",
"print('Name:', model.name)\n",
"print('Version:', model.version)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Register an environment (for all models)\n",
"\n",
"If you control over how your model is run, or if it has special runtime requirements, you can specify your own environment and scoring method.\n",
"\n",
"Specify the model's runtime environment by creating an [Environment](https://docs.microsoft.com/python/api/azureml-core/azureml.core.environment%28class%29?view=azure-ml-py) object and providing the [CondaDependencies](https://docs.microsoft.com/python/api/azureml-core/azureml.core.conda_dependencies.condadependencies?view=azure-ml-py) needed by your model."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Environment\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"environment=Environment('my-sklearn-environment')\n",
"environment.python.conda_dependencies = CondaDependencies.create(pip_packages=[\n",
" 'azureml-defaults',\n",
" 'inference-schema[numpy-support]',\n",
" 'joblib',\n",
" 'numpy',\n",
" 'scikit-learn'\n",
"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When using a custom environment, you must also provide Python code for initializing and running your model. An example script is included with this notebook."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('score.py') as f:\n",
" print(f.read())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create the InferenceConfig\n",
"Create the inference configuration to reference your environment and entry script during deployment"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.model import InferenceConfig\n",
"\n",
"inference_config = InferenceConfig(entry_script='score.py', \n",
" source_directory='.',\n",
" environment=environment)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Provision the AKS Cluster\n",
"If you already have an AKS cluster attached to this workspace, skip the step below and provide the name of the cluster."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import AksCompute\n",
"from azureml.core.compute import ComputeTarget\n",
"# Use the default configuration (can also provide parameters to customize)\n",
"prov_config = AksCompute.provisioning_configuration()\n",
"\n",
"aks_name = 'my-aks' \n",
"# Create the cluster\n",
"aks_target = ComputeTarget.create(workspace = ws, \n",
" name = aks_name, \n",
" provisioning_configuration = prov_config) \n",
"aks_target.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create an Endpoint and add a version (AKS service)\n",
"This creates a new endpoint and adds a version behind it. By default the first version added is the default version. You can specify the traffic percentile a version takes behind an endpoint. \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# deploying the model and create a new endpoint\n",
"from azureml.core.webservice import AksEndpoint\n",
"# from azureml.core.compute import ComputeTarget\n",
"\n",
"#select a created compute\n",
"compute = ComputeTarget(ws, 'my-aks')\n",
"namespace_name=\"endpointnamespace\"\n",
"# define the endpoint name\n",
"endpoint_name = \"myendpoint1\"\n",
"# define the service name\n",
"version_name= \"versiona\"\n",
"\n",
"endpoint_deployment_config = AksEndpoint.deploy_configuration(tags = {'modelVersion':'firstversion', 'department':'finance'}, \n",
" description = \"my first version\", namespace = namespace_name, \n",
" version_name = version_name, traffic_percentile = 40)\n",
"\n",
"endpoint = Model.deploy(ws, endpoint_name, [model], inference_config, endpoint_deployment_config, compute)\n",
"endpoint.wait_for_deployment(True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"endpoint.get_logs()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Add another version of the service to an existing endpoint\n",
"This adds another version behind an existing endpoint. You can specify the traffic percentile the new version takes. If no traffic_percentile is specified then it defaults to 0. All the unspecified traffic percentile (in this example 50) across all versions goes to default version."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Adding a new version to an existing Endpoint.\n",
"version_name_add=\"versionb\" \n",
"\n",
"endpoint.create_version(version_name = version_name_add, inference_config=inference_config, models=[model], tags = {'modelVersion':'secondversion', 'department':'finance'}, \n",
" description = \"my second version\", traffic_percentile = 10)\n",
"endpoint.wait_for_deployment(True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Update an existing version in an endpoint\n",
"There are two types of versions: control and treatment. An endpoint contains one or more treatment versions but only one control version. This categorization helps compare the different versions against the defined control version."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"endpoint.update_version(version_name=endpoint.versions[version_name_add].name, description=\"my second version update\", traffic_percentile=40, is_default=True, is_control_version_type=True)\n",
"endpoint.wait_for_deployment(True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test the web service using run method\n",
"Test the web sevice by passing in data. Run() method retrieves API keys behind the scenes to make sure that call is authenticated."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Scoring on endpoint\n",
"import json\n",
"test_sample = json.dumps({'data': [\n",
" [1,2,3,4,5,6,7,8,9,10], \n",
" [10,9,8,7,6,5,4,3,2,1]\n",
"]})\n",
"\n",
"test_sample_encoded = bytes(test_sample, encoding='utf8')\n",
"prediction = endpoint.run(input_data=test_sample_encoded)\n",
"print(prediction)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Delete Resources"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# deleting a version in an endpoint\n",
"endpoint.delete_version(version_name=version_name)\n",
"endpoint.wait_for_deployment(True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# deleting an endpoint, this will delete all versions in the endpoint and the endpoint itself\n",
"endpoint.delete()"
]
}
],
"metadata": {
"authors": [
{
"name": "shipatel"
}
],
"category": "deployment",
"compute": [
"None"
],
"datasets": [
"Diabetes"
],
"deployment": [
"Azure Kubernetes Service"
],
"exclude_from_index": false,
"framework": [
"Scikit-learn"
],
"friendly_name": "Deploy models to AKS using controlled roll out",
"index_order": 3,
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
},
"star_tag": [
"featured"
],
"tags": [
"None"
],
"task": "Deploy a model with Azure Machine Learning"
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,4 @@
name: deploy-aks-with-controlled-rollout
dependencies:
- pip:
- azureml-sdk

View File

@@ -0,0 +1,28 @@
import pickle
import json
import numpy
from sklearn.externals import joblib
from sklearn.linear_model import Ridge
from azureml.core.model import Model
def init():
global model
# note here "sklearn_regression_model.pkl" is the name of the model registered under
# this is a different behavior than before when the code is run locally, even though the code is the same.
model_path = Model.get_model_path('sklearn_regression_model.pkl')
# deserialize the model file back into a sklearn model
model = joblib.load(model_path)
# note you can pass in multiple rows for scoring
def run(raw_data):
try:
data = json.loads(raw_data)['data']
data = numpy.array(data)
result = model.predict(data)
# you can return any data type as long as it is JSON-serializable
return result.tolist()
except Exception as e:
error = str(e)
return error

View File

@@ -158,7 +158,8 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## 5. *Create myenv.yml file*" "## 5. *Create myenv.yml file*\n",
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
] ]
}, },
{ {
@@ -169,7 +170,8 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n", "myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'],\n",
" pip_packages=['azureml-defaults'])\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"
@@ -189,10 +191,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "\n",
" entry_script=\"score.py\",\n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" conda_file=\"myenv.yml\")" "inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
] ]
}, },
{ {

View File

@@ -244,7 +244,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"### Setting up inference configuration\n", "### Setting up inference configuration\n",
"First we create a YAML file that specifies which dependencies we would like to see in our container." "First we create a YAML file that specifies which dependencies we would like to see in our container. Please note that you must include azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
] ]
}, },
{ {
@@ -255,7 +255,7 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime==0.4.0\",\"azureml-core\"])\n", "myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime==0.4.0\", \"azureml-core\", \"azureml-defaults\"])\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"
@@ -275,11 +275,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "\n",
" entry_script=\"score.py\",\n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" conda_file=\"myenv.yml\",\n", "inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
" extra_docker_file_steps = \"Dockerfile\")"
] ]
}, },
{ {
@@ -373,7 +373,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#aci_service.delete()" "aci_service.delete()"
] ]
} }
], ],

View File

@@ -319,7 +319,8 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"### Write Environment File" "### Write Environment File\n",
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
] ]
}, },
{ {
@@ -330,7 +331,8 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\"])\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\", \"azureml-defaults\"])\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"
@@ -350,11 +352,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "\n",
" entry_script=\"score.py\",\n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" conda_file=\"myenv.yml\",\n", "inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
" extra_docker_file_steps = \"Dockerfile\")"
] ]
}, },
{ {
@@ -724,7 +726,7 @@
"source": [ "source": [
"# remember to delete your service after you are done using it!\n", "# remember to delete your service after you are done using it!\n",
"\n", "\n",
"# aci_service.delete()" "aci_service.delete()"
] ]
}, },
{ {

View File

@@ -306,7 +306,7 @@
"source": [ "source": [
"### Write Environment File\n", "### Write Environment File\n",
"\n", "\n",
"This step creates a YAML environment file that specifies which dependencies we would like to see in our Linux Virtual Machine." "This step creates a YAML environment file that specifies which dependencies we would like to see in our Linux Virtual Machine. Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
] ]
}, },
{ {
@@ -317,7 +317,7 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\"])\n", "myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\", \"azureml-defaults\"])\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"
@@ -337,11 +337,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "\n",
" entry_script=\"score.py\",\n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" extra_docker_file_steps = \"Dockerfile\",\n", "inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
" conda_file=\"myenv.yml\")"
] ]
}, },
{ {
@@ -733,7 +733,7 @@
"source": [ "source": [
"# remember to delete your service after you are done using it!\n", "# remember to delete your service after you are done using it!\n",
"\n", "\n",
"# aci_service.delete()" "aci_service.delete()"
] ]
}, },
{ {

View File

@@ -241,7 +241,8 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\", \"azureml-defaults\"])\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"
@@ -251,7 +252,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Create the inference configuration object" "Create the inference configuration object. Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
] ]
}, },
{ {
@@ -261,11 +262,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "\n",
" entry_script=\"score.py\",\n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" conda_file=\"myenv.yml\",\n", "inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
" extra_docker_file_steps = \"Dockerfile\")"
] ]
}, },
{ {
@@ -361,7 +362,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#aci_service.delete()" "aci_service.delete()"
] ]
} }
], ],

View File

@@ -405,7 +405,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"### Create inference configuration\n", "### Create inference configuration\n",
"First we create a YAML file that specifies which dependencies we would like to see in our container." "First we create a YAML file that specifies which dependencies we would like to see in our container. Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
] ]
}, },
{ {
@@ -416,7 +416,7 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n", "myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\", \"azureml-defaults\"])\n",
"\n", "\n",
"with open(\"myenv.yml\",\"w\") as f:\n", "with open(\"myenv.yml\",\"w\") as f:\n",
" f.write(myenv.serialize_to_string())" " f.write(myenv.serialize_to_string())"
@@ -436,11 +436,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "\n",
" entry_script=\"score.py\",\n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" conda_file=\"myenv.yml\",\n", "inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
" extra_docker_file_steps = \"Dockerfile\")"
] ]
}, },
{ {
@@ -537,7 +537,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"#aci_service.delete()" "aci_service.delete()"
] ]
} }
], ],

View File

@@ -308,7 +308,9 @@
"source": [ "source": [
"## Deploy \n", "## Deploy \n",
"\n", "\n",
"Deploy Model and ScoringExplainer" "Deploy Model and ScoringExplainer.\n",
"\n",
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
] ]
}, },
{ {
@@ -319,7 +321,7 @@
"source": [ "source": [
"from azureml.core.conda_dependencies import CondaDependencies \n", "from azureml.core.conda_dependencies import CondaDependencies \n",
"\n", "\n",
"# WARNING: to install this, g++ needs to be available on the Docker image and is not by default (look at the next cell)\n", "# azureml-defaults is required to host the model as a web service.\n",
"azureml_pip_packages = [\n", "azureml_pip_packages = [\n",
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n", " 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n",
" 'azureml-interpret'\n", " 'azureml-interpret'\n",
@@ -338,16 +340,6 @@
" print(f.read())" " print(f.read())"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%writefile dockerfile\n",
"RUN apt-get update && apt-get install -y g++ "
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@@ -369,6 +361,8 @@
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.webservice import AciWebservice\n", "from azureml.core.webservice import AciWebservice\n",
"from azureml.core.model import Model\n", "from azureml.core.model import Model\n",
"from azureml.core.environment import Environment\n",
"\n",
"\n", "\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
" memory_gb=1, \n", " memory_gb=1, \n",
@@ -376,10 +370,8 @@
" \"method\" : \"local_explanation\"}, \n", " \"method\" : \"local_explanation\"}, \n",
" description='Get local explanations for IBM Employee Attrition data')\n", " description='Get local explanations for IBM Employee Attrition data')\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" entry_script=\"score_local_explain.py\",\n", "inference_config = InferenceConfig(entry_script=\"score_local_explain.py\", environment=myenv)\n",
" conda_file=\"myenv.yml\",\n",
" extra_docker_file_steps=\"dockerfile\")\n",
"\n", "\n",
"# Use configs and models generated above\n", "# Use configs and models generated above\n",
"service = Model.deploy(ws, 'model-scoring-deploy-local', [scoring_explainer_model, original_model], inference_config, aciconfig)\n", "service = Model.deploy(ws, 'model-scoring-deploy-local', [scoring_explainer_model, original_model], inference_config, aciconfig)\n",

View File

@@ -246,7 +246,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Create TensorFlow estimator\n", "## Create TensorFlow estimator\n",
"Next, we construct an [TensorFlow](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn.tensorflow?view=azure-ml-py) estimator object.\n", "Next, we construct an [TensorFlow](https://docs.microsoft.com/python/api/azureml-train-core/azureml.train.dnn.tensorflow?view=azure-ml-py) estimator object.\n",
"The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker.\n", "The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker.\n",
"\n", "\n",
"The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release.\n", "The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release.\n",
@@ -385,7 +385,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"metrics_output_name = 'metrics_output'\n", "metrics_output_name = 'metrics_output'\n",
"metirics_data = PipelineData(name='metrics_data',\n", "metrics_data = PipelineData(name='metrics_data',\n",
" datastore=ds,\n", " datastore=ds,\n",
" pipeline_output_name=metrics_output_name)\n", " pipeline_output_name=metrics_output_name)\n",
"\n", "\n",
@@ -395,7 +395,7 @@
" hyperdrive_config=hd_config,\n", " hyperdrive_config=hd_config,\n",
" estimator_entry_script_arguments=['--data-folder', data_folder],\n", " estimator_entry_script_arguments=['--data-folder', data_folder],\n",
" inputs=[data_folder],\n", " inputs=[data_folder],\n",
" metrics_output=metirics_data)" " metrics_output=metrics_data)"
] ]
}, },
{ {

View File

@@ -382,10 +382,25 @@
" headers=aad_token, \n", " headers=aad_token, \n",
" json={\"ExperimentName\": \"My_Pipeline1\",\n", " json={\"ExperimentName\": \"My_Pipeline1\",\n",
" \"RunSource\": \"SDK\",\n", " \"RunSource\": \"SDK\",\n",
" \"ParameterAssignments\": {\"pipeline_arg\": 45}})\n", " \"ParameterAssignments\": {\"pipeline_arg\": 45}})"
"run_id = response.json()[\"Id\"]\n", ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" response.raise_for_status()\n",
"except Exception: \n",
" raise Exception('Received bad response from the endpoint: {}\\n'\n",
" 'Response Code: {}\\n'\n",
" 'Headers: {}\\n'\n",
" 'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))\n",
"\n", "\n",
"print(run_id)" "run_id = response.json().get('Id')\n",
"print('Submitted pipeline run: ', run_id)"
] ]
}, },
{ {

View File

@@ -180,7 +180,7 @@
"# just get the published pipeline object that you have the ID for.\n", "# just get the published pipeline object that you have the ID for.\n",
"\n", "\n",
"# Get all published pipeline objects in the workspace\n", "# Get all published pipeline objects in the workspace\n",
"all_pub_pipelines = PublishedPipeline.get_all(ws)\n", "all_pub_pipelines = PublishedPipeline.list(ws)\n",
"\n", "\n",
"# We will iterate through the list of published pipelines and \n", "# We will iterate through the list of published pipelines and \n",
"# use the last ID in the list for Schelue operations: \n", "# use the last ID in the list for Schelue operations: \n",
@@ -244,7 +244,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"schedules = Schedule.get_all(ws, pipeline_id=pub_pipeline_id)\n", "schedules = Schedule.list(ws, pipeline_id=pub_pipeline_id)\n",
"\n", "\n",
"# We will iterate through the list of schedules and \n", "# We will iterate through the list of schedules and \n",
"# use the last recurrence schedule in the list for further operations: \n", "# use the last recurrence schedule in the list for further operations: \n",
@@ -272,7 +272,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Use active_only=False to get all schedules including disabled schedules\n", "# Use active_only=False to get all schedules including disabled schedules\n",
"schedules = Schedule.get_all(ws, active_only=True) \n", "schedules = Schedule.list(ws, active_only=True) \n",
"print(\"Your workspace has the following schedules set up:\")\n", "print(\"Your workspace has the following schedules set up:\")\n",
"for schedule in schedules:\n", "for schedule in schedules:\n",
" print(\"{} (Published pipeline: {}\".format(schedule.id, schedule.pipeline_id))" " print(\"{} (Published pipeline: {}\".format(schedule.id, schedule.pipeline_id))"

View File

@@ -230,7 +230,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"endpoint_list = PipelineEndpoint.get_all(workspace=ws, active_only=True)\n", "endpoint_list = PipelineEndpoint.list(workspace=ws, active_only=True)\n",
"endpoint_list" "endpoint_list"
] ]
}, },
@@ -360,7 +360,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"versions = pipeline_endpoint_by_name.get_all_versions()\n", "versions = pipeline_endpoint_by_name.list_versions()\n",
"\n", "\n",
"for ve in versions:\n", "for ve in versions:\n",
" print(ve.version)\n", " print(ve.version)\n",
@@ -381,7 +381,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"pipelines = pipeline_endpoint_by_name.get_all_pipelines(active_only=True)\n", "pipelines = pipeline_endpoint_by_name.list_pipelines(active_only=True)\n",
"pipelines" "pipelines"
] ]
}, },
@@ -494,10 +494,25 @@
" headers=aad_token, \n", " headers=aad_token, \n",
" json={\"ExperimentName\": \"default_pipeline\",\n", " json={\"ExperimentName\": \"default_pipeline\",\n",
" \"RunSource\": \"SDK\",\n", " \"RunSource\": \"SDK\",\n",
" \"ParameterAssignments\": {\"1\": \"united\", \"2\":\"city\"}})\n", " \"ParameterAssignments\": {\"1\": \"united\", \"2\":\"city\"}})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" response.raise_for_status()\n",
"except Exception: \n",
" raise Exception('Received bad response from the endpoint: {}\\n'\n",
" 'Response Code: {}\\n'\n",
" 'Headers: {}\\n'\n",
" 'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))\n",
"\n", "\n",
"run_id = response.json()[\"Id\"]\n", "run_id = response.json().get('Id')\n",
"print(run_id)" "print('Submitted pipeline run: ', run_id)"
] ]
}, },
{ {
@@ -578,7 +593,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.2" "version": "3.6.7"
}, },
"order_index": 12, "order_index": 12,
"tags": [ "tags": [

View File

@@ -366,8 +366,15 @@
"\n", "\n",
"rest_endpoint = published_pipeline.endpoint\n", "rest_endpoint = published_pipeline.endpoint\n",
"\n", "\n",
"print(\"You can perform HTTP POST on URL {} to trigger this pipeline\".format(rest_endpoint))\n", "print(\"You can perform HTTP POST on URL {} to trigger this pipeline\".format(rest_endpoint))"
"\n", ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# specify the param when running the pipeline\n", "# specify the param when running the pipeline\n",
"response = requests.post(rest_endpoint, \n", "response = requests.post(rest_endpoint, \n",
" headers=aad_token, \n", " headers=aad_token, \n",
@@ -381,9 +388,24 @@
" },\n", " },\n",
" \"ParameterAssignments\": {\"input_string\": \"sample_string3\"}\n", " \"ParameterAssignments\": {\"input_string\": \"sample_string3\"}\n",
" }\n", " }\n",
" )\n", " )"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" response.raise_for_status()\n",
"except Exception: \n",
" raise Exception('Received bad response from the endpoint: {}\\n'\n",
" 'Response Code: {}\\n'\n",
" 'Headers: {}\\n'\n",
" 'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))\n",
"\n", "\n",
"run_id = response.json()[\"Id\"]\n", "run_id = response.json().get('Id')\n",
"print('Submitted pipeline run: ', run_id)" "print('Submitted pipeline run: ', run_id)"
] ]
}, },

View File

@@ -285,7 +285,7 @@
"metrics_output_name = 'metrics_output'\n", "metrics_output_name = 'metrics_output'\n",
"best_model_output_name = 'best_model_output'\n", "best_model_output_name = 'best_model_output'\n",
"\n", "\n",
"metirics_data = PipelineData(name='metrics_data',\n", "metrics_data = PipelineData(name='metrics_data',\n",
" datastore=ds,\n", " datastore=ds,\n",
" pipeline_output_name=metrics_output_name,\n", " pipeline_output_name=metrics_output_name,\n",
" training_output=TrainingOutput(type='Metrics'))\n", " training_output=TrainingOutput(type='Metrics'))\n",
@@ -311,7 +311,7 @@
"automl_step = AutoMLStep(\n", "automl_step = AutoMLStep(\n",
" name='automl_module',\n", " name='automl_module',\n",
" automl_config=automl_config,\n", " automl_config=automl_config,\n",
" outputs=[metirics_data, model_data],\n", " outputs=[metrics_data, model_data],\n",
" allow_reuse=True)" " allow_reuse=True)"
] ]
}, },

View File

@@ -0,0 +1,436 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-getting-started.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Azure Machine Learning Pipeline with NotebookRunnerStep\n",
"This notebook demonstrates the use of `NotebookRunnerStep`. It allows you to run a local notebook as a step in Azure Machine Learning Pipeline."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Introduction\n",
"In this example we showcase how you can run another notebook `notebook_runner/training_notebook.ipynb` as a step in Azure Machine Learning Pipeline.\n",
"\n",
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you have executed the [configuration](https://aka.ms/pl-config) before running this notebook.\n",
"\n",
"In this notebook you will learn how to:\n",
"1. Create an `Experiment` in an existing `Workspace`.\n",
"2. Create or Attach existing AmlCompute to a workspace.\n",
"3. Configure NotebookRun using `NotebokRunConfig`.\n",
"5. Use NotebookRunnerStep.\n",
"6. Run the notebook on `AmlCompute` as a pipeline step consuming the output of a python script step.\n",
"\n",
"Advantages of running your notebook as a step in pipeline:\n",
"1. Run your notebook like a python script without converting into .py files, leveraging complete end to end experience of Azure Machine Learning Pipelines.\n",
"2. Use pipeline intermediate data to and from the notebook along with other steps in pipeline.\n",
"3. Parameterize your notebook with [Pipeline Parameters](./aml-pipelines-publish-and-run-using-rest-endpoint.ipynb).\n",
"\n",
"Try some more [quick start notebooks](https://github.com/microsoft/recommenders/tree/master/notebooks/00_quick_start) with `NotebookRunnerStep`."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Azure Machine Learning and Pipeline SDK-specific imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"import azureml.core\n",
"\n",
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.data.data_reference import DataReference\n",
"from azureml.pipeline.core import PipelineData\n",
"from azureml.core.datastore import Datastore\n",
"\n",
"from azureml.widgets import RunDetails\n",
"\n",
"from azureml.core import Workspace, Experiment\n",
"from azureml.contrib.notebook import NotebookRunConfig, AzureMLNotebookHandler\n",
"\n",
"from azureml.pipeline.core import Pipeline\n",
"from azureml.pipeline.steps import PythonScriptStep\n",
"from azureml.contrib.notebook import NotebookRunnerStep\n",
"\n",
"# Check core SDK version number\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Initialize Workspace\n",
"\n",
"Initialize a [workspace](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace(class%29) object from persisted configuration."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')\n",
"ws.set_default_datastore(\"workspaceblobstore\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Upload data to datastore"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"Datastore.get(ws, \"workspaceblobstore\").upload_files([\"./20news.pkl\"], target_path=\"20newsgroups\", overwrite=True)\n",
"print(\"Upload call completed\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create an Azure ML experiment\n",
"Let's create an experiment named \"notebook-step-run-example\" and a folder to holding the notebook and other scripts. The script runs will be recorded under the experiment in Azure.\n",
"\n",
"The best practice is to use separate folders for scripts and its dependent files for each step and specify that folder as the `source_directory` for the step. This helps reduce the size of the snapshot created for the step (only the specific folder is snapshotted). Since changes in any files in the `source_directory` would trigger a re-upload of the snapshot, this helps keep the reuse of the step when there are no changes in the `source_directory` of the step."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Choose a name for the run history container in the workspace.\n",
"experiment_name = 'notebook-step-run-example'\n",
"source_directory = 'notebook_runner'\n",
"\n",
"experiment = Experiment(ws, experiment_name)\n",
"experiment"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create or Attach an AmlCompute cluster\n",
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you get the default `AmlCompute` as your training compute resource."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Choose a name for your cluster.\n",
"amlcompute_cluster_name = \"cpu-cluster\"\n",
"\n",
"found = False\n",
"# Check if this compute target already exists in the workspace.\n",
"cts = ws.compute_targets\n",
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
" found = True\n",
" print('Found existing compute target.')\n",
" compute_target = cts[amlcompute_cluster_name]\n",
" \n",
"if not found:\n",
" print('Creating a new compute target...')\n",
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
" #vm_priority = 'lowpriority', # optional\n",
" max_nodes = 4)\n",
"\n",
" # Create the cluster.\n",
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
" \n",
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
" compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)\n",
" \n",
" # For a more detailed view of current AmlCompute status, use get_status()."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create a new RunConfig object"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"conda_run_config = RunConfiguration(framework=\"python\")\n",
"\n",
"conda_run_config.environment.docker.enabled = True\n",
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
"\n",
"cd = CondaDependencies.create(pip_packages=['azureml-sdk'], pin_sdk_version=False)\n",
"conda_run_config.environment.python.conda_dependencies = cd\n",
"\n",
"print('run config is ready')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Define input and outputs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"input_data = DataReference(\n",
" datastore=Datastore.get(ws, \"workspaceblobstore\"),\n",
" data_reference_name=\"blob_test_data\",\n",
" path_on_datastore=\"20newsgroups/20news.pkl\")\n",
"\n",
"output_data = PipelineData(name=\"processed_data\",\n",
" datastore=Datastore.get(ws, \"workspaceblobstore\"))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create notebook run configuration and set parameters values"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"handler = AzureMLNotebookHandler(timeout=600, progress_bar=False, log_output=True)\n",
"\n",
"cfg = NotebookRunConfig(source_directory=source_directory, notebook=\"training_notebook.ipynb\",\n",
" handler = handler,\n",
" parameters={\"arg1\": \"Machine Learning\"},\n",
" run_config=conda_run_config)\n",
"\n",
"print(\"Notebook Run Config is created.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Define PythonScriptStep"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print('Source directory for the step is {}.'.format(os.path.realpath('./train')))\n",
"python_script_step = PythonScriptStep(\n",
" script_name=\"train.py\",\n",
" arguments=[\"--input_data\", input_data],\n",
" inputs=[input_data],\n",
" outputs=[output_data],\n",
" compute_target=compute_target, \n",
" source_directory=\"./train\",\n",
" allow_reuse=True)\n",
"print(\"python_script_step created\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Define NotebookRunnerStep\n",
"\n",
"This step will consume intermediate output produced by `python_script_step` as an input.\n",
"\n",
"Optionally, a output of type `output_notebook_pipeline_data_name` can be added to the `NotebookRunnerStep` to redirect the `output_notebook` of notebook run to `NotebookRunnerStep`'s step output produced as `PipelineData` and can be further passed along the pipeline."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.pipeline.core import PipelineParameter, TrainingOutput\n",
"\n",
"output_from_notebook = PipelineData(name=\"notebook_processed_data\",\n",
" datastore=Datastore.get(ws, \"workspaceblobstore\"))\n",
"\n",
"my_pipeline_param = PipelineParameter(name=\"pipeline_param\", default_value=\"my_param\")\n",
"\n",
"print('Source directory for the step is {}.'.format(os.path.realpath(source_directory)))\n",
"notebook_runner_step = NotebookRunnerStep(name=\"training_notebook_step\",\n",
" notebook_run_config=cfg,\n",
" params={\"my_pipeline_param\": my_pipeline_param},\n",
" inputs=[output_data],\n",
" outputs=[output_from_notebook],\n",
" allow_reuse=True,\n",
" compute_target=compute_target,\n",
" output_notebook_pipeline_data_name=\"notebook_result\")\n",
"\n",
"print(\"Notebook Runner Step is Created.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Build Pipeline\n",
"\n",
"Once we have the steps (or steps collection), we can build the [pipeline](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipeline.pipeline?view=azure-ml-py). By deafult, all these steps will run in **parallel** once we submit the pipeline for run.\n",
"\n",
"A pipeline is created with a list of steps and a workspace. Submit a pipeline using [submit](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.experiment(class)?view=azure-ml-py#submit-config--tags-none----kwargs-). When submit is called, a [PipelineRun](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelinerun?view=azure-ml-py) is created which in turn creates [StepRun](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.steprun?view=azure-ml-py) objects for each step in the workflow."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipeline1 = Pipeline(workspace=ws, steps=[notebook_runner_step])\n",
"\n",
"pipeline1.validate()\n",
"print(\"Pipeline validation complete\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipeline_run1 = experiment.submit(pipeline1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"RunDetails(pipeline_run1).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Download output notebook\n",
"\n",
"`output_notebook` can be retrieved via pipeline step output if `output_notebook_pipeline_data_name` is provided to the `NotebookRunnerStep`"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipeline_run1.wait_for_completion()\n",
" Retrieve the step runs by name `train.py`\n",
"train_step = pipeline_run1.find_step_run('training_notebook_step')\n",
"\n",
"if train_step:\n",
" train_step_obj = train_step[0] # since we have only one step by name `training_notebook_step`\n",
" train_step_obj.get_output_data('notebook_result').download(source_directory) # download the output to source_directory"
]
}
],
"metadata": {
"authors": [
{
"name": "sanpil"
}
],
"category": "tutorial",
"compute": [
"AML Compute"
],
"datasets": [
"Custom"
],
"deployment": [
"None"
],
"exclude_from_index": false,
"framework": [
"Azure ML"
],
"friendly_name": "How to use run a notebook as a step in AML Pipelines",
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
},
"order_index": 12,
"star_tag": [
"None"
],
"tags": [
"None"
],
"task": "Demonstrates the use of NotebookRunnerStep"
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,6 @@
name: aml-pipelines-with-notebook-runner-step
dependencies:
- pip:
- azureml-sdk
- azureml-widgets
- azureml-contrib-notebook

View File

@@ -0,0 +1,106 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/notebook_runner/training_notebook.png)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"In training_notebook.ipynb\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"# declaring parameters to override\n",
"\n",
"arg1 = \"Azure\"\n",
"processed_data = None\n",
"notebook_processed_data = None\n",
"my_pipeline_param = None"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Final parameter values\n",
"\n",
"print(\"arg1: %s\" % arg1)\n",
"print(\"input from previous step: %s\" % processed_data)\n",
"print(\"output from notebook: %s\" % notebook_processed_data)\n",
"print(\"pipeline_parameter: %s\" % my_pipeline_param)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if not (notebook_processed_data is None):\n",
" os.makedirs(notebook_processed_data, exist_ok=True)\n",
" print(\"%s created\" % notebook_processed_data)"
]
}
],
"metadata": {
"authors": [
{
"name": "sanpil"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -17,7 +17,7 @@ pip install azureml-contrib-pipeline-steps
``` ```
### Creation of Azure Machine Learning Workspace ### Creation of Azure Machine Learning Workspace
If you do not already have a Azure ML Workspace, please run the [configuration Notebook](../../configuration.ipynb). If you do not already have a Azure ML Workspace, please run the [configuration Notebook](https://aka.ms/pl-config).
## Configure a Batch Inference job ## Configure a Batch Inference job
@@ -124,4 +124,4 @@ pipeline_run.wait_for_completion(show_output=True)
- [file-dataset-image-inference-mnist.ipynb](./file-dataset-image-inference-mnist.ipynb) demonstrates how to run batch inference on an MNIST dataset. - [file-dataset-image-inference-mnist.ipynb](./file-dataset-image-inference-mnist.ipynb) demonstrates how to run batch inference on an MNIST dataset.
- [tabular-dataset-inference-iris.ipynb](./tabular-dataset-inference-iris.ipynb) demonstrates how to run batch inference on an IRIS dataset. - [tabular-dataset-inference-iris.ipynb](./tabular-dataset-inference-iris.ipynb) demonstrates how to run batch inference on an IRIS dataset.
![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/contrib/batch_inferencing/README.png) ![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/parallel-run/README.png)

View File

@@ -12,7 +12,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/contrib/batch_inferencing/file-dataset-image-inference-mnist.png)" "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/parallel-run/file-dataset-image-inference-mnist.png)"
] ]
}, },
{ {
@@ -23,6 +23,11 @@
"\n", "\n",
"In this notebook, we will demonstrate how to make predictions on large quantities of data asynchronously using the ML pipelines with Azure Machine Learning. Batch inference (or batch scoring) provides cost-effective inference, with unparalleled throughput for asynchronous applications. Batch prediction pipelines can scale to perform inference on terabytes of production data. Batch prediction is optimized for high throughput, fire-and-forget predictions for a large collection of data.\n", "In this notebook, we will demonstrate how to make predictions on large quantities of data asynchronously using the ML pipelines with Azure Machine Learning. Batch inference (or batch scoring) provides cost-effective inference, with unparalleled throughput for asynchronous applications. Batch prediction pipelines can scale to perform inference on terabytes of production data. Batch prediction is optimized for high throughput, fire-and-forget predictions for a large collection of data.\n",
"\n", "\n",
"> **Note**\n",
"This notebook uses public preview functionality (ParallelRunStep). Please install azureml-contrib-pipeline-steps package before running this notebook.\n",
"```\n",
"pip install azureml-contrib-pipeline-steps\n",
"```\n",
"> **Tip**\n", "> **Tip**\n",
"If your system requires low-latency processing (to process a single document or small set of documents quickly), use [real-time scoring](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-consume-web-service) instead of batch prediction.\n", "If your system requires low-latency processing (to process a single document or small set of documents quickly), use [real-time scoring](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-consume-web-service) instead of batch prediction.\n",
"\n", "\n",
@@ -519,9 +524,6 @@
"name": "tracych" "name": "tracych"
} }
], ],
"friendly_name": "MNIST data inferencing using ParallelRunStep",
"exclude_from_index": false,
"index_order": 1,
"category": "Other notebooks", "category": "Other notebooks",
"compute": [ "compute": [
"AML Compute" "AML Compute"
@@ -532,14 +534,12 @@
"deployment": [ "deployment": [
"None" "None"
], ],
"exclude_from_index": false,
"framework": [ "framework": [
"None" "None"
], ],
"tags": [ "friendly_name": "MNIST data inferencing using ParallelRunStep",
"Batch Inferencing", "index_order": 1,
"Pipeline"
],
"task": "Digit identification",
"kernelspec": { "kernelspec": {
"display_name": "Python 3.6", "display_name": "Python 3.6",
"language": "python", "language": "python",
@@ -556,7 +556,12 @@
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.9" "version": "3.6.9"
} },
"tags": [
"Batch Inferencing",
"Pipeline"
],
"task": "Digit identification"
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 2 "nbformat_minor": 2

View File

@@ -4,7 +4,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Copyright (c) Microsoft Corporation. All rights reserved. \n", "Copyright (c) Microsoft Corporation. All rights reserved.\n",
"Licensed under the MIT License." "Licensed under the MIT License."
] ]
}, },
@@ -12,7 +12,7 @@
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/contrib/batch_inferencing/tabular-dataset-inference-iris.png)" "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/parallel-run/tabular-dataset-inference-iris.png)"
] ]
}, },
{ {
@@ -23,6 +23,11 @@
"\n", "\n",
"In this notebook, we will demonstrate how to make predictions on large quantities of data asynchronously using the ML pipelines with Azure Machine Learning. Batch inference (or batch scoring) provides cost-effective inference, with unparalleled throughput for asynchronous applications. Batch prediction pipelines can scale to perform inference on terabytes of production data. Batch prediction is optimized for high throughput, fire-and-forget predictions for a large collection of data.\n", "In this notebook, we will demonstrate how to make predictions on large quantities of data asynchronously using the ML pipelines with Azure Machine Learning. Batch inference (or batch scoring) provides cost-effective inference, with unparalleled throughput for asynchronous applications. Batch prediction pipelines can scale to perform inference on terabytes of production data. Batch prediction is optimized for high throughput, fire-and-forget predictions for a large collection of data.\n",
"\n", "\n",
"> **Note**\n",
"This notebook uses public preview functionality (ParallelRunStep). Please install azureml-contrib-pipeline-steps package before running this notebook.\n",
"```\n",
"pip install azureml-contrib-pipeline-steps\n",
"```\n",
"> **Tip**\n", "> **Tip**\n",
"If your system requires low-latency processing (to process a single document or small set of documents quickly), use [real-time scoring](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-consume-web-service) instead of batch prediction.\n", "If your system requires low-latency processing (to process a single document or small set of documents quickly), use [real-time scoring](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-consume-web-service) instead of batch prediction.\n",
"\n", "\n",
@@ -494,9 +499,6 @@
"name": "tracych" "name": "tracych"
} }
], ],
"friendly_name": "IRIS data inferencing using ParallelRunStep",
"exclude_from_index": false,
"index_order": 1,
"category": "Other notebooks", "category": "Other notebooks",
"compute": [ "compute": [
"AML Compute" "AML Compute"
@@ -507,14 +509,12 @@
"deployment": [ "deployment": [
"None" "None"
], ],
"exclude_from_index": false,
"framework": [ "framework": [
"None" "None"
], ],
"tags": [ "friendly_name": "IRIS data inferencing using ParallelRunStep",
"Batch Inferencing", "index_order": 1,
"Pipeline"
],
"task": "Recognize flower type",
"kernelspec": { "kernelspec": {
"display_name": "Python 3.6", "display_name": "Python 3.6",
"language": "python", "language": "python",
@@ -531,7 +531,12 @@
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.2" "version": "3.6.2"
} },
"tags": [
"Batch Inferencing",
"Pipeline"
],
"task": "Recognize flower type"
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 2 "nbformat_minor": 2

View File

@@ -15,6 +15,13 @@
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.png)" "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.png)"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Note**: Azure Machine Learning recently released ParallelRunStep for public preview, this will allow for parallelization of your workload across many compute nodes without the difficulty of orchestrating worker pools and queues. See the [batch inference notebooks](../../../contrib/batch_inferencing/) for examples on how to get started."
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -553,8 +560,25 @@
"response = requests.post(rest_endpoint, \n", "response = requests.post(rest_endpoint, \n",
" headers=aad_token, \n", " headers=aad_token, \n",
" json={\"ExperimentName\": \"batch_scoring\",\n", " json={\"ExperimentName\": \"batch_scoring\",\n",
" \"ParameterAssignments\": {\"param_batch_size\": 50}})\n", " \"ParameterAssignments\": {\"param_batch_size\": 50}})"
"run_id = response.json()[\"Id\"]" ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" response.raise_for_status()\n",
"except Exception: \n",
" raise Exception('Received bad response from the endpoint: {}\\n'\n",
" 'Response Code: {}\\n'\n",
" 'Headers: {}\\n'\n",
" 'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))\n",
"\n",
"run_id = response.json().get('Id')\n",
"print('Submitted pipeline run: ', run_id)"
] ]
}, },
{ {

View File

@@ -1,207 +0,0 @@
# Original source: https://github.com/pytorch/examples/blob/master/fast_neural_style/neural_style/neural_style.py
import argparse
import os
import sys
import re
from PIL import Image
import torch
from torchvision import transforms
from mpi4py import MPI
def load_image(filename, size=None, scale=None):
img = Image.open(filename)
if size is not None:
img = img.resize((size, size), Image.ANTIALIAS)
elif scale is not None:
img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
return img
def save_image(filename, data):
img = data.clone().clamp(0, 255).numpy()
img = img.transpose(1, 2, 0).astype("uint8")
img = Image.fromarray(img)
img.save(filename)
class TransformerNet(torch.nn.Module):
def __init__(self):
super(TransformerNet, self).__init__()
# Initial convolution layers
self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2)
self.in2 = torch.nn.InstanceNorm2d(64, affine=True)
self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2)
self.in3 = torch.nn.InstanceNorm2d(128, affine=True)
# Residual layers
self.res1 = ResidualBlock(128)
self.res2 = ResidualBlock(128)
self.res3 = ResidualBlock(128)
self.res4 = ResidualBlock(128)
self.res5 = ResidualBlock(128)
# Upsampling Layers
self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2)
self.in4 = torch.nn.InstanceNorm2d(64, affine=True)
self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2)
self.in5 = torch.nn.InstanceNorm2d(32, affine=True)
self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1)
# Non-linearities
self.relu = torch.nn.ReLU()
def forward(self, X):
y = self.relu(self.in1(self.conv1(X)))
y = self.relu(self.in2(self.conv2(y)))
y = self.relu(self.in3(self.conv3(y)))
y = self.res1(y)
y = self.res2(y)
y = self.res3(y)
y = self.res4(y)
y = self.res5(y)
y = self.relu(self.in4(self.deconv1(y)))
y = self.relu(self.in5(self.deconv2(y)))
y = self.deconv3(y)
return y
class ConvLayer(torch.nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride):
super(ConvLayer, self).__init__()
reflection_padding = kernel_size // 2
self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
def forward(self, x):
out = self.reflection_pad(x)
out = self.conv2d(out)
return out
class ResidualBlock(torch.nn.Module):
"""ResidualBlock
introduced in: https://arxiv.org/abs/1512.03385
recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
"""
def __init__(self, channels):
super(ResidualBlock, self).__init__()
self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
self.in2 = torch.nn.InstanceNorm2d(channels, affine=True)
self.relu = torch.nn.ReLU()
def forward(self, x):
residual = x
out = self.relu(self.in1(self.conv1(x)))
out = self.in2(self.conv2(out))
out = out + residual
return out
class UpsampleConvLayer(torch.nn.Module):
"""UpsampleConvLayer
Upsamples the input and then does a convolution. This method gives better results
compared to ConvTranspose2d.
ref: http://distill.pub/2016/deconv-checkerboard/
"""
def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
super(UpsampleConvLayer, self).__init__()
self.upsample = upsample
if upsample:
self.upsample_layer = torch.nn.Upsample(mode='nearest', scale_factor=upsample)
reflection_padding = kernel_size // 2
self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
def forward(self, x):
x_in = x
if self.upsample:
x_in = self.upsample_layer(x_in)
out = self.reflection_pad(x_in)
out = self.conv2d(out)
return out
def stylize(args, comm):
rank = comm.Get_rank()
size = comm.Get_size()
device = torch.device("cuda" if args.cuda else "cpu")
with torch.no_grad():
style_model = TransformerNet()
state_dict = torch.load(os.path.join(args.model_dir, args.style + ".pth"))
# remove saved deprecated running_* keys in InstanceNorm from the checkpoint
for k in list(state_dict.keys()):
if re.search(r'in\d+\.running_(mean|var)$', k):
del state_dict[k]
style_model.load_state_dict(state_dict)
style_model.to(device)
filenames = os.listdir(args.content_dir)
filenames = sorted(filenames)
partition_size = len(filenames) // size
partitioned_filenames = filenames[rank * partition_size: (rank + 1) * partition_size]
print("RANK {} - is processing {} images out of the total {}".format(rank, len(partitioned_filenames),
len(filenames)))
output_paths = []
for filename in partitioned_filenames:
# print("Processing {}".format(filename))
full_path = os.path.join(args.content_dir, filename)
content_image = load_image(full_path, scale=args.content_scale)
content_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Lambda(lambda x: x.mul(255))
])
content_image = content_transform(content_image)
content_image = content_image.unsqueeze(0).to(device)
output = style_model(content_image).cpu()
output_path = os.path.join(args.output_dir, filename)
save_image(output_path, output[0])
output_paths.append(output_path)
print("RANK {} - number of pre-aggregated output files {}".format(rank, len(output_paths)))
output_paths_list = comm.gather(output_paths, root=0)
if rank == 0:
print("RANK {} - number of aggregated output files {}".format(rank, len(output_paths_list)))
print("RANK {} - end".format(rank))
def main():
arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
arg_parser.add_argument("--content-scale", type=float, default=None,
help="factor for scaling down the content image")
arg_parser.add_argument("--model-dir", type=str, required=True,
help="saved model to be used for stylizing the image.")
arg_parser.add_argument("--cuda", type=int, required=True,
help="set it to 1 for running on GPU, 0 for CPU")
arg_parser.add_argument("--style", type=str, help="style name")
arg_parser.add_argument("--content-dir", type=str, required=True,
help="directory holding the images")
arg_parser.add_argument("--output-dir", type=str, required=True,
help="directory holding the output images")
args = arg_parser.parse_args()
comm = MPI.COMM_WORLD
if args.cuda and not torch.cuda.is_available():
print("ERROR: cuda is not available, try running on CPU")
sys.exit(1)
os.makedirs(args.output_dir, exist_ok=True)
stylize(args, comm)
if __name__ == "__main__":
main()

View File

@@ -24,7 +24,13 @@
"Using modified code from `pytorch`'s neural style [example](https://pytorch.org/tutorials/advanced/neural_style_tutorial.html), we show how to setup a pipeline for doing style transfer on video. The pipeline has following steps:\n", "Using modified code from `pytorch`'s neural style [example](https://pytorch.org/tutorials/advanced/neural_style_tutorial.html), we show how to setup a pipeline for doing style transfer on video. The pipeline has following steps:\n",
"1. Split a video into images\n", "1. Split a video into images\n",
"2. Run neural style on each image using one of the provided models (from `pytorch` pretrained models for this example).\n", "2. Run neural style on each image using one of the provided models (from `pytorch` pretrained models for this example).\n",
"3. Stitch the image back into a video." "3. Stitch the image back into a video.\n",
"\n",
"> **Note**\n",
"This notebook uses public preview functionality (ParallelRunStep). Please install azureml-contrib-pipeline-steps package before running this notebook.\n",
"```\n",
"pip install azureml-contrib-pipeline-steps\n",
"```"
] ]
}, },
{ {
@@ -50,19 +56,25 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import os\n", "# Check core SDK version number\n",
"import azureml.core\n",
"\n",
"print(\"SDK version:\", azureml.core.VERSION)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace, Experiment\n", "from azureml.core import Workspace, Experiment\n",
"\n", "\n",
"ws = Workspace.from_config()\n", "ws = Workspace.from_config()\n",
"print('Workspace name: ' + ws.name, \n", "print('Workspace name: ' + ws.name, \n",
" 'Azure region: ' + ws.location, \n", " 'Azure region: ' + ws.location, \n",
" 'Subscription id: ' + ws.subscription_id, \n", " 'Subscription id: ' + ws.subscription_id, \n",
" 'Resource group: ' + ws.resource_group, sep = '\\n')\n", " 'Resource group: ' + ws.resource_group, sep = '\\n')"
"\n",
"scripts_folder = \"scripts_folder\"\n",
"\n",
"if not os.path.isdir(scripts_folder):\n",
" os.mkdir(scripts_folder)"
] ]
}, },
{ {
@@ -75,11 +87,96 @@
"from azureml.core.datastore import Datastore\n", "from azureml.core.datastore import Datastore\n",
"from azureml.data.data_reference import DataReference\n", "from azureml.data.data_reference import DataReference\n",
"from azureml.pipeline.core import Pipeline, PipelineData\n", "from azureml.pipeline.core import Pipeline, PipelineData\n",
"from azureml.pipeline.steps import PythonScriptStep, MpiStep\n", "from azureml.pipeline.steps import PythonScriptStep\n",
"from azureml.core.runconfig import CondaDependencies, RunConfiguration\n", "from azureml.core.runconfig import CondaDependencies, RunConfiguration\n",
"from azureml.core.compute_target import ComputeTargetException" "from azureml.core.compute_target import ComputeTargetException"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Download models"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"# create directory for model\n",
"model_dir = 'models'\n",
"if not os.path.isdir(model_dir):\n",
" os.mkdir(model_dir)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import urllib.request\n",
"\n",
"def download_model(model_name):\n",
" # downloaded models from https://pytorch.org/tutorials/advanced/neural_style_tutorial.html are kept here\n",
" url=\"https://pipelinedata.blob.core.windows.net/styletransfer/saved_models/\" + model_name\n",
" local_path = os.path.join(model_dir, model_name)\n",
" urllib.request.urlretrieve(url, local_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Register all Models"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.model import Model\n",
"mosaic_model = None\n",
"candy_model = None\n",
"\n",
"models = Model.list(workspace=ws, tags=['scenario'])\n",
"for m in models:\n",
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)\n",
" if m.name == 'mosaic' and mosaic_model is None:\n",
" mosaic_model = m\n",
" elif m.name == 'candy' and candy_model is None:\n",
" candy_model = m\n",
"\n",
"if mosaic_model is None:\n",
" print('Mosaic model does not exist, registering it')\n",
" download_model('mosaic.pth')\n",
" mosaic_model = Model.register(model_path = os.path.join(model_dir, \"mosaic.pth\"),\n",
" model_name = \"mosaic\",\n",
" tags = {'type': \"mosaic\", 'scenario': \"Style transfer using batch inference\"},\n",
" description = \"Style transfer - Mosaic\",\n",
" workspace = ws)\n",
"else:\n",
" print('Reusing existing mosaic model')\n",
" \n",
"\n",
"if candy_model is None:\n",
" print('Candy model does not exist, registering it')\n",
" download_model('candy.pth')\n",
" candy_model = Model.register(model_path = os.path.join(model_dir, \"candy.pth\"),\n",
" model_name = \"candy\",\n",
" tags = {'type': \"candy\", 'scenario': \"Style transfer using batch inference\"},\n",
" description = \"Style transfer - Candy\",\n",
" workspace = ws)\n",
"else:\n",
" print('Reusing existing candy model')"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -138,8 +235,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import shutil\n", "scripts_folder = \"scripts\""
"shutil.copy(\"neural_style_mpi.py\", scripts_folder)"
] ]
}, },
{ {
@@ -148,31 +244,11 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"%%writefile $scripts_folder/process_video.py\n", "process_video_script_file = \"process_video.py\"\n",
"import argparse\n",
"import glob\n",
"import os\n",
"import subprocess\n",
"\n", "\n",
"parser = argparse.ArgumentParser(description=\"Process input video\")\n", "# peek at contents\n",
"parser.add_argument('--input_video', required=True)\n", "with open(os.path.join(scripts_folder, process_video_script_file)) as process_video_file:\n",
"parser.add_argument('--output_audio', required=True)\n", " print(process_video_file.read())"
"parser.add_argument('--output_images', required=True)\n",
"\n",
"args = parser.parse_args()\n",
"\n",
"os.makedirs(args.output_audio, exist_ok=True)\n",
"os.makedirs(args.output_images, exist_ok=True)\n",
"\n",
"subprocess.run(\"ffmpeg -i {} {}/video.aac\"\n",
" .format(args.input_video, args.output_audio),\n",
" shell=True, check=True\n",
" )\n",
"\n",
"subprocess.run(\"ffmpeg -i {} {}/%05d_video.jpg -hide_banner\"\n",
" .format(args.input_video, args.output_images),\n",
" shell=True, check=True\n",
" )"
] ]
}, },
{ {
@@ -181,31 +257,11 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"%%writefile $scripts_folder/stitch_video.py\n", "stitch_video_script_file = \"stitch_video.py\"\n",
"import argparse\n",
"import os\n",
"import subprocess\n",
"\n", "\n",
"parser = argparse.ArgumentParser(description=\"Process input video\")\n", "# peek at contents\n",
"parser.add_argument('--images_dir', required=True)\n", "with open(os.path.join(scripts_folder, stitch_video_script_file)) as stitch_video_file:\n",
"parser.add_argument('--input_audio', required=True)\n", " print(stitch_video_file.read())"
"parser.add_argument('--output_dir', required=True)\n",
"\n",
"args = parser.parse_args()\n",
"\n",
"os.makedirs(args.output_dir, exist_ok=True)\n",
"\n",
"subprocess.run(\"ffmpeg -framerate 30 -i {}/%05d_video.jpg -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p \"\n",
" \"-y {}/video_without_audio.mp4\"\n",
" .format(args.images_dir, args.output_dir),\n",
" shell=True, check=True\n",
" )\n",
"\n",
"subprocess.run(\"ffmpeg -i {}/video_without_audio.mp4 -i {}/video.aac -map 0:0 -map 1:0 -vcodec \"\n",
" \"copy -acodec copy -y {}/video_with_audio.mp4\"\n",
" .format(args.output_dir, args.input_audio, args.output_dir),\n",
" shell=True, check=True\n",
" )"
] ]
}, },
{ {
@@ -226,15 +282,6 @@
"video_ds = Datastore.register_azure_blob_container(ws, \"videos\", \"sample-videos\",\n", "video_ds = Datastore.register_azure_blob_container(ws, \"videos\", \"sample-videos\",\n",
" account_name=account_name, overwrite=True)\n", " account_name=account_name, overwrite=True)\n",
"\n", "\n",
"# datastore for models\n",
"models_ds = Datastore.register_azure_blob_container(ws, \"models\", \"styletransfer\", \n",
" account_name=\"pipelinedata\", \n",
" overwrite=True)\n",
" \n",
"# downloaded models from https://pytorch.org/tutorials/advanced/neural_style_tutorial.html are kept here\n",
"models_dir = DataReference(data_reference_name=\"models\", datastore=models_ds, \n",
" path_on_datastore=\"saved_models\", mode=\"download\")\n",
"\n",
"# the default blob store attached to a workspace\n", "# the default blob store attached to a workspace\n",
"default_datastore = ws.get_default_datastore()" "default_datastore = ws.get_default_datastore()"
] ]
@@ -269,14 +316,8 @@
"cd.add_channel(\"conda-forge\")\n", "cd.add_channel(\"conda-forge\")\n",
"cd.add_conda_package(\"ffmpeg\")\n", "cd.add_conda_package(\"ffmpeg\")\n",
"\n", "\n",
"cd.add_channel(\"pytorch\")\n",
"cd.add_conda_package(\"pytorch\")\n",
"cd.add_conda_package(\"torchvision\")\n",
"\n",
"# Runconfig\n", "# Runconfig\n",
"amlcompute_run_config = RunConfiguration(conda_dependencies=cd)\n", "amlcompute_run_config = RunConfiguration(conda_dependencies=cd)\n",
"amlcompute_run_config.environment.docker.enabled = True\n",
"amlcompute_run_config.environment.docker.gpu_support = True\n",
"amlcompute_run_config.environment.docker.base_image = \"pytorch/pytorch\"\n", "amlcompute_run_config.environment.docker.base_image = \"pytorch/pytorch\"\n",
"amlcompute_run_config.environment.spark.precache_packages = False" "amlcompute_run_config.environment.spark.precache_packages = False"
] ]
@@ -288,9 +329,13 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"ffmpeg_audio = PipelineData(name=\"ffmpeg_audio\", datastore=default_datastore)\n", "ffmpeg_audio = PipelineData(name=\"ffmpeg_audio\", datastore=default_datastore)\n",
"ffmpeg_images = PipelineData(name=\"ffmpeg_images\", datastore=default_datastore)\n",
"processed_images = PipelineData(name=\"processed_images\", datastore=default_datastore)\n", "processed_images = PipelineData(name=\"processed_images\", datastore=default_datastore)\n",
"output_video = PipelineData(name=\"output_video\", datastore=default_datastore)" "output_video = PipelineData(name=\"output_video\", datastore=default_datastore)\n",
"\n",
"ffmpeg_images_ds_name = \"ffmpeg_images_data\"\n",
"ffmpeg_images = PipelineData(name=\"ffmpeg_images\", datastore=default_datastore)\n",
"ffmpeg_images_file_dataset = ffmpeg_images.as_dataset()\n",
"ffmpeg_images_named_file_dataset = ffmpeg_images_file_dataset.as_named_input(ffmpeg_images_ds_name)"
] ]
}, },
{ {
@@ -298,7 +343,10 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Define tweakable parameters to pipeline\n", "# Define tweakable parameters to pipeline\n",
"These parameters can be changed when the pipeline is published and rerun from a REST call" "These parameters can be changed when the pipeline is published and rerun from a REST call.\n",
"As part of ParallelRunStep following 2 pipeline parameters will be created which can be used to override values.\n",
" node_count\n",
" process_count_per_node"
] ]
}, },
{ {
@@ -308,10 +356,8 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.pipeline.core.graph import PipelineParameter\n", "from azureml.pipeline.core.graph import PipelineParameter\n",
"# create a parameter for style (one of \"candy\", \"mosaic\", \"rain_princess\", \"udnie\") to transfer the images to\n", "# create a parameter for style (one of \"candy\", \"mosaic\") to transfer the images to\n",
"style_param = PipelineParameter(name=\"style\", default_value=\"mosaic\")\n", "style_param = PipelineParameter(name=\"style\", default_value=\"mosaic\")"
"# create a parameter for the number of nodes to use in step no. 2 (style transfer)\n",
"nodecount_param = PipelineParameter(name=\"nodecount\", default_value=1)"
] ]
}, },
{ {
@@ -334,27 +380,6 @@
" source_directory=scripts_folder\n", " source_directory=scripts_folder\n",
")\n", ")\n",
"\n", "\n",
"# create a MPI step for distributing style transfer step across multiple nodes in AmlCompute \n",
"# using 'nodecount_param' PipelineParameter\n",
"distributed_style_transfer_step = MpiStep(\n",
" name=\"mpi style transfer\",\n",
" script_name=\"neural_style_mpi.py\",\n",
" arguments=[\"--content-dir\", ffmpeg_images,\n",
" \"--output-dir\", processed_images,\n",
" \"--model-dir\", models_dir,\n",
" \"--style\", style_param,\n",
" \"--cuda\", 1\n",
" ],\n",
" compute_target=gpu_cluster,\n",
" node_count=nodecount_param, \n",
" process_count_per_node=1,\n",
" inputs=[models_dir, ffmpeg_images],\n",
" outputs=[processed_images],\n",
" pip_packages=[\"mpi4py\", \"torch\", \"torchvision\"],\n",
" use_gpu=True,\n",
" source_directory=scripts_folder\n",
")\n",
"\n",
"stitch_video_step = PythonScriptStep(\n", "stitch_video_step = PythonScriptStep(\n",
" name=\"stitch\",\n", " name=\"stitch\",\n",
" script_name=\"stitch_video.py\",\n", " script_name=\"stitch_video.py\",\n",
@@ -369,6 +394,76 @@
")" ")"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create environment, parallel step run config and parallel run step"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Environment\n",
"from azureml.core.runconfig import DEFAULT_GPU_IMAGE\n",
"\n",
"parallel_cd = CondaDependencies()\n",
"\n",
"parallel_cd.add_channel(\"pytorch\")\n",
"parallel_cd.add_conda_package(\"pytorch\")\n",
"parallel_cd.add_conda_package(\"torchvision\")\n",
"\n",
"styleenvironment = Environment(name=\"styleenvironment\")\n",
"styleenvironment.python.conda_dependencies=parallel_cd\n",
"styleenvironment.docker.base_image = DEFAULT_GPU_IMAGE"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.contrib.pipeline.steps import ParallelRunConfig\n",
"\n",
"parallel_run_config = ParallelRunConfig(\n",
" environment=styleenvironment,\n",
" entry_script='transform.py',\n",
" output_action='summary_only',\n",
" mini_batch_size=\"1\",\n",
" error_threshold=1,\n",
" source_directory=scripts_folder,\n",
" compute_target=gpu_cluster, \n",
" node_count=3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.contrib.pipeline.steps import ParallelRunStep\n",
"from datetime import datetime\n",
"\n",
"parallel_step_name = 'styletransfer-' + datetime.now().strftime('%Y%m%d%H%M')\n",
"\n",
"distributed_style_transfer_step = ParallelRunStep(\n",
" name=parallel_step_name,\n",
" inputs=[ffmpeg_images_named_file_dataset], # Input file share/blob container/file dataset\n",
" output=processed_images, # Output file share/blob container\n",
" models=[mosaic_model, candy_model],\n",
" tags = {'scenario': \"batch inference\", 'type': \"demo\"},\n",
" properties = {'area': \"style transfer\"},\n",
" arguments=[\"--style\", style_param],\n",
" parallel_run_config=parallel_run_config,\n",
" allow_reuse=True #[optional - default value True]\n",
")"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -383,8 +478,18 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"pipeline = Pipeline(workspace=ws, steps=[stitch_video_step])\n", "pipeline = Pipeline(workspace=ws, steps=[stitch_video_step])\n",
"\n",
"pipeline.validate()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# submit the pipeline and provide values for the PipelineParameters used in the pipeline\n", "# submit the pipeline and provide values for the PipelineParameters used in the pipeline\n",
"pipeline_run = Experiment(ws, 'style_transfer').submit(pipeline, pipeline_parameters={\"style\": \"mosaic\", \"nodecount\": 3})" "pipeline_run = Experiment(ws, 'styletransfer_parallel_mosaic').submit(pipeline)"
] ]
}, },
{ {
@@ -400,10 +505,20 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Track pipeline run progress\n",
"from azureml.widgets import RunDetails\n", "from azureml.widgets import RunDetails\n",
"RunDetails(pipeline_run).show()" "RunDetails(pipeline_run).show()"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipeline_run.wait_for_completion()"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@@ -453,24 +568,21 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"published_pipeline = pipeline_run.publish_pipeline(\n", "pipeline_name = \"style-transfer-batch-inference\"\n",
" name=\"batch score style transfer\", description=\"style transfer\", version=\"1.0\")\n", "print(pipeline_name)\n",
"\n", "\n",
"published_pipeline" "published_pipeline = pipeline.publish(\n",
" name=pipeline_name, \n",
" description=pipeline_name)\n",
"print(\"Newly published pipeline id: {}\".format(published_pipeline.id))"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Get published pipeline\n", "# Get published pipeline\n",
"\n", "This is another way to get the published pipeline."
"You can get the published pipeline using **pipeline id**.\n",
"\n",
"To get all the published pipelines for a given workspace(ws): \n",
"```css\n",
"all_pub_pipelines = PublishedPipeline.get_all(ws)\n",
"```"
] ]
}, },
{ {
@@ -481,25 +593,30 @@
"source": [ "source": [
"from azureml.pipeline.core import PublishedPipeline\n", "from azureml.pipeline.core import PublishedPipeline\n",
"\n", "\n",
"pipeline_id = published_pipeline.id # use your published pipeline id\n", "# You could retrieve all pipelines that are published, or \n",
"published_pipeline = PublishedPipeline.get(ws, pipeline_id)\n", "# just get the published pipeline object that you have the ID for.\n",
"\n", "\n",
"published_pipeline" "# Get all published pipeline objects in the workspace\n",
"all_pub_pipelines = PublishedPipeline.list(ws)\n",
"\n",
"# We will iterate through the list of published pipelines and \n",
"# use the last ID in the list for Schelue operations: \n",
"print(\"Published pipelines found in the workspace:\")\n",
"for pub_pipeline in all_pub_pipelines:\n",
" print(\"Name:\", pub_pipeline.name,\"\\tDescription:\", pub_pipeline.description, \"\\tId:\", pub_pipeline.id, \"\\tStatus:\", pub_pipeline.status)\n",
" if(pub_pipeline.name == pipeline_name):\n",
" published_pipeline = pub_pipeline\n",
"\n",
"print(\"Published pipeline id: {}\".format(published_pipeline.id))"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Re-run pipeline through REST calls for other styles" "# Run pipeline through REST calls for other styles\n",
] "\n",
}, "# Get AAD token"
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get AAD token\n",
"[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
] ]
}, },
{ {
@@ -512,14 +629,14 @@
"import requests\n", "import requests\n",
"\n", "\n",
"auth = InteractiveLoginAuthentication()\n", "auth = InteractiveLoginAuthentication()\n",
"aad_token = auth.get_authentication_header()\n" "aad_token = auth.get_authentication_header()"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Get endpoint URL" "# Get endpoint URL"
] ]
}, },
{ {
@@ -528,14 +645,15 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"rest_endpoint = published_pipeline.endpoint" "rest_endpoint = published_pipeline.endpoint\n",
"print(\"Pipeline REST endpoing: {}\".format(rest_endpoint))"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Send request and monitor" "# Send request and monitor"
] ]
}, },
{ {
@@ -544,60 +662,24 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# run the pipeline using PipelineParameter values style='candy' and nodecount=2\n", "experiment_name = 'styletransfer_parallel_candy'\n",
"response = requests.post(rest_endpoint, \n", "response = requests.post(rest_endpoint, \n",
" headers=aad_token,\n", " headers=aad_token,\n",
" json={\"ExperimentName\": \"style_transfer\",\n", " json={\"ExperimentName\": experiment_name,\n",
" \"ParameterAssignments\": {\"style\": \"candy\", \"nodecount\": 2}}) \n", " \"ParameterAssignments\": {\"style\": \"candy\", \"aml_node_count\": 2}})\n",
"run_id = response.json()[\"Id\"]\n", "run_id = response.json()[\"Id\"]\n",
"\n", "\n",
"from azureml.pipeline.core.run import PipelineRun\n", "from azureml.pipeline.core.run import PipelineRun\n",
"published_pipeline_run_candy = PipelineRun(ws.experiments[\"style_transfer\"], run_id)\n", "published_pipeline_run_candy = PipelineRun(ws.experiments[experiment_name], run_id)\n",
"\n", "\n",
"RunDetails(published_pipeline_run_candy).show()" "RunDetails(published_pipeline_run_candy).show()"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# run the pipeline using PipelineParameter values style='rain_princess' and nodecount=3\n",
"response = requests.post(rest_endpoint, \n",
" headers=aad_token,\n",
" json={\"ExperimentName\": \"style_transfer\",\n",
" \"ParameterAssignments\": {\"style\": \"rain_princess\", \"nodecount\": 3}}) \n",
"run_id = response.json()[\"Id\"]\n",
"\n",
"published_pipeline_run_rain = PipelineRun(ws.experiments[\"style_transfer\"], run_id)\n",
"\n",
"RunDetails(published_pipeline_run_rain).show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# run the pipeline using PipelineParameter values style='udnie' and nodecount=4\n",
"response = requests.post(rest_endpoint, \n",
" headers=aad_token,\n",
" json={\"ExperimentName\": \"style_transfer\",\n",
" \"ParameterAssignments\": {\"style\": \"udnie\", \"nodecount\": 3}}) \n",
"run_id = response.json()[\"Id\"]\n",
"\n",
"published_pipeline_run_udnie = PipelineRun(ws.experiments[\"style_transfer\"], run_id)\n",
"\n",
"RunDetails(published_pipeline_run_udnie).show()"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Download output from re-run" "# Download output from re-run"
] ]
}, },
{ {
@@ -606,9 +688,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"published_pipeline_run_candy.wait_for_completion()\n", "published_pipeline_run_candy.wait_for_completion()"
"published_pipeline_run_rain.wait_for_completion()\n",
"published_pipeline_run_udnie.wait_for_completion()"
] ]
}, },
{ {
@@ -617,18 +697,30 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"download_video(published_pipeline_run_candy, target_dir=\"output_video_candy\")\n", "download_video(published_pipeline_run_candy, target_dir=\"output_video_candy\")"
"download_video(published_pipeline_run_rain, target_dir=\"output_video_rain_princess\")\n",
"download_video(published_pipeline_run_udnie, target_dir=\"output_video_udnie\")"
] ]
} }
], ],
"metadata": { "metadata": {
"authors": [ "authors": [
{ {
"name": "sanpil" "name": "sanpil joringer asraniwa pansav tracych"
} }
], ],
"category": "Other notebooks",
"compute": [
"AML Compute"
],
"datasets": [],
"deployment": [
"None"
],
"exclude_from_index": true,
"framework": [
"None"
],
"friendly_name": "Style transfer using ParallelRunStep",
"index_order": 1,
"kernelspec": { "kernelspec": {
"display_name": "Python 3.6", "display_name": "Python 3.6",
"language": "python", "language": "python",
@@ -644,8 +736,13 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.6.7" "version": "3.6.9"
} },
"tags": [
"Batch Inferencing",
"Pipeline"
],
"task": "Style transfer"
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 2 "nbformat_minor": 2

View File

@@ -2,5 +2,6 @@ name: pipeline-style-transfer
dependencies: dependencies:
- pip: - pip:
- azureml-sdk - azureml-sdk
- azureml-contrib-pipeline-steps
- azureml-widgets - azureml-widgets
- requests - requests

View File

@@ -0,0 +1,22 @@
import argparse
import glob
import os
import subprocess
parser = argparse.ArgumentParser(description="Process input video")
parser.add_argument('--input_video', required=True)
parser.add_argument('--output_audio', required=True)
parser.add_argument('--output_images', required=True)
args = parser.parse_args()
os.makedirs(args.output_audio, exist_ok=True)
os.makedirs(args.output_images, exist_ok=True)
subprocess.run("ffmpeg -i {} {}/video.aac".format(args.input_video, args.output_audio),
shell=True,
check=True)
subprocess.run("ffmpeg -i {} {}/%05d_video.jpg -hide_banner".format(args.input_video, args.output_images),
shell=True,
check=True)

View File

@@ -0,0 +1,22 @@
import argparse
import os
import subprocess
parser = argparse.ArgumentParser(description="Process input video")
parser.add_argument('--images_dir', required=True)
parser.add_argument('--input_audio', required=True)
parser.add_argument('--output_dir', required=True)
args = parser.parse_args()
os.makedirs(args.output_dir, exist_ok=True)
subprocess.run("ffmpeg -framerate 30 -i {}/%05d_video.jpg -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p "
"-y {}/video_without_audio.mp4"
.format(args.images_dir, args.output_dir),
shell=True, check=True)
subprocess.run("ffmpeg -i {}/video_without_audio.mp4 -i {}/video.aac -map 0:0 -map 1:0 -vcodec "
"copy -acodec copy -y {}/video_with_audio.mp4"
.format(args.output_dir, args.input_audio, args.output_dir),
shell=True, check=True)

View File

@@ -1,28 +1,17 @@
# Original source: https://github.com/pytorch/examples/blob/master/fast_neural_style/neural_style/neural_style.py
import argparse import argparse
import os import os
import sys import sys
import re import re
import json
import traceback
from PIL import Image from PIL import Image
import torch import torch
from torchvision import transforms from torchvision import transforms
from azureml.core.model import Model
def load_image(filename, size=None, scale=None): style_model = None
img = Image.open(filename)
if size is not None:
img = img.resize((size, size), Image.ANTIALIAS)
elif scale is not None:
img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
return img
def save_image(filename, data):
img = data.clone().clamp(0, 255).numpy()
img = img.transpose(1, 2, 0).astype("uint8")
img = Image.fromarray(img)
img.save(filename)
class TransformerNet(torch.nn.Module): class TransformerNet(torch.nn.Module):
@@ -125,60 +114,59 @@ class UpsampleConvLayer(torch.nn.Module):
return out return out
def stylize(args): def load_image(filename):
device = torch.device("cuda" if args.cuda else "cpu") img = Image.open(filename)
return img
def save_image(filename, data):
img = data.clone().clamp(0, 255).numpy()
img = img.transpose(1, 2, 0).astype("uint8")
img = Image.fromarray(img)
img.save(filename)
def init():
global output_path, args
global style_model, device
output_path = os.environ['AZUREML_BI_OUTPUT_PATH']
print(f'output path: {output_path}')
print(f'Cuda available? {torch.cuda.is_available()}')
arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
arg_parser.add_argument("--style", type=str, help="style name")
args, unknown_args = arg_parser.parse_known_args()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
with torch.no_grad(): with torch.no_grad():
style_model = TransformerNet() style_model = TransformerNet()
state_dict = torch.load(os.path.join(args.model_dir, args.style+".pth")) model_path = Model.get_model_path(args.style)
state_dict = torch.load(os.path.join(model_path))
# remove saved deprecated running_* keys in InstanceNorm from the checkpoint # remove saved deprecated running_* keys in InstanceNorm from the checkpoint
for k in list(state_dict.keys()): for k in list(state_dict.keys()):
if re.search(r'in\d+\.running_(mean|var)$', k): if re.search(r'in\d+\.running_(mean|var)$', k):
del state_dict[k] del state_dict[k]
style_model.load_state_dict(state_dict) style_model.load_state_dict(state_dict)
style_model.to(device) style_model.to(device)
print(f'Model loaded successfully. Path: {model_path}')
filenames = os.listdir(args.content_dir)
for filename in filenames: def run(mini_batch):
print("Processing {}".format(filename))
full_path = os.path.join(args.content_dir, filename) result = []
content_image = load_image(full_path, scale=args.content_scale) for image_file_path in mini_batch:
img = load_image(image_file_path)
with torch.no_grad():
content_transform = transforms.Compose([ content_transform = transforms.Compose([
transforms.ToTensor(), transforms.ToTensor(),
transforms.Lambda(lambda x: x.mul(255)) transforms.Lambda(lambda x: x.mul(255))
]) ])
content_image = content_transform(content_image) content_image = content_transform(img)
content_image = content_image.unsqueeze(0).to(device) content_image = content_image.unsqueeze(0).to(device)
output = style_model(content_image).cpu() output = style_model(content_image).cpu()
output_file_path = os.path.join(output_path, os.path.basename(image_file_path))
save_image(output_file_path, output[0])
result.append(output_file_path)
output_path = os.path.join(args.output_dir, filename) return result
save_image(output_path, output[0])
def main():
arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
arg_parser.add_argument("--content-scale", type=float, default=None,
help="factor for scaling down the content image")
arg_parser.add_argument("--model-dir", type=str, required=True,
help="saved model to be used for stylizing the image.")
arg_parser.add_argument("--cuda", type=int, required=True,
help="set it to 1 for running on GPU, 0 for CPU")
arg_parser.add_argument("--style", type=str,
help="style name")
arg_parser.add_argument("--content-dir", type=str, required=True,
help="directory holding the images")
arg_parser.add_argument("--output-dir", type=str, required=True,
help="directory holding the output images")
args = arg_parser.parse_args()
if args.cuda and not torch.cuda.is_available():
print("ERROR: cuda is not available, try running on CPU")
sys.exit(1)
os.makedirs(args.output_dir, exist_ok=True)
stylize(args)
if __name__ == "__main__":
main()

View File

@@ -507,7 +507,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"### Create myenv.yml\n", "### Create myenv.yml\n",
"We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify conda packages `numpy` and `chainer`." "We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify conda packages `numpy` and `chainer`. Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
] ]
}, },
{ {
@@ -521,6 +521,7 @@
"cd = CondaDependencies.create()\n", "cd = CondaDependencies.create()\n",
"cd.add_conda_package('numpy')\n", "cd.add_conda_package('numpy')\n",
"cd.add_conda_package('chainer')\n", "cd.add_conda_package('chainer')\n",
"cd.add_pip_package(\"azureml-defaults\")\n",
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n", "cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
"\n", "\n",
"print(cd.serialize_to_string())" "print(cd.serialize_to_string())"
@@ -544,10 +545,11 @@
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.webservice import Webservice\n", "from azureml.core.webservice import Webservice\n",
"from azureml.core.model import Model\n", "from azureml.core.model import Model\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "\n",
" entry_script=\"chainer_score.py\",\n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" conda_file=\"myenv.yml\")\n", "inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv)\n",
"\n", "\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n", "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
" auth_enabled=True, # this flag generates API keys to secure access\n", " auth_enabled=True, # this flag generates API keys to secure access\n",
@@ -707,7 +709,7 @@
"metadata": { "metadata": {
"authors": [ "authors": [
{ {
"name": "dipeck" "name": "swatig"
} }
], ],
"category": "training", "category": "training",

View File

@@ -166,7 +166,7 @@ def download_data():
from zipfile import ZipFile from zipfile import ZipFile
# download data # download data
data_file = './fowl_data.zip' data_file = './fowl_data.zip'
download_url = 'https://msdocsdatasets.blob.core.windows.net/pytorchfowl/fowl_data.zip' download_url = 'https://azureopendatastorage.blob.core.windows.net/testpublic/temp/fowl_data.zip'
urllib.request.urlretrieve(download_url, filename=data_file) urllib.request.urlretrieve(download_url, filename=data_file)
# extract files # extract files

View File

@@ -174,7 +174,7 @@
"metadata": {}, "metadata": {},
"source": [ "source": [
"### Download training data\n", "### Download training data\n",
"The dataset we will use (located on a public blob [here](https://msdocsdatasets.blob.core.windows.net/pytorchfowl/fowl_data.zip) as a zip file) consists of about 120 training images each for turkeys and chickens, with 100 validation images for each class. The images are a subset of the [Open Images v5 Dataset](https://storage.googleapis.com/openimages/web/index.html). We will download and extract the dataset as part of our training script `pytorch_train.py`" "The dataset we will use (located on a public blob [here](https://azureopendatastorage.blob.core.windows.net/testpublic/temp/fowl_data.zip) as a zip file) consists of about 120 training images each for turkeys and chickens, with 100 validation images for each class. The images are a subset of the [Open Images v5 Dataset](https://storage.googleapis.com/openimages/web/index.html). We will download and extract the dataset as part of our training script `pytorch_train.py`"
] ]
}, },
{ {
@@ -561,10 +561,11 @@
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.webservice import Webservice\n", "from azureml.core.webservice import Webservice\n",
"from azureml.core.model import Model\n", "from azureml.core.model import Model\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "\n",
" entry_script=\"pytorch_score.py\",\n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" conda_file=\"myenv.yml\")\n", "inference_config = InferenceConfig(entry_script=\"pytorch_score.py\", environment=myenv)\n",
"\n", "\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
" memory_gb=1, \n", " memory_gb=1, \n",
@@ -698,7 +699,7 @@
"metadata": { "metadata": {
"authors": [ "authors": [
{ {
"name": "ninhu" "name": "swatig"
} }
], ],
"category": "training", "category": "training",

View File

@@ -550,7 +550,7 @@
"metadata": { "metadata": {
"authors": [ "authors": [
{ {
"name": "dipeck" "name": "swatig"
} }
], ],
"category": "training", "category": "training",

View File

@@ -908,13 +908,16 @@
"def init():\n", "def init():\n",
" global X, output, sess\n", " global X, output, sess\n",
" tf.reset_default_graph()\n", " tf.reset_default_graph()\n",
" model_root = Model.get_model_path('tf-dnn-mnist')\n", " model_root = os.getenv('AZUREML_MODEL_DIR')\n",
" saver = tf.train.import_meta_graph(os.path.join(model_root, 'mnist-tf.model.meta'))\n", " # the name of the folder in which to look for tensorflow model files\n",
" tf_model_folder = 'model'\n",
" saver = tf.train.import_meta_graph(\n",
" os.path.join(model_root, tf_model_folder, 'mnist-tf.model.meta'))\n",
" X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n", " X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n",
" output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n", " output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n",
" \n", "\n",
" sess = tf.Session()\n", " sess = tf.Session()\n",
" saver.restore(sess, os.path.join(model_root, 'mnist-tf.model'))\n", " saver.restore(sess, os.path.join(model_root, tf_model_folder, 'mnist-tf.model'))\n",
"\n", "\n",
"def run(raw_data):\n", "def run(raw_data):\n",
" data = np.array(json.loads(raw_data)['data'])\n", " data = np.array(json.loads(raw_data)['data'])\n",
@@ -943,6 +946,7 @@
"cd = CondaDependencies.create()\n", "cd = CondaDependencies.create()\n",
"cd.add_conda_package('numpy')\n", "cd.add_conda_package('numpy')\n",
"cd.add_tensorflow_conda_package()\n", "cd.add_tensorflow_conda_package()\n",
"cd.add_pip_package(\"azureml-defaults\")\n",
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n", "cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
"\n", "\n",
"print(cd.serialize_to_string())" "print(cd.serialize_to_string())"
@@ -966,10 +970,11 @@
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.webservice import Webservice\n", "from azureml.core.webservice import Webservice\n",
"from azureml.core.model import Model\n", "from azureml.core.model import Model\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "\n",
" entry_script=\"score.py\",\n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" conda_file=\"myenv.yml\")\n", "inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
"\n", "\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n", "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
" memory_gb=1, \n", " memory_gb=1, \n",
@@ -1140,7 +1145,7 @@
"metadata": { "metadata": {
"authors": [ "authors": [
{ {
"name": "ninhu" "name": "swatig"
} }
], ],
"category": "training", "category": "training",

View File

@@ -517,7 +517,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.train.hyperdrive import *\n", "from azureml.train.hyperdrive import RandomParameterSampling, choice, loguniform\n",
"\n", "\n",
"ps = RandomParameterSampling(\n", "ps = RandomParameterSampling(\n",
" {\n", " {\n",
@@ -562,6 +562,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.train.hyperdrive import TruncationSelectionPolicy\n",
"policy = TruncationSelectionPolicy(evaluation_interval=2, truncation_percentage=25)" "policy = TruncationSelectionPolicy(evaluation_interval=2, truncation_percentage=25)"
] ]
}, },
@@ -578,12 +579,13 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.train.hyperdrive import HyperDriveConfig, PrimaryMetricGoal\n",
"htc = HyperDriveConfig(estimator=est, \n", "htc = HyperDriveConfig(estimator=est, \n",
" hyperparameter_sampling=ps, \n", " hyperparameter_sampling=ps, \n",
" policy=policy, \n", " policy=policy, \n",
" primary_metric_name='validation_acc', \n", " primary_metric_name='validation_acc', \n",
" primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, \n", " primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, \n",
" max_total_runs=20,\n", " max_total_runs=15,\n",
" max_concurrent_runs=4)" " max_concurrent_runs=4)"
] ]
}, },
@@ -616,7 +618,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.widgets import RunDetails\n",
"RunDetails(htr).show()" "RunDetails(htr).show()"
] ]
}, },
@@ -721,7 +722,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.widgets import RunDetails\n",
"RunDetails(warm_start_htr).show()" "RunDetails(warm_start_htr).show()"
] ]
}, },
@@ -820,7 +820,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.widgets import RunDetails\n",
"RunDetails(resume_child_runs_htr).show()" "RunDetails(resume_child_runs_htr).show()"
] ]
}, },

View File

@@ -0,0 +1,346 @@
latitude,longitude,temperature,windAngle,windSpeed,elevation
26.536,-81.755,17.8,10.0,2.1,9.0
26.536,-81.755,16.7,360.0,1.5,9.0
26.536,-81.755,16.1,350.0,1.5,9.0
26.536,-81.755,15.0,0.0,0.0,9.0
26.536,-81.755,14.4,350.0,1.5,9.0
26.536,-81.755,0.0,0.0,0.0,9.0
26.536,-81.755,13.9,360.0,2.1,9.0
26.536,-81.755,13.3,350.0,1.5,9.0
26.536,-81.755,13.3,10.0,2.1,9.0
26.536,-81.755,13.3,360.0,1.5,9.0
26.536,-81.755,13.3,0.0,0.0,9.0
26.536,-81.755,12.2,0.0,0.0,9.0
26.536,-81.755,11.7,0.0,0.0,9.0
26.536,-81.755,14.4,0.0,0.0,9.0
26.536,-81.755,17.2,10.0,2.6,9.0
26.536,-81.755,20.0,20.0,2.6,9.0
26.536,-81.755,22.2,10.0,3.6,9.0
26.536,-81.755,23.3,30.0,4.6,9.0
26.536,-81.755,23.3,330.0,2.6,9.0
26.536,-81.755,24.4,0.0,0.0,9.0
26.536,-81.755,25.0,360.0,3.1,9.0
26.536,-81.755,24.4,20.0,4.1,9.0
26.536,-81.755,23.3,10.0,2.6,9.0
26.536,-81.755,21.1,30.0,2.1,9.0
26.536,-81.755,18.3,0.0,0.0,9.0
26.536,-81.755,17.2,30.0,2.1,9.0
26.536,-81.755,15.6,60.0,2.6,9.0
26.536,-81.755,15.6,0.0,0.0,9.0
26.536,-81.755,13.9,60.0,2.6,9.0
26.536,-81.755,12.8,70.0,2.6,9.0
26.536,-81.755,0.0,0.0,0.0,9.0
26.536,-81.755,11.7,70.0,2.1,9.0
26.536,-81.755,12.2,20.0,2.1,9.0
26.536,-81.755,11.7,30.0,1.5,9.0
26.536,-81.755,11.1,40.0,2.1,9.0
26.536,-81.755,12.2,40.0,2.6,9.0
26.536,-81.755,12.2,30.0,2.6,9.0
26.536,-81.755,12.2,0.0,0.0,9.0
26.536,-81.755,15.0,30.0,6.2,9.0
26.536,-81.755,17.2,50.0,3.6,9.0
26.536,-81.755,20.6,60.0,5.1,9.0
26.536,-81.755,22.8,50.0,4.6,9.0
26.536,-81.755,24.4,80.0,6.2,9.0
26.536,-81.755,25.0,100.0,5.7,9.0
26.536,-81.755,25.6,60.0,3.1,9.0
26.536,-81.755,25.6,80.0,4.6,9.0
26.536,-81.755,25.0,90.0,5.1,9.0
26.536,-81.755,24.4,80.0,5.1,9.0
26.536,-81.755,21.1,60.0,2.6,9.0
26.536,-81.755,19.4,70.0,3.6,9.0
26.536,-81.755,18.3,70.0,2.6,9.0
26.536,-81.755,18.3,80.0,2.6,9.0
26.536,-81.755,17.2,60.0,1.5,9.0
26.536,-81.755,16.1,70.0,2.6,9.0
26.536,-81.755,15.6,70.0,2.6,9.0
26.536,-81.755,0.0,0.0,0.0,9.0
26.536,-81.755,16.1,50.0,2.6,9.0
26.536,-81.755,15.6,50.0,2.1,9.0
26.536,-81.755,15.0,50.0,1.5,9.0
26.536,-81.755,15.0,0.0,0.0,9.0
26.536,-81.755,15.0,0.0,0.0,9.0
26.536,-81.755,14.4,0.0,0.0,9.0
26.536,-81.755,14.4,30.0,4.1,9.0
26.536,-81.755,16.1,40.0,1.5,9.0
26.536,-81.755,19.4,0.0,1.5,9.0
26.536,-81.755,22.8,90.0,2.6,9.0
26.536,-81.755,24.4,130.0,3.6,9.0
26.536,-81.755,25.6,100.0,4.6,9.0
26.536,-81.755,26.1,120.0,3.1,9.0
26.536,-81.755,26.7,0.0,2.6,9.0
26.536,-81.755,27.2,0.0,0.0,9.0
26.536,-81.755,27.2,40.0,3.1,9.0
26.536,-81.755,26.1,30.0,1.5,9.0
26.536,-81.755,22.8,310.0,2.1,9.0
26.536,-81.755,23.3,330.0,2.1,9.0
-34.067,-56.238,17.5,30.0,3.1,68.0
-34.067,-56.238,21.2,30.0,5.7,68.0
-34.067,-56.238,24.5,30.0,3.1,68.0
-34.067,-56.238,27.5,330.0,3.6,68.0
-34.067,-56.238,29.2,30.0,4.1,68.0
-34.067,-56.238,31.0,20.0,4.6,68.0
-34.067,-56.238,33.0,360.0,2.6,68.0
-34.067,-56.238,33.6,60.0,3.1,68.0
-34.067,-56.238,33.6,30.0,3.6,68.0
-34.067,-56.238,18.6,40.0,3.1,68.0
-34.067,-56.238,22.0,120.0,1.5,68.0
-34.067,-56.238,25.0,120.0,2.6,68.0
-34.067,-56.238,28.6,50.0,3.1,68.0
-34.067,-56.238,30.6,50.0,4.1,68.0
-34.067,-56.238,31.5,30.0,6.7,68.0
-34.067,-56.238,32.0,40.0,7.2,68.0
-34.067,-56.238,33.0,30.0,5.7,68.0
-34.067,-56.238,33.2,360.0,3.6,68.0
-34.067,-56.238,20.6,30.0,3.1,68.0
-34.067,-56.238,21.2,0.0,0.0,68.0
-34.067,-56.238,22.0,210.0,3.1,68.0
-34.067,-56.238,23.0,210.0,3.6,68.0
-34.067,-56.238,24.0,180.0,6.7,68.0
-34.067,-56.238,24.5,210.0,7.2,68.0
-34.067,-56.238,21.0,180.0,8.2,68.0
-34.067,-56.238,20.0,180.0,6.7,68.0
-34.083,-56.233,20.2,180.0,7.2,68.0
-29.917,-71.2,16.6,290.0,4.1,146.0
-29.916,-71.2,17.0,290.0,4.1,147.0
-29.916,-71.2,16.0,310.0,3.1,147.0
-29.916,-71.2,16.0,300.0,2.1,147.0
-29.917,-71.2,15.1,0.0,0.0,146.0
-29.916,-71.2,15.0,0.0,1.0,147.0
-29.916,-71.2,15.0,160.0,1.0,147.0
-29.916,-71.2,15.0,120.0,1.0,147.0
-29.917,-71.2,14.3,190.0,1.0,146.0
-29.916,-71.2,14.0,190.0,1.0,147.0
-29.916,-71.2,14.0,0.0,0.0,147.0
-29.916,-71.2,14.0,100.0,3.1,147.0
-29.917,-71.2,12.9,0.0,0.0,146.0
-29.916,-71.2,13.0,0.0,1.0,147.0
-29.916,-71.2,14.0,0.0,0.5,147.0
-29.916,-71.2,15.0,0.0,0.5,147.0
-29.917,-71.2,15.9,0.0,0.0,146.0
-29.916,-71.2,16.0,0.0,0.0,147.0
-29.916,-71.2,17.0,270.0,4.6,147.0
-29.916,-71.2,19.0,260.0,4.1,147.0
-29.917,-71.2,18.1,270.0,6.2,146.0
-29.916,-71.2,18.0,270.0,6.2,147.0
-29.916,-71.2,19.0,270.0,6.2,147.0
-29.916,-71.2,20.0,260.0,5.1,147.0
-29.917,-71.2,19.6,280.0,6.2,146.0
-29.916,-71.2,20.0,280.0,6.2,147.0
-29.916,-71.2,20.0,270.0,6.2,147.0
-29.916,-71.2,19.0,280.0,6.7,147.0
-29.917,-71.2,18.3,270.0,5.7,146.0
-29.916,-71.2,18.0,270.0,5.7,147.0
-29.916,-71.2,18.0,0.0,0.0,147.0
-29.916,-71.2,17.0,280.0,4.6,147.0
-29.917,-71.2,15.9,280.0,4.1,146.0
-29.916,-71.2,16.0,280.0,4.1,147.0
-29.916,-71.2,15.0,280.0,3.6,147.0
-29.916,-71.2,15.0,280.0,3.6,147.0
-29.917,-71.2,15.4,280.0,4.1,146.0
-29.916,-71.2,15.0,280.0,4.1,147.0
-29.916,-71.2,16.0,240.0,2.1,147.0
-29.916,-71.2,15.0,0.0,0.5,147.0
-29.917,-71.2,15.8,80.0,3.6,146.0
-29.916,-71.2,16.0,80.0,3.6,147.0
-29.916,-71.2,16.0,10.0,1.5,147.0
-29.916,-71.2,16.0,100.0,1.5,147.0
-29.917,-71.2,15.3,130.0,1.5,146.0
-29.916,-71.2,15.0,130.0,1.5,147.0
-29.916,-71.2,15.0,110.0,1.0,147.0
-29.916,-71.2,16.0,280.0,6.2,147.0
-29.917,-71.2,15.9,240.0,3.6,146.0
-29.916,-71.2,16.0,240.0,3.6,147.0
-29.916,-71.2,16.0,240.0,3.1,147.0
-29.916,-71.2,16.0,220.0,3.1,147.0
-29.917,-71.2,16.4,260.0,3.1,146.0
-29.916,-71.2,16.0,260.0,3.1,147.0
-29.916,-71.2,17.0,230.0,2.6,147.0
-29.916,-71.2,18.0,0.0,1.5,147.0
-29.917,-71.2,20.3,340.0,2.6,146.0
-29.916,-71.2,20.0,340.0,2.6,147.0
-29.916,-71.2,21.0,270.0,5.1,147.0
-29.916,-71.2,20.0,270.0,6.7,147.0
-29.917,-71.2,19.2,280.0,6.7,146.0
-29.916,-71.2,19.0,280.0,6.7,147.0
-29.916,-71.2,19.0,310.0,2.6,147.0
-29.916,-71.2,18.0,270.0,5.1,147.0
-29.917,-71.2,17.0,300.0,4.6,146.0
-29.916,-71.2,17.0,300.0,4.6,147.0
-29.916,-71.2,17.0,300.0,3.6,147.0
-29.916,-71.2,17.0,290.0,3.1,147.0
-29.917,-71.2,16.3,290.0,2.1,146.0
-29.916,-71.2,16.0,290.0,2.1,147.0
-29.916,-71.2,17.0,270.0,1.0,147.0
-29.916,-71.2,17.0,0.0,0.5,147.0
-29.917,-71.2,16.5,160.0,2.1,146.0
-29.916,-71.2,17.0,160.0,2.1,147.0
-29.916,-71.2,15.0,120.0,3.1,147.0
-29.916,-71.2,16.0,180.0,1.5,147.0
-29.917,-71.2,14.7,0.0,0.0,146.0
-29.916,-71.2,15.0,0.0,1.0,147.0
-29.916,-71.2,15.0,300.0,1.0,147.0
-29.916,-71.2,16.0,0.0,0.0,147.0
-29.917,-71.2,18.5,110.0,1.0,146.0
-29.916,-71.2,19.0,110.0,1.0,147.0
-29.916,-71.2,20.0,270.0,3.6,147.0
-29.916,-71.2,20.0,270.0,5.7,147.0
-29.917,-71.2,20.0,280.0,6.2,146.0
-29.916,-71.2,20.0,280.0,6.2,147.0
-29.916,-71.2,21.0,290.0,6.7,147.0
-29.916,-71.2,20.0,270.0,6.2,147.0
-29.917,-71.2,21.0,260.0,6.7,146.0
-29.916,-71.2,21.0,260.0,6.7,147.0
-29.916,-71.2,20.0,270.0,6.2,147.0
-29.916,-71.2,19.0,260.0,5.1,147.0
-29.916,-71.2,18.0,280.0,4.6,147.0
-29.917,-71.2,17.5,280.0,3.1,146.0
-29.916,-71.2,18.0,280.0,3.1,147.0
30.349,-85.788,11.1,0.0,0.0,21.0
30.349,-85.788,11.1,0.0,0.0,21.0
30.349,-85.788,9.4,0.0,0.0,21.0
30.349,-85.788,9.4,0.0,0.0,21.0
30.349,-85.788,8.3,300.0,2.1,21.0
30.349,-85.788,11.1,280.0,1.5,21.0
30.349,-85.788,0.0,0.0,0.0,21.0
30.349,-85.788,10.6,320.0,3.1,21.0
30.349,-85.788,9.4,310.0,3.1,21.0
30.349,-85.788,7.8,320.0,2.6,21.0
30.349,-85.788,6.1,340.0,2.1,21.0
30.349,-85.788,6.7,330.0,2.6,21.0
30.349,-85.788,6.1,310.0,1.5,21.0
30.349,-85.788,7.2,310.0,2.1,21.0
30.349,-85.788,12.8,360.0,3.1,21.0
30.349,-85.788,15.0,0.0,3.1,21.0
30.349,-85.788,16.7,20.0,4.6,21.0
30.349,-85.788,18.9,30.0,5.1,21.0
30.349,-85.788,19.4,10.0,4.1,21.0
30.349,-85.788,21.1,330.0,2.6,21.0
30.349,-85.788,21.1,10.0,4.6,21.0
30.349,-85.788,21.7,360.0,4.1,21.0
30.349,-85.788,21.7,30.0,2.1,21.0
30.349,-85.788,21.7,330.0,2.6,21.0
30.349,-85.788,16.1,350.0,2.1,21.0
30.349,-85.788,11.7,0.0,0.0,21.0
30.349,-85.788,8.9,0.0,0.0,21.0
30.349,-85.788,9.4,0.0,0.0,21.0
30.349,-85.788,7.8,0.0,0.0,21.0
30.349,-85.788,11.1,30.0,3.1,21.0
30.349,-85.788,7.2,0.0,0.0,21.0
30.349,-85.788,7.2,0.0,0.0,21.0
30.349,-85.788,0.0,0.0,0.0,21.0
30.349,-85.788,7.8,30.0,2.1,21.0
30.349,-85.788,8.3,40.0,2.6,21.0
30.349,-85.788,7.2,50.0,1.5,21.0
30.349,-85.788,8.3,60.0,1.5,21.0
30.349,-85.788,5.6,40.0,2.1,21.0
30.349,-85.788,6.7,40.0,2.1,21.0
30.349,-85.788,7.8,50.0,3.1,21.0
30.349,-85.788,11.7,70.0,2.6,21.0
30.349,-85.788,15.6,70.0,3.1,21.0
30.349,-85.788,18.9,100.0,3.6,21.0
30.349,-85.788,20.0,130.0,3.6,21.0
30.349,-85.788,21.1,140.0,4.1,21.0
30.349,-85.788,21.7,150.0,4.1,21.0
30.349,-85.788,21.7,170.0,3.1,21.0
30.349,-85.788,22.2,170.0,3.1,21.0
30.349,-85.788,20.6,0.0,0.0,21.0
30.349,-85.788,17.2,0.0,0.0,21.0
30.349,-85.788,14.4,0.0,0.0,21.0
30.349,-85.788,12.8,100.0,1.5,21.0
30.349,-85.788,13.3,100.0,1.5,21.0
30.349,-85.788,10.6,0.0,0.0,21.0
30.349,-85.788,9.4,0.0,0.0,21.0
30.349,-85.788,7.8,0.0,0.0,21.0
30.358,-85.799,8.3,0.0,0.0,21.0
30.349,-85.788,0.0,0.0,0.0,21.0
30.358,-85.799,6.7,0.0,0.0,21.0
30.358,-85.799,7.2,0.0,0.0,21.0
30.358,-85.799,7.2,0.0,0.0,21.0
30.358,-85.799,8.3,50.0,1.5,21.0
30.358,-85.799,9.4,0.0,0.0,21.0
30.358,-85.799,8.9,0.0,0.0,21.0
30.358,-85.799,10.0,340.0,1.5,21.0
30.358,-85.799,12.8,40.0,1.5,21.0
30.358,-85.799,16.7,100.0,2.1,21.0
30.358,-85.799,21.1,100.0,1.5,21.0
30.358,-85.799,23.3,0.0,0.0,21.0
30.358,-85.799,25.0,180.0,4.6,21.0
30.358,-85.799,24.4,230.0,3.6,21.0
30.358,-85.799,25.0,210.0,4.1,21.0
30.358,-85.799,23.9,170.0,4.1,21.0
30.358,-85.799,22.8,0.0,0.0,21.0
30.358,-85.799,19.4,0.0,0.0,21.0
30.358,-85.799,17.8,140.0,2.1,21.0
60.383,5.333,-0.7,0.0,0.0,36.0
60.383,5.333,0.6,270.0,2.0,36.0
60.383,5.333,-0.9,120.0,1.0,36.0
60.383,5.333,-1.6,130.0,2.0,36.0
60.383,5.333,-1.4,150.0,1.0,36.0
60.383,5.333,-1.7,0.0,0.0,36.0
60.383,5.333,-1.7,140.0,1.0,36.0
60.383,5.333,-1.4,0.0,0.0,36.0
60.383,5.333,-1.0,0.0,0.0,36.0
60.383,5.333,-1.0,150.0,1.0,36.0
60.383,5.333,-0.7,140.0,1.0,36.0
60.383,5.333,0.5,150.0,1.0,36.0
60.383,5.333,1.9,0.0,0.0,36.0
60.383,5.333,1.7,0.0,0.0,36.0
60.383,5.333,2.1,310.0,2.0,36.0
60.383,5.333,1.5,90.0,1.0,36.0
60.383,5.333,1.9,290.0,1.0,36.0
60.383,5.333,2.0,320.0,1.0,36.0
60.383,5.333,1.9,330.0,1.0,36.0
60.383,5.333,1.3,350.0,1.0,36.0
60.383,5.333,1.5,120.0,1.0,36.0
60.383,5.333,1.3,150.0,2.0,36.0
60.383,5.333,0.8,140.0,1.0,36.0
60.383,5.333,0.3,300.0,1.0,36.0
60.383,5.333,0.2,140.0,1.0,36.0
60.383,5.333,0.4,140.0,1.0,36.0
60.383,5.333,0.5,320.0,1.0,36.0
60.383,5.333,1.5,330.0,1.0,36.0
60.383,5.333,1.8,40.0,1.0,36.0
60.383,5.333,2.3,170.0,1.0,36.0
60.383,5.333,2.7,140.0,1.0,36.0
60.383,5.333,3.1,330.0,1.0,36.0
60.383,5.333,3.8,350.0,1.0,36.0
60.383,5.333,3.8,140.0,1.0,36.0
60.383,5.333,4.1,150.0,1.0,36.0
60.383,5.333,4.4,180.0,1.0,36.0
60.383,5.333,4.9,300.0,1.0,36.0
60.383,5.333,5.2,320.0,1.0,36.0
60.383,5.333,6.7,340.0,1.0,36.0
60.383,5.333,6.9,250.0,1.0,36.0
60.383,5.333,7.9,300.0,2.0,36.0
60.383,5.333,5.5,140.0,1.0,36.0
60.383,5.333,7.1,140.0,2.0,36.0
60.383,5.333,7.0,280.0,2.0,36.0
60.383,5.333,4.6,170.0,1.0,36.0
60.383,5.333,4.8,330.0,1.0,36.0
60.383,5.333,6.4,260.0,2.0,36.0
60.383,5.333,6.2,340.0,1.0,36.0
60.383,5.333,5.7,320.0,2.0,36.0
60.383,5.333,5.2,100.0,1.0,36.0
60.383,5.333,5.1,310.0,1.0,36.0
60.383,5.333,4.9,290.0,2.0,36.0
60.383,5.333,4.9,310.0,2.0,36.0
60.383,5.333,6.1,320.0,2.0,36.0
60.383,5.333,7.0,250.0,1.0,36.0
60.383,5.333,5.3,140.0,1.0,36.0
60.383,5.333,6.9,350.0,1.0,36.0
60.383,5.333,9.7,110.0,3.0,36.0
60.383,5.333,10.3,300.0,3.0,36.0
60.383,5.333,8.7,310.0,1.0,36.0
60.383,5.333,9.0,270.0,3.0,36.0
60.383,5.333,11.6,80.0,3.0,36.0
60.383,5.333,11.4,80.0,4.0,36.0
60.383,5.333,9.7,70.0,5.0,36.0
60.383,5.333,9.5,80.0,6.0,36.0
60.383,5.333,8.7,80.0,5.0,36.0
60.383,5.333,7.7,80.0,5.0,36.0
60.383,5.333,8.2,80.0,4.0,36.0
60.383,5.333,7.7,30.0,1.0,36.0
60.383,5.333,7.2,310.0,1.0,36.0
60.383,5.333,6.8,300.0,2.0,36.0
60.383,5.333,6.7,140.0,1.0,36.0
1 latitude longitude temperature windAngle windSpeed elevation
2 26.536 -81.755 17.8 10.0 2.1 9.0
3 26.536 -81.755 16.7 360.0 1.5 9.0
4 26.536 -81.755 16.1 350.0 1.5 9.0
5 26.536 -81.755 15.0 0.0 0.0 9.0
6 26.536 -81.755 14.4 350.0 1.5 9.0
7 26.536 -81.755 0.0 0.0 0.0 9.0
8 26.536 -81.755 13.9 360.0 2.1 9.0
9 26.536 -81.755 13.3 350.0 1.5 9.0
10 26.536 -81.755 13.3 10.0 2.1 9.0
11 26.536 -81.755 13.3 360.0 1.5 9.0
12 26.536 -81.755 13.3 0.0 0.0 9.0
13 26.536 -81.755 12.2 0.0 0.0 9.0
14 26.536 -81.755 11.7 0.0 0.0 9.0
15 26.536 -81.755 14.4 0.0 0.0 9.0
16 26.536 -81.755 17.2 10.0 2.6 9.0
17 26.536 -81.755 20.0 20.0 2.6 9.0
18 26.536 -81.755 22.2 10.0 3.6 9.0
19 26.536 -81.755 23.3 30.0 4.6 9.0
20 26.536 -81.755 23.3 330.0 2.6 9.0
21 26.536 -81.755 24.4 0.0 0.0 9.0
22 26.536 -81.755 25.0 360.0 3.1 9.0
23 26.536 -81.755 24.4 20.0 4.1 9.0
24 26.536 -81.755 23.3 10.0 2.6 9.0
25 26.536 -81.755 21.1 30.0 2.1 9.0
26 26.536 -81.755 18.3 0.0 0.0 9.0
27 26.536 -81.755 17.2 30.0 2.1 9.0
28 26.536 -81.755 15.6 60.0 2.6 9.0
29 26.536 -81.755 15.6 0.0 0.0 9.0
30 26.536 -81.755 13.9 60.0 2.6 9.0
31 26.536 -81.755 12.8 70.0 2.6 9.0
32 26.536 -81.755 0.0 0.0 0.0 9.0
33 26.536 -81.755 11.7 70.0 2.1 9.0
34 26.536 -81.755 12.2 20.0 2.1 9.0
35 26.536 -81.755 11.7 30.0 1.5 9.0
36 26.536 -81.755 11.1 40.0 2.1 9.0
37 26.536 -81.755 12.2 40.0 2.6 9.0
38 26.536 -81.755 12.2 30.0 2.6 9.0
39 26.536 -81.755 12.2 0.0 0.0 9.0
40 26.536 -81.755 15.0 30.0 6.2 9.0
41 26.536 -81.755 17.2 50.0 3.6 9.0
42 26.536 -81.755 20.6 60.0 5.1 9.0
43 26.536 -81.755 22.8 50.0 4.6 9.0
44 26.536 -81.755 24.4 80.0 6.2 9.0
45 26.536 -81.755 25.0 100.0 5.7 9.0
46 26.536 -81.755 25.6 60.0 3.1 9.0
47 26.536 -81.755 25.6 80.0 4.6 9.0
48 26.536 -81.755 25.0 90.0 5.1 9.0
49 26.536 -81.755 24.4 80.0 5.1 9.0
50 26.536 -81.755 21.1 60.0 2.6 9.0
51 26.536 -81.755 19.4 70.0 3.6 9.0
52 26.536 -81.755 18.3 70.0 2.6 9.0
53 26.536 -81.755 18.3 80.0 2.6 9.0
54 26.536 -81.755 17.2 60.0 1.5 9.0
55 26.536 -81.755 16.1 70.0 2.6 9.0
56 26.536 -81.755 15.6 70.0 2.6 9.0
57 26.536 -81.755 0.0 0.0 0.0 9.0
58 26.536 -81.755 16.1 50.0 2.6 9.0
59 26.536 -81.755 15.6 50.0 2.1 9.0
60 26.536 -81.755 15.0 50.0 1.5 9.0
61 26.536 -81.755 15.0 0.0 0.0 9.0
62 26.536 -81.755 15.0 0.0 0.0 9.0
63 26.536 -81.755 14.4 0.0 0.0 9.0
64 26.536 -81.755 14.4 30.0 4.1 9.0
65 26.536 -81.755 16.1 40.0 1.5 9.0
66 26.536 -81.755 19.4 0.0 1.5 9.0
67 26.536 -81.755 22.8 90.0 2.6 9.0
68 26.536 -81.755 24.4 130.0 3.6 9.0
69 26.536 -81.755 25.6 100.0 4.6 9.0
70 26.536 -81.755 26.1 120.0 3.1 9.0
71 26.536 -81.755 26.7 0.0 2.6 9.0
72 26.536 -81.755 27.2 0.0 0.0 9.0
73 26.536 -81.755 27.2 40.0 3.1 9.0
74 26.536 -81.755 26.1 30.0 1.5 9.0
75 26.536 -81.755 22.8 310.0 2.1 9.0
76 26.536 -81.755 23.3 330.0 2.1 9.0
77 -34.067 -56.238 17.5 30.0 3.1 68.0
78 -34.067 -56.238 21.2 30.0 5.7 68.0
79 -34.067 -56.238 24.5 30.0 3.1 68.0
80 -34.067 -56.238 27.5 330.0 3.6 68.0
81 -34.067 -56.238 29.2 30.0 4.1 68.0
82 -34.067 -56.238 31.0 20.0 4.6 68.0
83 -34.067 -56.238 33.0 360.0 2.6 68.0
84 -34.067 -56.238 33.6 60.0 3.1 68.0
85 -34.067 -56.238 33.6 30.0 3.6 68.0
86 -34.067 -56.238 18.6 40.0 3.1 68.0
87 -34.067 -56.238 22.0 120.0 1.5 68.0
88 -34.067 -56.238 25.0 120.0 2.6 68.0
89 -34.067 -56.238 28.6 50.0 3.1 68.0
90 -34.067 -56.238 30.6 50.0 4.1 68.0
91 -34.067 -56.238 31.5 30.0 6.7 68.0
92 -34.067 -56.238 32.0 40.0 7.2 68.0
93 -34.067 -56.238 33.0 30.0 5.7 68.0
94 -34.067 -56.238 33.2 360.0 3.6 68.0
95 -34.067 -56.238 20.6 30.0 3.1 68.0
96 -34.067 -56.238 21.2 0.0 0.0 68.0
97 -34.067 -56.238 22.0 210.0 3.1 68.0
98 -34.067 -56.238 23.0 210.0 3.6 68.0
99 -34.067 -56.238 24.0 180.0 6.7 68.0
100 -34.067 -56.238 24.5 210.0 7.2 68.0
101 -34.067 -56.238 21.0 180.0 8.2 68.0
102 -34.067 -56.238 20.0 180.0 6.7 68.0
103 -34.083 -56.233 20.2 180.0 7.2 68.0
104 -29.917 -71.2 16.6 290.0 4.1 146.0
105 -29.916 -71.2 17.0 290.0 4.1 147.0
106 -29.916 -71.2 16.0 310.0 3.1 147.0
107 -29.916 -71.2 16.0 300.0 2.1 147.0
108 -29.917 -71.2 15.1 0.0 0.0 146.0
109 -29.916 -71.2 15.0 0.0 1.0 147.0
110 -29.916 -71.2 15.0 160.0 1.0 147.0
111 -29.916 -71.2 15.0 120.0 1.0 147.0
112 -29.917 -71.2 14.3 190.0 1.0 146.0
113 -29.916 -71.2 14.0 190.0 1.0 147.0
114 -29.916 -71.2 14.0 0.0 0.0 147.0
115 -29.916 -71.2 14.0 100.0 3.1 147.0
116 -29.917 -71.2 12.9 0.0 0.0 146.0
117 -29.916 -71.2 13.0 0.0 1.0 147.0
118 -29.916 -71.2 14.0 0.0 0.5 147.0
119 -29.916 -71.2 15.0 0.0 0.5 147.0
120 -29.917 -71.2 15.9 0.0 0.0 146.0
121 -29.916 -71.2 16.0 0.0 0.0 147.0
122 -29.916 -71.2 17.0 270.0 4.6 147.0
123 -29.916 -71.2 19.0 260.0 4.1 147.0
124 -29.917 -71.2 18.1 270.0 6.2 146.0
125 -29.916 -71.2 18.0 270.0 6.2 147.0
126 -29.916 -71.2 19.0 270.0 6.2 147.0
127 -29.916 -71.2 20.0 260.0 5.1 147.0
128 -29.917 -71.2 19.6 280.0 6.2 146.0
129 -29.916 -71.2 20.0 280.0 6.2 147.0
130 -29.916 -71.2 20.0 270.0 6.2 147.0
131 -29.916 -71.2 19.0 280.0 6.7 147.0
132 -29.917 -71.2 18.3 270.0 5.7 146.0
133 -29.916 -71.2 18.0 270.0 5.7 147.0
134 -29.916 -71.2 18.0 0.0 0.0 147.0
135 -29.916 -71.2 17.0 280.0 4.6 147.0
136 -29.917 -71.2 15.9 280.0 4.1 146.0
137 -29.916 -71.2 16.0 280.0 4.1 147.0
138 -29.916 -71.2 15.0 280.0 3.6 147.0
139 -29.916 -71.2 15.0 280.0 3.6 147.0
140 -29.917 -71.2 15.4 280.0 4.1 146.0
141 -29.916 -71.2 15.0 280.0 4.1 147.0
142 -29.916 -71.2 16.0 240.0 2.1 147.0
143 -29.916 -71.2 15.0 0.0 0.5 147.0
144 -29.917 -71.2 15.8 80.0 3.6 146.0
145 -29.916 -71.2 16.0 80.0 3.6 147.0
146 -29.916 -71.2 16.0 10.0 1.5 147.0
147 -29.916 -71.2 16.0 100.0 1.5 147.0
148 -29.917 -71.2 15.3 130.0 1.5 146.0
149 -29.916 -71.2 15.0 130.0 1.5 147.0
150 -29.916 -71.2 15.0 110.0 1.0 147.0
151 -29.916 -71.2 16.0 280.0 6.2 147.0
152 -29.917 -71.2 15.9 240.0 3.6 146.0
153 -29.916 -71.2 16.0 240.0 3.6 147.0
154 -29.916 -71.2 16.0 240.0 3.1 147.0
155 -29.916 -71.2 16.0 220.0 3.1 147.0
156 -29.917 -71.2 16.4 260.0 3.1 146.0
157 -29.916 -71.2 16.0 260.0 3.1 147.0
158 -29.916 -71.2 17.0 230.0 2.6 147.0
159 -29.916 -71.2 18.0 0.0 1.5 147.0
160 -29.917 -71.2 20.3 340.0 2.6 146.0
161 -29.916 -71.2 20.0 340.0 2.6 147.0
162 -29.916 -71.2 21.0 270.0 5.1 147.0
163 -29.916 -71.2 20.0 270.0 6.7 147.0
164 -29.917 -71.2 19.2 280.0 6.7 146.0
165 -29.916 -71.2 19.0 280.0 6.7 147.0
166 -29.916 -71.2 19.0 310.0 2.6 147.0
167 -29.916 -71.2 18.0 270.0 5.1 147.0
168 -29.917 -71.2 17.0 300.0 4.6 146.0
169 -29.916 -71.2 17.0 300.0 4.6 147.0
170 -29.916 -71.2 17.0 300.0 3.6 147.0
171 -29.916 -71.2 17.0 290.0 3.1 147.0
172 -29.917 -71.2 16.3 290.0 2.1 146.0
173 -29.916 -71.2 16.0 290.0 2.1 147.0
174 -29.916 -71.2 17.0 270.0 1.0 147.0
175 -29.916 -71.2 17.0 0.0 0.5 147.0
176 -29.917 -71.2 16.5 160.0 2.1 146.0
177 -29.916 -71.2 17.0 160.0 2.1 147.0
178 -29.916 -71.2 15.0 120.0 3.1 147.0
179 -29.916 -71.2 16.0 180.0 1.5 147.0
180 -29.917 -71.2 14.7 0.0 0.0 146.0
181 -29.916 -71.2 15.0 0.0 1.0 147.0
182 -29.916 -71.2 15.0 300.0 1.0 147.0
183 -29.916 -71.2 16.0 0.0 0.0 147.0
184 -29.917 -71.2 18.5 110.0 1.0 146.0
185 -29.916 -71.2 19.0 110.0 1.0 147.0
186 -29.916 -71.2 20.0 270.0 3.6 147.0
187 -29.916 -71.2 20.0 270.0 5.7 147.0
188 -29.917 -71.2 20.0 280.0 6.2 146.0
189 -29.916 -71.2 20.0 280.0 6.2 147.0
190 -29.916 -71.2 21.0 290.0 6.7 147.0
191 -29.916 -71.2 20.0 270.0 6.2 147.0
192 -29.917 -71.2 21.0 260.0 6.7 146.0
193 -29.916 -71.2 21.0 260.0 6.7 147.0
194 -29.916 -71.2 20.0 270.0 6.2 147.0
195 -29.916 -71.2 19.0 260.0 5.1 147.0
196 -29.916 -71.2 18.0 280.0 4.6 147.0
197 -29.917 -71.2 17.5 280.0 3.1 146.0
198 -29.916 -71.2 18.0 280.0 3.1 147.0
199 30.349 -85.788 11.1 0.0 0.0 21.0
200 30.349 -85.788 11.1 0.0 0.0 21.0
201 30.349 -85.788 9.4 0.0 0.0 21.0
202 30.349 -85.788 9.4 0.0 0.0 21.0
203 30.349 -85.788 8.3 300.0 2.1 21.0
204 30.349 -85.788 11.1 280.0 1.5 21.0
205 30.349 -85.788 0.0 0.0 0.0 21.0
206 30.349 -85.788 10.6 320.0 3.1 21.0
207 30.349 -85.788 9.4 310.0 3.1 21.0
208 30.349 -85.788 7.8 320.0 2.6 21.0
209 30.349 -85.788 6.1 340.0 2.1 21.0
210 30.349 -85.788 6.7 330.0 2.6 21.0
211 30.349 -85.788 6.1 310.0 1.5 21.0
212 30.349 -85.788 7.2 310.0 2.1 21.0
213 30.349 -85.788 12.8 360.0 3.1 21.0
214 30.349 -85.788 15.0 0.0 3.1 21.0
215 30.349 -85.788 16.7 20.0 4.6 21.0
216 30.349 -85.788 18.9 30.0 5.1 21.0
217 30.349 -85.788 19.4 10.0 4.1 21.0
218 30.349 -85.788 21.1 330.0 2.6 21.0
219 30.349 -85.788 21.1 10.0 4.6 21.0
220 30.349 -85.788 21.7 360.0 4.1 21.0
221 30.349 -85.788 21.7 30.0 2.1 21.0
222 30.349 -85.788 21.7 330.0 2.6 21.0
223 30.349 -85.788 16.1 350.0 2.1 21.0
224 30.349 -85.788 11.7 0.0 0.0 21.0
225 30.349 -85.788 8.9 0.0 0.0 21.0
226 30.349 -85.788 9.4 0.0 0.0 21.0
227 30.349 -85.788 7.8 0.0 0.0 21.0
228 30.349 -85.788 11.1 30.0 3.1 21.0
229 30.349 -85.788 7.2 0.0 0.0 21.0
230 30.349 -85.788 7.2 0.0 0.0 21.0
231 30.349 -85.788 0.0 0.0 0.0 21.0
232 30.349 -85.788 7.8 30.0 2.1 21.0
233 30.349 -85.788 8.3 40.0 2.6 21.0
234 30.349 -85.788 7.2 50.0 1.5 21.0
235 30.349 -85.788 8.3 60.0 1.5 21.0
236 30.349 -85.788 5.6 40.0 2.1 21.0
237 30.349 -85.788 6.7 40.0 2.1 21.0
238 30.349 -85.788 7.8 50.0 3.1 21.0
239 30.349 -85.788 11.7 70.0 2.6 21.0
240 30.349 -85.788 15.6 70.0 3.1 21.0
241 30.349 -85.788 18.9 100.0 3.6 21.0
242 30.349 -85.788 20.0 130.0 3.6 21.0
243 30.349 -85.788 21.1 140.0 4.1 21.0
244 30.349 -85.788 21.7 150.0 4.1 21.0
245 30.349 -85.788 21.7 170.0 3.1 21.0
246 30.349 -85.788 22.2 170.0 3.1 21.0
247 30.349 -85.788 20.6 0.0 0.0 21.0
248 30.349 -85.788 17.2 0.0 0.0 21.0
249 30.349 -85.788 14.4 0.0 0.0 21.0
250 30.349 -85.788 12.8 100.0 1.5 21.0
251 30.349 -85.788 13.3 100.0 1.5 21.0
252 30.349 -85.788 10.6 0.0 0.0 21.0
253 30.349 -85.788 9.4 0.0 0.0 21.0
254 30.349 -85.788 7.8 0.0 0.0 21.0
255 30.358 -85.799 8.3 0.0 0.0 21.0
256 30.349 -85.788 0.0 0.0 0.0 21.0
257 30.358 -85.799 6.7 0.0 0.0 21.0
258 30.358 -85.799 7.2 0.0 0.0 21.0
259 30.358 -85.799 7.2 0.0 0.0 21.0
260 30.358 -85.799 8.3 50.0 1.5 21.0
261 30.358 -85.799 9.4 0.0 0.0 21.0
262 30.358 -85.799 8.9 0.0 0.0 21.0
263 30.358 -85.799 10.0 340.0 1.5 21.0
264 30.358 -85.799 12.8 40.0 1.5 21.0
265 30.358 -85.799 16.7 100.0 2.1 21.0
266 30.358 -85.799 21.1 100.0 1.5 21.0
267 30.358 -85.799 23.3 0.0 0.0 21.0
268 30.358 -85.799 25.0 180.0 4.6 21.0
269 30.358 -85.799 24.4 230.0 3.6 21.0
270 30.358 -85.799 25.0 210.0 4.1 21.0
271 30.358 -85.799 23.9 170.0 4.1 21.0
272 30.358 -85.799 22.8 0.0 0.0 21.0
273 30.358 -85.799 19.4 0.0 0.0 21.0
274 30.358 -85.799 17.8 140.0 2.1 21.0
275 60.383 5.333 -0.7 0.0 0.0 36.0
276 60.383 5.333 0.6 270.0 2.0 36.0
277 60.383 5.333 -0.9 120.0 1.0 36.0
278 60.383 5.333 -1.6 130.0 2.0 36.0
279 60.383 5.333 -1.4 150.0 1.0 36.0
280 60.383 5.333 -1.7 0.0 0.0 36.0
281 60.383 5.333 -1.7 140.0 1.0 36.0
282 60.383 5.333 -1.4 0.0 0.0 36.0
283 60.383 5.333 -1.0 0.0 0.0 36.0
284 60.383 5.333 -1.0 150.0 1.0 36.0
285 60.383 5.333 -0.7 140.0 1.0 36.0
286 60.383 5.333 0.5 150.0 1.0 36.0
287 60.383 5.333 1.9 0.0 0.0 36.0
288 60.383 5.333 1.7 0.0 0.0 36.0
289 60.383 5.333 2.1 310.0 2.0 36.0
290 60.383 5.333 1.5 90.0 1.0 36.0
291 60.383 5.333 1.9 290.0 1.0 36.0
292 60.383 5.333 2.0 320.0 1.0 36.0
293 60.383 5.333 1.9 330.0 1.0 36.0
294 60.383 5.333 1.3 350.0 1.0 36.0
295 60.383 5.333 1.5 120.0 1.0 36.0
296 60.383 5.333 1.3 150.0 2.0 36.0
297 60.383 5.333 0.8 140.0 1.0 36.0
298 60.383 5.333 0.3 300.0 1.0 36.0
299 60.383 5.333 0.2 140.0 1.0 36.0
300 60.383 5.333 0.4 140.0 1.0 36.0
301 60.383 5.333 0.5 320.0 1.0 36.0
302 60.383 5.333 1.5 330.0 1.0 36.0
303 60.383 5.333 1.8 40.0 1.0 36.0
304 60.383 5.333 2.3 170.0 1.0 36.0
305 60.383 5.333 2.7 140.0 1.0 36.0
306 60.383 5.333 3.1 330.0 1.0 36.0
307 60.383 5.333 3.8 350.0 1.0 36.0
308 60.383 5.333 3.8 140.0 1.0 36.0
309 60.383 5.333 4.1 150.0 1.0 36.0
310 60.383 5.333 4.4 180.0 1.0 36.0
311 60.383 5.333 4.9 300.0 1.0 36.0
312 60.383 5.333 5.2 320.0 1.0 36.0
313 60.383 5.333 6.7 340.0 1.0 36.0
314 60.383 5.333 6.9 250.0 1.0 36.0
315 60.383 5.333 7.9 300.0 2.0 36.0
316 60.383 5.333 5.5 140.0 1.0 36.0
317 60.383 5.333 7.1 140.0 2.0 36.0
318 60.383 5.333 7.0 280.0 2.0 36.0
319 60.383 5.333 4.6 170.0 1.0 36.0
320 60.383 5.333 4.8 330.0 1.0 36.0
321 60.383 5.333 6.4 260.0 2.0 36.0
322 60.383 5.333 6.2 340.0 1.0 36.0
323 60.383 5.333 5.7 320.0 2.0 36.0
324 60.383 5.333 5.2 100.0 1.0 36.0
325 60.383 5.333 5.1 310.0 1.0 36.0
326 60.383 5.333 4.9 290.0 2.0 36.0
327 60.383 5.333 4.9 310.0 2.0 36.0
328 60.383 5.333 6.1 320.0 2.0 36.0
329 60.383 5.333 7.0 250.0 1.0 36.0
330 60.383 5.333 5.3 140.0 1.0 36.0
331 60.383 5.333 6.9 350.0 1.0 36.0
332 60.383 5.333 9.7 110.0 3.0 36.0
333 60.383 5.333 10.3 300.0 3.0 36.0
334 60.383 5.333 8.7 310.0 1.0 36.0
335 60.383 5.333 9.0 270.0 3.0 36.0
336 60.383 5.333 11.6 80.0 3.0 36.0
337 60.383 5.333 11.4 80.0 4.0 36.0
338 60.383 5.333 9.7 70.0 5.0 36.0
339 60.383 5.333 9.5 80.0 6.0 36.0
340 60.383 5.333 8.7 80.0 5.0 36.0
341 60.383 5.333 7.7 80.0 5.0 36.0
342 60.383 5.333 8.2 80.0 4.0 36.0
343 60.383 5.333 7.7 30.0 1.0 36.0
344 60.383 5.333 7.2 310.0 1.0 36.0
345 60.383 5.333 6.8 300.0 2.0 36.0
346 60.383 5.333 6.7 140.0 1.0 36.0

View File

@@ -92,7 +92,7 @@
"dstore = ws.get_default_datastore()\n", "dstore = ws.get_default_datastore()\n",
"\n", "\n",
"# upload weather data\n", "# upload weather data\n",
"dstore.upload('training-dataset', 'drift-on-aks-data', overwrite=True, show_progress=False)" "dstore.upload('dataset', 'drift-on-aks-data', overwrite=True, show_progress=False)"
] ]
}, },
{ {
@@ -229,7 +229,7 @@
"source": [ "source": [
"## Run recent weather data through the webservice \n", "## Run recent weather data through the webservice \n",
"\n", "\n",
"The below cells take the past 2 days of weather data, filter and transform using the same processes as the training dataset, and runs the data through the service." "The below cells take the weather data of Florida from 2019-11-20 to 2019-11-12, filter and transform using the same processes as the training dataset, and runs the data through the service."
] ]
}, },
{ {
@@ -238,16 +238,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from datetime import datetime, timedelta\n", "# create dataset \n",
"from azureml.opendatasets import NoaaIsdWeather\n", "tset = Dataset.Tabular.from_delimited_files(dstore.path('drift-on-aks-data/testing.csv'))\n",
"\n", "\n",
"start = datetime.today() - timedelta(days=2)\n", "df = tset.to_pandas_dataframe().fillna(0)\n",
"end = datetime.today()\n",
"\n",
"isd = NoaaIsdWeather(start, end)\n",
"\n",
"df = isd.to_pandas_dataframe().fillna(0)\n",
"df = df[df['stationName'].str.contains('FLORIDA', regex=True, na=False)]\n",
"\n", "\n",
"X_features = ['latitude', 'longitude', 'temperature', 'windAngle', 'windSpeed']\n", "X_features = ['latitude', 'longitude', 'temperature', 'windAngle', 'windSpeed']\n",
"y_features = ['elevation']\n", "y_features = ['elevation']\n",
@@ -264,9 +258,9 @@
"source": [ "source": [
"import json\n", "import json\n",
"\n", "\n",
"today_data = json.dumps({'data': X.values.tolist()})\n", "data = json.dumps({'data': X.values.tolist()})\n",
"\n", "\n",
"data_encoded = bytes(today_data, encoding='utf8')\n", "data_encoded = bytes(data, encoding='utf8')\n",
"prediction = service.run(input_data=data_encoded)\n", "prediction = service.run(input_data=data_encoded)\n",
"print(prediction)" "print(prediction)"
] ]
@@ -342,6 +336,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from datetime import datetime, timedelta\n",
"from azureml.datadrift import DataDriftDetector, AlertConfiguration\n", "from azureml.datadrift import DataDriftDetector, AlertConfiguration\n",
"\n", "\n",
"services = [service_name]\n", "services = [service_name]\n",

View File

@@ -100,7 +100,7 @@
"\n", "\n",
"# Check core SDK version number\n", "# Check core SDK version number\n",
"\n", "\n",
"print(\"This notebook was created using SDK version 1.0.76, you are currently running version\", azureml.core.VERSION)" "print(\"This notebook was created using SDK version 1.0.83, you are currently running version\", azureml.core.VERSION)"
] ]
}, },
{ {

View File

@@ -925,6 +925,7 @@
"cd = CondaDependencies.create()\n", "cd = CondaDependencies.create()\n",
"cd.add_tensorflow_conda_package()\n", "cd.add_tensorflow_conda_package()\n",
"cd.add_conda_package('keras==2.2.5')\n", "cd.add_conda_package('keras==2.2.5')\n",
"cd.add_pip_package(\"azureml-defaults\")\n",
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n", "cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
"\n", "\n",
"print(cd.serialize_to_string())" "print(cd.serialize_to_string())"
@@ -947,10 +948,11 @@
"from azureml.core.webservice import AciWebservice\n", "from azureml.core.webservice import AciWebservice\n",
"from azureml.core.model import InferenceConfig\n", "from azureml.core.model import InferenceConfig\n",
"from azureml.core.model import Model\n", "from azureml.core.model import Model\n",
"from azureml.core.environment import Environment\n",
"\n", "\n",
"inference_config = InferenceConfig(runtime= \"python\", \n", "\n",
" entry_script=\"score.py\",\n", "myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
" conda_file=\"myenv.yml\")\n", "inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
"\n", "\n",
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n", "aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
" auth_enabled=True, # this flag generates API keys to secure access\n", " auth_enabled=True, # this flag generates API keys to secure access\n",
@@ -1131,7 +1133,7 @@
"metadata": { "metadata": {
"authors": [ "authors": [
{ {
"name": "ninhu" "name": "swatig"
} }
], ],
"category": "training", "category": "training",

View File

@@ -149,7 +149,11 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": [
"condadependencies-remarks-sample"
]
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.core.environment import CondaDependencies\n", "from azureml.core.environment import CondaDependencies\n",
@@ -169,7 +173,11 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": [
"condadependencies-remarks-sample2"
]
},
"outputs": [], "outputs": [],
"source": [ "source": [
"conda_dep.add_pip_package(\"pillow==5.4.1\")\n", "conda_dep.add_pip_package(\"pillow==5.4.1\")\n",

View File

@@ -10,7 +10,7 @@ With Azure Machine Learning datasets, you can:
## Learn how to use Azure Machine Learning datasets ## Learn how to use Azure Machine Learning datasets
* [Create and register datasets](https://aka.ms/azureml/howto/createdatasets) * [Create and register datasets](https://aka.ms/azureml/howto/createdatasets)
* Use [Datasets in training](datasets-tutorial/train-with-datasets.ipynb) * Use [Datasets in training](datasets-tutorial/train-with-datasets/train-with-datasets.ipynb)
* Use TabularDatasets in [automated machine learning training](https://aka.ms/automl-dataset) * Use TabularDatasets in [automated machine learning training](https://aka.ms/automl-dataset)
* Use FileDatasets in [image classification](https://aka.ms/filedataset-samplenotebook) * Use FileDatasets in [image classification](https://aka.ms/filedataset-samplenotebook)
* Use FileDatasets in [deep learning with hyperparameter tuning](https://aka.ms/filedataset-hyperdrive) * Use FileDatasets in [deep learning with hyperparameter tuning](https://aka.ms/filedataset-hyperdrive)

View File

@@ -206,7 +206,11 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {
"tags": [
"datadrift-remarks-sample"
]
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from azureml.datadrift import DataDriftDetector, AlertConfiguration\n", "from azureml.datadrift import DataDriftDetector, AlertConfiguration\n",
@@ -290,7 +294,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# backfill for one month\n", "# backfill for one month\n",
"backfill = monitor.backfill(datetime(2019, 9, 1), datetime(2019, 10, 1))\n", "backfill_start_date = datetime(2019, 9, 1)\n",
"backfill_end_date = datetime(2019, 10, 1)\n",
"backfill = monitor.backfill(backfill_start_date, backfill_end_date)\n",
"backfill" "backfill"
] ]
}, },
@@ -353,7 +359,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# plot the results from Python SDK \n", "# plot the results from Python SDK \n",
"monitor.show()" "monitor.show(backfill_start_date, backfill_end_date)"
] ]
}, },
{ {
@@ -371,7 +377,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"link = 'https://ml.azure.com/data/monitor/{}?wsid=/subscriptions/{}/resourcegroups/{}/workspaces/{}'.format(monitor.name, ws.subscription_id, ws.resource_group, ws.name)\n", "link = 'https://ml.azure.com/data/monitor/{}?wsid=/subscriptions/{}/resourcegroups/{}/workspaces/{}&startDate={}&endDate={}'.format(monitor.name, ws.subscription_id, ws.resource_group, ws.name, backfill_start_date.strftime('%Y-%m-%d'), backfill_end_date .strftime('%Y-%m-%d'))\n",
"print(link)" "print(link)"
] ]
}, },

View File

@@ -0,0 +1,403 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/work-with-data/datasets-tutorial/labeled-datasets/labeled-datasets.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Introduction to labeled datasets\n",
"\n",
"Labeled datasets are output from Azure Machine Learning [labeling projects](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-create-labeling-projects). It captures the reference to the data (e.g. image files) and its labels. \n",
"\n",
"This tutorial introduces the capabilities of labeled datasets and how to use it in training.\n",
"\n",
"Learn how-to:\n",
"\n",
"> * Set up your development environment\n",
"> * Explore labeled datasets\n",
"> * Train a simple deep learning neural network on a remote cluster\n",
"\n",
"## Prerequisite:\n",
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
"* Go through Azure Machine Learning [labeling projects](https://docs.microsoft.com/azure/machine-learning/service/how-to-create-labeling-projects) and export the labels as an Azure Machine Learning dataset\n",
"* Go through the [configuration notebook](../../../configuration.ipynb) to:\n",
" * install the latest version of azureml-sdk\n",
" * install the latest version of azureml-contrib-dataset\n",
" * install [PyTorch](https://pytorch.org/)\n",
" * create a workspace and its configuration file (`config.json`)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set up your development environment\n",
"\n",
"All the setup for your development work can be accomplished in a Python notebook. Setup includes:\n",
"\n",
"* Importing Python packages\n",
"* Connecting to a workspace to enable communication between your local computer and remote resources\n",
"* Creating an experiment to track all your runs\n",
"* Creating a remote compute target to use for training\n",
"\n",
"### Import packages\n",
"\n",
"Import Python packages you need in this session. Also display the Azure Machine Learning SDK version."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import azureml.core\n",
"import azureml.contrib.dataset\n",
"from azureml.core import Dataset, Workspace, Experiment\n",
"from azureml.contrib.dataset import FileHandlingOption\n",
"\n",
"# check core SDK version number\n",
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)\n",
"print(\"Azure ML Contrib Version\", azureml.contrib.dataset.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Connect to workspace\n",
"\n",
"Create a workspace object from the existing workspace. `Workspace.from_config()` reads the file **config.json** and loads the details into an object named `workspace`."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load workspace\n",
"workspace = Workspace.from_config()\n",
"print('Workspace name: ' + workspace.name, \n",
" 'Azure region: ' + workspace.location, \n",
" 'Subscription id: ' + workspace.subscription_id, \n",
" 'Resource group: ' + workspace.resource_group, sep='\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create experiment and a directory\n",
"\n",
"Create an experiment to track the runs in your workspace and a directory to deliver the necessary code from your computer to the remote resource."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# create an ML experiment\n",
"exp = Experiment(workspace=workspace, name='labeled-datasets')\n",
"\n",
"# create a directory\n",
"script_folder = './labeled-datasets'\n",
"os.makedirs(script_folder, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create or Attach existing compute resource\n",
"By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you will create Azure Machine Learning Compute as your training environment. The code below creates the compute clusters for you if they don't already exist in your workspace.\n",
"\n",
"**Creation of compute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
"from azureml.core.compute_target import ComputeTargetException\n",
"\n",
"# choose a name for your cluster\n",
"cluster_name = \"openhack\"\n",
"\n",
"try:\n",
" compute_target = ComputeTarget(workspace=workspace, name=cluster_name)\n",
" print('Found existing compute target')\n",
"except ComputeTargetException:\n",
" print('Creating a new compute target...')\n",
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
" max_nodes=4)\n",
"\n",
" # create the cluster\n",
" compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)\n",
"\n",
" # can poll for a minimum number of nodes and for a specific timeout. \n",
" # if no min node count is provided it uses the scale settings for the cluster\n",
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
"\n",
"# use get_status() to get a detailed status for the current cluster. \n",
"print(compute_target.get_status().serialize())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Explore labeled datasets\n",
"\n",
"**Note**: How to create labeled datasets is not covered in this tutorial. To create labeled datasets, you can go through [labeling projects](https://docs.microsoft.com/azure/machine-learning/service/how-to-create-labeling-projects) and export the output labels as Azure Machine Lerning datasets. \n",
"\n",
"`animal_labels` used in this tutorial section is the output from a labeling project, with the task type of \"Object Identification\"."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get animal_labels dataset from the workspace\n",
"animal_labels = Dataset.get_by_name(workspace, 'animal_labels')\n",
"animal_labels"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can load labeled datasets into pandas DataFrame. There are 3 file handling option that you can choose to load the data files referenced by the labeled datasets:\n",
"* Streaming: The default option to load data files.\n",
"* Download: Download your data files to a local path.\n",
"* Mount: Mount your data files to a mount point. Mount only works for Linux-based compute, including Azure Machine Learning notebook VM and Azure Machine Learning Compute."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"animal_pd = animal_labels.to_pandas_dataframe(file_handling_option=FileHandlingOption.DOWNLOAD, target_path='./download/', overwrite_download=True)\n",
"animal_pd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import matplotlib.image as mpimg\n",
"\n",
"# read images from downloaded path\n",
"img = mpimg.imread(animal_pd.loc[0,'image_url'])\n",
"imgplot = plt.imshow(img)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also load labeled datasets into [torchvision datasets](https://pytorch.org/docs/stable/torchvision/datasets.html), so that you can leverage on the open source libraries provided by PyTorch for image transformation and training."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from torchvision.transforms import functional as F\n",
"\n",
"# load animal_labels dataset into torchvision dataset\n",
"pytorch_dataset = animal_labels.to_torchvision()\n",
"img = pytorch_dataset[0][0]\n",
"print(type(img))\n",
"\n",
"# use methods from torchvision to transform the img into grayscale\n",
"pil_image = F.to_pil_image(img)\n",
"gray_image = F.to_grayscale(pil_image, num_output_channels=3)\n",
"\n",
"imgplot = plt.imshow(gray_image)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train an image classification model\n",
"\n",
" `crack_labels` dataset used in this tutorial section is the output from a labeling project, with the task type of \"Image Classification Multi-class\". We will use this dataset to train an image classification model that classify whether an image has cracks or not."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# get crack_labels dataset from the workspace\n",
"crack_labels = Dataset.get_by_name(workspace, 'crack_labels')\n",
"crack_labels"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure Estimator for training\n",
"\n",
"You can ask the system to build a conda environment based on your dependency specification. Once the environment is built, and if you don't change your dependencies, it will be reused in subsequent runs."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Environment\n",
"from azureml.core.conda_dependencies import CondaDependencies\n",
"\n",
"conda_env = Environment('conda-env')\n",
"conda_env.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk',\n",
" 'azureml-contrib-dataset',\n",
" 'torch','torchvision',\n",
" 'azureml-dataprep[pandas]'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"An estimator object is used to submit the run. Azure Machine Learning has pre-configured estimators for common machine learning frameworks, as well as generic Estimator. Create a generic estimator for by specifying\n",
"\n",
"* The name of the estimator object, `est`\n",
"* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n",
"* The training script name, train.py\n",
"* The input dataset for training\n",
"* The compute target. In this case you will use the AmlCompute you created\n",
"* The environment definition for the experiment"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.train.estimator import Estimator\n",
"\n",
"est = Estimator(source_directory=script_folder, \n",
" entry_script='train.py',\n",
" inputs=[crack_labels.as_named_input('crack_labels')],\n",
" compute_target=compute_target,\n",
" environment_definition= conda_env)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Submit job to run\n",
"\n",
"Submit the estimator to the Azure ML experiment to kick off the execution."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run = exp.submit(est)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"run.wait_for_completion(show_output=True)"
]
}
],
"metadata": {
"authors": [
{
"name": "sihhu"
}
],
"category": "tutorial",
"compute": [
"Remote"
],
"deployment": [
"None"
],
"exclude_from_index": false,
"framework": [
"Azure ML"
],
"friendly_name": "Introduction to labeled datasets",
"index_order": 1,
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
},
"nteract": {
"version": "nteract-front-end@1.0.0"
},
"star_tag": [
"featured"
],
"tags": [
"Dataset",
"label",
"Estimator"
],
"task": "Train"
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,106 @@
import os
import torchvision
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from azureml.core import Dataset, Run
import azureml.contrib.dataset
from azureml.contrib.dataset import FileHandlingOption, LabeledDatasetTask
run = Run.get_context()
# get input dataset by name
labeled_dataset = run.input_datasets['crack_labels']
pytorch_dataset = labeled_dataset.to_torchvision()
indices = torch.randperm(len(pytorch_dataset)).tolist()
dataset_train = torch.utils.data.Subset(pytorch_dataset, indices[:40])
dataset_test = torch.utils.data.Subset(pytorch_dataset, indices[-10:])
trainloader = torch.utils.data.DataLoader(dataset_train, batch_size=4,
shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(dataset_test, batch_size=4,
shuffle=True, num_workers=0)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 71 * 71, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(x.size(0), 16 * 71 * 71)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 5 == 4: # print every 5 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 5))
running_loss = 0.0
print('Finished Training')
classes = trainloader.dataset.dataset.labels
PATH = './cifar_net.pth'
torch.save(net.state_dict(), PATH)
dataiter = iter(testloader)
images, labels = dataiter.next()
net = Net()
net.load_state_dict(torch.load(PATH))
outputs = net(images)
_, predicted = torch.max(outputs, 1)
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10 test images: %d %%' % (100 * correct / total))
pass

View File

@@ -0,0 +1,35 @@
import os
def convert(imgf, labelf, outf, n):
f = open(imgf, "rb")
l = open(labelf, "rb")
o = open(outf, "w")
f.read(16)
l.read(8)
images = []
for i in range(n):
image = [ord(l.read(1))]
for j in range(28 * 28):
image.append(ord(f.read(1)))
images.append(image)
for image in images:
o.write(",".join(str(pix) for pix in image) + "\n")
f.close()
o.close()
l.close()
mounted_input_path = os.environ['fashion_ds']
mounted_output_path = os.environ['AZUREML_DATAREFERENCE_prepared_fashion_ds']
os.makedirs(mounted_output_path, exist_ok=True)
convert(os.path.join(mounted_input_path, 'train-images-idx3-ubyte'),
os.path.join(mounted_input_path, 'train-labels-idx1-ubyte'),
os.path.join(mounted_output_path, 'mnist_train.csv'), 60000)
convert(os.path.join(mounted_input_path, 't10k-images-idx3-ubyte'),
os.path.join(mounted_input_path, 't10k-labels-idx1-ubyte'),
os.path.join(mounted_output_path, 'mnist_test.csv'), 10000)

Some files were not shown because too many files have changed in this diff Show More