mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-23 20:00:06 -05:00
Compare commits
10 Commits
azureml-sd
...
azureml-sd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c95f970dc8 | ||
|
|
9b9d112719 | ||
|
|
fe8fcd4b48 | ||
|
|
296ae01587 | ||
|
|
8f4efe15eb | ||
|
|
d179080467 | ||
|
|
0040644e7a | ||
|
|
8aa04307fb | ||
|
|
a525da4488 | ||
|
|
e149565a8a |
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.0.76.2 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.83 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -13,7 +13,6 @@ dependencies:
|
||||
- scikit-learn>=0.19.0,<=0.20.3
|
||||
- pandas>=0.22.0,<=0.23.4
|
||||
- py-xgboost<=0.80
|
||||
- pyarrow>=0.11.0
|
||||
- fbprophet==0.5
|
||||
- pytorch=1.1.0
|
||||
- cudatoolkit=9.0
|
||||
@@ -30,7 +29,7 @@ dependencies:
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- joblib
|
||||
- onnxruntime==0.4.0
|
||||
- onnxruntime==1.0.0
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
|
||||
channels:
|
||||
|
||||
@@ -14,7 +14,6 @@ dependencies:
|
||||
- scikit-learn>=0.19.0,<=0.20.3
|
||||
- pandas>=0.22.0,<0.23.0
|
||||
- py-xgboost<=0.80
|
||||
- pyarrow>=0.11.0
|
||||
- fbprophet==0.5
|
||||
- pytorch=1.1.0
|
||||
- cudatoolkit=9.0
|
||||
@@ -31,7 +30,7 @@ dependencies:
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- joblib
|
||||
- onnxruntime==0.4.0
|
||||
- onnxruntime==1.0.0
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
|
||||
channels:
|
||||
|
||||
@@ -288,7 +288,7 @@
|
||||
"|**blacklist_models** | *List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run. <br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i><br><br>Allowed values for **Forecasting**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i><br><i>Arima</i><br><i>Prophet</i>|\n",
|
||||
"| **whitelist_models** | *List* of *strings* indicating machine learning algorithms for AutoML to use in this run. Same values listed above for **blacklist_models** allowed for **whitelist_models**.|\n",
|
||||
"|**experiment_exit_score**| Value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
|
||||
"|**experiment_timeout_minutes**| Maximum amount of time in minutes that all iterations combined can take before the experiment terminates.|\n",
|
||||
"|**experiment_timeout_hours**| Maximum amount of time in hours that all iterations combined can take before the experiment terminates.|\n",
|
||||
"|**enable_early_stopping**| Flag to enble early termination if the score is not improving in the short term.|\n",
|
||||
"|**featurization**| 'auto' / 'off' Indicator for whether featurization step should be done automatically or not. Note: If the input data is sparse, featurization cannot be turned on.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
@@ -306,7 +306,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
" \"experiment_timeout_minutes\" : 20,\n",
|
||||
" \"experiment_timeout_hours\" : 0.3,\n",
|
||||
" \"enable_early_stopping\" : True,\n",
|
||||
" \"iteration_timeout_minutes\": 5,\n",
|
||||
" \"max_concurrent_iterations\": 4,\n",
|
||||
@@ -694,10 +694,10 @@
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime = \"python\", \n",
|
||||
" entry_script = script_file_name,\n",
|
||||
" conda_file = conda_env_file_name)\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=conda_env_file_name)\n",
|
||||
"inference_config = InferenceConfig(entry_script=script_file_name, environment=myenv)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||
" memory_gb = 1, \n",
|
||||
|
||||
@@ -2,12 +2,3 @@ name: auto-ml-classification-bank-marketing-all-features
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- interpret
|
||||
- azureml-defaults
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
- onnxruntime==0.4.0
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-interpret
|
||||
|
||||
@@ -213,7 +213,7 @@
|
||||
" \"preprocess\": True,\n",
|
||||
" \"enable_early_stopping\": True,\n",
|
||||
" \"max_concurrent_iterations\": 2, # This is a limit for testing purpose, please increase it as per cluster size\n",
|
||||
" \"experiment_timeout_minutes\": 10, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible\n",
|
||||
" \"experiment_timeout_hours\": 0.2, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible\n",
|
||||
" \"verbosity\": logging.INFO,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
@@ -305,7 +305,7 @@
|
||||
"source": [
|
||||
"#### Explain model\n",
|
||||
"\n",
|
||||
"Automated ML models can be explained and visualized using the SDK Explainability library. [Learn how to use the explainer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/model-explanation-remote-amlcompute/auto-ml-model-explanations-remote-compute.ipynb)."
|
||||
"Automated ML models can be explained and visualized using the SDK Explainability library. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -334,17 +334,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Print the properties of the model\n",
|
||||
"The fitted_model is a python object and you can read the different properties of the object.\n",
|
||||
"See *Print the properties of the model* section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy\n",
|
||||
"\n",
|
||||
"To deploy the model into a web service endpoint, see _Deploy_ section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb)"
|
||||
"The fitted_model is a python object and you can read the different properties of the object.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -2,10 +2,3 @@ name: auto-ml-classification-credit-card-fraud
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- interpret
|
||||
- azureml-defaults
|
||||
- azureml-explain-model
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
|
||||
@@ -519,12 +519,12 @@
|
||||
"name": "anshirga"
|
||||
}
|
||||
],
|
||||
"datasets": [
|
||||
"None"
|
||||
],
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"None"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
|
||||
@@ -2,9 +2,3 @@ name: auto-ml-classification-text-dnn
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-train
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
- statsmodels
|
||||
|
||||
@@ -210,7 +210,24 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data Ingestion Pipeline \n",
|
||||
"For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You can replace this with your own dataset, or you can skip this pipeline if you already have a time-series based `TabularDataset`.\n",
|
||||
"For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You can replace this with your own dataset, or you can skip this pipeline if you already have a time-series based `TabularDataset`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The name and target column of the Dataset to create \n",
|
||||
"dataset = \"NOAA-Weather-DS4\"\n",
|
||||
"target_column_name = \"temperature\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"### Upload Data Step\n",
|
||||
"The data ingestion pipeline has a single step with a script to query the latest weather data and upload it to the blob store. During the first run, the script will create and register a time-series based `TabularDataset` with the past one week of weather data. For each subsequent run, the script will create a partition in the blob store by querying NOAA for new weather data since the last modified time of the dataset (`dataset.data_changed_time`) and creating a data.csv file."
|
||||
@@ -225,8 +242,6 @@
|
||||
"from azureml.pipeline.core import Pipeline, PipelineParameter\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"\n",
|
||||
"# The name of the Dataset to create \n",
|
||||
"dataset = \"NOAA-Weather-DS4\"\n",
|
||||
"ds_name = PipelineParameter(name=\"ds_name\", default_value=dataset)\n",
|
||||
"upload_data_step = PythonScriptStep(script_name=\"upload_weather_data.py\", \n",
|
||||
" allow_reuse=False,\n",
|
||||
@@ -272,7 +287,7 @@
|
||||
"## Training Pipeline\n",
|
||||
"### Prepare Training Data Step\n",
|
||||
"\n",
|
||||
"Script to bring data into common X,y format. We need to set allow_reuse flag to False to allow the pipeline to run even when inputs don't change. We also need the name of the model to check the time the model was last trained."
|
||||
"Script to check if new data is available since the model was last trained. If no new data is available, we cancel the remaining pipeline steps. We need to set allow_reuse flag to False to allow the pipeline to run even when inputs don't change. We also need the name of the model to check the time the model was last trained."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -283,11 +298,8 @@
|
||||
"source": [
|
||||
"from azureml.pipeline.core import PipelineData\n",
|
||||
"\n",
|
||||
"target_column = PipelineParameter(\"target_column\", default_value=\"y\")\n",
|
||||
"# The model name with which to register the trained model in the workspace.\n",
|
||||
"model_name = PipelineParameter(\"model_name\", default_value=\"y\")\n",
|
||||
"output_x = PipelineData(\"output_x\", datastore=dstor)\n",
|
||||
"output_y = PipelineData(\"output_y\", datastore=dstor)"
|
||||
"model_name = PipelineParameter(\"model_name\", default_value=\"noaaweatherds\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -299,16 +311,23 @@
|
||||
"data_prep_step = PythonScriptStep(script_name=\"check_data.py\", \n",
|
||||
" allow_reuse=False,\n",
|
||||
" name=\"check_data\",\n",
|
||||
" arguments=[\"--target_column\", target_column,\n",
|
||||
" \"--output_x\", output_x,\n",
|
||||
" \"--output_y\", output_y,\n",
|
||||
" \"--ds_name\", ds_name,\n",
|
||||
" \"--model_name\", model_name],\n",
|
||||
" outputs=[output_x, output_y], \n",
|
||||
" arguments=[\"--ds_name\", ds_name,\n",
|
||||
" \"--model_name\", model_name],\n",
|
||||
" compute_target=compute_target, \n",
|
||||
" runconfig=conda_run_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Dataset\n",
|
||||
"train_ds = Dataset.get_by_name(ws, dataset)\n",
|
||||
"train_ds = train_ds.drop_columns([\"partition_date\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -324,11 +343,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.train.automl.runtime import AutoMLStep\n",
|
||||
"from azureml.train.automl import AutoMLStep\n",
|
||||
"\n",
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\": 20,\n",
|
||||
" \"experiment_timeout_minutes\": 30,\n",
|
||||
" \"iteration_timeout_minutes\": 10,\n",
|
||||
" \"experiment_timeout_hours\": 0.2,\n",
|
||||
" \"n_cross_validations\": 3,\n",
|
||||
" \"primary_metric\": 'r2_score',\n",
|
||||
" \"preprocess\": True,\n",
|
||||
@@ -342,8 +361,8 @@
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path = \".\",\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" data_script = \"get_data.py\",\n",
|
||||
" training_data = train_ds,\n",
|
||||
" label_column_name = target_column_name,\n",
|
||||
" **automl_settings\n",
|
||||
" )"
|
||||
]
|
||||
@@ -359,7 +378,7 @@
|
||||
"metrics_output_name = 'metrics_output'\n",
|
||||
"best_model_output_name = 'best_model_output'\n",
|
||||
"\n",
|
||||
"metirics_data = PipelineData(name='metrics_data',\n",
|
||||
"metrics_data = PipelineData(name='metrics_data',\n",
|
||||
" datastore=dstor,\n",
|
||||
" pipeline_output_name=metrics_output_name,\n",
|
||||
" training_output=TrainingOutput(type='Metrics'))\n",
|
||||
@@ -378,8 +397,7 @@
|
||||
"automl_step = AutoMLStep(\n",
|
||||
" name='automl_module',\n",
|
||||
" automl_config=automl_config,\n",
|
||||
" inputs=[output_x, output_y],\n",
|
||||
" outputs=[metirics_data, model_data],\n",
|
||||
" outputs=[metrics_data, model_data],\n",
|
||||
" allow_reuse=False)"
|
||||
]
|
||||
},
|
||||
@@ -432,7 +450,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"training_pipeline_run = experiment.submit(training_pipeline, pipeline_parameters={\n",
|
||||
" \"target_column\": \"temperature\", \"ds_name\": dataset, \"model_name\": \"noaaweatherds\"})"
|
||||
" \"ds_name\": dataset, \"model_name\": \"noaaweatherds\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -475,7 +493,7 @@
|
||||
"source": [
|
||||
"from azureml.pipeline.core import Schedule\n",
|
||||
"schedule = Schedule.create(workspace=ws, name=\"RetrainingSchedule\",\n",
|
||||
" pipeline_parameters={\"target_column\": \"temperature\",\"ds_name\": dataset, \"model_name\": \"noaaweatherds\"},\n",
|
||||
" pipeline_parameters={\"ds_name\": dataset, \"model_name\": \"noaaweatherds\"},\n",
|
||||
" pipeline_id=published_pipeline.id, \n",
|
||||
" experiment_name=experiment_name, \n",
|
||||
" datastore=dstor,\n",
|
||||
|
||||
@@ -2,8 +2,3 @@ name: auto-ml-continuous-retraining
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-pipeline
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
|
||||
@@ -15,32 +15,16 @@ if type(run) == _OfflineRun:
|
||||
else:
|
||||
ws = run.experiment.workspace
|
||||
|
||||
|
||||
def write_output(df, path):
|
||||
os.makedirs(path, exist_ok=True)
|
||||
print("%s created" % path)
|
||||
df.to_csv(path + "/part-00000", index=False)
|
||||
|
||||
|
||||
print("Check for new data and prepare the data")
|
||||
print("Check for new data.")
|
||||
|
||||
parser = argparse.ArgumentParser("split")
|
||||
parser.add_argument("--target_column", type=str, help="input split features")
|
||||
parser.add_argument("--ds_name", help="input dataset name")
|
||||
parser.add_argument("--model_name", help="name of the deployed model")
|
||||
parser.add_argument("--output_x", type=str,
|
||||
help="output features")
|
||||
parser.add_argument("--output_y", type=str,
|
||||
help="output labels")
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Argument 1(ds_name): %s" % args.ds_name)
|
||||
print("Argument 2(target_column): %s" % args.target_column)
|
||||
print("Argument 3(model_name): %s" % args.model_name)
|
||||
print("Argument 4(output_x): %s" % args.output_x)
|
||||
print("Argument 5(output_y): %s" % args.output_y)
|
||||
print("Argument 2(model_name): %s" % args.model_name)
|
||||
|
||||
# Get the latest registered model
|
||||
try:
|
||||
@@ -54,22 +38,9 @@ except Exception as e:
|
||||
train_ds = Dataset.get_by_name(ws, args.ds_name)
|
||||
dataset_changed_time = train_ds.data_changed_time
|
||||
|
||||
if dataset_changed_time > last_train_time:
|
||||
# New data is available since the model was last trained
|
||||
print("Dataset was last updated on {0}. Retraining...".format(dataset_changed_time))
|
||||
train_ds = train_ds.drop_columns(["partition_date"])
|
||||
X_train = train_ds.drop_columns(
|
||||
columns=[args.target_column]).to_pandas_dataframe()
|
||||
y_train = train_ds.keep_columns(
|
||||
columns=[args.target_column]).to_pandas_dataframe()
|
||||
|
||||
non_null = y_train[args.target_column].notnull()
|
||||
y = y_train[non_null]
|
||||
X = X_train[non_null]
|
||||
|
||||
if not (args.output_x is None and args.output_y is None):
|
||||
write_output(X, args.output_x)
|
||||
write_output(y, args.output_y)
|
||||
else:
|
||||
if not dataset_changed_time > last_train_time:
|
||||
print("Cancelling run since there is no new data.")
|
||||
run.parent.cancel()
|
||||
else:
|
||||
# New data is available since the model was last trained
|
||||
print("Dataset was last updated on {0}. Retraining...".format(dataset_changed_time))
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def get_data():
|
||||
print("In get_data")
|
||||
print(os.environ['AZUREML_DATAREFERENCE_output_x'])
|
||||
X_train = pd.read_csv(
|
||||
os.environ['AZUREML_DATAREFERENCE_output_x'] + "/part-00000")
|
||||
y_train = pd.read_csv(
|
||||
os.environ['AZUREML_DATAREFERENCE_output_y'] + "/part-00000")
|
||||
|
||||
print(X_train.head(3))
|
||||
|
||||
return {"X": X_train.values, "y": y_train.values.flatten()}
|
||||
@@ -58,7 +58,7 @@ except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
print("Dataset with name {0} not found, registering new dataset.".format(args.ds_name))
|
||||
register_dataset = True
|
||||
end_time_last_slice = datetime.today() - relativedelta(weeks=1)
|
||||
end_time_last_slice = datetime.today() - relativedelta(weeks=2)
|
||||
|
||||
end_time = datetime.utcnow()
|
||||
train_df = get_noaa_data(end_time_last_slice, end_time)
|
||||
@@ -80,10 +80,10 @@ if train_df.size > 0:
|
||||
target_path=folder_name,
|
||||
overwrite=True,
|
||||
show_progress=True)
|
||||
|
||||
if register_dataset:
|
||||
ds = Dataset.Tabular.from_delimited_files(dstor.path("{}/**/*.csv".format(
|
||||
args.ds_name)), partition_format='/{partition_date:yyyy/MM/dd/hh/mm/ss}/data.csv')
|
||||
ds.register(ws, name=args.ds_name)
|
||||
else:
|
||||
print("No new data since {0}.".format(end_time_last_slice))
|
||||
|
||||
if register_dataset:
|
||||
ds = Dataset.Tabular.from_delimited_files(dstor.path("{}/**/*.csv".format(
|
||||
args.ds_name)), partition_format='/{partition_date:yyyy/MM/dd/HH/mm/ss}/data.csv')
|
||||
ds.register(ws, name=args.ds_name)
|
||||
|
||||
@@ -358,7 +358,7 @@
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task='forecasting', \n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" experiment_timeout_minutes = 60,\n",
|
||||
" experiment_timeout_hours = 1,\n",
|
||||
" training_data=train_dataset,\n",
|
||||
" label_column_name=target_column_name,\n",
|
||||
" validation_data=valid_dataset, \n",
|
||||
|
||||
@@ -1,12 +1,4 @@
|
||||
name: auto-ml-forecasting-beer-remote
|
||||
dependencies:
|
||||
- fbprophet==0.5
|
||||
- py-xgboost<=0.80
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-train
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
- statsmodels
|
||||
|
||||
@@ -202,7 +202,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'dataset/bike-no.csv')]).with_timestamp_columns(fine_grain_timestamp=time_column_name) \n",
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
"dataset.take(5).to_pandas_dataframe().reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -221,8 +221,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# select data that occurs before a specified date\n",
|
||||
"train = dataset.time_before(datetime(2012, 9, 1))\n",
|
||||
"train.to_pandas_dataframe().tail(5)"
|
||||
"train = dataset.time_before(datetime(2012, 8, 31), include_boundary=True)\n",
|
||||
"train.to_pandas_dataframe().tail(5).reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -231,8 +231,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test = dataset.time_after(datetime(2012, 8, 31))\n",
|
||||
"test.to_pandas_dataframe().head(5)"
|
||||
"test = dataset.time_after(datetime(2012, 9, 1), include_boundary=True)\n",
|
||||
"test.to_pandas_dataframe().head(5).reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -247,8 +247,8 @@
|
||||
"|-|-|\n",
|
||||
"|**task**|forecasting|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>\n",
|
||||
"|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.constants.supportedmodels.regression?view=azure-ml-py).|\n",
|
||||
"|**experiment_timeout_minutes**|Experimentation timeout in minutes.|\n",
|
||||
"|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.constants.supportedmodels.forecasting?view=azure-ml-py).|\n",
|
||||
"|**experiment_timeout_hours**|Experimentation timeout in hours.|\n",
|
||||
"|**training_data**|Input dataset, containing both features and label column.|\n",
|
||||
"|**label_column_name**|The name of the label column.|\n",
|
||||
"|**compute_target**|The remote compute for training.|\n",
|
||||
@@ -260,7 +260,7 @@
|
||||
"|**target_lags**|The target_lags specifies how far back we will construct the lags of the target variable.|\n",
|
||||
"|**drop_column_names**|Name(s) of columns to drop prior to modeling|\n",
|
||||
"\n",
|
||||
"This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the experiment_timeout_minutes parameter value to get results."
|
||||
"This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the experiment_timeout_hours parameter value to get results."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -305,7 +305,7 @@
|
||||
"automl_config = AutoMLConfig(task='forecasting', \n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" blacklist_models = ['ExtremeRandomTrees'], \n",
|
||||
" experiment_timeout_minutes=20,\n",
|
||||
" experiment_timeout_hours=0.3,\n",
|
||||
" training_data=train,\n",
|
||||
" label_column_name=target_column_name,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
|
||||
@@ -1,11 +1,4 @@
|
||||
name: auto-ml-forecasting-bike-share
|
||||
dependencies:
|
||||
- fbprophet==0.5
|
||||
- py-xgboost<=0.80
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
- statsmodels
|
||||
|
||||
@@ -32,18 +32,17 @@ test_dataset = run.input_datasets['test_data']
|
||||
|
||||
grain_column_names = []
|
||||
|
||||
df = test_dataset.to_pandas_dataframe()
|
||||
df = test_dataset.to_pandas_dataframe().reset_index(drop=True)
|
||||
|
||||
X_test_df = test_dataset.drop_columns(columns=[target_column_name])
|
||||
y_test_df = test_dataset.with_timestamp_columns(
|
||||
None).keep_columns(columns=[target_column_name])
|
||||
X_test_df = test_dataset.drop_columns(columns=[target_column_name]).to_pandas_dataframe().reset_index(drop=True)
|
||||
y_test_df = test_dataset.with_timestamp_columns(None).keep_columns(columns=[target_column_name]).to_pandas_dataframe()
|
||||
|
||||
fitted_model = joblib.load('model.pkl')
|
||||
|
||||
df_all = forecasting_helper.do_rolling_forecast(
|
||||
fitted_model,
|
||||
X_test_df.to_pandas_dataframe(),
|
||||
y_test_df.to_pandas_dataframe().values.T[0],
|
||||
X_test_df,
|
||||
y_test_df.values.T[0],
|
||||
target_column_name,
|
||||
time_column_name,
|
||||
max_horizon,
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
"\n",
|
||||
"Advanced Forecasting\n",
|
||||
"1. [Advanced Training](#advanced_training)\n",
|
||||
"1. [Advanced Results](#advanced Results)"
|
||||
"1. [Advanced Results](#advanced_results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -211,7 +211,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = Dataset.Tabular.from_delimited_files(path = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/nyc_energy.csv\").with_timestamp_columns(fine_grain_timestamp=time_column_name) \n",
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
"dataset.take(5).to_pandas_dataframe().reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -253,7 +253,7 @@
|
||||
"source": [
|
||||
"# split into train based on time\n",
|
||||
"train = dataset.time_before(datetime(2017, 8, 8, 5), include_boundary=True)\n",
|
||||
"train.to_pandas_dataframe().sort_values(time_column_name).tail(5)"
|
||||
"train.to_pandas_dataframe().sort_values(time_column_name).tail(5).reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -263,8 +263,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# split into test based on time\n",
|
||||
"test = dataset.time_between(datetime(2017, 8, 8, 5), datetime(2017, 8, 10, 5))\n",
|
||||
"test.to_pandas_dataframe().head(5)"
|
||||
"test = dataset.time_between(datetime(2017, 8, 8, 6), datetime(2017, 8, 10, 5))\n",
|
||||
"test.to_pandas_dataframe().head(5).reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -301,8 +301,8 @@
|
||||
"|-|-|\n",
|
||||
"|**task**|forecasting|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
|
||||
"|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.constants.supportedmodels.regression?view=azure-ml-py).|\n",
|
||||
"|**experiment_timeout_minutes**|Maximum amount of time in minutes that the experiment take before it terminates.|\n",
|
||||
"|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.constants.supportedmodels.forecasting?view=azure-ml-py).|\n",
|
||||
"|**experiment_timeout_hours**|Maximum amount of time in hours that the experiment take before it terminates.|\n",
|
||||
"|**training_data**|The training data to be used within the experiment.|\n",
|
||||
"|**label_column_name**|The name of the label column.|\n",
|
||||
"|**compute_target**|The remote compute for training.|\n",
|
||||
@@ -316,7 +316,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the experiment_timeout_minutes parameter value to get results."
|
||||
"This notebook uses the blacklist_models parameter to exclude some models that take a longer time to train on this dataset. You can choose to remove models from the blacklist_models list but you may need to increase the experiment_timeout_hours parameter value to get results."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -333,7 +333,7 @@
|
||||
"automl_config = AutoMLConfig(task='forecasting', \n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" blacklist_models = ['ExtremeRandomTrees', 'AutoArima', 'Prophet'], \n",
|
||||
" experiment_timeout_minutes=20,\n",
|
||||
" experiment_timeout_hours=0.3,\n",
|
||||
" training_data=train,\n",
|
||||
" label_column_name=target_column_name,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
@@ -454,7 +454,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_test = test.to_pandas_dataframe()\n",
|
||||
"X_test = test.to_pandas_dataframe().reset_index(drop=True)\n",
|
||||
"y_test = X_test.pop(target_column_name).values"
|
||||
]
|
||||
},
|
||||
@@ -578,7 +578,7 @@
|
||||
"automl_config = AutoMLConfig(task='forecasting', \n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" blacklist_models = ['ElasticNet','ExtremeRandomTrees','GradientBoosting','XGBoostRegressor','ExtremeRandomTrees', 'AutoArima', 'Prophet'], #These models are blacklisted for tutorial purposes, remove this for real use cases. \n",
|
||||
" experiment_timeout_minutes=20,\n",
|
||||
" experiment_timeout_hours=0.3,\n",
|
||||
" training_data=train,\n",
|
||||
" label_column_name=target_column_name,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
@@ -633,7 +633,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Results\n",
|
||||
"## Advanced Results<a id=\"advanced_results\"></a>\n",
|
||||
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation."
|
||||
]
|
||||
},
|
||||
|
||||
@@ -2,11 +2,3 @@ name: auto-ml-forecasting-energy-demand
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- interpret
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
- statsmodels
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-interpret
|
||||
|
||||
@@ -251,7 +251,7 @@
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\" : 5,\n",
|
||||
" \"experiment_timeout_minutes\" : 15,\n",
|
||||
" \"experiment_timeout_hours\" : 0.25,\n",
|
||||
" \"primary_metric\" : 'normalized_mean_absolute_error',\n",
|
||||
" \"time_column_name\": time_column_name,\n",
|
||||
" \"grain_column_names\": grain_column_names,\n",
|
||||
|
||||
@@ -2,9 +2,3 @@ name: auto-ml-forecasting-grouping
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-pipeline
|
||||
- azureml-widgets
|
||||
- pandas_ml
|
||||
- statsmodels
|
||||
- matplotlib
|
||||
|
||||
@@ -30,11 +30,11 @@ def _get_configs(automlconfig: AutoMLConfig,
|
||||
groups = _get_groups(data, group_column_names)
|
||||
configs = {}
|
||||
for i, group in groups.iterrows():
|
||||
single = data
|
||||
single = data._dataflow
|
||||
group_name = "#####".join(str(x) for x in group.values)
|
||||
group_name = valid_chars.sub('', group_name)
|
||||
for key in group.index:
|
||||
single = single._dataflow.filter(data._dataflow[key] == group[key])
|
||||
single = single.filter(data._dataflow[key] == group[key])
|
||||
t_dataset = TabularDataset._create(single)
|
||||
group_conf = copy.deepcopy(automlconfig)
|
||||
group_conf.user_settings['training_data'] = t_dataset
|
||||
@@ -71,7 +71,7 @@ def build_pipeline_steps(automlconfig: AutoMLConfig,
|
||||
# create each automl step end-to-end (train, register)
|
||||
for group_name, conf in configs.items():
|
||||
# create automl metrics output
|
||||
metirics_data = PipelineData(
|
||||
metrics_data = PipelineData(
|
||||
name='metrics_data_{}'.format(group_name),
|
||||
pipeline_output_name=metrics_output_name.format(group_name),
|
||||
training_output=TrainingOutput(type='Metrics'))
|
||||
@@ -84,7 +84,7 @@ def build_pipeline_steps(automlconfig: AutoMLConfig,
|
||||
automl_step = AutoMLStep(
|
||||
name='automl_{}'.format(group_name),
|
||||
automl_config=conf,
|
||||
outputs=[metirics_data, model_data],
|
||||
outputs=[metrics_data, model_data],
|
||||
allow_reuse=True)
|
||||
steps.append(automl_step)
|
||||
|
||||
|
||||
@@ -335,7 +335,7 @@
|
||||
"automl_config = AutoMLConfig(task='forecasting',\n",
|
||||
" debug_log='automl_forecasting_function.log',\n",
|
||||
" primary_metric='normalized_root_mean_squared_error',\n",
|
||||
" experiment_timeout_minutes=15,\n",
|
||||
" experiment_timeout_hours=0.25,\n",
|
||||
" enable_early_stopping=True,\n",
|
||||
" training_data=train_data,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
|
||||
@@ -1,11 +1,4 @@
|
||||
name: automl-forecasting-function
|
||||
dependencies:
|
||||
- fbprophet==0.5
|
||||
- py-xgboost<=0.80
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- pandas_ml
|
||||
- statsmodels
|
||||
- matplotlib
|
||||
|
||||
@@ -335,7 +335,7 @@
|
||||
"|-|-|\n",
|
||||
"|**task**|forecasting|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>\n",
|
||||
"|**experiment_timeout_minutes**|Experimentation timeout in minutes.|\n",
|
||||
"|**experiment_timeout_hours**|Experimentation timeout in hours.|\n",
|
||||
"|**enable_early_stopping**|If early stopping is on, training will stop when the primary metric is no longer improving.|\n",
|
||||
"|**training_data**|Input dataset, containing both features and label column.|\n",
|
||||
"|**label_column_name**|The name of the label column.|\n",
|
||||
@@ -366,7 +366,7 @@
|
||||
"automl_config = AutoMLConfig(task='forecasting',\n",
|
||||
" debug_log='automl_oj_sales_errors.log',\n",
|
||||
" primary_metric='normalized_mean_absolute_error',\n",
|
||||
" experiment_timeout_minutes=15,\n",
|
||||
" experiment_timeout_hours=0.25,\n",
|
||||
" training_data=train_dataset,\n",
|
||||
" label_column_name=target_column_name,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
|
||||
@@ -1,11 +1,4 @@
|
||||
name: auto-ml-forecasting-orange-juice-sales
|
||||
dependencies:
|
||||
- fbprophet==0.5
|
||||
- py-xgboost<=0.80
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
- statsmodels
|
||||
|
||||
@@ -156,7 +156,7 @@
|
||||
" \"n_cross_validations\": 3,\n",
|
||||
" \"primary_metric\": 'average_precision_score_weighted',\n",
|
||||
" \"preprocess\": True,\n",
|
||||
" \"experiment_timeout_minutes\": 10, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible\n",
|
||||
" \"experiment_timeout_hours\": 0.2, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ability to find the best model possible\n",
|
||||
" \"verbosity\": logging.INFO,\n",
|
||||
" \"enable_stack_ensemble\": False\n",
|
||||
"}\n",
|
||||
@@ -260,17 +260,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Print the properties of the model\n",
|
||||
"The fitted_model is a python object and you can read the different properties of the object.\n",
|
||||
"See *Print the properties of the model* section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy\n",
|
||||
"\n",
|
||||
"To deploy the model into a web service endpoint, see _Deploy_ section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb)"
|
||||
"The fitted_model is a python object and you can read the different properties of the object.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -2,10 +2,3 @@ name: auto-ml-classification-credit-card-fraud-local
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- interpret
|
||||
- azureml-defaults
|
||||
- azureml-explain-model
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
|
||||
@@ -206,7 +206,7 @@
|
||||
"|-|-|\n",
|
||||
"|**task**|classification, regression or forecasting|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize. Regression supports the following primary metrics: <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
|
||||
"|**experiment_timeout_minutes**| Maximum amount of time in minutes that all iterations combined can take before the experiment terminates.|\n",
|
||||
"|**experiment_timeout_hours**| Maximum amount of time in hours that all iterations combined can take before the experiment terminates.|\n",
|
||||
"|**enable_early_stopping**| Flag to enble early termination if the score is not improving in the short term.|\n",
|
||||
"|**featurization**| 'auto' / 'off' / FeaturizationConfig Indicator for whether featurization step should be done automatically or not, or whether customized featurization should be used. Note: If the input data is sparse, featurization cannot be turned on.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
@@ -262,7 +262,7 @@
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
" \"enable_early_stopping\": True, \n",
|
||||
" \"experiment_timeout_minutes\" : 10,\n",
|
||||
" \"experiment_timeout_hours\" : 0.2,\n",
|
||||
" \"max_concurrent_iterations\": 4,\n",
|
||||
" \"max_cores_per_iteration\": -1,\n",
|
||||
" \"n_cross_validations\": 5,\n",
|
||||
@@ -558,7 +558,6 @@
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n",
|
||||
" conda_packages=['scikit-learn', 'numpy','py-xgboost<=0.80'],\n",
|
||||
" pip_packages=azureml_pip_packages)"
|
||||
]
|
||||
},
|
||||
@@ -718,17 +717,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-explain-model', 'azureml-train-automl', 'azureml-defaults'\n",
|
||||
"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas', 'numpy', 'py-xgboost<=0.80'],\n",
|
||||
" pip_packages=azureml_pip_packages,\n",
|
||||
" pin_sdk_version=True)\n",
|
||||
"myenv = automl_run.get_environment().python.conda_dependencies\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())\n",
|
||||
|
||||
@@ -2,12 +2,3 @@ name: auto-ml-regression-hardware-performance-explanation-and-featurization
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- interpret
|
||||
- azureml-defaults
|
||||
- azureml-explain-model
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-interpret
|
||||
|
||||
@@ -7,7 +7,7 @@ from azureml.core.experiment import Experiment
|
||||
from sklearn.externals import joblib
|
||||
from azureml.core.dataset import Dataset
|
||||
from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplainerSetupClass, \
|
||||
automl_setup_model_explanations
|
||||
automl_setup_model_explanations, automl_check_model_if_explainable
|
||||
from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel
|
||||
from azureml.explain.model.mimic_wrapper import MimicWrapper
|
||||
from automl.client.core.common.constants import MODEL_PATH
|
||||
@@ -25,6 +25,11 @@ ws = run.experiment.workspace
|
||||
experiment = Experiment(ws, '<<experimnet_name>>')
|
||||
automl_run = Run(experiment=experiment, run_id='<<run_id>>')
|
||||
|
||||
# Check if this AutoML model is explainable
|
||||
if not automl_check_model_if_explainable(automl_run):
|
||||
raise Exception("Model explanations is currently not supported for " + automl_run.get_properties().get(
|
||||
'run_algorithm'))
|
||||
|
||||
# Download the best model from the artifact store
|
||||
automl_run.download_file(name=MODEL_PATH, output_file_path='model.pkl')
|
||||
|
||||
|
||||
@@ -188,7 +188,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"automlconfig-remarks-sample"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
@@ -196,7 +200,7 @@
|
||||
" \"primary_metric\": 'r2_score',\n",
|
||||
" \"preprocess\": True,\n",
|
||||
" \"enable_early_stopping\": True, \n",
|
||||
" \"experiment_timeout_minutes\": 20, #for real scenarios we reccommend a timeout of at least one hour \n",
|
||||
" \"experiment_timeout_hours\": 0.3, #for real scenarios we reccommend a timeout of at least one hour \n",
|
||||
" \"max_concurrent_iterations\": 4,\n",
|
||||
" \"max_cores_per_iteration\": -1,\n",
|
||||
" \"verbosity\": logging.INFO,\n",
|
||||
|
||||
@@ -2,8 +2,3 @@ name: auto-ml-regression
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-train-automl
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
- paramiko<2.5.0
|
||||
|
||||
@@ -56,7 +56,7 @@ CREATE OR ALTER PROCEDURE [dbo].[AutoMLTrain]
|
||||
@task NVARCHAR(40)='classification', -- The type of task. Can be classification, regression or forecasting.
|
||||
@experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.
|
||||
@iteration_timeout_minutes INT = 15, -- The maximum time in minutes for training a single pipeline.
|
||||
@experiment_timeout_minutes INT = 60, -- The maximum time in minutes for training all pipelines.
|
||||
@experiment_timeout_hours FLOAT = 1, -- The maximum time in hours for training all pipelines.
|
||||
@n_cross_validations INT = 3, -- The number of cross validations.
|
||||
@blacklist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that will not be used.
|
||||
-- The list of possible models can be found at:
|
||||
@@ -131,8 +131,8 @@ if __name__.startswith("sqlindb"):
|
||||
|
||||
X_train = data_train
|
||||
|
||||
if experiment_timeout_minutes == 0:
|
||||
experiment_timeout_minutes = None
|
||||
if experiment_timeout_hours == 0:
|
||||
experiment_timeout_hours = None
|
||||
|
||||
if experiment_exit_score == 0:
|
||||
experiment_exit_score = None
|
||||
@@ -163,7 +163,7 @@ if __name__.startswith("sqlindb"):
|
||||
debug_log = log_file_name,
|
||||
primary_metric = primary_metric,
|
||||
iteration_timeout_minutes = iteration_timeout_minutes,
|
||||
experiment_timeout_minutes = experiment_timeout_minutes,
|
||||
experiment_timeout_hours = experiment_timeout_hours,
|
||||
iterations = iterations,
|
||||
n_cross_validations = n_cross_validations,
|
||||
preprocess = preprocess,
|
||||
@@ -204,7 +204,7 @@ if __name__.startswith("sqlindb"):
|
||||
@iterations INT, @task NVARCHAR(40),
|
||||
@experiment_name NVARCHAR(32),
|
||||
@iteration_timeout_minutes INT,
|
||||
@experiment_timeout_minutes INT,
|
||||
@experiment_timeout_hours FLOAT,
|
||||
@n_cross_validations INT,
|
||||
@blacklist_models NVARCHAR(MAX),
|
||||
@whitelist_models NVARCHAR(MAX),
|
||||
@@ -223,7 +223,7 @@ if __name__.startswith("sqlindb"):
|
||||
, @task = @task
|
||||
, @experiment_name = @experiment_name
|
||||
, @iteration_timeout_minutes = @iteration_timeout_minutes
|
||||
, @experiment_timeout_minutes = @experiment_timeout_minutes
|
||||
, @experiment_timeout_hours = @experiment_timeout_hours
|
||||
, @n_cross_validations = @n_cross_validations
|
||||
, @blacklist_models = @blacklist_models
|
||||
, @whitelist_models = @whitelist_models
|
||||
|
||||
@@ -235,7 +235,7 @@
|
||||
" @task NVARCHAR(40)='classification', -- The type of task. Can be classification, regression or forecasting.\r\n",
|
||||
" @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.\r\n",
|
||||
" @iteration_timeout_minutes INT = 15, -- The maximum time in minutes for training a single pipeline. \r\n",
|
||||
" @experiment_timeout_minutes INT = 60, -- The maximum time in minutes for training all pipelines.\r\n",
|
||||
" @experiment_timeout_hours FLOAT = 1, -- The maximum time in hours for training all pipelines.\r\n",
|
||||
" @n_cross_validations INT = 3, -- The number of cross validations.\r\n",
|
||||
" @blacklist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that will not be used.\r\n",
|
||||
" -- The list of possible models can be found at:\r\n",
|
||||
@@ -307,8 +307,8 @@
|
||||
"\r\n",
|
||||
" X_train = data_train\r\n",
|
||||
"\r\n",
|
||||
" if experiment_timeout_minutes == 0:\r\n",
|
||||
" experiment_timeout_minutes = None\r\n",
|
||||
" if experiment_timeout_hours == 0:\r\n",
|
||||
" experiment_timeout_hours = None\r\n",
|
||||
"\r\n",
|
||||
" if experiment_exit_score == 0:\r\n",
|
||||
" experiment_exit_score = None\r\n",
|
||||
@@ -337,7 +337,7 @@
|
||||
" debug_log = log_file_name, \r\n",
|
||||
" primary_metric = primary_metric, \r\n",
|
||||
" iteration_timeout_minutes = iteration_timeout_minutes, \r\n",
|
||||
" experiment_timeout_minutes = experiment_timeout_minutes,\r\n",
|
||||
" experiment_timeout_hours = experiment_timeout_hours,\r\n",
|
||||
" iterations = iterations, \r\n",
|
||||
" n_cross_validations = n_cross_validations, \r\n",
|
||||
" preprocess = preprocess,\r\n",
|
||||
@@ -378,7 +378,7 @@
|
||||
"\t\t\t\t @iterations INT, @task NVARCHAR(40),\r\n",
|
||||
"\t\t\t\t @experiment_name NVARCHAR(32),\r\n",
|
||||
"\t\t\t\t @iteration_timeout_minutes INT,\r\n",
|
||||
"\t\t\t\t @experiment_timeout_minutes INT,\r\n",
|
||||
"\t\t\t\t @experiment_timeout_hours FLOAT,\r\n",
|
||||
"\t\t\t\t @n_cross_validations INT,\r\n",
|
||||
"\t\t\t\t @blacklist_models NVARCHAR(MAX),\r\n",
|
||||
"\t\t\t\t @whitelist_models NVARCHAR(MAX),\r\n",
|
||||
@@ -396,7 +396,7 @@
|
||||
"\t, @task = @task\r\n",
|
||||
"\t, @experiment_name = @experiment_name\r\n",
|
||||
"\t, @iteration_timeout_minutes = @iteration_timeout_minutes\r\n",
|
||||
"\t, @experiment_timeout_minutes = @experiment_timeout_minutes\r\n",
|
||||
"\t, @experiment_timeout_hours = @experiment_timeout_hours\r\n",
|
||||
"\t, @n_cross_validations = @n_cross_validations\r\n",
|
||||
"\t, @blacklist_models = @blacklist_models\r\n",
|
||||
"\t, @whitelist_models = @whitelist_models\r\n",
|
||||
@@ -560,9 +560,6 @@
|
||||
"framework": [
|
||||
"Azure ML AutoML"
|
||||
],
|
||||
"tags": [
|
||||
""
|
||||
],
|
||||
"friendly_name": "Setup automated ML SQL integration",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
@@ -574,6 +571,9 @@
|
||||
"name": "sql",
|
||||
"version": ""
|
||||
},
|
||||
"tags": [
|
||||
""
|
||||
],
|
||||
"task": "None"
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -161,9 +161,9 @@
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myacienv = CondaDependencies.create(conda_packages=['scikit-learn','numpy','pandas']) #showing how to add libs as an eg. - not needed for this model.\n",
|
||||
"myacienv = CondaDependencies.create(conda_packages=['scikit-learn','numpy','pandas']) # showing how to add libs as an eg. - not needed for this model.\n",
|
||||
"\n",
|
||||
"with open(\"mydeployenv.yml\",\"w\") as f:\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myacienv.serialize_to_string())"
|
||||
]
|
||||
},
|
||||
@@ -177,6 +177,9 @@
|
||||
"from azureml.core.webservice import AciWebservice, Webservice\n",
|
||||
"from azureml.exceptions import WebserviceException\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"myaci_config = AciWebservice.deploy_configuration(cpu_cores = 2, \n",
|
||||
" memory_gb = 2, \n",
|
||||
@@ -191,9 +194,16 @@
|
||||
"except WebserviceException:\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= 'spark-py', \n",
|
||||
" entry_script='score_sparkml.py',\n",
|
||||
" conda_file='mydeployenv.yml')\n",
|
||||
"myenv = Environment.get(ws, name='AzureML-PySpark-MmlSpark-0.15')\n",
|
||||
"# we need to add extra packages to procured environment\n",
|
||||
"# in order to deploy amended environment we need to rename it\n",
|
||||
"myenv.name = 'myenv'\n",
|
||||
"model_dependencies = CondaDependencies('myenv.yml')\n",
|
||||
"for pip_dep in model_dependencies.pip_packages:\n",
|
||||
" myenv.python.conda_dependencies.add_pip_package(pip_dep)\n",
|
||||
"for conda_dep in model_dependencies.conda_packages:\n",
|
||||
" myenv.python.conda_dependencies.add_conda_package(conda_dep)\n",
|
||||
"inference_config = InferenceConfig(entry_script='score_sparkml.py', environment=myenv)\n",
|
||||
"\n",
|
||||
"myservice = Model.deploy(ws, service_name, [mymodel], inference_config, myaci_config)\n",
|
||||
"myservice.wait_for_deployment(show_output=True)"
|
||||
|
||||
@@ -640,7 +640,7 @@
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'], pip_packages=['azureml-defaults', 'azureml-sdk[automl]'])\n",
|
||||
"\n",
|
||||
"conda_env_file_name = 'mydeployenv.yml'\n",
|
||||
"conda_env_file_name = 'myenv.yml'\n",
|
||||
"myenv.save_to_file('.', conda_env_file_name)"
|
||||
]
|
||||
},
|
||||
@@ -664,17 +664,27 @@
|
||||
"from azureml.exceptions import WebserviceException\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"myaci_config = AciWebservice.deploy_configuration(\n",
|
||||
" cpu_cores = 2, \n",
|
||||
" memory_gb = 2, \n",
|
||||
" tags = {'name':'Databricks Azure ML ACI'}, \n",
|
||||
" description = 'This is for ADB and AutoML example.')\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= 'spark-py', \n",
|
||||
" entry_script='score.py',\n",
|
||||
" conda_file='mydeployenv.yml')\n",
|
||||
"myenv = Environment.get(ws, name='AzureML-PySpark-MmlSpark-0.15')\n",
|
||||
"# we need to add extra packages to procured environment\n",
|
||||
"# in order to deploy amended environment we need to rename it\n",
|
||||
"myenv.name = 'myenv'\n",
|
||||
"model_dependencies = CondaDependencies('myenv.yml')\n",
|
||||
"for pip_dep in model_dependencies.pip_packages:\n",
|
||||
" myenv.python.conda_dependencies.add_pip_package(pip_dep)\n",
|
||||
"for conda_dep in model_dependencies.conda_packages:\n",
|
||||
" myenv.python.conda_dependencies.add_conda_package(conda_dep)\n",
|
||||
"inference_config = InferenceConfig(entry_script='score_sparkml.py', environment=myenv)\n",
|
||||
"\n",
|
||||
"guid = str(uuid.uuid4()).split(\"-\")[0]\n",
|
||||
"service_name = \"myservice-{}\".format(guid)\n",
|
||||
|
||||
@@ -195,7 +195,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can now create and/or use an Environment object when deploying a Webservice. The Environment can have been previously registered with your Workspace, or it will be registered with it as a part of the Webservice deployment. Only Environments that were created using azureml-defaults version 1.0.48 or later will work with this new handling however.\n",
|
||||
"You can now create and/or use an Environment object when deploying a Webservice. The Environment can have been previously registered with your Workspace, or it will be registered with it as a part of the Webservice deployment. Please note that your environment must include azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service.\n",
|
||||
"\n",
|
||||
"More information can be found in our [using environments notebook](../training/using-environments/using-environments.ipynb)."
|
||||
]
|
||||
@@ -221,23 +221,30 @@
|
||||
"## Create Inference Configuration\n",
|
||||
"\n",
|
||||
"There is now support for a source directory, you can upload an entire folder from your local machine as dependencies for the Webservice.\n",
|
||||
"Note: in that case, your entry_script, conda_file, and extra_docker_file_steps paths are relative paths to the source_directory path.\n",
|
||||
"Note: in that case, environments's entry_script and file_path are relative paths to the source_directory path; myenv.docker.base_dockerfile is a string containing extra docker steps or contents of the docker file.\n",
|
||||
"\n",
|
||||
"Sample code for using a source directory:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name='myenv', file_path='env/myenv.yml')\n",
|
||||
"\n",
|
||||
"# explicitly set base_image to None when setting base_dockerfile\n",
|
||||
"myenv.docker.base_image = None\n",
|
||||
"# add extra docker commends to execute\n",
|
||||
"myenv.docker.base_dockerfile = \"FROM ubuntu\\n RUN echo \\\"hello\\\"\"\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
|
||||
" runtime= \"python\", \n",
|
||||
" entry_script=\"x/y/score.py\",\n",
|
||||
" conda_file=\"env/myenv.yml\", \n",
|
||||
" extra_docker_file_steps=\"helloworld.txt\")\n",
|
||||
" environment=myenv)\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
" - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
|
||||
" - runtime = Which runtime to use for the image. Current supported runtimes are 'spark-py' and 'python\n",
|
||||
" - entry_script = contains logic specific to initializing your model and running predictions\n",
|
||||
" - conda_file = manages conda and python package dependencies.\n",
|
||||
" - extra_docker_file_steps = optional: any extra steps you want to inject into docker file"
|
||||
" - file_path: input parameter to Environment constructor. Manages conda and python package dependencies.\n",
|
||||
" - env.docker.base_dockerfile: any extra steps you want to inject into docker file\n",
|
||||
" - source_directory: holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
|
||||
" - entry_script: contains logic specific to initializing your model and running predictions"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -405,7 +405,7 @@
|
||||
"\n",
|
||||
" - To run a production-ready web service, see the [notebook on deployment to Azure Kubernetes Service](../production-deploy-to-aks/production-deploy-to-aks.ipynb).\n",
|
||||
" - To run a local web service, see the [notebook on deployment to a local Docker container](../deploy-to-local/register-model-deploy-local.ipynb).\n",
|
||||
" - For more information on datasets, see the [notebook on training with datasets](../../work-with-data/datasets-tutorial/train-with-datasets.ipynb).\n",
|
||||
" - For more information on datasets, see the [notebook on training with datasets](../../work-with-data/datasets-tutorial/train-with-datasets/train-with-datasets.ipynb).\n",
|
||||
" - For more information on environments, see the [notebook on using environments](../../training/using-environments/using-environments.ipynb).\n",
|
||||
" - For information on all the available deployment targets, see [“How and where to deploy models”](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where#choose-a-compute-target)."
|
||||
]
|
||||
|
||||
@@ -189,6 +189,15 @@
|
||||
" return error"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency for your environemnt. This package contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -206,16 +215,6 @@
|
||||
" - inference-schema[numpy-support]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile C:/abc/dockerstep/customDockerStep.txt\n",
|
||||
"RUN echo \"this is test\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -240,11 +239,10 @@
|
||||
"source": [
|
||||
"## Create Inference Configuration\n",
|
||||
"\n",
|
||||
" - source_directory = holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
|
||||
" - runtime = Which runtime to use for the image. Current supported runtimes are 'spark-py' and 'python\n",
|
||||
" - entry_script = contains logic specific to initializing your model and running predictions\n",
|
||||
" - conda_file = manages conda and python package dependencies.\n",
|
||||
" - extra_docker_file_steps = optional: any extra steps you want to inject into docker file"
|
||||
" - file_path: input parameter to Environment constructor. Manages conda and python package dependencies.\n",
|
||||
" - env.docker.base_dockerfile: any extra steps you want to inject into docker file\n",
|
||||
" - source_directory: holds source path as string, this entire folder gets added in image so its really easy to access any files within this folder or subfolder\n",
|
||||
" - entry_script: contains logic specific to initializing your model and running predictions"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -253,13 +251,19 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name='myenv', file_path='env/myenv.yml')\n",
|
||||
"\n",
|
||||
"# explicitly set base_image to None when setting base_dockerfile\n",
|
||||
"myenv.docker.base_image = None\n",
|
||||
"myenv.docker.base_dockerfile = \"RUN echo \\\"this is test\\\"\"\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(source_directory=\"C:/abc\",\n",
|
||||
" runtime=\"python\", \n",
|
||||
" entry_script=\"x/y/score.py\",\n",
|
||||
" conda_file=\"env/myenv.yml\", \n",
|
||||
" extra_docker_file_steps=\"dockerstep/customDockerStep.txt\")"
|
||||
" environment=myenv)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -158,7 +158,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. *Create myenv.yml file*"
|
||||
"## 5. *Create myenv.yml file*\n",
|
||||
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -169,7 +170,8 @@
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'])\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['numpy','scikit-learn'],\n",
|
||||
" pip_packages=['azureml-defaults'])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
@@ -189,10 +191,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")"
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -244,7 +244,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Setting up inference configuration\n",
|
||||
"First we create a YAML file that specifies which dependencies we would like to see in our container."
|
||||
"First we create a YAML file that specifies which dependencies we would like to see in our container. Please note that you must include azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -255,7 +255,7 @@
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime==0.4.0\",\"azureml-core\"])\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime==0.4.0\", \"azureml-core\", \"azureml-defaults\"])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
@@ -275,11 +275,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" extra_docker_file_steps = \"Dockerfile\")"
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -373,7 +373,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#aci_service.delete()"
|
||||
"aci_service.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -319,7 +319,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Write Environment File"
|
||||
"### Write Environment File\n",
|
||||
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -330,7 +331,8 @@
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\"])\n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\", \"azureml-defaults\"])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
@@ -350,11 +352,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" extra_docker_file_steps = \"Dockerfile\")"
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -724,7 +726,7 @@
|
||||
"source": [
|
||||
"# remember to delete your service after you are done using it!\n",
|
||||
"\n",
|
||||
"# aci_service.delete()"
|
||||
"aci_service.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -306,7 +306,7 @@
|
||||
"source": [
|
||||
"### Write Environment File\n",
|
||||
"\n",
|
||||
"This step creates a YAML environment file that specifies which dependencies we would like to see in our Linux Virtual Machine."
|
||||
"This step creates a YAML environment file that specifies which dependencies we would like to see in our Linux Virtual Machine. Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -317,7 +317,7 @@
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\"])\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\", \"azureml-defaults\"])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
@@ -337,11 +337,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" extra_docker_file_steps = \"Dockerfile\",\n",
|
||||
" conda_file=\"myenv.yml\")"
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -733,7 +733,7 @@
|
||||
"source": [
|
||||
"# remember to delete your service after you are done using it!\n",
|
||||
"\n",
|
||||
"# aci_service.delete()"
|
||||
"aci_service.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -241,7 +241,8 @@
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\", \"onnxruntime\", \"azureml-core\", \"azureml-defaults\"])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
@@ -251,7 +252,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create the inference configuration object"
|
||||
"Create the inference configuration object. Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -261,11 +262,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" extra_docker_file_steps = \"Dockerfile\")"
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -361,7 +362,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#aci_service.delete()"
|
||||
"aci_service.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -405,7 +405,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create inference configuration\n",
|
||||
"First we create a YAML file that specifies which dependencies we would like to see in our container."
|
||||
"First we create a YAML file that specifies which dependencies we would like to see in our container. Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -416,7 +416,7 @@
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\"])\n",
|
||||
"myenv = CondaDependencies.create(pip_packages=[\"numpy\",\"onnxruntime\",\"azureml-core\", \"azureml-defaults\"])\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
@@ -436,11 +436,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" extra_docker_file_steps = \"Dockerfile\")"
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -537,7 +537,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#aci_service.delete()"
|
||||
"aci_service.delete()"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -308,7 +308,9 @@
|
||||
"source": [
|
||||
"## Deploy \n",
|
||||
"\n",
|
||||
"Deploy Model and ScoringExplainer"
|
||||
"Deploy Model and ScoringExplainer.\n",
|
||||
"\n",
|
||||
"Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -319,7 +321,7 @@
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"# WARNING: to install this, g++ needs to be available on the Docker image and is not by default (look at the next cell)\n",
|
||||
"# azureml-defaults is required to host the model as a web service.\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-defaults', 'azureml-contrib-interpret', 'azureml-core', 'azureml-telemetry',\n",
|
||||
" 'azureml-interpret'\n",
|
||||
@@ -338,16 +340,6 @@
|
||||
" print(f.read())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile dockerfile\n",
|
||||
"RUN apt-get update && apt-get install -y g++ "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -369,6 +361,8 @@
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
" memory_gb=1, \n",
|
||||
@@ -376,10 +370,8 @@
|
||||
" \"method\" : \"local_explanation\"}, \n",
|
||||
" description='Get local explanations for IBM Employee Attrition data')\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score_local_explain.py\",\n",
|
||||
" conda_file=\"myenv.yml\",\n",
|
||||
" extra_docker_file_steps=\"dockerfile\")\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score_local_explain.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"# Use configs and models generated above\n",
|
||||
"service = Model.deploy(ws, 'model-scoring-deploy-local', [scoring_explainer_model, original_model], inference_config, aciconfig)\n",
|
||||
|
||||
@@ -246,7 +246,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create TensorFlow estimator\n",
|
||||
"Next, we construct an [TensorFlow](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn.tensorflow?view=azure-ml-py) estimator object.\n",
|
||||
"Next, we construct an [TensorFlow](https://docs.microsoft.com/python/api/azureml-train-core/azureml.train.dnn.tensorflow?view=azure-ml-py) estimator object.\n",
|
||||
"The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed -- if additional pip or conda packages are required, their names can be passed in via the `pip_packages` and `conda_packages` arguments and they will be included in the resulting docker.\n",
|
||||
"\n",
|
||||
"The TensorFlow estimator also takes a `framework_version` parameter -- if no version is provided, the estimator will default to the latest version supported by AzureML. Use `TensorFlow.get_supported_versions()` to get a list of all versions supported by your current SDK version or see the [SDK documentation](https://docs.microsoft.com/en-us/python/api/azureml-train-core/azureml.train.dnn?view=azure-ml-py) for the versions supported in the most current release.\n",
|
||||
@@ -385,7 +385,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metrics_output_name = 'metrics_output'\n",
|
||||
"metirics_data = PipelineData(name='metrics_data',\n",
|
||||
"metrics_data = PipelineData(name='metrics_data',\n",
|
||||
" datastore=ds,\n",
|
||||
" pipeline_output_name=metrics_output_name)\n",
|
||||
"\n",
|
||||
@@ -395,7 +395,7 @@
|
||||
" hyperdrive_config=hd_config,\n",
|
||||
" estimator_entry_script_arguments=['--data-folder', data_folder],\n",
|
||||
" inputs=[data_folder],\n",
|
||||
" metrics_output=metirics_data)"
|
||||
" metrics_output=metrics_data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -180,7 +180,7 @@
|
||||
"# just get the published pipeline object that you have the ID for.\n",
|
||||
"\n",
|
||||
"# Get all published pipeline objects in the workspace\n",
|
||||
"all_pub_pipelines = PublishedPipeline.get_all(ws)\n",
|
||||
"all_pub_pipelines = PublishedPipeline.list(ws)\n",
|
||||
"\n",
|
||||
"# We will iterate through the list of published pipelines and \n",
|
||||
"# use the last ID in the list for Schelue operations: \n",
|
||||
@@ -244,7 +244,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schedules = Schedule.get_all(ws, pipeline_id=pub_pipeline_id)\n",
|
||||
"schedules = Schedule.list(ws, pipeline_id=pub_pipeline_id)\n",
|
||||
"\n",
|
||||
"# We will iterate through the list of schedules and \n",
|
||||
"# use the last recurrence schedule in the list for further operations: \n",
|
||||
@@ -272,7 +272,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use active_only=False to get all schedules including disabled schedules\n",
|
||||
"schedules = Schedule.get_all(ws, active_only=True) \n",
|
||||
"schedules = Schedule.list(ws, active_only=True) \n",
|
||||
"print(\"Your workspace has the following schedules set up:\")\n",
|
||||
"for schedule in schedules:\n",
|
||||
" print(\"{} (Published pipeline: {}\".format(schedule.id, schedule.pipeline_id))"
|
||||
|
||||
@@ -230,7 +230,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint_list = PipelineEndpoint.get_all(workspace=ws, active_only=True)\n",
|
||||
"endpoint_list = PipelineEndpoint.list(workspace=ws, active_only=True)\n",
|
||||
"endpoint_list"
|
||||
]
|
||||
},
|
||||
@@ -360,7 +360,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"versions = pipeline_endpoint_by_name.get_all_versions()\n",
|
||||
"versions = pipeline_endpoint_by_name.list_versions()\n",
|
||||
"\n",
|
||||
"for ve in versions:\n",
|
||||
" print(ve.version)\n",
|
||||
@@ -381,7 +381,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipelines = pipeline_endpoint_by_name.get_all_pipelines(active_only=True)\n",
|
||||
"pipelines = pipeline_endpoint_by_name.list_pipelines(active_only=True)\n",
|
||||
"pipelines"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -285,7 +285,7 @@
|
||||
"metrics_output_name = 'metrics_output'\n",
|
||||
"best_model_output_name = 'best_model_output'\n",
|
||||
"\n",
|
||||
"metirics_data = PipelineData(name='metrics_data',\n",
|
||||
"metrics_data = PipelineData(name='metrics_data',\n",
|
||||
" datastore=ds,\n",
|
||||
" pipeline_output_name=metrics_output_name,\n",
|
||||
" training_output=TrainingOutput(type='Metrics'))\n",
|
||||
@@ -311,7 +311,7 @@
|
||||
"automl_step = AutoMLStep(\n",
|
||||
" name='automl_module',\n",
|
||||
" automl_config=automl_config,\n",
|
||||
" outputs=[metirics_data, model_data],\n",
|
||||
" outputs=[metrics_data, model_data],\n",
|
||||
" allow_reuse=True)"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,436 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Machine Learning Pipeline with NotebookRunnerStep\n",
|
||||
"This notebook demonstrates the use of `NotebookRunnerStep`. It allows you to run a local notebook as a step in Azure Machine Learning Pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"In this example we showcase how you can run another notebook `notebook_runner/training_notebook.ipynb` as a step in Azure Machine Learning Pipeline.\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you have executed the [configuration](https://aka.ms/pl-config) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Create or Attach existing AmlCompute to a workspace.\n",
|
||||
"3. Configure NotebookRun using `NotebokRunConfig`.\n",
|
||||
"5. Use NotebookRunnerStep.\n",
|
||||
"6. Run the notebook on `AmlCompute` as a pipeline step consuming the output of a python script step.\n",
|
||||
"\n",
|
||||
"Advantages of running your notebook as a step in pipeline:\n",
|
||||
"1. Run your notebook like a python script without converting into .py files, leveraging complete end to end experience of Azure Machine Learning Pipelines.\n",
|
||||
"2. Use pipeline intermediate data to and from the notebook along with other steps in pipeline.\n",
|
||||
"3. Parameterize your notebook with [Pipeline Parameters](./aml-pipelines-publish-and-run-using-rest-endpoint.ipynb).\n",
|
||||
"\n",
|
||||
"Try some more [quick start notebooks](https://github.com/microsoft/recommenders/tree/master/notebooks/00_quick_start) with `NotebookRunnerStep`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Azure Machine Learning and Pipeline SDK-specific imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.data.data_reference import DataReference\n",
|
||||
"from azureml.pipeline.core import PipelineData\n",
|
||||
"from azureml.core.datastore import Datastore\n",
|
||||
"\n",
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"from azureml.core import Workspace, Experiment\n",
|
||||
"from azureml.contrib.notebook import NotebookRunConfig, AzureMLNotebookHandler\n",
|
||||
"\n",
|
||||
"from azureml.pipeline.core import Pipeline\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"from azureml.contrib.notebook import NotebookRunnerStep\n",
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize Workspace\n",
|
||||
"\n",
|
||||
"Initialize a [workspace](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace(class%29) object from persisted configuration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')\n",
|
||||
"ws.set_default_datastore(\"workspaceblobstore\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Upload data to datastore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Datastore.get(ws, \"workspaceblobstore\").upload_files([\"./20news.pkl\"], target_path=\"20newsgroups\", overwrite=True)\n",
|
||||
"print(\"Upload call completed\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create an Azure ML experiment\n",
|
||||
"Let's create an experiment named \"notebook-step-run-example\" and a folder to holding the notebook and other scripts. The script runs will be recorded under the experiment in Azure.\n",
|
||||
"\n",
|
||||
"The best practice is to use separate folders for scripts and its dependent files for each step and specify that folder as the `source_directory` for the step. This helps reduce the size of the snapshot created for the step (only the specific folder is snapshotted). Since changes in any files in the `source_directory` would trigger a re-upload of the snapshot, this helps keep the reuse of the step when there are no changes in the `source_directory` of the step."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Choose a name for the run history container in the workspace.\n",
|
||||
"experiment_name = 'notebook-step-run-example'\n",
|
||||
"source_directory = 'notebook_runner'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach an AmlCompute cluster\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you get the default `AmlCompute` as your training compute resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
" \n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 4)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)\n",
|
||||
" \n",
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a new RunConfig object"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
"\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk'], pin_sdk_version=False)\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd\n",
|
||||
"\n",
|
||||
"print('run config is ready')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define input and outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"input_data = DataReference(\n",
|
||||
" datastore=Datastore.get(ws, \"workspaceblobstore\"),\n",
|
||||
" data_reference_name=\"blob_test_data\",\n",
|
||||
" path_on_datastore=\"20newsgroups/20news.pkl\")\n",
|
||||
"\n",
|
||||
"output_data = PipelineData(name=\"processed_data\",\n",
|
||||
" datastore=Datastore.get(ws, \"workspaceblobstore\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create notebook run configuration and set parameters values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"handler = AzureMLNotebookHandler(timeout=600, progress_bar=False, log_output=True)\n",
|
||||
"\n",
|
||||
"cfg = NotebookRunConfig(source_directory=source_directory, notebook=\"training_notebook.ipynb\",\n",
|
||||
" handler = handler,\n",
|
||||
" parameters={\"arg1\": \"Machine Learning\"},\n",
|
||||
" run_config=conda_run_config)\n",
|
||||
"\n",
|
||||
"print(\"Notebook Run Config is created.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define PythonScriptStep"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('Source directory for the step is {}.'.format(os.path.realpath('./train')))\n",
|
||||
"python_script_step = PythonScriptStep(\n",
|
||||
" script_name=\"train.py\",\n",
|
||||
" arguments=[\"--input_data\", input_data],\n",
|
||||
" inputs=[input_data],\n",
|
||||
" outputs=[output_data],\n",
|
||||
" compute_target=compute_target, \n",
|
||||
" source_directory=\"./train\",\n",
|
||||
" allow_reuse=True)\n",
|
||||
"print(\"python_script_step created\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define NotebookRunnerStep\n",
|
||||
"\n",
|
||||
"This step will consume intermediate output produced by `python_script_step` as an input.\n",
|
||||
"\n",
|
||||
"Optionally, a output of type `output_notebook_pipeline_data_name` can be added to the `NotebookRunnerStep` to redirect the `output_notebook` of notebook run to `NotebookRunnerStep`'s step output produced as `PipelineData` and can be further passed along the pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core import PipelineParameter, TrainingOutput\n",
|
||||
"\n",
|
||||
"output_from_notebook = PipelineData(name=\"notebook_processed_data\",\n",
|
||||
" datastore=Datastore.get(ws, \"workspaceblobstore\"))\n",
|
||||
"\n",
|
||||
"my_pipeline_param = PipelineParameter(name=\"pipeline_param\", default_value=\"my_param\")\n",
|
||||
"\n",
|
||||
"print('Source directory for the step is {}.'.format(os.path.realpath(source_directory)))\n",
|
||||
"notebook_runner_step = NotebookRunnerStep(name=\"training_notebook_step\",\n",
|
||||
" notebook_run_config=cfg,\n",
|
||||
" params={\"my_pipeline_param\": my_pipeline_param},\n",
|
||||
" inputs=[output_data],\n",
|
||||
" outputs=[output_from_notebook],\n",
|
||||
" allow_reuse=True,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" output_notebook_pipeline_data_name=\"notebook_result\")\n",
|
||||
"\n",
|
||||
"print(\"Notebook Runner Step is Created.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Build Pipeline\n",
|
||||
"\n",
|
||||
"Once we have the steps (or steps collection), we can build the [pipeline](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipeline.pipeline?view=azure-ml-py). By deafult, all these steps will run in **parallel** once we submit the pipeline for run.\n",
|
||||
"\n",
|
||||
"A pipeline is created with a list of steps and a workspace. Submit a pipeline using [submit](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.experiment(class)?view=azure-ml-py#submit-config--tags-none----kwargs-). When submit is called, a [PipelineRun](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelinerun?view=azure-ml-py) is created which in turn creates [StepRun](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.steprun?view=azure-ml-py) objects for each step in the workflow."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline1 = Pipeline(workspace=ws, steps=[notebook_runner_step])\n",
|
||||
"\n",
|
||||
"pipeline1.validate()\n",
|
||||
"print(\"Pipeline validation complete\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run1 = experiment.submit(pipeline1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"RunDetails(pipeline_run1).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Download output notebook\n",
|
||||
"\n",
|
||||
"`output_notebook` can be retrieved via pipeline step output if `output_notebook_pipeline_data_name` is provided to the `NotebookRunnerStep`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run1.wait_for_completion()\n",
|
||||
" Retrieve the step runs by name `train.py`\n",
|
||||
"train_step = pipeline_run1.find_step_run('training_notebook_step')\n",
|
||||
"\n",
|
||||
"if train_step:\n",
|
||||
" train_step_obj = train_step[0] # since we have only one step by name `training_notebook_step`\n",
|
||||
" train_step_obj.get_output_data('notebook_result').download(source_directory) # download the output to source_directory"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sanpil"
|
||||
}
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"Custom"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"Azure ML"
|
||||
],
|
||||
"friendly_name": "How to use run a notebook as a step in AML Pipelines",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
},
|
||||
"order_index": 12,
|
||||
"star_tag": [
|
||||
"None"
|
||||
],
|
||||
"tags": [
|
||||
"None"
|
||||
],
|
||||
"task": "Demonstrates the use of NotebookRunnerStep"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
name: aml-pipelines-with-notebook-runner-step
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-contrib-notebook
|
||||
@@ -0,0 +1,106 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"In training_notebook.ipynb\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"parameters"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# declaring parameters to override\n",
|
||||
"\n",
|
||||
"arg1 = \"Azure\"\n",
|
||||
"processed_data = None\n",
|
||||
"notebook_processed_data = None\n",
|
||||
"my_pipeline_param = None"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Final parameter values\n",
|
||||
"\n",
|
||||
"print(\"arg1: %s\" % arg1)\n",
|
||||
"print(\"input from previous step: %s\" % processed_data)\n",
|
||||
"print(\"output from notebook: %s\" % notebook_processed_data)\n",
|
||||
"print(\"pipeline_parameter: %s\" % my_pipeline_param)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not (notebook_processed_data is None):\n",
|
||||
" os.makedirs(notebook_processed_data, exist_ok=True)\n",
|
||||
" print(\"%s created\" % notebook_processed_data)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sanpil"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -11,13 +11,13 @@ Batch inference public preview offers a platform in which to do large inference
|
||||
### Python package installation
|
||||
Following the convention of most AzureML Public Preview features, Batch Inference SDK is currently available as a contrib package.
|
||||
|
||||
If you're unfamiliar with creating a new Python environment, you may follow this example for [creating a conda environment](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#local). Batch Inference package can be installed through the following pip command.
|
||||
If you're unfamiliar with creating a new Python environment, you may follow this example for [creating a conda environment](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#local). Batch Inference package can be installed through the following pip command.
|
||||
```
|
||||
pip install azureml-contrib-pipeline-steps
|
||||
```
|
||||
|
||||
### Creation of Azure Machine Learning Workspace
|
||||
If you do not already have a Azure ML Workspace, please run the [configuration Notebook](../../configuration.ipynb).
|
||||
If you do not already have a Azure ML Workspace, please run the [configuration Notebook](https://aka.ms/pl-config).
|
||||
|
||||
## Configure a Batch Inference job
|
||||
|
||||
@@ -124,4 +124,4 @@ pipeline_run.wait_for_completion(show_output=True)
|
||||
- [file-dataset-image-inference-mnist.ipynb](./file-dataset-image-inference-mnist.ipynb) demonstrates how to run batch inference on an MNIST dataset.
|
||||
- [tabular-dataset-inference-iris.ipynb](./tabular-dataset-inference-iris.ipynb) demonstrates how to run batch inference on an IRIS dataset.
|
||||
|
||||

|
||||

|
||||
@@ -12,7 +12,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -23,6 +23,11 @@
|
||||
"\n",
|
||||
"In this notebook, we will demonstrate how to make predictions on large quantities of data asynchronously using the ML pipelines with Azure Machine Learning. Batch inference (or batch scoring) provides cost-effective inference, with unparalleled throughput for asynchronous applications. Batch prediction pipelines can scale to perform inference on terabytes of production data. Batch prediction is optimized for high throughput, fire-and-forget predictions for a large collection of data.\n",
|
||||
"\n",
|
||||
"> **Note**\n",
|
||||
"This notebook uses public preview functionality (ParallelRunStep). Please install azureml-contrib-pipeline-steps package before running this notebook.\n",
|
||||
"```\n",
|
||||
"pip install azureml-contrib-pipeline-steps\n",
|
||||
"```\n",
|
||||
"> **Tip**\n",
|
||||
"If your system requires low-latency processing (to process a single document or small set of documents quickly), use [real-time scoring](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-consume-web-service) instead of batch prediction.\n",
|
||||
"\n",
|
||||
@@ -519,9 +524,6 @@
|
||||
"name": "tracych"
|
||||
}
|
||||
],
|
||||
"friendly_name": "MNIST data inferencing using ParallelRunStep",
|
||||
"exclude_from_index": false,
|
||||
"index_order": 1,
|
||||
"category": "Other notebooks",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
@@ -532,14 +534,12 @@
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"None"
|
||||
],
|
||||
"tags": [
|
||||
"Batch Inferencing",
|
||||
"Pipeline"
|
||||
],
|
||||
"task": "Digit identification",
|
||||
"friendly_name": "MNIST data inferencing using ParallelRunStep",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
@@ -556,7 +556,12 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batch Inferencing",
|
||||
"Pipeline"
|
||||
],
|
||||
"task": "Digit identification"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
@@ -12,7 +12,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -23,6 +23,11 @@
|
||||
"\n",
|
||||
"In this notebook, we will demonstrate how to make predictions on large quantities of data asynchronously using the ML pipelines with Azure Machine Learning. Batch inference (or batch scoring) provides cost-effective inference, with unparalleled throughput for asynchronous applications. Batch prediction pipelines can scale to perform inference on terabytes of production data. Batch prediction is optimized for high throughput, fire-and-forget predictions for a large collection of data.\n",
|
||||
"\n",
|
||||
"> **Note**\n",
|
||||
"This notebook uses public preview functionality (ParallelRunStep). Please install azureml-contrib-pipeline-steps package before running this notebook.\n",
|
||||
"```\n",
|
||||
"pip install azureml-contrib-pipeline-steps\n",
|
||||
"```\n",
|
||||
"> **Tip**\n",
|
||||
"If your system requires low-latency processing (to process a single document or small set of documents quickly), use [real-time scoring](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-consume-web-service) instead of batch prediction.\n",
|
||||
"\n",
|
||||
@@ -494,9 +499,6 @@
|
||||
"name": "tracych"
|
||||
}
|
||||
],
|
||||
"friendly_name": "IRIS data inferencing using ParallelRunStep",
|
||||
"exclude_from_index": false,
|
||||
"index_order": 1,
|
||||
"category": "Other notebooks",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
@@ -507,14 +509,12 @@
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"None"
|
||||
],
|
||||
"tags": [
|
||||
"Batch Inferencing",
|
||||
"Pipeline"
|
||||
],
|
||||
"task": "Recognize flower type",
|
||||
"friendly_name": "IRIS data inferencing using ParallelRunStep",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
@@ -531,7 +531,12 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.2"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batch Inferencing",
|
||||
"Pipeline"
|
||||
],
|
||||
"task": "Recognize flower type"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
@@ -1,207 +0,0 @@
|
||||
# Original source: https://github.com/pytorch/examples/blob/master/fast_neural_style/neural_style/neural_style.py
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
||||
from PIL import Image
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
|
||||
def load_image(filename, size=None, scale=None):
|
||||
img = Image.open(filename)
|
||||
if size is not None:
|
||||
img = img.resize((size, size), Image.ANTIALIAS)
|
||||
elif scale is not None:
|
||||
img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
|
||||
return img
|
||||
|
||||
|
||||
def save_image(filename, data):
|
||||
img = data.clone().clamp(0, 255).numpy()
|
||||
img = img.transpose(1, 2, 0).astype("uint8")
|
||||
img = Image.fromarray(img)
|
||||
img.save(filename)
|
||||
|
||||
|
||||
class TransformerNet(torch.nn.Module):
|
||||
def __init__(self):
|
||||
super(TransformerNet, self).__init__()
|
||||
# Initial convolution layers
|
||||
self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1)
|
||||
self.in1 = torch.nn.InstanceNorm2d(32, affine=True)
|
||||
self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2)
|
||||
self.in2 = torch.nn.InstanceNorm2d(64, affine=True)
|
||||
self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2)
|
||||
self.in3 = torch.nn.InstanceNorm2d(128, affine=True)
|
||||
# Residual layers
|
||||
self.res1 = ResidualBlock(128)
|
||||
self.res2 = ResidualBlock(128)
|
||||
self.res3 = ResidualBlock(128)
|
||||
self.res4 = ResidualBlock(128)
|
||||
self.res5 = ResidualBlock(128)
|
||||
# Upsampling Layers
|
||||
self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2)
|
||||
self.in4 = torch.nn.InstanceNorm2d(64, affine=True)
|
||||
self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2)
|
||||
self.in5 = torch.nn.InstanceNorm2d(32, affine=True)
|
||||
self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1)
|
||||
# Non-linearities
|
||||
self.relu = torch.nn.ReLU()
|
||||
|
||||
def forward(self, X):
|
||||
y = self.relu(self.in1(self.conv1(X)))
|
||||
y = self.relu(self.in2(self.conv2(y)))
|
||||
y = self.relu(self.in3(self.conv3(y)))
|
||||
y = self.res1(y)
|
||||
y = self.res2(y)
|
||||
y = self.res3(y)
|
||||
y = self.res4(y)
|
||||
y = self.res5(y)
|
||||
y = self.relu(self.in4(self.deconv1(y)))
|
||||
y = self.relu(self.in5(self.deconv2(y)))
|
||||
y = self.deconv3(y)
|
||||
return y
|
||||
|
||||
|
||||
class ConvLayer(torch.nn.Module):
|
||||
def __init__(self, in_channels, out_channels, kernel_size, stride):
|
||||
super(ConvLayer, self).__init__()
|
||||
reflection_padding = kernel_size // 2
|
||||
self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
|
||||
self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.reflection_pad(x)
|
||||
out = self.conv2d(out)
|
||||
return out
|
||||
|
||||
|
||||
class ResidualBlock(torch.nn.Module):
|
||||
"""ResidualBlock
|
||||
introduced in: https://arxiv.org/abs/1512.03385
|
||||
recommended architecture: http://torch.ch/blog/2016/02/04/resnets.html
|
||||
"""
|
||||
|
||||
def __init__(self, channels):
|
||||
super(ResidualBlock, self).__init__()
|
||||
self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1)
|
||||
self.in1 = torch.nn.InstanceNorm2d(channels, affine=True)
|
||||
self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1)
|
||||
self.in2 = torch.nn.InstanceNorm2d(channels, affine=True)
|
||||
self.relu = torch.nn.ReLU()
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
out = self.relu(self.in1(self.conv1(x)))
|
||||
out = self.in2(self.conv2(out))
|
||||
out = out + residual
|
||||
return out
|
||||
|
||||
|
||||
class UpsampleConvLayer(torch.nn.Module):
|
||||
"""UpsampleConvLayer
|
||||
Upsamples the input and then does a convolution. This method gives better results
|
||||
compared to ConvTranspose2d.
|
||||
ref: http://distill.pub/2016/deconv-checkerboard/
|
||||
"""
|
||||
|
||||
def __init__(self, in_channels, out_channels, kernel_size, stride, upsample=None):
|
||||
super(UpsampleConvLayer, self).__init__()
|
||||
self.upsample = upsample
|
||||
if upsample:
|
||||
self.upsample_layer = torch.nn.Upsample(mode='nearest', scale_factor=upsample)
|
||||
reflection_padding = kernel_size // 2
|
||||
self.reflection_pad = torch.nn.ReflectionPad2d(reflection_padding)
|
||||
self.conv2d = torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride)
|
||||
|
||||
def forward(self, x):
|
||||
x_in = x
|
||||
if self.upsample:
|
||||
x_in = self.upsample_layer(x_in)
|
||||
out = self.reflection_pad(x_in)
|
||||
out = self.conv2d(out)
|
||||
return out
|
||||
|
||||
|
||||
def stylize(args, comm):
|
||||
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
device = torch.device("cuda" if args.cuda else "cpu")
|
||||
with torch.no_grad():
|
||||
style_model = TransformerNet()
|
||||
state_dict = torch.load(os.path.join(args.model_dir, args.style + ".pth"))
|
||||
# remove saved deprecated running_* keys in InstanceNorm from the checkpoint
|
||||
for k in list(state_dict.keys()):
|
||||
if re.search(r'in\d+\.running_(mean|var)$', k):
|
||||
del state_dict[k]
|
||||
style_model.load_state_dict(state_dict)
|
||||
style_model.to(device)
|
||||
|
||||
filenames = os.listdir(args.content_dir)
|
||||
filenames = sorted(filenames)
|
||||
partition_size = len(filenames) // size
|
||||
partitioned_filenames = filenames[rank * partition_size: (rank + 1) * partition_size]
|
||||
print("RANK {} - is processing {} images out of the total {}".format(rank, len(partitioned_filenames),
|
||||
len(filenames)))
|
||||
|
||||
output_paths = []
|
||||
for filename in partitioned_filenames:
|
||||
# print("Processing {}".format(filename))
|
||||
full_path = os.path.join(args.content_dir, filename)
|
||||
content_image = load_image(full_path, scale=args.content_scale)
|
||||
content_transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Lambda(lambda x: x.mul(255))
|
||||
])
|
||||
content_image = content_transform(content_image)
|
||||
content_image = content_image.unsqueeze(0).to(device)
|
||||
|
||||
output = style_model(content_image).cpu()
|
||||
|
||||
output_path = os.path.join(args.output_dir, filename)
|
||||
save_image(output_path, output[0])
|
||||
|
||||
output_paths.append(output_path)
|
||||
|
||||
print("RANK {} - number of pre-aggregated output files {}".format(rank, len(output_paths)))
|
||||
|
||||
output_paths_list = comm.gather(output_paths, root=0)
|
||||
|
||||
if rank == 0:
|
||||
print("RANK {} - number of aggregated output files {}".format(rank, len(output_paths_list)))
|
||||
print("RANK {} - end".format(rank))
|
||||
|
||||
|
||||
def main():
|
||||
arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
|
||||
|
||||
arg_parser.add_argument("--content-scale", type=float, default=None,
|
||||
help="factor for scaling down the content image")
|
||||
arg_parser.add_argument("--model-dir", type=str, required=True,
|
||||
help="saved model to be used for stylizing the image.")
|
||||
arg_parser.add_argument("--cuda", type=int, required=True,
|
||||
help="set it to 1 for running on GPU, 0 for CPU")
|
||||
arg_parser.add_argument("--style", type=str, help="style name")
|
||||
arg_parser.add_argument("--content-dir", type=str, required=True,
|
||||
help="directory holding the images")
|
||||
arg_parser.add_argument("--output-dir", type=str, required=True,
|
||||
help="directory holding the output images")
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
|
||||
if args.cuda and not torch.cuda.is_available():
|
||||
print("ERROR: cuda is not available, try running on CPU")
|
||||
sys.exit(1)
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
stylize(args, comm)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -16,13 +16,6 @@
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**Note**: Azure Machine Learning recently released ParallelRunStep for public preview, this will allow for parallelization of your workload across many compute nodes without the difficulty of orchestrating worker pools and queues. See the [batch inference notebooks](../../../contrib/batch_inferencing/) for examples on how to get started."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -31,7 +24,13 @@
|
||||
"Using modified code from `pytorch`'s neural style [example](https://pytorch.org/tutorials/advanced/neural_style_tutorial.html), we show how to setup a pipeline for doing style transfer on video. The pipeline has following steps:\n",
|
||||
"1. Split a video into images\n",
|
||||
"2. Run neural style on each image using one of the provided models (from `pytorch` pretrained models for this example).\n",
|
||||
"3. Stitch the image back into a video."
|
||||
"3. Stitch the image back into a video.\n",
|
||||
"\n",
|
||||
"> **Note**\n",
|
||||
"This notebook uses public preview functionality (ParallelRunStep). Please install azureml-contrib-pipeline-steps package before running this notebook.\n",
|
||||
"```\n",
|
||||
"pip install azureml-contrib-pipeline-steps\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -57,19 +56,25 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"# Check core SDK version number\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace, Experiment\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + ws.name, \n",
|
||||
" 'Azure region: ' + ws.location, \n",
|
||||
" 'Subscription id: ' + ws.subscription_id, \n",
|
||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')\n",
|
||||
"\n",
|
||||
"scripts_folder = \"scripts_folder\"\n",
|
||||
"\n",
|
||||
"if not os.path.isdir(scripts_folder):\n",
|
||||
" os.mkdir(scripts_folder)"
|
||||
" 'Resource group: ' + ws.resource_group, sep = '\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -82,11 +87,96 @@
|
||||
"from azureml.core.datastore import Datastore\n",
|
||||
"from azureml.data.data_reference import DataReference\n",
|
||||
"from azureml.pipeline.core import Pipeline, PipelineData\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep, MpiStep\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"from azureml.core.runconfig import CondaDependencies, RunConfiguration\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Download models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# create directory for model\n",
|
||||
"model_dir = 'models'\n",
|
||||
"if not os.path.isdir(model_dir):\n",
|
||||
" os.mkdir(model_dir)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"def download_model(model_name):\n",
|
||||
" # downloaded models from https://pytorch.org/tutorials/advanced/neural_style_tutorial.html are kept here\n",
|
||||
" url=\"https://pipelinedata.blob.core.windows.net/styletransfer/saved_models/\" + model_name\n",
|
||||
" local_path = os.path.join(model_dir, model_name)\n",
|
||||
" urllib.request.urlretrieve(url, local_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Register all Models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.model import Model\n",
|
||||
"mosaic_model = None\n",
|
||||
"candy_model = None\n",
|
||||
"\n",
|
||||
"models = Model.list(workspace=ws, tags=['scenario'])\n",
|
||||
"for m in models:\n",
|
||||
" print(\"Name:\", m.name,\"\\tVersion:\", m.version, \"\\tDescription:\", m.description, m.tags)\n",
|
||||
" if m.name == 'mosaic' and mosaic_model is None:\n",
|
||||
" mosaic_model = m\n",
|
||||
" elif m.name == 'candy' and candy_model is None:\n",
|
||||
" candy_model = m\n",
|
||||
"\n",
|
||||
"if mosaic_model is None:\n",
|
||||
" print('Mosaic model does not exist, registering it')\n",
|
||||
" download_model('mosaic.pth')\n",
|
||||
" mosaic_model = Model.register(model_path = os.path.join(model_dir, \"mosaic.pth\"),\n",
|
||||
" model_name = \"mosaic\",\n",
|
||||
" tags = {'type': \"mosaic\", 'scenario': \"Style transfer using batch inference\"},\n",
|
||||
" description = \"Style transfer - Mosaic\",\n",
|
||||
" workspace = ws)\n",
|
||||
"else:\n",
|
||||
" print('Reusing existing mosaic model')\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"if candy_model is None:\n",
|
||||
" print('Candy model does not exist, registering it')\n",
|
||||
" download_model('candy.pth')\n",
|
||||
" candy_model = Model.register(model_path = os.path.join(model_dir, \"candy.pth\"),\n",
|
||||
" model_name = \"candy\",\n",
|
||||
" tags = {'type': \"candy\", 'scenario': \"Style transfer using batch inference\"},\n",
|
||||
" description = \"Style transfer - Candy\",\n",
|
||||
" workspace = ws)\n",
|
||||
"else:\n",
|
||||
" print('Reusing existing candy model')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -122,7 +212,7 @@
|
||||
"except ComputeTargetException:\n",
|
||||
" print(\"creating new cluster\")\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_NC6\",\n",
|
||||
" max_nodes = 3)\n",
|
||||
" max_nodes = 3)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, provisioning_config)\n",
|
||||
@@ -145,8 +235,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shutil\n",
|
||||
"shutil.copy(\"neural_style_mpi.py\", scripts_folder)"
|
||||
"scripts_folder = \"scripts\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -155,31 +244,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile $scripts_folder/process_video.py\n",
|
||||
"import argparse\n",
|
||||
"import glob\n",
|
||||
"import os\n",
|
||||
"import subprocess\n",
|
||||
"process_video_script_file = \"process_video.py\"\n",
|
||||
"\n",
|
||||
"parser = argparse.ArgumentParser(description=\"Process input video\")\n",
|
||||
"parser.add_argument('--input_video', required=True)\n",
|
||||
"parser.add_argument('--output_audio', required=True)\n",
|
||||
"parser.add_argument('--output_images', required=True)\n",
|
||||
"\n",
|
||||
"args = parser.parse_args()\n",
|
||||
"\n",
|
||||
"os.makedirs(args.output_audio, exist_ok=True)\n",
|
||||
"os.makedirs(args.output_images, exist_ok=True)\n",
|
||||
"\n",
|
||||
"subprocess.run(\"ffmpeg -i {} {}/video.aac\"\n",
|
||||
" .format(args.input_video, args.output_audio),\n",
|
||||
" shell=True, check=True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"subprocess.run(\"ffmpeg -i {} {}/%05d_video.jpg -hide_banner\"\n",
|
||||
" .format(args.input_video, args.output_images),\n",
|
||||
" shell=True, check=True\n",
|
||||
" )"
|
||||
"# peek at contents\n",
|
||||
"with open(os.path.join(scripts_folder, process_video_script_file)) as process_video_file:\n",
|
||||
" print(process_video_file.read())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -188,31 +257,11 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile $scripts_folder/stitch_video.py\n",
|
||||
"import argparse\n",
|
||||
"import os\n",
|
||||
"import subprocess\n",
|
||||
"stitch_video_script_file = \"stitch_video.py\"\n",
|
||||
"\n",
|
||||
"parser = argparse.ArgumentParser(description=\"Process input video\")\n",
|
||||
"parser.add_argument('--images_dir', required=True)\n",
|
||||
"parser.add_argument('--input_audio', required=True)\n",
|
||||
"parser.add_argument('--output_dir', required=True)\n",
|
||||
"\n",
|
||||
"args = parser.parse_args()\n",
|
||||
"\n",
|
||||
"os.makedirs(args.output_dir, exist_ok=True)\n",
|
||||
"\n",
|
||||
"subprocess.run(\"ffmpeg -framerate 30 -i {}/%05d_video.jpg -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p \"\n",
|
||||
" \"-y {}/video_without_audio.mp4\"\n",
|
||||
" .format(args.images_dir, args.output_dir),\n",
|
||||
" shell=True, check=True\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"subprocess.run(\"ffmpeg -i {}/video_without_audio.mp4 -i {}/video.aac -map 0:0 -map 1:0 -vcodec \"\n",
|
||||
" \"copy -acodec copy -y {}/video_with_audio.mp4\"\n",
|
||||
" .format(args.output_dir, args.input_audio, args.output_dir),\n",
|
||||
" shell=True, check=True\n",
|
||||
" )"
|
||||
"# peek at contents\n",
|
||||
"with open(os.path.join(scripts_folder, stitch_video_script_file)) as stitch_video_file:\n",
|
||||
" print(stitch_video_file.read())"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -233,15 +282,6 @@
|
||||
"video_ds = Datastore.register_azure_blob_container(ws, \"videos\", \"sample-videos\",\n",
|
||||
" account_name=account_name, overwrite=True)\n",
|
||||
"\n",
|
||||
"# datastore for models\n",
|
||||
"models_ds = Datastore.register_azure_blob_container(ws, \"models\", \"styletransfer\", \n",
|
||||
" account_name=\"pipelinedata\", \n",
|
||||
" overwrite=True)\n",
|
||||
" \n",
|
||||
"# downloaded models from https://pytorch.org/tutorials/advanced/neural_style_tutorial.html are kept here\n",
|
||||
"models_dir = DataReference(data_reference_name=\"models\", datastore=models_ds, \n",
|
||||
" path_on_datastore=\"saved_models\", mode=\"download\")\n",
|
||||
"\n",
|
||||
"# the default blob store attached to a workspace\n",
|
||||
"default_datastore = ws.get_default_datastore()"
|
||||
]
|
||||
@@ -276,13 +316,8 @@
|
||||
"cd.add_channel(\"conda-forge\")\n",
|
||||
"cd.add_conda_package(\"ffmpeg\")\n",
|
||||
"\n",
|
||||
"cd.add_channel(\"pytorch\")\n",
|
||||
"cd.add_conda_package(\"pytorch\")\n",
|
||||
"cd.add_conda_package(\"torchvision\")\n",
|
||||
"\n",
|
||||
"# Runconfig\n",
|
||||
"amlcompute_run_config = RunConfiguration(conda_dependencies=cd)\n",
|
||||
"amlcompute_run_config.environment.docker.enabled = True\n",
|
||||
"amlcompute_run_config.environment.docker.base_image = \"pytorch/pytorch\"\n",
|
||||
"amlcompute_run_config.environment.spark.precache_packages = False"
|
||||
]
|
||||
@@ -294,9 +329,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ffmpeg_audio = PipelineData(name=\"ffmpeg_audio\", datastore=default_datastore)\n",
|
||||
"ffmpeg_images = PipelineData(name=\"ffmpeg_images\", datastore=default_datastore)\n",
|
||||
"processed_images = PipelineData(name=\"processed_images\", datastore=default_datastore)\n",
|
||||
"output_video = PipelineData(name=\"output_video\", datastore=default_datastore)"
|
||||
"output_video = PipelineData(name=\"output_video\", datastore=default_datastore)\n",
|
||||
"\n",
|
||||
"ffmpeg_images_ds_name = \"ffmpeg_images_data\"\n",
|
||||
"ffmpeg_images = PipelineData(name=\"ffmpeg_images\", datastore=default_datastore)\n",
|
||||
"ffmpeg_images_file_dataset = ffmpeg_images.as_dataset()\n",
|
||||
"ffmpeg_images_named_file_dataset = ffmpeg_images_file_dataset.as_named_input(ffmpeg_images_ds_name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -304,7 +343,10 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Define tweakable parameters to pipeline\n",
|
||||
"These parameters can be changed when the pipeline is published and rerun from a REST call"
|
||||
"These parameters can be changed when the pipeline is published and rerun from a REST call.\n",
|
||||
"As part of ParallelRunStep following 2 pipeline parameters will be created which can be used to override values.\n",
|
||||
" node_count\n",
|
||||
" process_count_per_node"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -314,10 +356,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core.graph import PipelineParameter\n",
|
||||
"# create a parameter for style (one of \"candy\", \"mosaic\", \"rain_princess\", \"udnie\") to transfer the images to\n",
|
||||
"style_param = PipelineParameter(name=\"style\", default_value=\"mosaic\")\n",
|
||||
"# create a parameter for the number of nodes to use in step no. 2 (style transfer)\n",
|
||||
"nodecount_param = PipelineParameter(name=\"nodecount\", default_value=1)"
|
||||
"# create a parameter for style (one of \"candy\", \"mosaic\") to transfer the images to\n",
|
||||
"style_param = PipelineParameter(name=\"style\", default_value=\"mosaic\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -340,27 +380,6 @@
|
||||
" source_directory=scripts_folder\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# create a MPI step for distributing style transfer step across multiple nodes in AmlCompute \n",
|
||||
"# using 'nodecount_param' PipelineParameter\n",
|
||||
"distributed_style_transfer_step = MpiStep(\n",
|
||||
" name=\"mpi style transfer\",\n",
|
||||
" script_name=\"neural_style_mpi.py\",\n",
|
||||
" arguments=[\"--content-dir\", ffmpeg_images,\n",
|
||||
" \"--output-dir\", processed_images,\n",
|
||||
" \"--model-dir\", models_dir,\n",
|
||||
" \"--style\", style_param,\n",
|
||||
" \"--cuda\", 1\n",
|
||||
" ],\n",
|
||||
" compute_target=gpu_cluster,\n",
|
||||
" node_count=nodecount_param, \n",
|
||||
" process_count_per_node=1,\n",
|
||||
" inputs=[models_dir, ffmpeg_images],\n",
|
||||
" outputs=[processed_images],\n",
|
||||
" pip_packages=[\"mpi4py\", \"torch\", \"torchvision\"],\n",
|
||||
" use_gpu=True,\n",
|
||||
" source_directory=scripts_folder\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"stitch_video_step = PythonScriptStep(\n",
|
||||
" name=\"stitch\",\n",
|
||||
" script_name=\"stitch_video.py\",\n",
|
||||
@@ -375,6 +394,76 @@
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Create environment, parallel step run config and parallel run step"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"from azureml.core.runconfig import DEFAULT_GPU_IMAGE\n",
|
||||
"\n",
|
||||
"parallel_cd = CondaDependencies()\n",
|
||||
"\n",
|
||||
"parallel_cd.add_channel(\"pytorch\")\n",
|
||||
"parallel_cd.add_conda_package(\"pytorch\")\n",
|
||||
"parallel_cd.add_conda_package(\"torchvision\")\n",
|
||||
"\n",
|
||||
"styleenvironment = Environment(name=\"styleenvironment\")\n",
|
||||
"styleenvironment.python.conda_dependencies=parallel_cd\n",
|
||||
"styleenvironment.docker.base_image = DEFAULT_GPU_IMAGE"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.pipeline.steps import ParallelRunConfig\n",
|
||||
"\n",
|
||||
"parallel_run_config = ParallelRunConfig(\n",
|
||||
" environment=styleenvironment,\n",
|
||||
" entry_script='transform.py',\n",
|
||||
" output_action='summary_only',\n",
|
||||
" mini_batch_size=\"1\",\n",
|
||||
" error_threshold=1,\n",
|
||||
" source_directory=scripts_folder,\n",
|
||||
" compute_target=gpu_cluster, \n",
|
||||
" node_count=3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.pipeline.steps import ParallelRunStep\n",
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"parallel_step_name = 'styletransfer-' + datetime.now().strftime('%Y%m%d%H%M')\n",
|
||||
"\n",
|
||||
"distributed_style_transfer_step = ParallelRunStep(\n",
|
||||
" name=parallel_step_name,\n",
|
||||
" inputs=[ffmpeg_images_named_file_dataset], # Input file share/blob container/file dataset\n",
|
||||
" output=processed_images, # Output file share/blob container\n",
|
||||
" models=[mosaic_model, candy_model],\n",
|
||||
" tags = {'scenario': \"batch inference\", 'type': \"demo\"},\n",
|
||||
" properties = {'area': \"style transfer\"},\n",
|
||||
" arguments=[\"--style\", style_param],\n",
|
||||
" parallel_run_config=parallel_run_config,\n",
|
||||
" allow_reuse=True #[optional - default value True]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -389,8 +478,18 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline = Pipeline(workspace=ws, steps=[stitch_video_step])\n",
|
||||
"\n",
|
||||
"pipeline.validate()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# submit the pipeline and provide values for the PipelineParameters used in the pipeline\n",
|
||||
"pipeline_run = Experiment(ws, 'style_transfer').submit(pipeline, pipeline_parameters={\"style\": \"mosaic\", \"nodecount\": 3})"
|
||||
"pipeline_run = Experiment(ws, 'styletransfer_parallel_mosaic').submit(pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -406,10 +505,20 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Track pipeline run progress\n",
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"RunDetails(pipeline_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run.wait_for_completion()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -459,24 +568,21 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"published_pipeline = pipeline_run.publish_pipeline(\n",
|
||||
" name=\"batch score style transfer\", description=\"style transfer\", version=\"1.0\")\n",
|
||||
"pipeline_name = \"style-transfer-batch-inference\"\n",
|
||||
"print(pipeline_name)\n",
|
||||
"\n",
|
||||
"published_pipeline"
|
||||
"published_pipeline = pipeline.publish(\n",
|
||||
" name=pipeline_name, \n",
|
||||
" description=pipeline_name)\n",
|
||||
"print(\"Newly published pipeline id: {}\".format(published_pipeline.id))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get published pipeline\n",
|
||||
"\n",
|
||||
"You can get the published pipeline using **pipeline id**.\n",
|
||||
"\n",
|
||||
"To get all the published pipelines for a given workspace(ws): \n",
|
||||
"```css\n",
|
||||
"all_pub_pipelines = PublishedPipeline.get_all(ws)\n",
|
||||
"```"
|
||||
"# Get published pipeline\n",
|
||||
"This is another way to get the published pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -487,25 +593,30 @@
|
||||
"source": [
|
||||
"from azureml.pipeline.core import PublishedPipeline\n",
|
||||
"\n",
|
||||
"pipeline_id = published_pipeline.id # use your published pipeline id\n",
|
||||
"published_pipeline = PublishedPipeline.get(ws, pipeline_id)\n",
|
||||
"# You could retrieve all pipelines that are published, or \n",
|
||||
"# just get the published pipeline object that you have the ID for.\n",
|
||||
"\n",
|
||||
"published_pipeline"
|
||||
"# Get all published pipeline objects in the workspace\n",
|
||||
"all_pub_pipelines = PublishedPipeline.list(ws)\n",
|
||||
"\n",
|
||||
"# We will iterate through the list of published pipelines and \n",
|
||||
"# use the last ID in the list for Schelue operations: \n",
|
||||
"print(\"Published pipelines found in the workspace:\")\n",
|
||||
"for pub_pipeline in all_pub_pipelines:\n",
|
||||
" print(\"Name:\", pub_pipeline.name,\"\\tDescription:\", pub_pipeline.description, \"\\tId:\", pub_pipeline.id, \"\\tStatus:\", pub_pipeline.status)\n",
|
||||
" if(pub_pipeline.name == pipeline_name):\n",
|
||||
" published_pipeline = pub_pipeline\n",
|
||||
"\n",
|
||||
"print(\"Published pipeline id: {}\".format(published_pipeline.id))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Re-run pipeline through REST calls for other styles"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get AAD token\n",
|
||||
"[This notebook](https://aka.ms/pl-restep-auth) shows how to authenticate to AML workspace."
|
||||
"# Run pipeline through REST calls for other styles\n",
|
||||
"\n",
|
||||
"# Get AAD token"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -518,14 +629,14 @@
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"auth = InteractiveLoginAuthentication()\n",
|
||||
"aad_token = auth.get_authentication_header()\n"
|
||||
"aad_token = auth.get_authentication_header()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Get endpoint URL"
|
||||
"# Get endpoint URL"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -534,21 +645,15 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rest_endpoint = published_pipeline.endpoint"
|
||||
"rest_endpoint = published_pipeline.endpoint\n",
|
||||
"print(\"Pipeline REST endpoing: {}\".format(rest_endpoint))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Send request and monitor"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the pipeline using PipelineParameter values style='candy' and nodecount=2"
|
||||
"# Send request and monitor"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -557,38 +662,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"experiment_name = 'styletransfer_parallel_candy'\n",
|
||||
"response = requests.post(rest_endpoint, \n",
|
||||
" headers=aad_token,\n",
|
||||
" json={\"ExperimentName\": \"style_transfer\",\n",
|
||||
" \"ParameterAssignments\": {\"style\": \"candy\", \"nodecount\": 2}})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" response.raise_for_status()\n",
|
||||
"except Exception: \n",
|
||||
" raise Exception('Received bad response from the endpoint: {}\\n'\n",
|
||||
" 'Response Code: {}\\n'\n",
|
||||
" 'Headers: {}\\n'\n",
|
||||
" 'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))\n",
|
||||
" json={\"ExperimentName\": experiment_name,\n",
|
||||
" \"ParameterAssignments\": {\"style\": \"candy\", \"aml_node_count\": 2}})\n",
|
||||
"run_id = response.json()[\"Id\"]\n",
|
||||
"\n",
|
||||
"run_id = response.json().get('Id')\n",
|
||||
"print('Submitted pipeline run: ', run_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core.run import PipelineRun\n",
|
||||
"published_pipeline_run_candy = PipelineRun(ws.experiments[\"style_transfer\"], run_id)\n",
|
||||
"published_pipeline_run_candy = PipelineRun(ws.experiments[experiment_name], run_id)\n",
|
||||
"\n",
|
||||
"RunDetails(published_pipeline_run_candy).show()"
|
||||
]
|
||||
},
|
||||
@@ -596,7 +679,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the pipeline using PipelineParameter values style='rain_princess' and nodecount=3"
|
||||
"# Download output from re-run"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -605,10 +688,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"response = requests.post(rest_endpoint, \n",
|
||||
" headers=aad_token,\n",
|
||||
" json={\"ExperimentName\": \"style_transfer\",\n",
|
||||
" \"ParameterAssignments\": {\"style\": \"rain_princess\", \"nodecount\": 3}})"
|
||||
"published_pipeline_run_candy.wait_for_completion()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -617,111 +697,30 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" response.raise_for_status()\n",
|
||||
"except Exception: \n",
|
||||
" raise Exception('Received bad response from the endpoint: {}\\n'\n",
|
||||
" 'Response Code: {}\\n'\n",
|
||||
" 'Headers: {}\\n'\n",
|
||||
" 'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))\n",
|
||||
"\n",
|
||||
"run_id = response.json().get('Id')\n",
|
||||
"print('Submitted pipeline run: ', run_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"published_pipeline_run_rain = PipelineRun(ws.experiments[\"style_transfer\"], run_id)\n",
|
||||
"RunDetails(published_pipeline_run_rain).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Run the pipeline using PipelineParameter values style='udnie' and nodecount=4"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"response = requests.post(rest_endpoint, \n",
|
||||
" headers=aad_token,\n",
|
||||
" json={\"ExperimentName\": \"style_transfer\",\n",
|
||||
" \"ParameterAssignments\": {\"style\": \"udnie\", \"nodecount\": 3}})\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" response.raise_for_status()\n",
|
||||
"except Exception: \n",
|
||||
" raise Exception('Received bad response from the endpoint: {}\\n'\n",
|
||||
" 'Response Code: {}\\n'\n",
|
||||
" 'Headers: {}\\n'\n",
|
||||
" 'Content: {}'.format(rest_endpoint, response.status_code, response.headers, response.content))\n",
|
||||
"\n",
|
||||
"run_id = response.json().get('Id')\n",
|
||||
"print('Submitted pipeline run: ', run_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"published_pipeline_run_udnie = PipelineRun(ws.experiments[\"style_transfer\"], run_id)\n",
|
||||
"RunDetails(published_pipeline_run_udnie).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Download output from re-run"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"published_pipeline_run_candy.wait_for_completion()\n",
|
||||
"published_pipeline_run_rain.wait_for_completion()\n",
|
||||
"published_pipeline_run_udnie.wait_for_completion()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"download_video(published_pipeline_run_candy, target_dir=\"output_video_candy\")\n",
|
||||
"download_video(published_pipeline_run_rain, target_dir=\"output_video_rain_princess\")\n",
|
||||
"download_video(published_pipeline_run_udnie, target_dir=\"output_video_udnie\")"
|
||||
"download_video(published_pipeline_run_candy, target_dir=\"output_video_candy\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sanpil"
|
||||
"name": "sanpil joringer asraniwa pansav tracych"
|
||||
}
|
||||
],
|
||||
"category": "Other notebooks",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": true,
|
||||
"framework": [
|
||||
"None"
|
||||
],
|
||||
"friendly_name": "Style transfer using ParallelRunStep",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
@@ -737,8 +736,13 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
}
|
||||
"version": "3.6.9"
|
||||
},
|
||||
"tags": [
|
||||
"Batch Inferencing",
|
||||
"Pipeline"
|
||||
],
|
||||
"task": "Style transfer"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -2,5 +2,6 @@ name: pipeline-style-transfer
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-contrib-pipeline-steps
|
||||
- azureml-widgets
|
||||
- requests
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
parser = argparse.ArgumentParser(description="Process input video")
|
||||
parser.add_argument('--input_video', required=True)
|
||||
parser.add_argument('--output_audio', required=True)
|
||||
parser.add_argument('--output_images', required=True)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
os.makedirs(args.output_audio, exist_ok=True)
|
||||
os.makedirs(args.output_images, exist_ok=True)
|
||||
|
||||
subprocess.run("ffmpeg -i {} {}/video.aac".format(args.input_video, args.output_audio),
|
||||
shell=True,
|
||||
check=True)
|
||||
|
||||
subprocess.run("ffmpeg -i {} {}/%05d_video.jpg -hide_banner".format(args.input_video, args.output_images),
|
||||
shell=True,
|
||||
check=True)
|
||||
@@ -0,0 +1,22 @@
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
parser = argparse.ArgumentParser(description="Process input video")
|
||||
parser.add_argument('--images_dir', required=True)
|
||||
parser.add_argument('--input_audio', required=True)
|
||||
parser.add_argument('--output_dir', required=True)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
|
||||
subprocess.run("ffmpeg -framerate 30 -i {}/%05d_video.jpg -c:v libx264 -profile:v high -crf 20 -pix_fmt yuv420p "
|
||||
"-y {}/video_without_audio.mp4"
|
||||
.format(args.images_dir, args.output_dir),
|
||||
shell=True, check=True)
|
||||
|
||||
subprocess.run("ffmpeg -i {}/video_without_audio.mp4 -i {}/video.aac -map 0:0 -map 1:0 -vcodec "
|
||||
"copy -acodec copy -y {}/video_with_audio.mp4"
|
||||
.format(args.output_dir, args.input_audio, args.output_dir),
|
||||
shell=True, check=True)
|
||||
@@ -1,28 +1,17 @@
|
||||
# Original source: https://github.com/pytorch/examples/blob/master/fast_neural_style/neural_style/neural_style.py
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
||||
import json
|
||||
import traceback
|
||||
from PIL import Image
|
||||
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
|
||||
from azureml.core.model import Model
|
||||
|
||||
def load_image(filename, size=None, scale=None):
|
||||
img = Image.open(filename)
|
||||
if size is not None:
|
||||
img = img.resize((size, size), Image.ANTIALIAS)
|
||||
elif scale is not None:
|
||||
img = img.resize((int(img.size[0] / scale), int(img.size[1] / scale)), Image.ANTIALIAS)
|
||||
return img
|
||||
|
||||
|
||||
def save_image(filename, data):
|
||||
img = data.clone().clamp(0, 255).numpy()
|
||||
img = img.transpose(1, 2, 0).astype("uint8")
|
||||
img = Image.fromarray(img)
|
||||
img.save(filename)
|
||||
style_model = None
|
||||
|
||||
|
||||
class TransformerNet(torch.nn.Module):
|
||||
@@ -123,62 +112,61 @@ class UpsampleConvLayer(torch.nn.Module):
|
||||
out = self.reflection_pad(x_in)
|
||||
out = self.conv2d(out)
|
||||
return out
|
||||
|
||||
|
||||
def stylize(args):
|
||||
device = torch.device("cuda" if args.cuda else "cpu")
|
||||
|
||||
def load_image(filename):
|
||||
img = Image.open(filename)
|
||||
return img
|
||||
|
||||
|
||||
def save_image(filename, data):
|
||||
img = data.clone().clamp(0, 255).numpy()
|
||||
img = img.transpose(1, 2, 0).astype("uint8")
|
||||
img = Image.fromarray(img)
|
||||
img.save(filename)
|
||||
|
||||
|
||||
def init():
|
||||
global output_path, args
|
||||
global style_model, device
|
||||
output_path = os.environ['AZUREML_BI_OUTPUT_PATH']
|
||||
print(f'output path: {output_path}')
|
||||
print(f'Cuda available? {torch.cuda.is_available()}')
|
||||
|
||||
arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
|
||||
arg_parser.add_argument("--style", type=str, help="style name")
|
||||
args, unknown_args = arg_parser.parse_known_args()
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
with torch.no_grad():
|
||||
style_model = TransformerNet()
|
||||
state_dict = torch.load(os.path.join(args.model_dir, args.style+".pth"))
|
||||
model_path = Model.get_model_path(args.style)
|
||||
state_dict = torch.load(os.path.join(model_path))
|
||||
# remove saved deprecated running_* keys in InstanceNorm from the checkpoint
|
||||
for k in list(state_dict.keys()):
|
||||
if re.search(r'in\d+\.running_(mean|var)$', k):
|
||||
del state_dict[k]
|
||||
style_model.load_state_dict(state_dict)
|
||||
style_model.to(device)
|
||||
print(f'Model loaded successfully. Path: {model_path}')
|
||||
|
||||
filenames = os.listdir(args.content_dir)
|
||||
|
||||
for filename in filenames:
|
||||
print("Processing {}".format(filename))
|
||||
full_path = os.path.join(args.content_dir, filename)
|
||||
content_image = load_image(full_path, scale=args.content_scale)
|
||||
def run(mini_batch):
|
||||
|
||||
result = []
|
||||
for image_file_path in mini_batch:
|
||||
img = load_image(image_file_path)
|
||||
|
||||
with torch.no_grad():
|
||||
content_transform = transforms.Compose([
|
||||
transforms.ToTensor(),
|
||||
transforms.Lambda(lambda x: x.mul(255))
|
||||
])
|
||||
content_image = content_transform(content_image)
|
||||
content_image = content_transform(img)
|
||||
content_image = content_image.unsqueeze(0).to(device)
|
||||
|
||||
output = style_model(content_image).cpu()
|
||||
output_file_path = os.path.join(output_path, os.path.basename(image_file_path))
|
||||
save_image(output_file_path, output[0])
|
||||
result.append(output_file_path)
|
||||
|
||||
output_path = os.path.join(args.output_dir, filename)
|
||||
save_image(output_path, output[0])
|
||||
|
||||
def main():
|
||||
arg_parser = argparse.ArgumentParser(description="parser for fast-neural-style")
|
||||
|
||||
arg_parser.add_argument("--content-scale", type=float, default=None,
|
||||
help="factor for scaling down the content image")
|
||||
arg_parser.add_argument("--model-dir", type=str, required=True,
|
||||
help="saved model to be used for stylizing the image.")
|
||||
arg_parser.add_argument("--cuda", type=int, required=True,
|
||||
help="set it to 1 for running on GPU, 0 for CPU")
|
||||
arg_parser.add_argument("--style", type=str,
|
||||
help="style name")
|
||||
|
||||
arg_parser.add_argument("--content-dir", type=str, required=True,
|
||||
help="directory holding the images")
|
||||
arg_parser.add_argument("--output-dir", type=str, required=True,
|
||||
help="directory holding the output images")
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
if args.cuda and not torch.cuda.is_available():
|
||||
print("ERROR: cuda is not available, try running on CPU")
|
||||
sys.exit(1)
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
stylize(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
return result
|
||||
@@ -507,7 +507,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create myenv.yml\n",
|
||||
"We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify conda packages `numpy` and `chainer`."
|
||||
"We also need to create an environment file so that Azure Machine Learning can install the necessary packages in the Docker image which are required by your scoring script. In this case, we need to specify conda packages `numpy` and `chainer`. Please note that you must indicate azureml-defaults with verion >= 1.0.45 as a pip dependency, because it contains the functionality needed to host the model as a web service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -521,6 +521,7 @@
|
||||
"cd = CondaDependencies.create()\n",
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_conda_package('chainer')\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
@@ -544,10 +545,11 @@
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"chainer_score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"chainer_score.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
|
||||
" auth_enabled=True, # this flag generates API keys to secure access\n",
|
||||
|
||||
@@ -561,10 +561,11 @@
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"pytorch_score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"pytorch_score.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
" memory_gb=1, \n",
|
||||
|
||||
@@ -908,13 +908,16 @@
|
||||
"def init():\n",
|
||||
" global X, output, sess\n",
|
||||
" tf.reset_default_graph()\n",
|
||||
" model_root = Model.get_model_path('tf-dnn-mnist')\n",
|
||||
" saver = tf.train.import_meta_graph(os.path.join(model_root, 'mnist-tf.model.meta'))\n",
|
||||
" model_root = os.getenv('AZUREML_MODEL_DIR')\n",
|
||||
" # the name of the folder in which to look for tensorflow model files\n",
|
||||
" tf_model_folder = 'model'\n",
|
||||
" saver = tf.train.import_meta_graph(\n",
|
||||
" os.path.join(model_root, tf_model_folder, 'mnist-tf.model.meta'))\n",
|
||||
" X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n",
|
||||
" output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" sess = tf.Session()\n",
|
||||
" saver.restore(sess, os.path.join(model_root, 'mnist-tf.model'))\n",
|
||||
" saver.restore(sess, os.path.join(model_root, tf_model_folder, 'mnist-tf.model'))\n",
|
||||
"\n",
|
||||
"def run(raw_data):\n",
|
||||
" data = np.array(json.loads(raw_data)['data'])\n",
|
||||
@@ -943,6 +946,7 @@
|
||||
"cd = CondaDependencies.create()\n",
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_tensorflow_conda_package()\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
@@ -966,10 +970,11 @@
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
" memory_gb=1, \n",
|
||||
|
||||
346
how-to-use-azureml/monitor-models/data-drift/dataset/testing.csv
Normal file
346
how-to-use-azureml/monitor-models/data-drift/dataset/testing.csv
Normal file
@@ -0,0 +1,346 @@
|
||||
latitude,longitude,temperature,windAngle,windSpeed,elevation
|
||||
26.536,-81.755,17.8,10.0,2.1,9.0
|
||||
26.536,-81.755,16.7,360.0,1.5,9.0
|
||||
26.536,-81.755,16.1,350.0,1.5,9.0
|
||||
26.536,-81.755,15.0,0.0,0.0,9.0
|
||||
26.536,-81.755,14.4,350.0,1.5,9.0
|
||||
26.536,-81.755,0.0,0.0,0.0,9.0
|
||||
26.536,-81.755,13.9,360.0,2.1,9.0
|
||||
26.536,-81.755,13.3,350.0,1.5,9.0
|
||||
26.536,-81.755,13.3,10.0,2.1,9.0
|
||||
26.536,-81.755,13.3,360.0,1.5,9.0
|
||||
26.536,-81.755,13.3,0.0,0.0,9.0
|
||||
26.536,-81.755,12.2,0.0,0.0,9.0
|
||||
26.536,-81.755,11.7,0.0,0.0,9.0
|
||||
26.536,-81.755,14.4,0.0,0.0,9.0
|
||||
26.536,-81.755,17.2,10.0,2.6,9.0
|
||||
26.536,-81.755,20.0,20.0,2.6,9.0
|
||||
26.536,-81.755,22.2,10.0,3.6,9.0
|
||||
26.536,-81.755,23.3,30.0,4.6,9.0
|
||||
26.536,-81.755,23.3,330.0,2.6,9.0
|
||||
26.536,-81.755,24.4,0.0,0.0,9.0
|
||||
26.536,-81.755,25.0,360.0,3.1,9.0
|
||||
26.536,-81.755,24.4,20.0,4.1,9.0
|
||||
26.536,-81.755,23.3,10.0,2.6,9.0
|
||||
26.536,-81.755,21.1,30.0,2.1,9.0
|
||||
26.536,-81.755,18.3,0.0,0.0,9.0
|
||||
26.536,-81.755,17.2,30.0,2.1,9.0
|
||||
26.536,-81.755,15.6,60.0,2.6,9.0
|
||||
26.536,-81.755,15.6,0.0,0.0,9.0
|
||||
26.536,-81.755,13.9,60.0,2.6,9.0
|
||||
26.536,-81.755,12.8,70.0,2.6,9.0
|
||||
26.536,-81.755,0.0,0.0,0.0,9.0
|
||||
26.536,-81.755,11.7,70.0,2.1,9.0
|
||||
26.536,-81.755,12.2,20.0,2.1,9.0
|
||||
26.536,-81.755,11.7,30.0,1.5,9.0
|
||||
26.536,-81.755,11.1,40.0,2.1,9.0
|
||||
26.536,-81.755,12.2,40.0,2.6,9.0
|
||||
26.536,-81.755,12.2,30.0,2.6,9.0
|
||||
26.536,-81.755,12.2,0.0,0.0,9.0
|
||||
26.536,-81.755,15.0,30.0,6.2,9.0
|
||||
26.536,-81.755,17.2,50.0,3.6,9.0
|
||||
26.536,-81.755,20.6,60.0,5.1,9.0
|
||||
26.536,-81.755,22.8,50.0,4.6,9.0
|
||||
26.536,-81.755,24.4,80.0,6.2,9.0
|
||||
26.536,-81.755,25.0,100.0,5.7,9.0
|
||||
26.536,-81.755,25.6,60.0,3.1,9.0
|
||||
26.536,-81.755,25.6,80.0,4.6,9.0
|
||||
26.536,-81.755,25.0,90.0,5.1,9.0
|
||||
26.536,-81.755,24.4,80.0,5.1,9.0
|
||||
26.536,-81.755,21.1,60.0,2.6,9.0
|
||||
26.536,-81.755,19.4,70.0,3.6,9.0
|
||||
26.536,-81.755,18.3,70.0,2.6,9.0
|
||||
26.536,-81.755,18.3,80.0,2.6,9.0
|
||||
26.536,-81.755,17.2,60.0,1.5,9.0
|
||||
26.536,-81.755,16.1,70.0,2.6,9.0
|
||||
26.536,-81.755,15.6,70.0,2.6,9.0
|
||||
26.536,-81.755,0.0,0.0,0.0,9.0
|
||||
26.536,-81.755,16.1,50.0,2.6,9.0
|
||||
26.536,-81.755,15.6,50.0,2.1,9.0
|
||||
26.536,-81.755,15.0,50.0,1.5,9.0
|
||||
26.536,-81.755,15.0,0.0,0.0,9.0
|
||||
26.536,-81.755,15.0,0.0,0.0,9.0
|
||||
26.536,-81.755,14.4,0.0,0.0,9.0
|
||||
26.536,-81.755,14.4,30.0,4.1,9.0
|
||||
26.536,-81.755,16.1,40.0,1.5,9.0
|
||||
26.536,-81.755,19.4,0.0,1.5,9.0
|
||||
26.536,-81.755,22.8,90.0,2.6,9.0
|
||||
26.536,-81.755,24.4,130.0,3.6,9.0
|
||||
26.536,-81.755,25.6,100.0,4.6,9.0
|
||||
26.536,-81.755,26.1,120.0,3.1,9.0
|
||||
26.536,-81.755,26.7,0.0,2.6,9.0
|
||||
26.536,-81.755,27.2,0.0,0.0,9.0
|
||||
26.536,-81.755,27.2,40.0,3.1,9.0
|
||||
26.536,-81.755,26.1,30.0,1.5,9.0
|
||||
26.536,-81.755,22.8,310.0,2.1,9.0
|
||||
26.536,-81.755,23.3,330.0,2.1,9.0
|
||||
-34.067,-56.238,17.5,30.0,3.1,68.0
|
||||
-34.067,-56.238,21.2,30.0,5.7,68.0
|
||||
-34.067,-56.238,24.5,30.0,3.1,68.0
|
||||
-34.067,-56.238,27.5,330.0,3.6,68.0
|
||||
-34.067,-56.238,29.2,30.0,4.1,68.0
|
||||
-34.067,-56.238,31.0,20.0,4.6,68.0
|
||||
-34.067,-56.238,33.0,360.0,2.6,68.0
|
||||
-34.067,-56.238,33.6,60.0,3.1,68.0
|
||||
-34.067,-56.238,33.6,30.0,3.6,68.0
|
||||
-34.067,-56.238,18.6,40.0,3.1,68.0
|
||||
-34.067,-56.238,22.0,120.0,1.5,68.0
|
||||
-34.067,-56.238,25.0,120.0,2.6,68.0
|
||||
-34.067,-56.238,28.6,50.0,3.1,68.0
|
||||
-34.067,-56.238,30.6,50.0,4.1,68.0
|
||||
-34.067,-56.238,31.5,30.0,6.7,68.0
|
||||
-34.067,-56.238,32.0,40.0,7.2,68.0
|
||||
-34.067,-56.238,33.0,30.0,5.7,68.0
|
||||
-34.067,-56.238,33.2,360.0,3.6,68.0
|
||||
-34.067,-56.238,20.6,30.0,3.1,68.0
|
||||
-34.067,-56.238,21.2,0.0,0.0,68.0
|
||||
-34.067,-56.238,22.0,210.0,3.1,68.0
|
||||
-34.067,-56.238,23.0,210.0,3.6,68.0
|
||||
-34.067,-56.238,24.0,180.0,6.7,68.0
|
||||
-34.067,-56.238,24.5,210.0,7.2,68.0
|
||||
-34.067,-56.238,21.0,180.0,8.2,68.0
|
||||
-34.067,-56.238,20.0,180.0,6.7,68.0
|
||||
-34.083,-56.233,20.2,180.0,7.2,68.0
|
||||
-29.917,-71.2,16.6,290.0,4.1,146.0
|
||||
-29.916,-71.2,17.0,290.0,4.1,147.0
|
||||
-29.916,-71.2,16.0,310.0,3.1,147.0
|
||||
-29.916,-71.2,16.0,300.0,2.1,147.0
|
||||
-29.917,-71.2,15.1,0.0,0.0,146.0
|
||||
-29.916,-71.2,15.0,0.0,1.0,147.0
|
||||
-29.916,-71.2,15.0,160.0,1.0,147.0
|
||||
-29.916,-71.2,15.0,120.0,1.0,147.0
|
||||
-29.917,-71.2,14.3,190.0,1.0,146.0
|
||||
-29.916,-71.2,14.0,190.0,1.0,147.0
|
||||
-29.916,-71.2,14.0,0.0,0.0,147.0
|
||||
-29.916,-71.2,14.0,100.0,3.1,147.0
|
||||
-29.917,-71.2,12.9,0.0,0.0,146.0
|
||||
-29.916,-71.2,13.0,0.0,1.0,147.0
|
||||
-29.916,-71.2,14.0,0.0,0.5,147.0
|
||||
-29.916,-71.2,15.0,0.0,0.5,147.0
|
||||
-29.917,-71.2,15.9,0.0,0.0,146.0
|
||||
-29.916,-71.2,16.0,0.0,0.0,147.0
|
||||
-29.916,-71.2,17.0,270.0,4.6,147.0
|
||||
-29.916,-71.2,19.0,260.0,4.1,147.0
|
||||
-29.917,-71.2,18.1,270.0,6.2,146.0
|
||||
-29.916,-71.2,18.0,270.0,6.2,147.0
|
||||
-29.916,-71.2,19.0,270.0,6.2,147.0
|
||||
-29.916,-71.2,20.0,260.0,5.1,147.0
|
||||
-29.917,-71.2,19.6,280.0,6.2,146.0
|
||||
-29.916,-71.2,20.0,280.0,6.2,147.0
|
||||
-29.916,-71.2,20.0,270.0,6.2,147.0
|
||||
-29.916,-71.2,19.0,280.0,6.7,147.0
|
||||
-29.917,-71.2,18.3,270.0,5.7,146.0
|
||||
-29.916,-71.2,18.0,270.0,5.7,147.0
|
||||
-29.916,-71.2,18.0,0.0,0.0,147.0
|
||||
-29.916,-71.2,17.0,280.0,4.6,147.0
|
||||
-29.917,-71.2,15.9,280.0,4.1,146.0
|
||||
-29.916,-71.2,16.0,280.0,4.1,147.0
|
||||
-29.916,-71.2,15.0,280.0,3.6,147.0
|
||||
-29.916,-71.2,15.0,280.0,3.6,147.0
|
||||
-29.917,-71.2,15.4,280.0,4.1,146.0
|
||||
-29.916,-71.2,15.0,280.0,4.1,147.0
|
||||
-29.916,-71.2,16.0,240.0,2.1,147.0
|
||||
-29.916,-71.2,15.0,0.0,0.5,147.0
|
||||
-29.917,-71.2,15.8,80.0,3.6,146.0
|
||||
-29.916,-71.2,16.0,80.0,3.6,147.0
|
||||
-29.916,-71.2,16.0,10.0,1.5,147.0
|
||||
-29.916,-71.2,16.0,100.0,1.5,147.0
|
||||
-29.917,-71.2,15.3,130.0,1.5,146.0
|
||||
-29.916,-71.2,15.0,130.0,1.5,147.0
|
||||
-29.916,-71.2,15.0,110.0,1.0,147.0
|
||||
-29.916,-71.2,16.0,280.0,6.2,147.0
|
||||
-29.917,-71.2,15.9,240.0,3.6,146.0
|
||||
-29.916,-71.2,16.0,240.0,3.6,147.0
|
||||
-29.916,-71.2,16.0,240.0,3.1,147.0
|
||||
-29.916,-71.2,16.0,220.0,3.1,147.0
|
||||
-29.917,-71.2,16.4,260.0,3.1,146.0
|
||||
-29.916,-71.2,16.0,260.0,3.1,147.0
|
||||
-29.916,-71.2,17.0,230.0,2.6,147.0
|
||||
-29.916,-71.2,18.0,0.0,1.5,147.0
|
||||
-29.917,-71.2,20.3,340.0,2.6,146.0
|
||||
-29.916,-71.2,20.0,340.0,2.6,147.0
|
||||
-29.916,-71.2,21.0,270.0,5.1,147.0
|
||||
-29.916,-71.2,20.0,270.0,6.7,147.0
|
||||
-29.917,-71.2,19.2,280.0,6.7,146.0
|
||||
-29.916,-71.2,19.0,280.0,6.7,147.0
|
||||
-29.916,-71.2,19.0,310.0,2.6,147.0
|
||||
-29.916,-71.2,18.0,270.0,5.1,147.0
|
||||
-29.917,-71.2,17.0,300.0,4.6,146.0
|
||||
-29.916,-71.2,17.0,300.0,4.6,147.0
|
||||
-29.916,-71.2,17.0,300.0,3.6,147.0
|
||||
-29.916,-71.2,17.0,290.0,3.1,147.0
|
||||
-29.917,-71.2,16.3,290.0,2.1,146.0
|
||||
-29.916,-71.2,16.0,290.0,2.1,147.0
|
||||
-29.916,-71.2,17.0,270.0,1.0,147.0
|
||||
-29.916,-71.2,17.0,0.0,0.5,147.0
|
||||
-29.917,-71.2,16.5,160.0,2.1,146.0
|
||||
-29.916,-71.2,17.0,160.0,2.1,147.0
|
||||
-29.916,-71.2,15.0,120.0,3.1,147.0
|
||||
-29.916,-71.2,16.0,180.0,1.5,147.0
|
||||
-29.917,-71.2,14.7,0.0,0.0,146.0
|
||||
-29.916,-71.2,15.0,0.0,1.0,147.0
|
||||
-29.916,-71.2,15.0,300.0,1.0,147.0
|
||||
-29.916,-71.2,16.0,0.0,0.0,147.0
|
||||
-29.917,-71.2,18.5,110.0,1.0,146.0
|
||||
-29.916,-71.2,19.0,110.0,1.0,147.0
|
||||
-29.916,-71.2,20.0,270.0,3.6,147.0
|
||||
-29.916,-71.2,20.0,270.0,5.7,147.0
|
||||
-29.917,-71.2,20.0,280.0,6.2,146.0
|
||||
-29.916,-71.2,20.0,280.0,6.2,147.0
|
||||
-29.916,-71.2,21.0,290.0,6.7,147.0
|
||||
-29.916,-71.2,20.0,270.0,6.2,147.0
|
||||
-29.917,-71.2,21.0,260.0,6.7,146.0
|
||||
-29.916,-71.2,21.0,260.0,6.7,147.0
|
||||
-29.916,-71.2,20.0,270.0,6.2,147.0
|
||||
-29.916,-71.2,19.0,260.0,5.1,147.0
|
||||
-29.916,-71.2,18.0,280.0,4.6,147.0
|
||||
-29.917,-71.2,17.5,280.0,3.1,146.0
|
||||
-29.916,-71.2,18.0,280.0,3.1,147.0
|
||||
30.349,-85.788,11.1,0.0,0.0,21.0
|
||||
30.349,-85.788,11.1,0.0,0.0,21.0
|
||||
30.349,-85.788,9.4,0.0,0.0,21.0
|
||||
30.349,-85.788,9.4,0.0,0.0,21.0
|
||||
30.349,-85.788,8.3,300.0,2.1,21.0
|
||||
30.349,-85.788,11.1,280.0,1.5,21.0
|
||||
30.349,-85.788,0.0,0.0,0.0,21.0
|
||||
30.349,-85.788,10.6,320.0,3.1,21.0
|
||||
30.349,-85.788,9.4,310.0,3.1,21.0
|
||||
30.349,-85.788,7.8,320.0,2.6,21.0
|
||||
30.349,-85.788,6.1,340.0,2.1,21.0
|
||||
30.349,-85.788,6.7,330.0,2.6,21.0
|
||||
30.349,-85.788,6.1,310.0,1.5,21.0
|
||||
30.349,-85.788,7.2,310.0,2.1,21.0
|
||||
30.349,-85.788,12.8,360.0,3.1,21.0
|
||||
30.349,-85.788,15.0,0.0,3.1,21.0
|
||||
30.349,-85.788,16.7,20.0,4.6,21.0
|
||||
30.349,-85.788,18.9,30.0,5.1,21.0
|
||||
30.349,-85.788,19.4,10.0,4.1,21.0
|
||||
30.349,-85.788,21.1,330.0,2.6,21.0
|
||||
30.349,-85.788,21.1,10.0,4.6,21.0
|
||||
30.349,-85.788,21.7,360.0,4.1,21.0
|
||||
30.349,-85.788,21.7,30.0,2.1,21.0
|
||||
30.349,-85.788,21.7,330.0,2.6,21.0
|
||||
30.349,-85.788,16.1,350.0,2.1,21.0
|
||||
30.349,-85.788,11.7,0.0,0.0,21.0
|
||||
30.349,-85.788,8.9,0.0,0.0,21.0
|
||||
30.349,-85.788,9.4,0.0,0.0,21.0
|
||||
30.349,-85.788,7.8,0.0,0.0,21.0
|
||||
30.349,-85.788,11.1,30.0,3.1,21.0
|
||||
30.349,-85.788,7.2,0.0,0.0,21.0
|
||||
30.349,-85.788,7.2,0.0,0.0,21.0
|
||||
30.349,-85.788,0.0,0.0,0.0,21.0
|
||||
30.349,-85.788,7.8,30.0,2.1,21.0
|
||||
30.349,-85.788,8.3,40.0,2.6,21.0
|
||||
30.349,-85.788,7.2,50.0,1.5,21.0
|
||||
30.349,-85.788,8.3,60.0,1.5,21.0
|
||||
30.349,-85.788,5.6,40.0,2.1,21.0
|
||||
30.349,-85.788,6.7,40.0,2.1,21.0
|
||||
30.349,-85.788,7.8,50.0,3.1,21.0
|
||||
30.349,-85.788,11.7,70.0,2.6,21.0
|
||||
30.349,-85.788,15.6,70.0,3.1,21.0
|
||||
30.349,-85.788,18.9,100.0,3.6,21.0
|
||||
30.349,-85.788,20.0,130.0,3.6,21.0
|
||||
30.349,-85.788,21.1,140.0,4.1,21.0
|
||||
30.349,-85.788,21.7,150.0,4.1,21.0
|
||||
30.349,-85.788,21.7,170.0,3.1,21.0
|
||||
30.349,-85.788,22.2,170.0,3.1,21.0
|
||||
30.349,-85.788,20.6,0.0,0.0,21.0
|
||||
30.349,-85.788,17.2,0.0,0.0,21.0
|
||||
30.349,-85.788,14.4,0.0,0.0,21.0
|
||||
30.349,-85.788,12.8,100.0,1.5,21.0
|
||||
30.349,-85.788,13.3,100.0,1.5,21.0
|
||||
30.349,-85.788,10.6,0.0,0.0,21.0
|
||||
30.349,-85.788,9.4,0.0,0.0,21.0
|
||||
30.349,-85.788,7.8,0.0,0.0,21.0
|
||||
30.358,-85.799,8.3,0.0,0.0,21.0
|
||||
30.349,-85.788,0.0,0.0,0.0,21.0
|
||||
30.358,-85.799,6.7,0.0,0.0,21.0
|
||||
30.358,-85.799,7.2,0.0,0.0,21.0
|
||||
30.358,-85.799,7.2,0.0,0.0,21.0
|
||||
30.358,-85.799,8.3,50.0,1.5,21.0
|
||||
30.358,-85.799,9.4,0.0,0.0,21.0
|
||||
30.358,-85.799,8.9,0.0,0.0,21.0
|
||||
30.358,-85.799,10.0,340.0,1.5,21.0
|
||||
30.358,-85.799,12.8,40.0,1.5,21.0
|
||||
30.358,-85.799,16.7,100.0,2.1,21.0
|
||||
30.358,-85.799,21.1,100.0,1.5,21.0
|
||||
30.358,-85.799,23.3,0.0,0.0,21.0
|
||||
30.358,-85.799,25.0,180.0,4.6,21.0
|
||||
30.358,-85.799,24.4,230.0,3.6,21.0
|
||||
30.358,-85.799,25.0,210.0,4.1,21.0
|
||||
30.358,-85.799,23.9,170.0,4.1,21.0
|
||||
30.358,-85.799,22.8,0.0,0.0,21.0
|
||||
30.358,-85.799,19.4,0.0,0.0,21.0
|
||||
30.358,-85.799,17.8,140.0,2.1,21.0
|
||||
60.383,5.333,-0.7,0.0,0.0,36.0
|
||||
60.383,5.333,0.6,270.0,2.0,36.0
|
||||
60.383,5.333,-0.9,120.0,1.0,36.0
|
||||
60.383,5.333,-1.6,130.0,2.0,36.0
|
||||
60.383,5.333,-1.4,150.0,1.0,36.0
|
||||
60.383,5.333,-1.7,0.0,0.0,36.0
|
||||
60.383,5.333,-1.7,140.0,1.0,36.0
|
||||
60.383,5.333,-1.4,0.0,0.0,36.0
|
||||
60.383,5.333,-1.0,0.0,0.0,36.0
|
||||
60.383,5.333,-1.0,150.0,1.0,36.0
|
||||
60.383,5.333,-0.7,140.0,1.0,36.0
|
||||
60.383,5.333,0.5,150.0,1.0,36.0
|
||||
60.383,5.333,1.9,0.0,0.0,36.0
|
||||
60.383,5.333,1.7,0.0,0.0,36.0
|
||||
60.383,5.333,2.1,310.0,2.0,36.0
|
||||
60.383,5.333,1.5,90.0,1.0,36.0
|
||||
60.383,5.333,1.9,290.0,1.0,36.0
|
||||
60.383,5.333,2.0,320.0,1.0,36.0
|
||||
60.383,5.333,1.9,330.0,1.0,36.0
|
||||
60.383,5.333,1.3,350.0,1.0,36.0
|
||||
60.383,5.333,1.5,120.0,1.0,36.0
|
||||
60.383,5.333,1.3,150.0,2.0,36.0
|
||||
60.383,5.333,0.8,140.0,1.0,36.0
|
||||
60.383,5.333,0.3,300.0,1.0,36.0
|
||||
60.383,5.333,0.2,140.0,1.0,36.0
|
||||
60.383,5.333,0.4,140.0,1.0,36.0
|
||||
60.383,5.333,0.5,320.0,1.0,36.0
|
||||
60.383,5.333,1.5,330.0,1.0,36.0
|
||||
60.383,5.333,1.8,40.0,1.0,36.0
|
||||
60.383,5.333,2.3,170.0,1.0,36.0
|
||||
60.383,5.333,2.7,140.0,1.0,36.0
|
||||
60.383,5.333,3.1,330.0,1.0,36.0
|
||||
60.383,5.333,3.8,350.0,1.0,36.0
|
||||
60.383,5.333,3.8,140.0,1.0,36.0
|
||||
60.383,5.333,4.1,150.0,1.0,36.0
|
||||
60.383,5.333,4.4,180.0,1.0,36.0
|
||||
60.383,5.333,4.9,300.0,1.0,36.0
|
||||
60.383,5.333,5.2,320.0,1.0,36.0
|
||||
60.383,5.333,6.7,340.0,1.0,36.0
|
||||
60.383,5.333,6.9,250.0,1.0,36.0
|
||||
60.383,5.333,7.9,300.0,2.0,36.0
|
||||
60.383,5.333,5.5,140.0,1.0,36.0
|
||||
60.383,5.333,7.1,140.0,2.0,36.0
|
||||
60.383,5.333,7.0,280.0,2.0,36.0
|
||||
60.383,5.333,4.6,170.0,1.0,36.0
|
||||
60.383,5.333,4.8,330.0,1.0,36.0
|
||||
60.383,5.333,6.4,260.0,2.0,36.0
|
||||
60.383,5.333,6.2,340.0,1.0,36.0
|
||||
60.383,5.333,5.7,320.0,2.0,36.0
|
||||
60.383,5.333,5.2,100.0,1.0,36.0
|
||||
60.383,5.333,5.1,310.0,1.0,36.0
|
||||
60.383,5.333,4.9,290.0,2.0,36.0
|
||||
60.383,5.333,4.9,310.0,2.0,36.0
|
||||
60.383,5.333,6.1,320.0,2.0,36.0
|
||||
60.383,5.333,7.0,250.0,1.0,36.0
|
||||
60.383,5.333,5.3,140.0,1.0,36.0
|
||||
60.383,5.333,6.9,350.0,1.0,36.0
|
||||
60.383,5.333,9.7,110.0,3.0,36.0
|
||||
60.383,5.333,10.3,300.0,3.0,36.0
|
||||
60.383,5.333,8.7,310.0,1.0,36.0
|
||||
60.383,5.333,9.0,270.0,3.0,36.0
|
||||
60.383,5.333,11.6,80.0,3.0,36.0
|
||||
60.383,5.333,11.4,80.0,4.0,36.0
|
||||
60.383,5.333,9.7,70.0,5.0,36.0
|
||||
60.383,5.333,9.5,80.0,6.0,36.0
|
||||
60.383,5.333,8.7,80.0,5.0,36.0
|
||||
60.383,5.333,7.7,80.0,5.0,36.0
|
||||
60.383,5.333,8.2,80.0,4.0,36.0
|
||||
60.383,5.333,7.7,30.0,1.0,36.0
|
||||
60.383,5.333,7.2,310.0,1.0,36.0
|
||||
60.383,5.333,6.8,300.0,2.0,36.0
|
||||
60.383,5.333,6.7,140.0,1.0,36.0
|
||||
|
@@ -92,7 +92,7 @@
|
||||
"dstore = ws.get_default_datastore()\n",
|
||||
"\n",
|
||||
"# upload weather data\n",
|
||||
"dstore.upload('training-dataset', 'drift-on-aks-data', overwrite=True, show_progress=False)"
|
||||
"dstore.upload('dataset', 'drift-on-aks-data', overwrite=True, show_progress=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -229,7 +229,7 @@
|
||||
"source": [
|
||||
"## Run recent weather data through the webservice \n",
|
||||
"\n",
|
||||
"The below cells take the past 2 days of weather data, filter and transform using the same processes as the training dataset, and runs the data through the service."
|
||||
"The below cells take the weather data of Florida from 2019-11-20 to 2019-11-12, filter and transform using the same processes as the training dataset, and runs the data through the service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -238,16 +238,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"from azureml.opendatasets import NoaaIsdWeather\n",
|
||||
"# create dataset \n",
|
||||
"tset = Dataset.Tabular.from_delimited_files(dstore.path('drift-on-aks-data/testing.csv'))\n",
|
||||
"\n",
|
||||
"start = datetime.today() - timedelta(days=2)\n",
|
||||
"end = datetime.today()\n",
|
||||
"\n",
|
||||
"isd = NoaaIsdWeather(start, end)\n",
|
||||
"\n",
|
||||
"df = isd.to_pandas_dataframe().fillna(0)\n",
|
||||
"df = df[df['stationName'].str.contains('FLORIDA', regex=True, na=False)]\n",
|
||||
"df = tset.to_pandas_dataframe().fillna(0)\n",
|
||||
"\n",
|
||||
"X_features = ['latitude', 'longitude', 'temperature', 'windAngle', 'windSpeed']\n",
|
||||
"y_features = ['elevation']\n",
|
||||
@@ -264,9 +258,9 @@
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"today_data = json.dumps({'data': X.values.tolist()})\n",
|
||||
"data = json.dumps({'data': X.values.tolist()})\n",
|
||||
"\n",
|
||||
"data_encoded = bytes(today_data, encoding='utf8')\n",
|
||||
"data_encoded = bytes(data, encoding='utf8')\n",
|
||||
"prediction = service.run(input_data=data_encoded)\n",
|
||||
"print(prediction)"
|
||||
]
|
||||
@@ -342,6 +336,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"from azureml.datadrift import DataDriftDetector, AlertConfiguration\n",
|
||||
"\n",
|
||||
"services = [service_name]\n",
|
||||
|
||||
@@ -100,7 +100,7 @@
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using SDK version 1.0.76.2, you are currently running version\", azureml.core.VERSION)"
|
||||
"print(\"This notebook was created using SDK version 1.0.83, you are currently running version\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -925,6 +925,7 @@
|
||||
"cd = CondaDependencies.create()\n",
|
||||
"cd.add_tensorflow_conda_package()\n",
|
||||
"cd.add_conda_package('keras==2.2.5')\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
@@ -947,10 +948,11 @@
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
|
||||
" auth_enabled=True, # this flag generates API keys to secure access\n",
|
||||
|
||||
@@ -10,7 +10,7 @@ With Azure Machine Learning datasets, you can:
|
||||
|
||||
## Learn how to use Azure Machine Learning datasets
|
||||
* [Create and register datasets](https://aka.ms/azureml/howto/createdatasets)
|
||||
* Use [Datasets in training](datasets-tutorial/train-with-datasets.ipynb)
|
||||
* Use [Datasets in training](datasets-tutorial/train-with-datasets/train-with-datasets.ipynb)
|
||||
* Use TabularDatasets in [automated machine learning training](https://aka.ms/automl-dataset)
|
||||
* Use FileDatasets in [image classification](https://aka.ms/filedataset-samplenotebook)
|
||||
* Use FileDatasets in [deep learning with hyperparameter tuning](https://aka.ms/filedataset-hyperdrive)
|
||||
|
||||
@@ -206,7 +206,11 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"datadrift-remarks-sample"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.datadrift import DataDriftDetector, AlertConfiguration\n",
|
||||
@@ -290,7 +294,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# backfill for one month\n",
|
||||
"backfill = monitor.backfill(datetime(2019, 9, 1), datetime(2019, 10, 1))\n",
|
||||
"backfill_start_date = datetime(2019, 9, 1)\n",
|
||||
"backfill_end_date = datetime(2019, 10, 1)\n",
|
||||
"backfill = monitor.backfill(backfill_start_date, backfill_end_date)\n",
|
||||
"backfill"
|
||||
]
|
||||
},
|
||||
@@ -353,7 +359,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# plot the results from Python SDK \n",
|
||||
"monitor.show()"
|
||||
"monitor.show(backfill_start_date, backfill_end_date)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -371,7 +377,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"link = 'https://ml.azure.com/data/monitor/{}?wsid=/subscriptions/{}/resourcegroups/{}/workspaces/{}'.format(monitor.name, ws.subscription_id, ws.resource_group, ws.name)\n",
|
||||
"link = 'https://ml.azure.com/data/monitor/{}?wsid=/subscriptions/{}/resourcegroups/{}/workspaces/{}&startDate={}&endDate={}'.format(monitor.name, ws.subscription_id, ws.resource_group, ws.name, backfill_start_date.strftime('%Y-%m-%d'), backfill_end_date .strftime('%Y-%m-%d'))\n",
|
||||
"print(link)"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,403 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Introduction to labeled datasets\n",
|
||||
"\n",
|
||||
"Labeled datasets are output from Azure Machine Learning [labeling projects](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-create-labeling-projects). It captures the reference to the data (e.g. image files) and its labels. \n",
|
||||
"\n",
|
||||
"This tutorial introduces the capabilities of labeled datasets and how to use it in training.\n",
|
||||
"\n",
|
||||
"Learn how-to:\n",
|
||||
"\n",
|
||||
"> * Set up your development environment\n",
|
||||
"> * Explore labeled datasets\n",
|
||||
"> * Train a simple deep learning neural network on a remote cluster\n",
|
||||
"\n",
|
||||
"## Prerequisite:\n",
|
||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||
"* Go through Azure Machine Learning [labeling projects](https://docs.microsoft.com/azure/machine-learning/service/how-to-create-labeling-projects) and export the labels as an Azure Machine Learning dataset\n",
|
||||
"* Go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||
" * install the latest version of azureml-sdk\n",
|
||||
" * install the latest version of azureml-contrib-dataset\n",
|
||||
" * install [PyTorch](https://pytorch.org/)\n",
|
||||
" * create a workspace and its configuration file (`config.json`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up your development environment\n",
|
||||
"\n",
|
||||
"All the setup for your development work can be accomplished in a Python notebook. Setup includes:\n",
|
||||
"\n",
|
||||
"* Importing Python packages\n",
|
||||
"* Connecting to a workspace to enable communication between your local computer and remote resources\n",
|
||||
"* Creating an experiment to track all your runs\n",
|
||||
"* Creating a remote compute target to use for training\n",
|
||||
"\n",
|
||||
"### Import packages\n",
|
||||
"\n",
|
||||
"Import Python packages you need in this session. Also display the Azure Machine Learning SDK version."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import azureml.core\n",
|
||||
"import azureml.contrib.dataset\n",
|
||||
"from azureml.core import Dataset, Workspace, Experiment\n",
|
||||
"from azureml.contrib.dataset import FileHandlingOption\n",
|
||||
"\n",
|
||||
"# check core SDK version number\n",
|
||||
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)\n",
|
||||
"print(\"Azure ML Contrib Version\", azureml.contrib.dataset.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to workspace\n",
|
||||
"\n",
|
||||
"Create a workspace object from the existing workspace. `Workspace.from_config()` reads the file **config.json** and loads the details into an object named `workspace`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load workspace\n",
|
||||
"workspace = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + workspace.name, \n",
|
||||
" 'Azure region: ' + workspace.location, \n",
|
||||
" 'Subscription id: ' + workspace.subscription_id, \n",
|
||||
" 'Resource group: ' + workspace.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create experiment and a directory\n",
|
||||
"\n",
|
||||
"Create an experiment to track the runs in your workspace and a directory to deliver the necessary code from your computer to the remote resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create an ML experiment\n",
|
||||
"exp = Experiment(workspace=workspace, name='labeled-datasets')\n",
|
||||
"\n",
|
||||
"# create a directory\n",
|
||||
"script_folder = './labeled-datasets'\n",
|
||||
"os.makedirs(script_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach existing compute resource\n",
|
||||
"By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you will create Azure Machine Learning Compute as your training environment. The code below creates the compute clusters for you if they don't already exist in your workspace.\n",
|
||||
"\n",
|
||||
"**Creation of compute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"openhack\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=workspace, name=cluster_name)\n",
|
||||
" print('Found existing compute target')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||
" # if no min node count is provided it uses the scale settings for the cluster\n",
|
||||
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explore labeled datasets\n",
|
||||
"\n",
|
||||
"**Note**: How to create labeled datasets is not covered in this tutorial. To create labeled datasets, you can go through [labeling projects](https://docs.microsoft.com/azure/machine-learning/service/how-to-create-labeling-projects) and export the output labels as Azure Machine Lerning datasets. \n",
|
||||
"\n",
|
||||
"`animal_labels` used in this tutorial section is the output from a labeling project, with the task type of \"Object Identification\"."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get animal_labels dataset from the workspace\n",
|
||||
"animal_labels = Dataset.get_by_name(workspace, 'animal_labels')\n",
|
||||
"animal_labels"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can load labeled datasets into pandas DataFrame. There are 3 file handling option that you can choose to load the data files referenced by the labeled datasets:\n",
|
||||
"* Streaming: The default option to load data files.\n",
|
||||
"* Download: Download your data files to a local path.\n",
|
||||
"* Mount: Mount your data files to a mount point. Mount only works for Linux-based compute, including Azure Machine Learning notebook VM and Azure Machine Learning Compute."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"animal_pd = animal_labels.to_pandas_dataframe(file_handling_option=FileHandlingOption.DOWNLOAD, target_path='./download/', overwrite_download=True)\n",
|
||||
"animal_pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import matplotlib.image as mpimg\n",
|
||||
"\n",
|
||||
"# read images from downloaded path\n",
|
||||
"img = mpimg.imread(animal_pd.loc[0,'image_url'])\n",
|
||||
"imgplot = plt.imshow(img)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also load labeled datasets into [torchvision datasets](https://pytorch.org/docs/stable/torchvision/datasets.html), so that you can leverage on the open source libraries provided by PyTorch for image transformation and training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from torchvision.transforms import functional as F\n",
|
||||
"\n",
|
||||
"# load animal_labels dataset into torchvision dataset\n",
|
||||
"pytorch_dataset = animal_labels.to_torchvision()\n",
|
||||
"img = pytorch_dataset[0][0]\n",
|
||||
"print(type(img))\n",
|
||||
"\n",
|
||||
"# use methods from torchvision to transform the img into grayscale\n",
|
||||
"pil_image = F.to_pil_image(img)\n",
|
||||
"gray_image = F.to_grayscale(pil_image, num_output_channels=3)\n",
|
||||
"\n",
|
||||
"imgplot = plt.imshow(gray_image)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train an image classification model\n",
|
||||
"\n",
|
||||
" `crack_labels` dataset used in this tutorial section is the output from a labeling project, with the task type of \"Image Classification Multi-class\". We will use this dataset to train an image classification model that classify whether an image has cracks or not."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get crack_labels dataset from the workspace\n",
|
||||
"crack_labels = Dataset.get_by_name(workspace, 'crack_labels')\n",
|
||||
"crack_labels"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure Estimator for training\n",
|
||||
"\n",
|
||||
"You can ask the system to build a conda environment based on your dependency specification. Once the environment is built, and if you don't change your dependencies, it will be reused in subsequent runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"conda_env = Environment('conda-env')\n",
|
||||
"conda_env.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk',\n",
|
||||
" 'azureml-contrib-dataset',\n",
|
||||
" 'torch','torchvision',\n",
|
||||
" 'azureml-dataprep[pandas]'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"An estimator object is used to submit the run. Azure Machine Learning has pre-configured estimators for common machine learning frameworks, as well as generic Estimator. Create a generic estimator for by specifying\n",
|
||||
"\n",
|
||||
"* The name of the estimator object, `est`\n",
|
||||
"* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n",
|
||||
"* The training script name, train.py\n",
|
||||
"* The input dataset for training\n",
|
||||
"* The compute target. In this case you will use the AmlCompute you created\n",
|
||||
"* The environment definition for the experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.estimator import Estimator\n",
|
||||
"\n",
|
||||
"est = Estimator(source_directory=script_folder, \n",
|
||||
" entry_script='train.py',\n",
|
||||
" inputs=[crack_labels.as_named_input('crack_labels')],\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" environment_definition= conda_env)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit job to run\n",
|
||||
"\n",
|
||||
"Submit the estimator to the Azure ML experiment to kick off the execution."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = exp.submit(est)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sihhu"
|
||||
}
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"Remote"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"Azure ML"
|
||||
],
|
||||
"friendly_name": "Introduction to labeled datasets",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
},
|
||||
"star_tag": [
|
||||
"featured"
|
||||
],
|
||||
"tags": [
|
||||
"Dataset",
|
||||
"label",
|
||||
"Estimator"
|
||||
],
|
||||
"task": "Train"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
import os
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
|
||||
from azureml.core import Dataset, Run
|
||||
import azureml.contrib.dataset
|
||||
from azureml.contrib.dataset import FileHandlingOption, LabeledDatasetTask
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
# get input dataset by name
|
||||
labeled_dataset = run.input_datasets['crack_labels']
|
||||
pytorch_dataset = labeled_dataset.to_torchvision()
|
||||
|
||||
|
||||
indices = torch.randperm(len(pytorch_dataset)).tolist()
|
||||
dataset_train = torch.utils.data.Subset(pytorch_dataset, indices[:40])
|
||||
dataset_test = torch.utils.data.Subset(pytorch_dataset, indices[-10:])
|
||||
|
||||
trainloader = torch.utils.data.DataLoader(dataset_train, batch_size=4,
|
||||
shuffle=True, num_workers=0)
|
||||
|
||||
testloader = torch.utils.data.DataLoader(dataset_test, batch_size=4,
|
||||
shuffle=True, num_workers=0)
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 6, 5)
|
||||
self.pool = nn.MaxPool2d(2, 2)
|
||||
self.conv2 = nn.Conv2d(6, 16, 5)
|
||||
self.fc1 = nn.Linear(16 * 71 * 71, 120)
|
||||
self.fc2 = nn.Linear(120, 84)
|
||||
self.fc3 = nn.Linear(84, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.pool(F.relu(self.conv1(x)))
|
||||
x = self.pool(F.relu(self.conv2(x)))
|
||||
x = x.view(x.size(0), 16 * 71 * 71)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
x = self.fc3(x)
|
||||
return x
|
||||
|
||||
|
||||
net = Net()
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
|
||||
|
||||
|
||||
for epoch in range(2): # loop over the dataset multiple times
|
||||
|
||||
running_loss = 0.0
|
||||
for i, data in enumerate(trainloader, 0):
|
||||
# get the inputs; data is a list of [inputs, labels]
|
||||
inputs, labels = data
|
||||
|
||||
# zero the parameter gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
# forward + backward + optimize
|
||||
outputs = net(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# print statistics
|
||||
running_loss += loss.item()
|
||||
if i % 5 == 4: # print every 5 mini-batches
|
||||
print('[%d, %5d] loss: %.3f' %
|
||||
(epoch + 1, i + 1, running_loss / 5))
|
||||
running_loss = 0.0
|
||||
|
||||
print('Finished Training')
|
||||
classes = trainloader.dataset.dataset.labels
|
||||
PATH = './cifar_net.pth'
|
||||
torch.save(net.state_dict(), PATH)
|
||||
|
||||
dataiter = iter(testloader)
|
||||
images, labels = dataiter.next()
|
||||
|
||||
net = Net()
|
||||
net.load_state_dict(torch.load(PATH))
|
||||
|
||||
outputs = net(images)
|
||||
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
|
||||
correct = 0
|
||||
total = 0
|
||||
with torch.no_grad():
|
||||
for data in testloader:
|
||||
images, labels = data
|
||||
outputs = net(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
|
||||
print('Accuracy of the network on the 10 test images: %d %%' % (100 * correct / total))
|
||||
pass
|
||||
@@ -0,0 +1,35 @@
|
||||
import os
|
||||
|
||||
|
||||
def convert(imgf, labelf, outf, n):
|
||||
f = open(imgf, "rb")
|
||||
l = open(labelf, "rb")
|
||||
o = open(outf, "w")
|
||||
|
||||
f.read(16)
|
||||
l.read(8)
|
||||
images = []
|
||||
|
||||
for i in range(n):
|
||||
image = [ord(l.read(1))]
|
||||
for j in range(28 * 28):
|
||||
image.append(ord(f.read(1)))
|
||||
images.append(image)
|
||||
|
||||
for image in images:
|
||||
o.write(",".join(str(pix) for pix in image) + "\n")
|
||||
f.close()
|
||||
o.close()
|
||||
l.close()
|
||||
|
||||
|
||||
mounted_input_path = os.environ['fashion_ds']
|
||||
mounted_output_path = os.environ['AZUREML_DATAREFERENCE_prepared_fashion_ds']
|
||||
os.makedirs(mounted_output_path, exist_ok=True)
|
||||
|
||||
convert(os.path.join(mounted_input_path, 'train-images-idx3-ubyte'),
|
||||
os.path.join(mounted_input_path, 'train-labels-idx1-ubyte'),
|
||||
os.path.join(mounted_output_path, 'mnist_train.csv'), 60000)
|
||||
convert(os.path.join(mounted_input_path, 't10k-images-idx3-ubyte'),
|
||||
os.path.join(mounted_input_path, 't10k-labels-idx1-ubyte'),
|
||||
os.path.join(mounted_output_path, 'mnist_test.csv'), 10000)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,120 @@
|
||||
import keras
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Flatten
|
||||
from keras.layers import Conv2D, MaxPooling2D
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
from keras.utils import to_categorical
|
||||
from keras.callbacks import Callback
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.model_selection import train_test_split
|
||||
from azureml.core import Run
|
||||
|
||||
# dataset object from the run
|
||||
run = Run.get_context()
|
||||
dataset = run.input_datasets['prepared_fashion_ds']
|
||||
|
||||
# split dataset into train and test set
|
||||
(train_dataset, test_dataset) = dataset.random_split(percentage=0.8, seed=111)
|
||||
|
||||
# load dataset into pandas dataframe
|
||||
data_train = train_dataset.to_pandas_dataframe()
|
||||
data_test = test_dataset.to_pandas_dataframe()
|
||||
|
||||
img_rows, img_cols = 28, 28
|
||||
input_shape = (img_rows, img_cols, 1)
|
||||
|
||||
X = np.array(data_train.iloc[:, 1:])
|
||||
y = to_categorical(np.array(data_train.iloc[:, 0]))
|
||||
|
||||
# here we split validation data to optimiza classifier during training
|
||||
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=13)
|
||||
|
||||
# test data
|
||||
X_test = np.array(data_test.iloc[:, 1:])
|
||||
y_test = to_categorical(np.array(data_test.iloc[:, 0]))
|
||||
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1).astype('float32') / 255
|
||||
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1).astype('float32') / 255
|
||||
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1).astype('float32') / 255
|
||||
|
||||
batch_size = 256
|
||||
num_classes = 10
|
||||
epochs = 10
|
||||
|
||||
# construct neuron network
|
||||
model = Sequential()
|
||||
model.add(Conv2D(32, kernel_size=(3, 3),
|
||||
activation='relu',
|
||||
kernel_initializer='he_normal',
|
||||
input_shape=input_shape))
|
||||
model.add(MaxPooling2D((2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
model.add(Conv2D(64, (3, 3), activation='relu'))
|
||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
model.add(Conv2D(128, (3, 3), activation='relu'))
|
||||
model.add(Dropout(0.4))
|
||||
model.add(Flatten())
|
||||
model.add(Dense(128, activation='relu'))
|
||||
model.add(Dropout(0.3))
|
||||
model.add(Dense(num_classes, activation='softmax'))
|
||||
|
||||
model.compile(loss=keras.losses.categorical_crossentropy,
|
||||
optimizer=keras.optimizers.Adam(),
|
||||
metrics=['accuracy'])
|
||||
|
||||
# start an Azure ML run
|
||||
run = Run.get_context()
|
||||
|
||||
|
||||
class LogRunMetrics(Callback):
|
||||
# callback at the end of every epoch
|
||||
def on_epoch_end(self, epoch, log):
|
||||
# log a value repeated which creates a list
|
||||
run.log('Loss', log['loss'])
|
||||
run.log('Accuracy', log['accuracy'])
|
||||
|
||||
|
||||
history = model.fit(X_train, y_train,
|
||||
batch_size=batch_size,
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
validation_data=(X_val, y_val),
|
||||
callbacks=[LogRunMetrics()])
|
||||
|
||||
score = model.evaluate(X_test, y_test, verbose=0)
|
||||
|
||||
# log a single value
|
||||
run.log("Final test loss", score[0])
|
||||
print('Test loss:', score[0])
|
||||
|
||||
run.log('Final test accuracy', score[1])
|
||||
print('Test accuracy:', score[1])
|
||||
|
||||
plt.figure(figsize=(6, 3))
|
||||
plt.title('Fashion MNIST with Keras ({} epochs)'.format(epochs), fontsize=14)
|
||||
plt.plot(history.history['accuracy'], 'b-', label='Accuracy', lw=4, alpha=0.5)
|
||||
plt.plot(history.history['loss'], 'r--', label='Loss', lw=4, alpha=0.5)
|
||||
plt.legend(fontsize=12)
|
||||
plt.grid(True)
|
||||
|
||||
# log an image
|
||||
run.log_image('Loss v.s. Accuracy', plot=plt)
|
||||
|
||||
# create a ./outputs/model folder in the compute target
|
||||
# files saved in the "./outputs" folder are automatically uploaded into run history
|
||||
os.makedirs('./outputs/model', exist_ok=True)
|
||||
|
||||
# serialize NN architecture to JSON
|
||||
model_json = model.to_json()
|
||||
# save model JSON
|
||||
with open('./outputs/model/model.json', 'w') as f:
|
||||
f.write(model_json)
|
||||
# save model weights
|
||||
model.save_weights('./outputs/model/model.h5')
|
||||
print("model saved in ./outputs/model folder")
|
||||
@@ -0,0 +1,488 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License [2017] Zalando SE, https://tech.zalando.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Build a simple ML pipeline for image classification\n",
|
||||
"\n",
|
||||
"## Introduction\n",
|
||||
"This tutorial shows how to train a simple deep neural network using the [Fashion MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset and Keras on Azure Machine Learning. Fashion-MNIST is a dataset of Zalando's article images\u00e2\u20ac\u201dconsisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes.\n",
|
||||
"\n",
|
||||
"Learn how to:\n",
|
||||
"\n",
|
||||
"> * Set up your development environment\n",
|
||||
"> * Create the Fashion MNIST dataset\n",
|
||||
"> * Create a machine learning pipeline to train a simple deep learning neural network on a remote cluster\n",
|
||||
"> * Retrieve input datasets from the experiment and register the output model with datasets\n",
|
||||
"\n",
|
||||
"## Prerequisite:\n",
|
||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||
" * install the latest version of AzureML SDK\n",
|
||||
" * create a workspace and its configuration file (`config.json`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up your development environment\n",
|
||||
"\n",
|
||||
"All the setup for your development work can be accomplished in a Python notebook. Setup includes:\n",
|
||||
"\n",
|
||||
"* Importing Python packages\n",
|
||||
"* Connecting to a workspace to enable communication between your local computer and remote resources\n",
|
||||
"* Creating an experiment to track all your runs\n",
|
||||
"* Creating a remote compute target to use for training\n",
|
||||
"\n",
|
||||
"### Import packages\n",
|
||||
"\n",
|
||||
"Import Python packages you need in this session. Also display the Azure Machine Learning SDK version."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace, Dataset, Datastore, ComputeTarget, RunConfiguration, Experiment\n",
|
||||
"from azureml.core.runconfig import CondaDependencies\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep, EstimatorStep\n",
|
||||
"from azureml.pipeline.core import Pipeline, PipelineData\n",
|
||||
"from azureml.train.dnn import TensorFlow\n",
|
||||
"\n",
|
||||
"# check core SDK version number\n",
|
||||
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to workspace\n",
|
||||
"\n",
|
||||
"Create a workspace object from the existing workspace. `Workspace.from_config()` reads the file **config.json** and loads the details into an object named `workspace`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load workspace\n",
|
||||
"workspace = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + workspace.name, \n",
|
||||
" 'Azure region: ' + workspace.location, \n",
|
||||
" 'Subscription id: ' + workspace.subscription_id, \n",
|
||||
" 'Resource group: ' + workspace.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create experiment and a directory\n",
|
||||
"\n",
|
||||
"Create an experiment to track the runs in your workspace and a directory to deliver the necessary code from your computer to the remote resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create an ML experiment\n",
|
||||
"exp = Experiment(workspace=workspace, name='keras-mnist-fashion')\n",
|
||||
"\n",
|
||||
"# create a directory\n",
|
||||
"script_folder = './keras-mnist-fashion'\n",
|
||||
"os.makedirs(script_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach existing compute resource\n",
|
||||
"By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you create Azure Machine Learning Compute as your training environment. The code below creates the compute clusters for you if they don't already exist in your workspace.\n",
|
||||
"\n",
|
||||
"**Creation of compute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"your-cluster-name\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=workspace, name=cluster_name)\n",
|
||||
" print('Found existing compute target')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||
" # if no min node count is provided it uses the scale settings for the cluster\n",
|
||||
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Fashion MNIST dataset\n",
|
||||
"\n",
|
||||
"By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred. \n",
|
||||
"\n",
|
||||
"Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create a dataset from it. We will now upload the [Fashion MNIST](./keras-mnist-fashion) to the default datastore (blob) within your workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"datastore = workspace.get_default_datastore()\n",
|
||||
"datastore.upload_files(files = ['keras-mnist-fashion/t10k-images-idx3-ubyte', 'keras-mnist-fashion/t10k-labels-idx1-ubyte',\n",
|
||||
" 'keras-mnist-fashion/train-images-idx3-ubyte','keras-mnist-fashion/train-labels-idx1-ubyte'],\n",
|
||||
" target_path = 'mnist-fashion',\n",
|
||||
" overwrite = True,\n",
|
||||
" show_progress = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we will create an unregistered FileDataset pointing to the path in the datastore. You can also create a dataset from multiple paths. [Learn More](https://aka.ms/azureml/howto/createdatasets) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fashion_ds = Dataset.File.from_files([(datastore, 'mnist-fashion')])\n",
|
||||
"\n",
|
||||
"# list the files referenced by fashion_ds\n",
|
||||
"fashion_ds.to_path()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Build 2-step ML pipeline\n",
|
||||
"\n",
|
||||
"The [Azure Machine Learning Pipeline](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines) enables data scientists to create and manage multiple simple and complex workflows concurrently. A typical pipeline would have multiple tasks to prepare data, train, deploy and evaluate models. Individual steps in the pipeline can make use of diverse compute options (for example: CPU for data preparation and GPU for training) and languages. [Learn More](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/machine-learning-pipelines)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Step 1: data preparation\n",
|
||||
"\n",
|
||||
"In step one, we will load the image and labels from Fashion MNIST dataset into mnist_train.csv and mnist_test.csv\n",
|
||||
"\n",
|
||||
"Each image is 28 pixels in height and 28 pixels in width, for a total of 784 pixels in total. Each pixel has a single pixel-value associated with it, indicating the lightness or darkness of that pixel, with higher numbers meaning darker. This pixel-value is an integer between 0 and 255. Both mnist_train.csv and mnist_test.csv contain 785 columns. The first column consists of the class labels, which represent the article of clothing. The rest of the columns contain the pixel-values of the associated image."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set up the compute environment to install required packages\n",
|
||||
"conda = CondaDependencies.create(\n",
|
||||
" pip_packages=['azureml-sdk','azureml-dataprep[fuse,pandas]'],\n",
|
||||
" pin_sdk_version=False)\n",
|
||||
"\n",
|
||||
"conda.set_pip_option('--pre')\n",
|
||||
"\n",
|
||||
"run_config = RunConfiguration()\n",
|
||||
"run_config.environment.python.conda_dependencies = conda"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Intermediate data (or output of a step) is represented by a `PipelineData` object. preprared_fashion_ds is produced as the output of step 1, and used as the input of step 2. PipelineData introduces a data dependency between steps, and creates an implicit execution order in the pipeline. You can register a `PipelineData` as a dataset and version the output data automatically. [Learn More](https://docs.microsoft.com/azure/machine-learning/service/how-to-version-track-datasets#version-a-pipeline-output-dataset) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# define output data\n",
|
||||
"prepared_fashion_ds = PipelineData('prepared_fashion_ds', datastore=datastore).as_dataset()\n",
|
||||
"\n",
|
||||
"# register output data as dataset\n",
|
||||
"prepared_fashion_ds = prepared_fashion_ds.register(name='prepared_fashion_ds', create_new_version=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used. You can also use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify requirements for the PythonScriptStep, such as conda dependencies and docker image."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prep_step = PythonScriptStep(name='prepare step',\n",
|
||||
" script_name=\"prepare.py\",\n",
|
||||
" # mount fashion_ds dataset to the compute_target\n",
|
||||
" inputs=[fashion_ds.as_named_input('fashion_ds').as_mount()],\n",
|
||||
" outputs=[prepared_fashion_ds],\n",
|
||||
" source_directory=script_folder,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" runconfig=run_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Step 2: train CNN with Keras\n",
|
||||
"\n",
|
||||
"Next, we construct an `azureml.train.dnn.TensorFlow` estimator object. The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed.\n",
|
||||
"\n",
|
||||
"[EstimatorStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py) adds a step to run Tensorflow Estimator in a Pipeline. It takes a dataset as the input."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set up training step with Tensorflow estimator\n",
|
||||
"est = TensorFlow(entry_script='train.py',\n",
|
||||
" source_directory=script_folder, \n",
|
||||
" pip_packages = ['azureml-sdk','keras','numpy','scikit-learn', 'matplotlib'],\n",
|
||||
" compute_target=compute_target)\n",
|
||||
"\n",
|
||||
"est_step = EstimatorStep(name='train step',\n",
|
||||
" estimator=est,\n",
|
||||
" estimator_entry_script_arguments=[],\n",
|
||||
" # parse prepared_fashion_ds into TabularDataset and use it as the input\n",
|
||||
" inputs=[prepared_fashion_ds.parse_delimited_files()],\n",
|
||||
" compute_target=compute_target)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Build the pipeline\n",
|
||||
"Once we have the steps (or steps collection), we can build the [pipeline](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.pipeline.pipeline?view=azure-ml-py).\n",
|
||||
"\n",
|
||||
"A pipeline is created with a list of steps and a workspace. Submit a pipeline using [submit](https://docs.microsoft.com/python/api/azureml-core/azureml.core.experiment(class)?view=azure-ml-py#submit-config--tags-none----kwargs-). When submit is called, a [PipelineRun](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelinerun?view=azure-ml-py) is created which in turn creates [StepRun](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.steprun?view=azure-ml-py) objects for each step in the workflow."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# build pipeline & run experiment\n",
|
||||
"pipeline = Pipeline(workspace, steps=[prep_step, est_step])\n",
|
||||
"run = exp.submit(pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor the PipelineRun"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"inputHidden": false,
|
||||
"outputHidden": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.find_step_run('train step')[0].get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register the input dataset and the output model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Azure Machine Learning dataset makes it easy to trace how your data is used in ML. [Learn More](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-version-track-datasets#track-datasets-in-experiments)<br>\n",
|
||||
"For each Machine Learning experiment, you can easily trace the datasets used as the input through `Run` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get input datasets\n",
|
||||
"prep_step = run.find_step_run('prepare step')[0]\n",
|
||||
"inputs = prep_step.get_details()['inputDatasets']\n",
|
||||
"input_dataset = inputs[0]['dataset']\n",
|
||||
"\n",
|
||||
"# list the files referenced by input_dataset\n",
|
||||
"input_dataset.to_path()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Register the input Fashion MNIST dataset with the workspace so that you can reuse it in other experiments or share it with your colleagues who have access to your workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fashion_ds = input_dataset.register(workspace = workspace,\n",
|
||||
" name = 'fashion_ds',\n",
|
||||
" description = 'image and label files from fashion mnist',\n",
|
||||
" create_new_version = True)\n",
|
||||
"fashion_ds"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Register the output model with dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.find_step_run('train step')[0].register_model(model_name = 'keras-model', model_path = 'outputs/model/', \n",
|
||||
" datasets =[('train test data',fashion_ds)])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sihhu"
|
||||
}
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"Remote"
|
||||
],
|
||||
"datasets": [
|
||||
"Fashion MNIST"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"Azure ML"
|
||||
],
|
||||
"friendly_name": "Datasets with ML Pipeline",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
},
|
||||
"star_tag": [
|
||||
"featured"
|
||||
],
|
||||
"tags": [
|
||||
"Dataset",
|
||||
"Pipeline",
|
||||
"Estimator",
|
||||
"ScriptRun"
|
||||
],
|
||||
"task": "Train"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -13,23 +13,23 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Train with Azure Machine Learning Datasets\n",
|
||||
"# Train with Azure Machine Learning datasets\n",
|
||||
"Datasets are categorized into TabularDataset and FileDataset based on how users consume them in training. \n",
|
||||
"* A TabularDataset represents data in a tabular format by parsing the provided file or list of files. TabularDataset can be created from csv, tsv, parquet files, SQL query results etc. For the complete list, please visit our [documentation](https://aka.ms/tabulardataset-api-reference). It provides you with the ability to materialize the data into a pandas DataFrame.\n",
|
||||
"* A FileDataset references single or multiple files in your datastores or public urls. This provides you with the ability to download or mount the files to your compute. The files can be of any format, which enables a wider range of machine learning scenarios including deep learning.\n",
|
||||
"\n",
|
||||
"In this tutorial, you will learn how to train with Azure Machine Learning Datasets:\n",
|
||||
"In this tutorial, you will learn how to train with Azure Machine Learning datasets:\n",
|
||||
"\n",
|
||||
"☑ Use Datasets directly in your training script\n",
|
||||
"☑ Use datasets directly in your training script\n",
|
||||
"\n",
|
||||
"☑ Use Datasets to mount files to a remote compute"
|
||||
"☑ Use datasets to mount files to a remote compute"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -149,12 +149,12 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You now have the necessary packages and compute resources to train a model in the cloud.\n",
|
||||
"## Use Datasets directly in training\n",
|
||||
"## Use datasets directly in training\n",
|
||||
"\n",
|
||||
"### Create a TabularDataset\n",
|
||||
"By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred. \n",
|
||||
"\n",
|
||||
"Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create Dataset from it. We will now upload the [Iris data](./train-dataset/Iris.csv) to the default datastore (blob) within your workspace."
|
||||
"Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create dataset from it. We will now upload the [Iris data](./train-dataset/Iris.csv) to the default datastore (blob) within your workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -174,7 +174,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we will create an unregistered TabularDataset pointing to the path in the datastore. You can also create a Dataset from multiple paths. [learn more](https://aka.ms/azureml/howto/createdatasets) "
|
||||
"Then we will create an unregistered TabularDataset pointing to the path in the datastore. You can also create a dataset from multiple paths. [learn more](https://aka.ms/azureml/howto/createdatasets) \n",
|
||||
"\n",
|
||||
"[TabularDataset](https://docs.microsoft.com/python/api/azureml-core/azureml.data.tabulardataset?view=azure-ml-py) represents data in a tabular format by parsing the provided file or list of files. This provides you with the ability to materialize the data into a Pandas or Spark DataFrame. You can create a TabularDataset object from .csv, .tsv, and parquet files, and from SQL query results. For a complete list, see [TabularDatasetFactory](https://docs.microsoft.com/python/api/azureml-core/azureml.data.dataset_factory.tabulardatasetfactory?view=azure-ml-py) class."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -260,7 +262,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use Datasets as the input to Estimator"
|
||||
"### Configure and use datasets as the input to Estimator"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -294,7 +296,7 @@
|
||||
"* The name of the estimator object, `est`\n",
|
||||
"* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n",
|
||||
"* The training script name, train_titanic.py\n",
|
||||
"* The input Dataset for training\n",
|
||||
"* The input dataset for training\n",
|
||||
"* The compute target. In this case you will use the AmlCompute you created\n",
|
||||
"* The environment definition for the experiment"
|
||||
]
|
||||
@@ -348,9 +350,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use Datasets to mount files to a remote compute\n",
|
||||
"## Use datasets to mount files to a remote compute\n",
|
||||
"\n",
|
||||
"You can use the Dataset object to mount or download files referred by it. When you mount a file system, you attach that file system to a directory (mount point) and make it available to the system. Because mounting load files at the time of processing, it is usually faster than download.<br> \n",
|
||||
"You can use the `Dataset` object to mount or download files referred by it. When you mount a file system, you attach that file system to a directory (mount point) and make it available to the system. Because mounting load files at the time of processing, it is usually faster than download.<br> \n",
|
||||
"Note: mounting is only available for Linux-based compute (DSVM/VM, AMLCompute, HDInsights)."
|
||||
]
|
||||
},
|
||||
@@ -365,7 +367,6 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_diabetes\n",
|
||||
@@ -396,7 +397,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a FileDataset"
|
||||
"### Create a FileDataset\n",
|
||||
"\n",
|
||||
"[FileDataset](https://docs.microsoft.com/python/api/azureml-core/azureml.data.file_dataset.filedataset?view=azure-ml-py) references single or multiple files in your datastores or public URLs. Using this method, you can download or mount the files to your compute as a FileDataset object. The files can be in any format, which enables a wider range of machine learning scenarios, including deep learning."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -492,7 +495,7 @@
|
||||
"src = ScriptRunConfig(source_directory=script_folder, \n",
|
||||
" script='train_diabetes.py', \n",
|
||||
" # to mount the dataset on the remote compute and pass the mounted path as an argument to the training script\n",
|
||||
" arguments =[dataset.as_named_input('diabetes').as_mount('tmp/dataset')])\n",
|
||||
" arguments =[dataset.as_named_input('diabetes').as_mount()])\n",
|
||||
"\n",
|
||||
"src.run_config.framework = 'python'\n",
|
||||
"src.run_config.environment = conda_env\n",
|
||||
@@ -533,7 +536,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register Datasets\n",
|
||||
"### Register datasets\n",
|
||||
"Use the register() method to register datasets to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script."
|
||||
]
|
||||
},
|
||||
@@ -553,10 +556,10 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register models with Datasets\n",
|
||||
"## Register models with datasets\n",
|
||||
"The last step in the training script wrote the model files in a directory named `outputs` in the VM of the cluster where the job is executed. `outputs` is a special directory in that all content in this directory is automatically uploaded to your workspace. This content appears in the run record in the experiment under your workspace. Hence, the model file is now also available in your workspace.\n",
|
||||
"\n",
|
||||
"You can register models with Datasets for reproducibility and auditing purpose."
|
||||
"You can register models with datasets for reproducibility and auditing purpose."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -642,9 +645,11 @@
|
||||
"featured"
|
||||
],
|
||||
"tags": [
|
||||
"Dataset"
|
||||
"Dataset",
|
||||
"Estimator",
|
||||
"ScriptRun"
|
||||
],
|
||||
"task": "Filtering"
|
||||
"task": "Train"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
12
index.md
12
index.md
@@ -36,7 +36,11 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
|
||||
| :star:[Filtering data using Tabular Timeseiries Dataset related API](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/tabular-timeseries-dataset-filtering.ipynb) | Filtering | NOAA | Local | None | Azure ML | Dataset, Tabular Timeseries |
|
||||
|
||||
| :star:[Train with Datasets (Tabular and File)](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/train-with-datasets.ipynb) | Filtering | Iris, Diabetes | Remote | None | Azure ML | Dataset |
|
||||
| :star:[Introduction to labeled datasets](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/labeled-datasets/labeled-datasets.ipynb) | Train | | Remote | None | Azure ML | Dataset, label, Estimator |
|
||||
|
||||
| :star:[Datasets with ML Pipeline](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/pipeline-for-image-classification.ipynb) | Train | Fashion MNIST | Remote | None | Azure ML | Dataset, Pipeline, Estimator, ScriptRun |
|
||||
|
||||
| :star:[Train with Datasets (Tabular and File)](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/train-with-datasets/train-with-datasets.ipynb) | Train | Iris, Diabetes | Remote | None | Azure ML | Dataset, Estimator, ScriptRun |
|
||||
|
||||
| [Forecasting away from training data](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb) | Forecasting | None | Remote | None | Azure ML AutoML | Forecasting, Confidence Intervals |
|
||||
|
||||
@@ -78,6 +82,8 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
|
||||
| :star:[Azure Machine Learning Pipelines with Data Dependency](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb) | Demonstrates how to construct a Pipeline with data dependency between steps | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| [How to use run a notebook as a step in AML Pipelines](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-notebook-runner-step.ipynb) | Demonstrates the use of NotebookRunnerStep | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
|
||||
## Training
|
||||
|
||||
@@ -215,12 +221,12 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
|
||||
| [train-explain-model-on-amlcompute-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb) | | | | | | |
|
||||
|
||||
| [training_notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/notebook_runner/training_notebook.ipynb) | | | | | | |
|
||||
|
||||
| [nyc-taxi-data-regression-model-building](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb) | | | | | | |
|
||||
|
||||
| [pipeline-batch-scoring](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb) | | | | | | |
|
||||
|
||||
| [pipeline-style-transfer](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/pipeline-style-transfer/pipeline-style-transfer.ipynb) | | | | | | |
|
||||
|
||||
| [authentication-in-azureml](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azureml.ipynb) | | | | | | |
|
||||
|
||||
| [Logging APIs](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb) | Logging APIs and analyzing results | None | None | None | None | None |
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.0.76.2 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.83 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -328,6 +328,7 @@
|
||||
"\n",
|
||||
"myenv = CondaDependencies()\n",
|
||||
"myenv.add_conda_package(\"scikit-learn\")\n",
|
||||
"myenv.add_pip_package(\"azureml-defaults\")\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
@@ -387,13 +388,11 @@
|
||||
"\n",
|
||||
"Configure the image and deploy. The following code goes through these steps:\n",
|
||||
"\n",
|
||||
"1. Build an image using:\n",
|
||||
"1. Create environment object containing dependencies needed by the model using the environment file (`myenv.yml`)\n",
|
||||
"1. Create inference configuration necessary to deploy the model as a web service using:\n",
|
||||
" * The scoring file (`score.py`)\n",
|
||||
" * The environment file (`myenv.yml`)\n",
|
||||
" * The model file\n",
|
||||
"1. Register that image under the workspace. \n",
|
||||
"1. Send the image to the ACI container.\n",
|
||||
"1. Start up a container in ACI using the image.\n",
|
||||
" * envrionment object created in previous step\n",
|
||||
"1. Deploy the model to the ACI container.\n",
|
||||
"1. Get the web service HTTP endpoint."
|
||||
]
|
||||
},
|
||||
@@ -413,10 +412,11 @@
|
||||
"%%time\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"service = Model.deploy(workspace=ws, \n",
|
||||
" name='sklearn-mnist-svc', \n",
|
||||
|
||||
Reference in New Issue
Block a user