Compare commits
7 Commits
azureml-sd
...
release_up
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
296ae01587 | ||
|
|
8f4efe15eb | ||
|
|
d179080467 | ||
|
|
0040644e7a | ||
|
|
8aa04307fb | ||
|
|
a525da4488 | ||
|
|
e149565a8a |
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.0.76.2 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.81 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -30,7 +30,7 @@ dependencies:
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- joblib
|
||||
- onnxruntime==0.4.0
|
||||
- onnxruntime==1.0.0
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
|
||||
channels:
|
||||
|
||||
@@ -31,7 +31,7 @@ dependencies:
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- joblib
|
||||
- onnxruntime==0.4.0
|
||||
- onnxruntime==1.0.0
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
|
||||
channels:
|
||||
|
||||
@@ -288,7 +288,7 @@
|
||||
"|**blacklist_models** | *List* of *strings* indicating machine learning algorithms for AutoML to avoid in this run. <br><br> Allowed values for **Classification**<br><i>LogisticRegression</i><br><i>SGD</i><br><i>MultinomialNaiveBayes</i><br><i>BernoulliNaiveBayes</i><br><i>SVM</i><br><i>LinearSVM</i><br><i>KNN</i><br><i>DecisionTree</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>GradientBoosting</i><br><i>TensorFlowDNN</i><br><i>TensorFlowLinearClassifier</i><br><br>Allowed values for **Regression**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i><br><br>Allowed values for **Forecasting**<br><i>ElasticNet</i><br><i>GradientBoosting</i><br><i>DecisionTree</i><br><i>KNN</i><br><i>LassoLars</i><br><i>SGD</i><br><i>RandomForest</i><br><i>ExtremeRandomTrees</i><br><i>LightGBM</i><br><i>TensorFlowLinearRegressor</i><br><i>TensorFlowDNN</i><br><i>Arima</i><br><i>Prophet</i>|\n",
|
||||
"| **whitelist_models** | *List* of *strings* indicating machine learning algorithms for AutoML to use in this run. Same values listed above for **blacklist_models** allowed for **whitelist_models**.|\n",
|
||||
"|**experiment_exit_score**| Value indicating the target for *primary_metric*. <br>Once the target is surpassed the run terminates.|\n",
|
||||
"|**experiment_timeout_minutes**| Maximum amount of time in minutes that all iterations combined can take before the experiment terminates.|\n",
|
||||
"|**experiment_timeout_hours**| Maximum amount of time in hours that all iterations combined can take before the experiment terminates.|\n",
|
||||
"|**enable_early_stopping**| Flag to enble early termination if the score is not improving in the short term.|\n",
|
||||
"|**featurization**| 'auto' / 'off' Indicator for whether featurization step should be done automatically or not. Note: If the input data is sparse, featurization cannot be turned on.|\n",
|
||||
"|**n_cross_validations**|Number of cross validation splits.|\n",
|
||||
@@ -306,7 +306,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"automl_settings = {\n",
|
||||
" \"experiment_timeout_minutes\" : 20,\n",
|
||||
" \"experiment_timeout_hours\" : 0.3,\n",
|
||||
" \"enable_early_stopping\" : True,\n",
|
||||
" \"iteration_timeout_minutes\": 5,\n",
|
||||
" \"max_concurrent_iterations\": 4,\n",
|
||||
@@ -694,10 +694,10 @@
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime = \"python\", \n",
|
||||
" entry_script = script_file_name,\n",
|
||||
" conda_file = conda_env_file_name)\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=conda_env_file_name)\n",
|
||||
"inference_config = InferenceConfig(entry_script=script_file_name, environment=myenv)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, \n",
|
||||
" memory_gb = 1, \n",
|
||||
|
||||
@@ -8,6 +8,6 @@ dependencies:
|
||||
- azureml-widgets
|
||||
- matplotlib
|
||||
- pandas_ml
|
||||
- onnxruntime==0.4.0
|
||||
- onnxruntime==1.0.0
|
||||
- azureml-explain-model
|
||||
- azureml-contrib-interpret
|
||||
|
||||
@@ -213,7 +213,7 @@
|
||||
" \"preprocess\": True,\n",
|
||||
" \"enable_early_stopping\": True,\n",
|
||||
" \"max_concurrent_iterations\": 2, # This is a limit for testing purpose, please increase it as per cluster size\n",
|
||||
" \"experiment_timeout_minutes\": 10, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible\n",
|
||||
" \"experiment_timeout_hours\": 0.2, # This is a time limit for testing purposes, remove it for real use cases, this will drastically limit ablity to find the best model possible\n",
|
||||
" \"verbosity\": logging.INFO,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
@@ -305,7 +305,7 @@
|
||||
"source": [
|
||||
"#### Explain model\n",
|
||||
"\n",
|
||||
"Automated ML models can be explained and visualized using the SDK Explainability library. [Learn how to use the explainer](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/model-explanation-remote-amlcompute/auto-ml-model-explanations-remote-compute.ipynb)."
|
||||
"Automated ML models can be explained and visualized using the SDK Explainability library. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -334,17 +334,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Print the properties of the model\n",
|
||||
"The fitted_model is a python object and you can read the different properties of the object.\n",
|
||||
"See *Print the properties of the model* section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification/auto-ml-classification.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Deploy\n",
|
||||
"\n",
|
||||
"To deploy the model into a web service endpoint, see _Deploy_ section in [this sample notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-with-deployment/auto-ml-classification-with-deployment.ipynb)"
|
||||
"The fitted_model is a python object and you can read the different properties of the object.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -210,7 +210,24 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Data Ingestion Pipeline \n",
|
||||
"For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You can replace this with your own dataset, or you can skip this pipeline if you already have a time-series based `TabularDataset`.\n",
|
||||
"For this demo, we will use NOAA weather data from [Azure Open Datasets](https://azure.microsoft.com/services/open-datasets/). You can replace this with your own dataset, or you can skip this pipeline if you already have a time-series based `TabularDataset`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The name and target column of the Dataset to create \n",
|
||||
"dataset = \"NOAA-Weather-DS4\"\n",
|
||||
"target_column_name = \"temperature\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"### Upload Data Step\n",
|
||||
"The data ingestion pipeline has a single step with a script to query the latest weather data and upload it to the blob store. During the first run, the script will create and register a time-series based `TabularDataset` with the past one week of weather data. For each subsequent run, the script will create a partition in the blob store by querying NOAA for new weather data since the last modified time of the dataset (`dataset.data_changed_time`) and creating a data.csv file."
|
||||
@@ -225,8 +242,6 @@
|
||||
"from azureml.pipeline.core import Pipeline, PipelineParameter\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"\n",
|
||||
"# The name of the Dataset to create \n",
|
||||
"dataset = \"NOAA-Weather-DS4\"\n",
|
||||
"ds_name = PipelineParameter(name=\"ds_name\", default_value=dataset)\n",
|
||||
"upload_data_step = PythonScriptStep(script_name=\"upload_weather_data.py\", \n",
|
||||
" allow_reuse=False,\n",
|
||||
@@ -272,7 +287,7 @@
|
||||
"## Training Pipeline\n",
|
||||
"### Prepare Training Data Step\n",
|
||||
"\n",
|
||||
"Script to bring data into common X,y format. We need to set allow_reuse flag to False to allow the pipeline to run even when inputs don't change. We also need the name of the model to check the time the model was last trained."
|
||||
"Script to check if new data is available since the model was last trained. If no new data is available, we cancel the remaining pipeline steps. We need to set allow_reuse flag to False to allow the pipeline to run even when inputs don't change. We also need the name of the model to check the time the model was last trained."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -283,11 +298,8 @@
|
||||
"source": [
|
||||
"from azureml.pipeline.core import PipelineData\n",
|
||||
"\n",
|
||||
"target_column = PipelineParameter(\"target_column\", default_value=\"y\")\n",
|
||||
"# The model name with which to register the trained model in the workspace.\n",
|
||||
"model_name = PipelineParameter(\"model_name\", default_value=\"y\")\n",
|
||||
"output_x = PipelineData(\"output_x\", datastore=dstor)\n",
|
||||
"output_y = PipelineData(\"output_y\", datastore=dstor)"
|
||||
"model_name = PipelineParameter(\"model_name\", default_value=\"noaaweatherds\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -299,16 +311,23 @@
|
||||
"data_prep_step = PythonScriptStep(script_name=\"check_data.py\", \n",
|
||||
" allow_reuse=False,\n",
|
||||
" name=\"check_data\",\n",
|
||||
" arguments=[\"--target_column\", target_column,\n",
|
||||
" \"--output_x\", output_x,\n",
|
||||
" \"--output_y\", output_y,\n",
|
||||
" \"--ds_name\", ds_name,\n",
|
||||
" \"--model_name\", model_name],\n",
|
||||
" outputs=[output_x, output_y], \n",
|
||||
" arguments=[\"--ds_name\", ds_name,\n",
|
||||
" \"--model_name\", model_name],\n",
|
||||
" compute_target=compute_target, \n",
|
||||
" runconfig=conda_run_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Dataset\n",
|
||||
"train_ds = Dataset.get_by_name(ws, dataset)\n",
|
||||
"train_ds = train_ds.drop_columns([\"partition_date\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -324,11 +343,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.train.automl.runtime import AutoMLStep\n",
|
||||
"from azureml.train.automl import AutoMLStep\n",
|
||||
"\n",
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\": 20,\n",
|
||||
" \"experiment_timeout_minutes\": 30,\n",
|
||||
" \"iteration_timeout_minutes\": 10,\n",
|
||||
" \"experiment_timeout_minutes\": 10,\n",
|
||||
" \"n_cross_validations\": 3,\n",
|
||||
" \"primary_metric\": 'r2_score',\n",
|
||||
" \"preprocess\": True,\n",
|
||||
@@ -342,8 +361,8 @@
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path = \".\",\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" data_script = \"get_data.py\",\n",
|
||||
" training_data = train_ds,\n",
|
||||
" label_column_name = target_column_name,\n",
|
||||
" **automl_settings\n",
|
||||
" )"
|
||||
]
|
||||
@@ -378,7 +397,6 @@
|
||||
"automl_step = AutoMLStep(\n",
|
||||
" name='automl_module',\n",
|
||||
" automl_config=automl_config,\n",
|
||||
" inputs=[output_x, output_y],\n",
|
||||
" outputs=[metirics_data, model_data],\n",
|
||||
" allow_reuse=False)"
|
||||
]
|
||||
@@ -432,7 +450,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"training_pipeline_run = experiment.submit(training_pipeline, pipeline_parameters={\n",
|
||||
" \"target_column\": \"temperature\", \"ds_name\": dataset, \"model_name\": \"noaaweatherds\"})"
|
||||
" \"ds_name\": dataset, \"model_name\": \"noaaweatherds\"})"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -475,7 +493,7 @@
|
||||
"source": [
|
||||
"from azureml.pipeline.core import Schedule\n",
|
||||
"schedule = Schedule.create(workspace=ws, name=\"RetrainingSchedule\",\n",
|
||||
" pipeline_parameters={\"target_column\": \"temperature\",\"ds_name\": dataset, \"model_name\": \"noaaweatherds\"},\n",
|
||||
" pipeline_parameters={\"ds_name\": dataset, \"model_name\": \"noaaweatherds\"},\n",
|
||||
" pipeline_id=published_pipeline.id, \n",
|
||||
" experiment_name=experiment_name, \n",
|
||||
" datastore=dstor,\n",
|
||||
|
||||
@@ -15,32 +15,16 @@ if type(run) == _OfflineRun:
|
||||
else:
|
||||
ws = run.experiment.workspace
|
||||
|
||||
|
||||
def write_output(df, path):
|
||||
os.makedirs(path, exist_ok=True)
|
||||
print("%s created" % path)
|
||||
df.to_csv(path + "/part-00000", index=False)
|
||||
|
||||
|
||||
print("Check for new data and prepare the data")
|
||||
print("Check for new data.")
|
||||
|
||||
parser = argparse.ArgumentParser("split")
|
||||
parser.add_argument("--target_column", type=str, help="input split features")
|
||||
parser.add_argument("--ds_name", help="input dataset name")
|
||||
parser.add_argument("--model_name", help="name of the deployed model")
|
||||
parser.add_argument("--output_x", type=str,
|
||||
help="output features")
|
||||
parser.add_argument("--output_y", type=str,
|
||||
help="output labels")
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Argument 1(ds_name): %s" % args.ds_name)
|
||||
print("Argument 2(target_column): %s" % args.target_column)
|
||||
print("Argument 3(model_name): %s" % args.model_name)
|
||||
print("Argument 4(output_x): %s" % args.output_x)
|
||||
print("Argument 5(output_y): %s" % args.output_y)
|
||||
print("Argument 2(model_name): %s" % args.model_name)
|
||||
|
||||
# Get the latest registered model
|
||||
try:
|
||||
@@ -54,22 +38,9 @@ except Exception as e:
|
||||
train_ds = Dataset.get_by_name(ws, args.ds_name)
|
||||
dataset_changed_time = train_ds.data_changed_time
|
||||
|
||||
if dataset_changed_time > last_train_time:
|
||||
# New data is available since the model was last trained
|
||||
print("Dataset was last updated on {0}. Retraining...".format(dataset_changed_time))
|
||||
train_ds = train_ds.drop_columns(["partition_date"])
|
||||
X_train = train_ds.drop_columns(
|
||||
columns=[args.target_column]).to_pandas_dataframe()
|
||||
y_train = train_ds.keep_columns(
|
||||
columns=[args.target_column]).to_pandas_dataframe()
|
||||
|
||||
non_null = y_train[args.target_column].notnull()
|
||||
y = y_train[non_null]
|
||||
X = X_train[non_null]
|
||||
|
||||
if not (args.output_x is None and args.output_y is None):
|
||||
write_output(X, args.output_x)
|
||||
write_output(y, args.output_y)
|
||||
else:
|
||||
if not dataset_changed_time > last_train_time:
|
||||
print("Cancelling run since there is no new data.")
|
||||
run.parent.cancel()
|
||||
else:
|
||||
# New data is available since the model was last trained
|
||||
print("Dataset was last updated on {0}. Retraining...".format(dataset_changed_time))
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def get_data():
|
||||
print("In get_data")
|
||||
print(os.environ['AZUREML_DATAREFERENCE_output_x'])
|
||||
X_train = pd.read_csv(
|
||||
os.environ['AZUREML_DATAREFERENCE_output_x'] + "/part-00000")
|
||||
y_train = pd.read_csv(
|
||||
os.environ['AZUREML_DATAREFERENCE_output_y'] + "/part-00000")
|
||||
|
||||
print(X_train.head(3))
|
||||
|
||||
return {"X": X_train.values, "y": y_train.values.flatten()}
|
||||
@@ -58,7 +58,7 @@ except Exception as e:
|
||||
print(traceback.format_exc())
|
||||
print("Dataset with name {0} not found, registering new dataset.".format(args.ds_name))
|
||||
register_dataset = True
|
||||
end_time_last_slice = datetime.today() - relativedelta(weeks=1)
|
||||
end_time_last_slice = datetime.today() - relativedelta(weeks=2)
|
||||
|
||||
end_time = datetime.utcnow()
|
||||
train_df = get_noaa_data(end_time_last_slice, end_time)
|
||||
@@ -80,10 +80,10 @@ if train_df.size > 0:
|
||||
target_path=folder_name,
|
||||
overwrite=True,
|
||||
show_progress=True)
|
||||
|
||||
if register_dataset:
|
||||
ds = Dataset.Tabular.from_delimited_files(dstor.path("{}/**/*.csv".format(
|
||||
args.ds_name)), partition_format='/{partition_date:yyyy/MM/dd/hh/mm/ss}/data.csv')
|
||||
ds.register(ws, name=args.ds_name)
|
||||
else:
|
||||
print("No new data since {0}.".format(end_time_last_slice))
|
||||
|
||||
if register_dataset:
|
||||
ds = Dataset.Tabular.from_delimited_files(dstor.path("{}/**/*.csv".format(
|
||||
args.ds_name)), partition_format='/{partition_date:yyyy/MM/dd/HH/mm/ss}/data.csv')
|
||||
ds.register(ws, name=args.ds_name)
|
||||
|
||||
@@ -202,7 +202,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'dataset/bike-no.csv')]).with_timestamp_columns(fine_grain_timestamp=time_column_name) \n",
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
"dataset.take(5).to_pandas_dataframe().reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -221,8 +221,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# select data that occurs before a specified date\n",
|
||||
"train = dataset.time_before(datetime(2012, 9, 1))\n",
|
||||
"train.to_pandas_dataframe().tail(5)"
|
||||
"train = dataset.time_before(datetime(2012, 8, 31), include_boundary=True)\n",
|
||||
"train.to_pandas_dataframe().tail(5).reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -231,8 +231,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test = dataset.time_after(datetime(2012, 8, 31))\n",
|
||||
"test.to_pandas_dataframe().head(5)"
|
||||
"test = dataset.time_after(datetime(2012, 9, 1), include_boundary=True)\n",
|
||||
"test.to_pandas_dataframe().head(5).reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -247,7 +247,7 @@
|
||||
"|-|-|\n",
|
||||
"|**task**|forecasting|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>\n",
|
||||
"|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.constants.supportedmodels.regression?view=azure-ml-py).|\n",
|
||||
"|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.constants.supportedmodels.forecasting?view=azure-ml-py).|\n",
|
||||
"|**experiment_timeout_minutes**|Experimentation timeout in minutes.|\n",
|
||||
"|**training_data**|Input dataset, containing both features and label column.|\n",
|
||||
"|**label_column_name**|The name of the label column.|\n",
|
||||
|
||||
@@ -32,18 +32,17 @@ test_dataset = run.input_datasets['test_data']
|
||||
|
||||
grain_column_names = []
|
||||
|
||||
df = test_dataset.to_pandas_dataframe()
|
||||
df = test_dataset.to_pandas_dataframe().reset_index(drop=True)
|
||||
|
||||
X_test_df = test_dataset.drop_columns(columns=[target_column_name])
|
||||
y_test_df = test_dataset.with_timestamp_columns(
|
||||
None).keep_columns(columns=[target_column_name])
|
||||
X_test_df = test_dataset.drop_columns(columns=[target_column_name]).to_pandas_dataframe().reset_index(drop=True)
|
||||
y_test_df = test_dataset.with_timestamp_columns(None).keep_columns(columns=[target_column_name]).to_pandas_dataframe()
|
||||
|
||||
fitted_model = joblib.load('model.pkl')
|
||||
|
||||
df_all = forecasting_helper.do_rolling_forecast(
|
||||
fitted_model,
|
||||
X_test_df.to_pandas_dataframe(),
|
||||
y_test_df.to_pandas_dataframe().values.T[0],
|
||||
X_test_df,
|
||||
y_test_df.values.T[0],
|
||||
target_column_name,
|
||||
time_column_name,
|
||||
max_horizon,
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
"\n",
|
||||
"Advanced Forecasting\n",
|
||||
"1. [Advanced Training](#advanced_training)\n",
|
||||
"1. [Advanced Results](#advanced Results)"
|
||||
"1. [Advanced Results](#advanced_results)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -211,7 +211,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = Dataset.Tabular.from_delimited_files(path = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/nyc_energy.csv\").with_timestamp_columns(fine_grain_timestamp=time_column_name) \n",
|
||||
"dataset.take(5).to_pandas_dataframe()"
|
||||
"dataset.take(5).to_pandas_dataframe().reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -253,7 +253,7 @@
|
||||
"source": [
|
||||
"# split into train based on time\n",
|
||||
"train = dataset.time_before(datetime(2017, 8, 8, 5), include_boundary=True)\n",
|
||||
"train.to_pandas_dataframe().sort_values(time_column_name).tail(5)"
|
||||
"train.to_pandas_dataframe().sort_values(time_column_name).tail(5).reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -263,8 +263,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# split into test based on time\n",
|
||||
"test = dataset.time_between(datetime(2017, 8, 8, 5), datetime(2017, 8, 10, 5))\n",
|
||||
"test.to_pandas_dataframe().head(5)"
|
||||
"test = dataset.time_between(datetime(2017, 8, 8, 6), datetime(2017, 8, 10, 5))\n",
|
||||
"test.to_pandas_dataframe().head(5).reset_index(drop=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -301,7 +301,7 @@
|
||||
"|-|-|\n",
|
||||
"|**task**|forecasting|\n",
|
||||
"|**primary_metric**|This is the metric that you want to optimize.<br> Forecasting supports the following primary metrics <br><i>spearman_correlation</i><br><i>normalized_root_mean_squared_error</i><br><i>r2_score</i><br><i>normalized_mean_absolute_error</i>|\n",
|
||||
"|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl/azureml.train.automl.constants.supportedmodels.regression?view=azure-ml-py).|\n",
|
||||
"|**blacklist_models**|Models in blacklist won't be used by AutoML. All supported models can be found at [here](https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.constants.supportedmodels.forecasting?view=azure-ml-py).|\n",
|
||||
"|**experiment_timeout_minutes**|Maximum amount of time in minutes that the experiment take before it terminates.|\n",
|
||||
"|**training_data**|The training data to be used within the experiment.|\n",
|
||||
"|**label_column_name**|The name of the label column.|\n",
|
||||
@@ -454,7 +454,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_test = test.to_pandas_dataframe()\n",
|
||||
"X_test = test.to_pandas_dataframe().reset_index(drop=True)\n",
|
||||
"y_test = X_test.pop(target_column_name).values"
|
||||
]
|
||||
},
|
||||
@@ -633,7 +633,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Advanced Results\n",
|
||||
"## Advanced Results<a id=\"advanced_results\"></a>\n",
|
||||
"We did not use lags in the previous model specification. In effect, the prediction was the result of a simple regression on date, grain and any additional features. This is often a very good prediction as common time series patterns like seasonality and trends can be captured in this manner. Such simple regression is horizon-less: it doesn't matter how far into the future we are predicting, because we are not using past data. In the previous example, the horizon was only used to split the data for cross-validation."
|
||||
]
|
||||
},
|
||||
|
||||
@@ -30,11 +30,11 @@ def _get_configs(automlconfig: AutoMLConfig,
|
||||
groups = _get_groups(data, group_column_names)
|
||||
configs = {}
|
||||
for i, group in groups.iterrows():
|
||||
single = data
|
||||
single = data._dataflow
|
||||
group_name = "#####".join(str(x) for x in group.values)
|
||||
group_name = valid_chars.sub('', group_name)
|
||||
for key in group.index:
|
||||
single = single._dataflow.filter(data._dataflow[key] == group[key])
|
||||
single = single.filter(data._dataflow[key] == group[key])
|
||||
t_dataset = TabularDataset._create(single)
|
||||
group_conf = copy.deepcopy(automlconfig)
|
||||
group_conf.user_settings['training_data'] = t_dataset
|
||||
|
||||
@@ -558,7 +558,6 @@
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = CondaDependencies.create(\n",
|
||||
" conda_packages=['scikit-learn', 'numpy','py-xgboost<=0.80'],\n",
|
||||
" pip_packages=azureml_pip_packages)"
|
||||
]
|
||||
},
|
||||
@@ -718,17 +717,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies \n",
|
||||
"\n",
|
||||
"azureml_pip_packages = [\n",
|
||||
" 'azureml-explain-model', 'azureml-train-automl', 'azureml-defaults'\n",
|
||||
"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"# specify CondaDependencies obj\n",
|
||||
"myenv = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas', 'numpy', 'py-xgboost<=0.80'],\n",
|
||||
" pip_packages=azureml_pip_packages,\n",
|
||||
" pin_sdk_version=True)\n",
|
||||
"myenv = automl_run.get_environment().python.conda_dependencies\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())\n",
|
||||
|
||||
@@ -7,7 +7,7 @@ from azureml.core.experiment import Experiment
|
||||
from sklearn.externals import joblib
|
||||
from azureml.core.dataset import Dataset
|
||||
from azureml.train.automl.runtime.automl_explain_utilities import AutoMLExplainerSetupClass, \
|
||||
automl_setup_model_explanations
|
||||
automl_setup_model_explanations, automl_check_model_if_explainable
|
||||
from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel
|
||||
from azureml.explain.model.mimic_wrapper import MimicWrapper
|
||||
from automl.client.core.common.constants import MODEL_PATH
|
||||
@@ -25,6 +25,11 @@ ws = run.experiment.workspace
|
||||
experiment = Experiment(ws, '<<experimnet_name>>')
|
||||
automl_run = Run(experiment=experiment, run_id='<<run_id>>')
|
||||
|
||||
# Check if this AutoML model is explainable
|
||||
if not automl_check_model_if_explainable(automl_run):
|
||||
raise Exception("Model explanations is currently not supported for " + automl_run.get_properties().get(
|
||||
'run_algorithm'))
|
||||
|
||||
# Download the best model from the artifact store
|
||||
automl_run.download_file(name=MODEL_PATH, output_file_path='model.pkl')
|
||||
|
||||
|
||||
@@ -405,7 +405,7 @@
|
||||
"\n",
|
||||
" - To run a production-ready web service, see the [notebook on deployment to Azure Kubernetes Service](../production-deploy-to-aks/production-deploy-to-aks.ipynb).\n",
|
||||
" - To run a local web service, see the [notebook on deployment to a local Docker container](../deploy-to-local/register-model-deploy-local.ipynb).\n",
|
||||
" - For more information on datasets, see the [notebook on training with datasets](../../work-with-data/datasets-tutorial/train-with-datasets.ipynb).\n",
|
||||
" - For more information on datasets, see the [notebook on training with datasets](../../work-with-data/datasets-tutorial/train-with-datasets/train-with-datasets.ipynb).\n",
|
||||
" - For more information on environments, see the [notebook on using environments](../../training/using-environments/using-environments.ipynb).\n",
|
||||
" - For information on all the available deployment targets, see [“How and where to deploy models”](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-deploy-and-where#choose-a-compute-target)."
|
||||
]
|
||||
|
||||
@@ -180,7 +180,7 @@
|
||||
"# just get the published pipeline object that you have the ID for.\n",
|
||||
"\n",
|
||||
"# Get all published pipeline objects in the workspace\n",
|
||||
"all_pub_pipelines = PublishedPipeline.get_all(ws)\n",
|
||||
"all_pub_pipelines = PublishedPipeline.list(ws)\n",
|
||||
"\n",
|
||||
"# We will iterate through the list of published pipelines and \n",
|
||||
"# use the last ID in the list for Schelue operations: \n",
|
||||
@@ -244,7 +244,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"schedules = Schedule.get_all(ws, pipeline_id=pub_pipeline_id)\n",
|
||||
"schedules = Schedule.list(ws, pipeline_id=pub_pipeline_id)\n",
|
||||
"\n",
|
||||
"# We will iterate through the list of schedules and \n",
|
||||
"# use the last recurrence schedule in the list for further operations: \n",
|
||||
@@ -272,7 +272,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Use active_only=False to get all schedules including disabled schedules\n",
|
||||
"schedules = Schedule.get_all(ws, active_only=True) \n",
|
||||
"schedules = Schedule.list(ws, active_only=True) \n",
|
||||
"print(\"Your workspace has the following schedules set up:\")\n",
|
||||
"for schedule in schedules:\n",
|
||||
" print(\"{} (Published pipeline: {}\".format(schedule.id, schedule.pipeline_id))"
|
||||
|
||||
@@ -230,7 +230,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"endpoint_list = PipelineEndpoint.get_all(workspace=ws, active_only=True)\n",
|
||||
"endpoint_list = PipelineEndpoint.list(workspace=ws, active_only=True)\n",
|
||||
"endpoint_list"
|
||||
]
|
||||
},
|
||||
@@ -360,7 +360,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"versions = pipeline_endpoint_by_name.get_all_versions()\n",
|
||||
"versions = pipeline_endpoint_by_name.list_versions()\n",
|
||||
"\n",
|
||||
"for ve in versions:\n",
|
||||
" print(ve.version)\n",
|
||||
@@ -381,7 +381,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipelines = pipeline_endpoint_by_name.get_all_pipelines(active_only=True)\n",
|
||||
"pipelines = pipeline_endpoint_by_name.list_pipelines(active_only=True)\n",
|
||||
"pipelines"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,436 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Machine Learning Pipeline with NotebookRunnerStep\n",
|
||||
"This notebook demonstrates the use of `NotebookRunnerStep`. It allows you to run a local notebook as a step in Azure Machine Learning Pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"In this example we showcase how you can run another notebook `notebook_runner/training_notebook.ipynb` as a step in Azure Machine Learning Pipeline.\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, make sure you have executed the [configuration](https://aka.ms/pl-config) before running this notebook.\n",
|
||||
"\n",
|
||||
"In this notebook you will learn how to:\n",
|
||||
"1. Create an `Experiment` in an existing `Workspace`.\n",
|
||||
"2. Create or Attach existing AmlCompute to a workspace.\n",
|
||||
"3. Configure NotebookRun using `NotebokRunConfig`.\n",
|
||||
"5. Use NotebookRunnerStep.\n",
|
||||
"6. Run the notebook on `AmlCompute` as a pipeline step consuming the output of a python script step.\n",
|
||||
"\n",
|
||||
"Advantages of running your notebook as a step in pipeline:\n",
|
||||
"1. Run your notebook like a python script without converting into .py files, leveraging complete end to end experience of Azure Machine Learning Pipelines.\n",
|
||||
"2. Use pipeline intermediate data to and from the notebook along with other steps in pipeline.\n",
|
||||
"3. Parameterize your notebook with [Pipeline Parameters](./aml-pipelines-publish-and-run-using-rest-endpoint.ipynb).\n",
|
||||
"\n",
|
||||
"Try some more [quick start notebooks](https://github.com/microsoft/recommenders/tree/master/notebooks/00_quick_start) with `NotebookRunnerStep`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Azure Machine Learning and Pipeline SDK-specific imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.data.data_reference import DataReference\n",
|
||||
"from azureml.pipeline.core import PipelineData\n",
|
||||
"from azureml.core.datastore import Datastore\n",
|
||||
"\n",
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"from azureml.core import Workspace, Experiment\n",
|
||||
"from azureml.contrib.notebook import NotebookRunConfig, AzureMLNotebookHandler\n",
|
||||
"\n",
|
||||
"from azureml.pipeline.core import Pipeline\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep\n",
|
||||
"from azureml.contrib.notebook import NotebookRunnerStep\n",
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"print(\"SDK version:\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Initialize Workspace\n",
|
||||
"\n",
|
||||
"Initialize a [workspace](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.workspace(class%29) object from persisted configuration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')\n",
|
||||
"ws.set_default_datastore(\"workspaceblobstore\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Upload data to datastore"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Datastore.get(ws, \"workspaceblobstore\").upload_files([\"./20news.pkl\"], target_path=\"20newsgroups\", overwrite=True)\n",
|
||||
"print(\"Upload call completed\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create an Azure ML experiment\n",
|
||||
"Let's create an experiment named \"notebook-step-run-example\" and a folder to holding the notebook and other scripts. The script runs will be recorded under the experiment in Azure.\n",
|
||||
"\n",
|
||||
"The best practice is to use separate folders for scripts and its dependent files for each step and specify that folder as the `source_directory` for the step. This helps reduce the size of the snapshot created for the step (only the specific folder is snapshotted). Since changes in any files in the `source_directory` would trigger a re-upload of the snapshot, this helps keep the reuse of the step when there are no changes in the `source_directory` of the step."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Choose a name for the run history container in the workspace.\n",
|
||||
"experiment_name = 'notebook-step-run-example'\n",
|
||||
"source_directory = 'notebook_runner'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach an AmlCompute cluster\n",
|
||||
"You will need to create a [compute target](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture#compute-target) for your AutoML run. In this tutorial, you get the default `AmlCompute` as your training compute resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Choose a name for your cluster.\n",
|
||||
"amlcompute_cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"found = False\n",
|
||||
"# Check if this compute target already exists in the workspace.\n",
|
||||
"cts = ws.compute_targets\n",
|
||||
"if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':\n",
|
||||
" found = True\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
" compute_target = cts[amlcompute_cluster_name]\n",
|
||||
" \n",
|
||||
"if not found:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(vm_size = \"STANDARD_D2_V2\", # for GPU, use \"STANDARD_NC6\"\n",
|
||||
" #vm_priority = 'lowpriority', # optional\n",
|
||||
" max_nodes = 4)\n",
|
||||
"\n",
|
||||
" # Create the cluster.\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)\n",
|
||||
" \n",
|
||||
" # Can poll for a minimum number of nodes and for a specific timeout.\n",
|
||||
" # If no min_node_count is provided, it will use the scale settings for the cluster.\n",
|
||||
" compute_target.wait_for_completion(show_output = True, min_node_count = 1, timeout_in_minutes = 10)\n",
|
||||
" \n",
|
||||
" # For a more detailed view of current AmlCompute status, use get_status()."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a new RunConfig object"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
"\n",
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk'], pin_sdk_version=False)\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd\n",
|
||||
"\n",
|
||||
"print('run config is ready')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define input and outputs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"input_data = DataReference(\n",
|
||||
" datastore=Datastore.get(ws, \"workspaceblobstore\"),\n",
|
||||
" data_reference_name=\"blob_test_data\",\n",
|
||||
" path_on_datastore=\"20newsgroups/20news.pkl\")\n",
|
||||
"\n",
|
||||
"output_data = PipelineData(name=\"processed_data\",\n",
|
||||
" datastore=Datastore.get(ws, \"workspaceblobstore\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create notebook run configuration and set parameters values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"handler = AzureMLNotebookHandler(timeout=600, progress_bar=False, log_output=True)\n",
|
||||
"\n",
|
||||
"cfg = NotebookRunConfig(source_directory=source_directory, notebook=\"training_notebook.ipynb\",\n",
|
||||
" handler = handler,\n",
|
||||
" parameters={\"arg1\": \"Machine Learning\"},\n",
|
||||
" run_config=conda_run_config)\n",
|
||||
"\n",
|
||||
"print(\"Notebook Run Config is created.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define PythonScriptStep"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('Source directory for the step is {}.'.format(os.path.realpath('./train')))\n",
|
||||
"python_script_step = PythonScriptStep(\n",
|
||||
" script_name=\"train.py\",\n",
|
||||
" arguments=[\"--input_data\", input_data],\n",
|
||||
" inputs=[input_data],\n",
|
||||
" outputs=[output_data],\n",
|
||||
" compute_target=compute_target, \n",
|
||||
" source_directory=\"./train\",\n",
|
||||
" allow_reuse=True)\n",
|
||||
"print(\"python_script_step created\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define NotebookRunnerStep\n",
|
||||
"\n",
|
||||
"This step will consume intermediate output produced by `python_script_step` as an input.\n",
|
||||
"\n",
|
||||
"Optionally, a output of type `output_notebook_pipeline_data_name` can be added to the `NotebookRunnerStep` to redirect the `output_notebook` of notebook run to `NotebookRunnerStep`'s step output produced as `PipelineData` and can be further passed along the pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.pipeline.core import PipelineParameter, TrainingOutput\n",
|
||||
"\n",
|
||||
"output_from_notebook = PipelineData(name=\"notebook_processed_data\",\n",
|
||||
" datastore=Datastore.get(ws, \"workspaceblobstore\"))\n",
|
||||
"\n",
|
||||
"my_pipeline_param = PipelineParameter(name=\"pipeline_param\", default_value=\"my_param\")\n",
|
||||
"\n",
|
||||
"print('Source directory for the step is {}.'.format(os.path.realpath(source_directory)))\n",
|
||||
"notebook_runner_step = NotebookRunnerStep(name=\"training_notebook_step\",\n",
|
||||
" notebook_run_config=cfg,\n",
|
||||
" params={\"my_pipeline_param\": my_pipeline_param},\n",
|
||||
" inputs=[output_data],\n",
|
||||
" outputs=[output_from_notebook],\n",
|
||||
" allow_reuse=True,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" output_notebook_pipeline_data_name=\"notebook_result\")\n",
|
||||
"\n",
|
||||
"print(\"Notebook Runner Step is Created.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Build Pipeline\n",
|
||||
"\n",
|
||||
"Once we have the steps (or steps collection), we can build the [pipeline](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipeline.pipeline?view=azure-ml-py). By deafult, all these steps will run in **parallel** once we submit the pipeline for run.\n",
|
||||
"\n",
|
||||
"A pipeline is created with a list of steps and a workspace. Submit a pipeline using [submit](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.experiment(class)?view=azure-ml-py#submit-config--tags-none----kwargs-). When submit is called, a [PipelineRun](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelinerun?view=azure-ml-py) is created which in turn creates [StepRun](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-core/azureml.pipeline.core.steprun?view=azure-ml-py) objects for each step in the workflow."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline1 = Pipeline(workspace=ws, steps=[notebook_runner_step])\n",
|
||||
"\n",
|
||||
"pipeline1.validate()\n",
|
||||
"print(\"Pipeline validation complete\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run1 = experiment.submit(pipeline1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"RunDetails(pipeline_run1).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Download output notebook\n",
|
||||
"\n",
|
||||
"`output_notebook` can be retrieved via pipeline step output if `output_notebook_pipeline_data_name` is provided to the `NotebookRunnerStep`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pipeline_run1.wait_for_completion()\n",
|
||||
" Retrieve the step runs by name `train.py`\n",
|
||||
"train_step = pipeline_run1.find_step_run('training_notebook_step')\n",
|
||||
"\n",
|
||||
"if train_step:\n",
|
||||
" train_step_obj = train_step[0] # since we have only one step by name `training_notebook_step`\n",
|
||||
" train_step_obj.get_output_data('notebook_result').download(source_directory) # download the output to source_directory"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sanpil"
|
||||
}
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"AML Compute"
|
||||
],
|
||||
"datasets": [
|
||||
"Custom"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"Azure ML"
|
||||
],
|
||||
"friendly_name": "How to use run a notebook as a step in AML Pipelines",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
},
|
||||
"order_index": 12,
|
||||
"star_tag": [
|
||||
"None"
|
||||
],
|
||||
"tags": [
|
||||
"None"
|
||||
],
|
||||
"task": "Demonstrates the use of NotebookRunnerStep"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
name: aml-pipelines-with-notebook-runner-step
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-contrib-notebook
|
||||
@@ -0,0 +1,106 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"In training_notebook.ipynb\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": [
|
||||
"parameters"
|
||||
]
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# declaring parameters to override\n",
|
||||
"\n",
|
||||
"arg1 = \"Azure\"\n",
|
||||
"processed_data = None\n",
|
||||
"notebook_processed_data = None\n",
|
||||
"my_pipeline_param = None"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Final parameter values\n",
|
||||
"\n",
|
||||
"print(\"arg1: %s\" % arg1)\n",
|
||||
"print(\"input from previous step: %s\" % processed_data)\n",
|
||||
"print(\"output from notebook: %s\" % notebook_processed_data)\n",
|
||||
"print(\"pipeline_parameter: %s\" % my_pipeline_param)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if not (notebook_processed_data is None):\n",
|
||||
" os.makedirs(notebook_processed_data, exist_ok=True)\n",
|
||||
" print(\"%s created\" % notebook_processed_data)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sanpil"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -561,10 +561,11 @@
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"pytorch_score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"pytorch_score.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
" memory_gb=1, \n",
|
||||
|
||||
@@ -908,13 +908,16 @@
|
||||
"def init():\n",
|
||||
" global X, output, sess\n",
|
||||
" tf.reset_default_graph()\n",
|
||||
" model_root = Model.get_model_path('tf-dnn-mnist')\n",
|
||||
" saver = tf.train.import_meta_graph(os.path.join(model_root, 'mnist-tf.model.meta'))\n",
|
||||
" model_root = os.getenv('AZUREML_MODEL_DIR')\n",
|
||||
" # the name of the folder in which to look for tensorflow model files\n",
|
||||
" tf_model_folder = 'model'\n",
|
||||
" saver = tf.train.import_meta_graph(\n",
|
||||
" os.path.join(model_root, tf_model_folder, 'mnist-tf.model.meta'))\n",
|
||||
" X = tf.get_default_graph().get_tensor_by_name(\"network/X:0\")\n",
|
||||
" output = tf.get_default_graph().get_tensor_by_name(\"network/output/MatMul:0\")\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" sess = tf.Session()\n",
|
||||
" saver.restore(sess, os.path.join(model_root, 'mnist-tf.model'))\n",
|
||||
" saver.restore(sess, os.path.join(model_root, tf_model_folder, 'mnist-tf.model'))\n",
|
||||
"\n",
|
||||
"def run(raw_data):\n",
|
||||
" data = np.array(json.loads(raw_data)['data'])\n",
|
||||
@@ -943,6 +946,7 @@
|
||||
"cd = CondaDependencies.create()\n",
|
||||
"cd.add_conda_package('numpy')\n",
|
||||
"cd.add_tensorflow_conda_package()\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
@@ -966,10 +970,11 @@
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
" memory_gb=1, \n",
|
||||
|
||||
346
how-to-use-azureml/monitor-models/data-drift/dataset/testing.csv
Normal file
346
how-to-use-azureml/monitor-models/data-drift/dataset/testing.csv
Normal file
@@ -0,0 +1,346 @@
|
||||
latitude,longitude,temperature,windAngle,windSpeed,elevation
|
||||
26.536,-81.755,17.8,10.0,2.1,9.0
|
||||
26.536,-81.755,16.7,360.0,1.5,9.0
|
||||
26.536,-81.755,16.1,350.0,1.5,9.0
|
||||
26.536,-81.755,15.0,0.0,0.0,9.0
|
||||
26.536,-81.755,14.4,350.0,1.5,9.0
|
||||
26.536,-81.755,0.0,0.0,0.0,9.0
|
||||
26.536,-81.755,13.9,360.0,2.1,9.0
|
||||
26.536,-81.755,13.3,350.0,1.5,9.0
|
||||
26.536,-81.755,13.3,10.0,2.1,9.0
|
||||
26.536,-81.755,13.3,360.0,1.5,9.0
|
||||
26.536,-81.755,13.3,0.0,0.0,9.0
|
||||
26.536,-81.755,12.2,0.0,0.0,9.0
|
||||
26.536,-81.755,11.7,0.0,0.0,9.0
|
||||
26.536,-81.755,14.4,0.0,0.0,9.0
|
||||
26.536,-81.755,17.2,10.0,2.6,9.0
|
||||
26.536,-81.755,20.0,20.0,2.6,9.0
|
||||
26.536,-81.755,22.2,10.0,3.6,9.0
|
||||
26.536,-81.755,23.3,30.0,4.6,9.0
|
||||
26.536,-81.755,23.3,330.0,2.6,9.0
|
||||
26.536,-81.755,24.4,0.0,0.0,9.0
|
||||
26.536,-81.755,25.0,360.0,3.1,9.0
|
||||
26.536,-81.755,24.4,20.0,4.1,9.0
|
||||
26.536,-81.755,23.3,10.0,2.6,9.0
|
||||
26.536,-81.755,21.1,30.0,2.1,9.0
|
||||
26.536,-81.755,18.3,0.0,0.0,9.0
|
||||
26.536,-81.755,17.2,30.0,2.1,9.0
|
||||
26.536,-81.755,15.6,60.0,2.6,9.0
|
||||
26.536,-81.755,15.6,0.0,0.0,9.0
|
||||
26.536,-81.755,13.9,60.0,2.6,9.0
|
||||
26.536,-81.755,12.8,70.0,2.6,9.0
|
||||
26.536,-81.755,0.0,0.0,0.0,9.0
|
||||
26.536,-81.755,11.7,70.0,2.1,9.0
|
||||
26.536,-81.755,12.2,20.0,2.1,9.0
|
||||
26.536,-81.755,11.7,30.0,1.5,9.0
|
||||
26.536,-81.755,11.1,40.0,2.1,9.0
|
||||
26.536,-81.755,12.2,40.0,2.6,9.0
|
||||
26.536,-81.755,12.2,30.0,2.6,9.0
|
||||
26.536,-81.755,12.2,0.0,0.0,9.0
|
||||
26.536,-81.755,15.0,30.0,6.2,9.0
|
||||
26.536,-81.755,17.2,50.0,3.6,9.0
|
||||
26.536,-81.755,20.6,60.0,5.1,9.0
|
||||
26.536,-81.755,22.8,50.0,4.6,9.0
|
||||
26.536,-81.755,24.4,80.0,6.2,9.0
|
||||
26.536,-81.755,25.0,100.0,5.7,9.0
|
||||
26.536,-81.755,25.6,60.0,3.1,9.0
|
||||
26.536,-81.755,25.6,80.0,4.6,9.0
|
||||
26.536,-81.755,25.0,90.0,5.1,9.0
|
||||
26.536,-81.755,24.4,80.0,5.1,9.0
|
||||
26.536,-81.755,21.1,60.0,2.6,9.0
|
||||
26.536,-81.755,19.4,70.0,3.6,9.0
|
||||
26.536,-81.755,18.3,70.0,2.6,9.0
|
||||
26.536,-81.755,18.3,80.0,2.6,9.0
|
||||
26.536,-81.755,17.2,60.0,1.5,9.0
|
||||
26.536,-81.755,16.1,70.0,2.6,9.0
|
||||
26.536,-81.755,15.6,70.0,2.6,9.0
|
||||
26.536,-81.755,0.0,0.0,0.0,9.0
|
||||
26.536,-81.755,16.1,50.0,2.6,9.0
|
||||
26.536,-81.755,15.6,50.0,2.1,9.0
|
||||
26.536,-81.755,15.0,50.0,1.5,9.0
|
||||
26.536,-81.755,15.0,0.0,0.0,9.0
|
||||
26.536,-81.755,15.0,0.0,0.0,9.0
|
||||
26.536,-81.755,14.4,0.0,0.0,9.0
|
||||
26.536,-81.755,14.4,30.0,4.1,9.0
|
||||
26.536,-81.755,16.1,40.0,1.5,9.0
|
||||
26.536,-81.755,19.4,0.0,1.5,9.0
|
||||
26.536,-81.755,22.8,90.0,2.6,9.0
|
||||
26.536,-81.755,24.4,130.0,3.6,9.0
|
||||
26.536,-81.755,25.6,100.0,4.6,9.0
|
||||
26.536,-81.755,26.1,120.0,3.1,9.0
|
||||
26.536,-81.755,26.7,0.0,2.6,9.0
|
||||
26.536,-81.755,27.2,0.0,0.0,9.0
|
||||
26.536,-81.755,27.2,40.0,3.1,9.0
|
||||
26.536,-81.755,26.1,30.0,1.5,9.0
|
||||
26.536,-81.755,22.8,310.0,2.1,9.0
|
||||
26.536,-81.755,23.3,330.0,2.1,9.0
|
||||
-34.067,-56.238,17.5,30.0,3.1,68.0
|
||||
-34.067,-56.238,21.2,30.0,5.7,68.0
|
||||
-34.067,-56.238,24.5,30.0,3.1,68.0
|
||||
-34.067,-56.238,27.5,330.0,3.6,68.0
|
||||
-34.067,-56.238,29.2,30.0,4.1,68.0
|
||||
-34.067,-56.238,31.0,20.0,4.6,68.0
|
||||
-34.067,-56.238,33.0,360.0,2.6,68.0
|
||||
-34.067,-56.238,33.6,60.0,3.1,68.0
|
||||
-34.067,-56.238,33.6,30.0,3.6,68.0
|
||||
-34.067,-56.238,18.6,40.0,3.1,68.0
|
||||
-34.067,-56.238,22.0,120.0,1.5,68.0
|
||||
-34.067,-56.238,25.0,120.0,2.6,68.0
|
||||
-34.067,-56.238,28.6,50.0,3.1,68.0
|
||||
-34.067,-56.238,30.6,50.0,4.1,68.0
|
||||
-34.067,-56.238,31.5,30.0,6.7,68.0
|
||||
-34.067,-56.238,32.0,40.0,7.2,68.0
|
||||
-34.067,-56.238,33.0,30.0,5.7,68.0
|
||||
-34.067,-56.238,33.2,360.0,3.6,68.0
|
||||
-34.067,-56.238,20.6,30.0,3.1,68.0
|
||||
-34.067,-56.238,21.2,0.0,0.0,68.0
|
||||
-34.067,-56.238,22.0,210.0,3.1,68.0
|
||||
-34.067,-56.238,23.0,210.0,3.6,68.0
|
||||
-34.067,-56.238,24.0,180.0,6.7,68.0
|
||||
-34.067,-56.238,24.5,210.0,7.2,68.0
|
||||
-34.067,-56.238,21.0,180.0,8.2,68.0
|
||||
-34.067,-56.238,20.0,180.0,6.7,68.0
|
||||
-34.083,-56.233,20.2,180.0,7.2,68.0
|
||||
-29.917,-71.2,16.6,290.0,4.1,146.0
|
||||
-29.916,-71.2,17.0,290.0,4.1,147.0
|
||||
-29.916,-71.2,16.0,310.0,3.1,147.0
|
||||
-29.916,-71.2,16.0,300.0,2.1,147.0
|
||||
-29.917,-71.2,15.1,0.0,0.0,146.0
|
||||
-29.916,-71.2,15.0,0.0,1.0,147.0
|
||||
-29.916,-71.2,15.0,160.0,1.0,147.0
|
||||
-29.916,-71.2,15.0,120.0,1.0,147.0
|
||||
-29.917,-71.2,14.3,190.0,1.0,146.0
|
||||
-29.916,-71.2,14.0,190.0,1.0,147.0
|
||||
-29.916,-71.2,14.0,0.0,0.0,147.0
|
||||
-29.916,-71.2,14.0,100.0,3.1,147.0
|
||||
-29.917,-71.2,12.9,0.0,0.0,146.0
|
||||
-29.916,-71.2,13.0,0.0,1.0,147.0
|
||||
-29.916,-71.2,14.0,0.0,0.5,147.0
|
||||
-29.916,-71.2,15.0,0.0,0.5,147.0
|
||||
-29.917,-71.2,15.9,0.0,0.0,146.0
|
||||
-29.916,-71.2,16.0,0.0,0.0,147.0
|
||||
-29.916,-71.2,17.0,270.0,4.6,147.0
|
||||
-29.916,-71.2,19.0,260.0,4.1,147.0
|
||||
-29.917,-71.2,18.1,270.0,6.2,146.0
|
||||
-29.916,-71.2,18.0,270.0,6.2,147.0
|
||||
-29.916,-71.2,19.0,270.0,6.2,147.0
|
||||
-29.916,-71.2,20.0,260.0,5.1,147.0
|
||||
-29.917,-71.2,19.6,280.0,6.2,146.0
|
||||
-29.916,-71.2,20.0,280.0,6.2,147.0
|
||||
-29.916,-71.2,20.0,270.0,6.2,147.0
|
||||
-29.916,-71.2,19.0,280.0,6.7,147.0
|
||||
-29.917,-71.2,18.3,270.0,5.7,146.0
|
||||
-29.916,-71.2,18.0,270.0,5.7,147.0
|
||||
-29.916,-71.2,18.0,0.0,0.0,147.0
|
||||
-29.916,-71.2,17.0,280.0,4.6,147.0
|
||||
-29.917,-71.2,15.9,280.0,4.1,146.0
|
||||
-29.916,-71.2,16.0,280.0,4.1,147.0
|
||||
-29.916,-71.2,15.0,280.0,3.6,147.0
|
||||
-29.916,-71.2,15.0,280.0,3.6,147.0
|
||||
-29.917,-71.2,15.4,280.0,4.1,146.0
|
||||
-29.916,-71.2,15.0,280.0,4.1,147.0
|
||||
-29.916,-71.2,16.0,240.0,2.1,147.0
|
||||
-29.916,-71.2,15.0,0.0,0.5,147.0
|
||||
-29.917,-71.2,15.8,80.0,3.6,146.0
|
||||
-29.916,-71.2,16.0,80.0,3.6,147.0
|
||||
-29.916,-71.2,16.0,10.0,1.5,147.0
|
||||
-29.916,-71.2,16.0,100.0,1.5,147.0
|
||||
-29.917,-71.2,15.3,130.0,1.5,146.0
|
||||
-29.916,-71.2,15.0,130.0,1.5,147.0
|
||||
-29.916,-71.2,15.0,110.0,1.0,147.0
|
||||
-29.916,-71.2,16.0,280.0,6.2,147.0
|
||||
-29.917,-71.2,15.9,240.0,3.6,146.0
|
||||
-29.916,-71.2,16.0,240.0,3.6,147.0
|
||||
-29.916,-71.2,16.0,240.0,3.1,147.0
|
||||
-29.916,-71.2,16.0,220.0,3.1,147.0
|
||||
-29.917,-71.2,16.4,260.0,3.1,146.0
|
||||
-29.916,-71.2,16.0,260.0,3.1,147.0
|
||||
-29.916,-71.2,17.0,230.0,2.6,147.0
|
||||
-29.916,-71.2,18.0,0.0,1.5,147.0
|
||||
-29.917,-71.2,20.3,340.0,2.6,146.0
|
||||
-29.916,-71.2,20.0,340.0,2.6,147.0
|
||||
-29.916,-71.2,21.0,270.0,5.1,147.0
|
||||
-29.916,-71.2,20.0,270.0,6.7,147.0
|
||||
-29.917,-71.2,19.2,280.0,6.7,146.0
|
||||
-29.916,-71.2,19.0,280.0,6.7,147.0
|
||||
-29.916,-71.2,19.0,310.0,2.6,147.0
|
||||
-29.916,-71.2,18.0,270.0,5.1,147.0
|
||||
-29.917,-71.2,17.0,300.0,4.6,146.0
|
||||
-29.916,-71.2,17.0,300.0,4.6,147.0
|
||||
-29.916,-71.2,17.0,300.0,3.6,147.0
|
||||
-29.916,-71.2,17.0,290.0,3.1,147.0
|
||||
-29.917,-71.2,16.3,290.0,2.1,146.0
|
||||
-29.916,-71.2,16.0,290.0,2.1,147.0
|
||||
-29.916,-71.2,17.0,270.0,1.0,147.0
|
||||
-29.916,-71.2,17.0,0.0,0.5,147.0
|
||||
-29.917,-71.2,16.5,160.0,2.1,146.0
|
||||
-29.916,-71.2,17.0,160.0,2.1,147.0
|
||||
-29.916,-71.2,15.0,120.0,3.1,147.0
|
||||
-29.916,-71.2,16.0,180.0,1.5,147.0
|
||||
-29.917,-71.2,14.7,0.0,0.0,146.0
|
||||
-29.916,-71.2,15.0,0.0,1.0,147.0
|
||||
-29.916,-71.2,15.0,300.0,1.0,147.0
|
||||
-29.916,-71.2,16.0,0.0,0.0,147.0
|
||||
-29.917,-71.2,18.5,110.0,1.0,146.0
|
||||
-29.916,-71.2,19.0,110.0,1.0,147.0
|
||||
-29.916,-71.2,20.0,270.0,3.6,147.0
|
||||
-29.916,-71.2,20.0,270.0,5.7,147.0
|
||||
-29.917,-71.2,20.0,280.0,6.2,146.0
|
||||
-29.916,-71.2,20.0,280.0,6.2,147.0
|
||||
-29.916,-71.2,21.0,290.0,6.7,147.0
|
||||
-29.916,-71.2,20.0,270.0,6.2,147.0
|
||||
-29.917,-71.2,21.0,260.0,6.7,146.0
|
||||
-29.916,-71.2,21.0,260.0,6.7,147.0
|
||||
-29.916,-71.2,20.0,270.0,6.2,147.0
|
||||
-29.916,-71.2,19.0,260.0,5.1,147.0
|
||||
-29.916,-71.2,18.0,280.0,4.6,147.0
|
||||
-29.917,-71.2,17.5,280.0,3.1,146.0
|
||||
-29.916,-71.2,18.0,280.0,3.1,147.0
|
||||
30.349,-85.788,11.1,0.0,0.0,21.0
|
||||
30.349,-85.788,11.1,0.0,0.0,21.0
|
||||
30.349,-85.788,9.4,0.0,0.0,21.0
|
||||
30.349,-85.788,9.4,0.0,0.0,21.0
|
||||
30.349,-85.788,8.3,300.0,2.1,21.0
|
||||
30.349,-85.788,11.1,280.0,1.5,21.0
|
||||
30.349,-85.788,0.0,0.0,0.0,21.0
|
||||
30.349,-85.788,10.6,320.0,3.1,21.0
|
||||
30.349,-85.788,9.4,310.0,3.1,21.0
|
||||
30.349,-85.788,7.8,320.0,2.6,21.0
|
||||
30.349,-85.788,6.1,340.0,2.1,21.0
|
||||
30.349,-85.788,6.7,330.0,2.6,21.0
|
||||
30.349,-85.788,6.1,310.0,1.5,21.0
|
||||
30.349,-85.788,7.2,310.0,2.1,21.0
|
||||
30.349,-85.788,12.8,360.0,3.1,21.0
|
||||
30.349,-85.788,15.0,0.0,3.1,21.0
|
||||
30.349,-85.788,16.7,20.0,4.6,21.0
|
||||
30.349,-85.788,18.9,30.0,5.1,21.0
|
||||
30.349,-85.788,19.4,10.0,4.1,21.0
|
||||
30.349,-85.788,21.1,330.0,2.6,21.0
|
||||
30.349,-85.788,21.1,10.0,4.6,21.0
|
||||
30.349,-85.788,21.7,360.0,4.1,21.0
|
||||
30.349,-85.788,21.7,30.0,2.1,21.0
|
||||
30.349,-85.788,21.7,330.0,2.6,21.0
|
||||
30.349,-85.788,16.1,350.0,2.1,21.0
|
||||
30.349,-85.788,11.7,0.0,0.0,21.0
|
||||
30.349,-85.788,8.9,0.0,0.0,21.0
|
||||
30.349,-85.788,9.4,0.0,0.0,21.0
|
||||
30.349,-85.788,7.8,0.0,0.0,21.0
|
||||
30.349,-85.788,11.1,30.0,3.1,21.0
|
||||
30.349,-85.788,7.2,0.0,0.0,21.0
|
||||
30.349,-85.788,7.2,0.0,0.0,21.0
|
||||
30.349,-85.788,0.0,0.0,0.0,21.0
|
||||
30.349,-85.788,7.8,30.0,2.1,21.0
|
||||
30.349,-85.788,8.3,40.0,2.6,21.0
|
||||
30.349,-85.788,7.2,50.0,1.5,21.0
|
||||
30.349,-85.788,8.3,60.0,1.5,21.0
|
||||
30.349,-85.788,5.6,40.0,2.1,21.0
|
||||
30.349,-85.788,6.7,40.0,2.1,21.0
|
||||
30.349,-85.788,7.8,50.0,3.1,21.0
|
||||
30.349,-85.788,11.7,70.0,2.6,21.0
|
||||
30.349,-85.788,15.6,70.0,3.1,21.0
|
||||
30.349,-85.788,18.9,100.0,3.6,21.0
|
||||
30.349,-85.788,20.0,130.0,3.6,21.0
|
||||
30.349,-85.788,21.1,140.0,4.1,21.0
|
||||
30.349,-85.788,21.7,150.0,4.1,21.0
|
||||
30.349,-85.788,21.7,170.0,3.1,21.0
|
||||
30.349,-85.788,22.2,170.0,3.1,21.0
|
||||
30.349,-85.788,20.6,0.0,0.0,21.0
|
||||
30.349,-85.788,17.2,0.0,0.0,21.0
|
||||
30.349,-85.788,14.4,0.0,0.0,21.0
|
||||
30.349,-85.788,12.8,100.0,1.5,21.0
|
||||
30.349,-85.788,13.3,100.0,1.5,21.0
|
||||
30.349,-85.788,10.6,0.0,0.0,21.0
|
||||
30.349,-85.788,9.4,0.0,0.0,21.0
|
||||
30.349,-85.788,7.8,0.0,0.0,21.0
|
||||
30.358,-85.799,8.3,0.0,0.0,21.0
|
||||
30.349,-85.788,0.0,0.0,0.0,21.0
|
||||
30.358,-85.799,6.7,0.0,0.0,21.0
|
||||
30.358,-85.799,7.2,0.0,0.0,21.0
|
||||
30.358,-85.799,7.2,0.0,0.0,21.0
|
||||
30.358,-85.799,8.3,50.0,1.5,21.0
|
||||
30.358,-85.799,9.4,0.0,0.0,21.0
|
||||
30.358,-85.799,8.9,0.0,0.0,21.0
|
||||
30.358,-85.799,10.0,340.0,1.5,21.0
|
||||
30.358,-85.799,12.8,40.0,1.5,21.0
|
||||
30.358,-85.799,16.7,100.0,2.1,21.0
|
||||
30.358,-85.799,21.1,100.0,1.5,21.0
|
||||
30.358,-85.799,23.3,0.0,0.0,21.0
|
||||
30.358,-85.799,25.0,180.0,4.6,21.0
|
||||
30.358,-85.799,24.4,230.0,3.6,21.0
|
||||
30.358,-85.799,25.0,210.0,4.1,21.0
|
||||
30.358,-85.799,23.9,170.0,4.1,21.0
|
||||
30.358,-85.799,22.8,0.0,0.0,21.0
|
||||
30.358,-85.799,19.4,0.0,0.0,21.0
|
||||
30.358,-85.799,17.8,140.0,2.1,21.0
|
||||
60.383,5.333,-0.7,0.0,0.0,36.0
|
||||
60.383,5.333,0.6,270.0,2.0,36.0
|
||||
60.383,5.333,-0.9,120.0,1.0,36.0
|
||||
60.383,5.333,-1.6,130.0,2.0,36.0
|
||||
60.383,5.333,-1.4,150.0,1.0,36.0
|
||||
60.383,5.333,-1.7,0.0,0.0,36.0
|
||||
60.383,5.333,-1.7,140.0,1.0,36.0
|
||||
60.383,5.333,-1.4,0.0,0.0,36.0
|
||||
60.383,5.333,-1.0,0.0,0.0,36.0
|
||||
60.383,5.333,-1.0,150.0,1.0,36.0
|
||||
60.383,5.333,-0.7,140.0,1.0,36.0
|
||||
60.383,5.333,0.5,150.0,1.0,36.0
|
||||
60.383,5.333,1.9,0.0,0.0,36.0
|
||||
60.383,5.333,1.7,0.0,0.0,36.0
|
||||
60.383,5.333,2.1,310.0,2.0,36.0
|
||||
60.383,5.333,1.5,90.0,1.0,36.0
|
||||
60.383,5.333,1.9,290.0,1.0,36.0
|
||||
60.383,5.333,2.0,320.0,1.0,36.0
|
||||
60.383,5.333,1.9,330.0,1.0,36.0
|
||||
60.383,5.333,1.3,350.0,1.0,36.0
|
||||
60.383,5.333,1.5,120.0,1.0,36.0
|
||||
60.383,5.333,1.3,150.0,2.0,36.0
|
||||
60.383,5.333,0.8,140.0,1.0,36.0
|
||||
60.383,5.333,0.3,300.0,1.0,36.0
|
||||
60.383,5.333,0.2,140.0,1.0,36.0
|
||||
60.383,5.333,0.4,140.0,1.0,36.0
|
||||
60.383,5.333,0.5,320.0,1.0,36.0
|
||||
60.383,5.333,1.5,330.0,1.0,36.0
|
||||
60.383,5.333,1.8,40.0,1.0,36.0
|
||||
60.383,5.333,2.3,170.0,1.0,36.0
|
||||
60.383,5.333,2.7,140.0,1.0,36.0
|
||||
60.383,5.333,3.1,330.0,1.0,36.0
|
||||
60.383,5.333,3.8,350.0,1.0,36.0
|
||||
60.383,5.333,3.8,140.0,1.0,36.0
|
||||
60.383,5.333,4.1,150.0,1.0,36.0
|
||||
60.383,5.333,4.4,180.0,1.0,36.0
|
||||
60.383,5.333,4.9,300.0,1.0,36.0
|
||||
60.383,5.333,5.2,320.0,1.0,36.0
|
||||
60.383,5.333,6.7,340.0,1.0,36.0
|
||||
60.383,5.333,6.9,250.0,1.0,36.0
|
||||
60.383,5.333,7.9,300.0,2.0,36.0
|
||||
60.383,5.333,5.5,140.0,1.0,36.0
|
||||
60.383,5.333,7.1,140.0,2.0,36.0
|
||||
60.383,5.333,7.0,280.0,2.0,36.0
|
||||
60.383,5.333,4.6,170.0,1.0,36.0
|
||||
60.383,5.333,4.8,330.0,1.0,36.0
|
||||
60.383,5.333,6.4,260.0,2.0,36.0
|
||||
60.383,5.333,6.2,340.0,1.0,36.0
|
||||
60.383,5.333,5.7,320.0,2.0,36.0
|
||||
60.383,5.333,5.2,100.0,1.0,36.0
|
||||
60.383,5.333,5.1,310.0,1.0,36.0
|
||||
60.383,5.333,4.9,290.0,2.0,36.0
|
||||
60.383,5.333,4.9,310.0,2.0,36.0
|
||||
60.383,5.333,6.1,320.0,2.0,36.0
|
||||
60.383,5.333,7.0,250.0,1.0,36.0
|
||||
60.383,5.333,5.3,140.0,1.0,36.0
|
||||
60.383,5.333,6.9,350.0,1.0,36.0
|
||||
60.383,5.333,9.7,110.0,3.0,36.0
|
||||
60.383,5.333,10.3,300.0,3.0,36.0
|
||||
60.383,5.333,8.7,310.0,1.0,36.0
|
||||
60.383,5.333,9.0,270.0,3.0,36.0
|
||||
60.383,5.333,11.6,80.0,3.0,36.0
|
||||
60.383,5.333,11.4,80.0,4.0,36.0
|
||||
60.383,5.333,9.7,70.0,5.0,36.0
|
||||
60.383,5.333,9.5,80.0,6.0,36.0
|
||||
60.383,5.333,8.7,80.0,5.0,36.0
|
||||
60.383,5.333,7.7,80.0,5.0,36.0
|
||||
60.383,5.333,8.2,80.0,4.0,36.0
|
||||
60.383,5.333,7.7,30.0,1.0,36.0
|
||||
60.383,5.333,7.2,310.0,1.0,36.0
|
||||
60.383,5.333,6.8,300.0,2.0,36.0
|
||||
60.383,5.333,6.7,140.0,1.0,36.0
|
||||
|
@@ -92,7 +92,7 @@
|
||||
"dstore = ws.get_default_datastore()\n",
|
||||
"\n",
|
||||
"# upload weather data\n",
|
||||
"dstore.upload('training-dataset', 'drift-on-aks-data', overwrite=True, show_progress=False)"
|
||||
"dstore.upload('dataset', 'drift-on-aks-data', overwrite=True, show_progress=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -229,7 +229,7 @@
|
||||
"source": [
|
||||
"## Run recent weather data through the webservice \n",
|
||||
"\n",
|
||||
"The below cells take the past 2 days of weather data, filter and transform using the same processes as the training dataset, and runs the data through the service."
|
||||
"The below cells take the weather data of Florida from 2019-11-20 to 2019-11-12, filter and transform using the same processes as the training dataset, and runs the data through the service."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -238,16 +238,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"from azureml.opendatasets import NoaaIsdWeather\n",
|
||||
"# create dataset \n",
|
||||
"tset = Dataset.Tabular.from_delimited_files(dstore.path('drift-on-aks-data/testing.csv'))\n",
|
||||
"\n",
|
||||
"start = datetime.today() - timedelta(days=2)\n",
|
||||
"end = datetime.today()\n",
|
||||
"\n",
|
||||
"isd = NoaaIsdWeather(start, end)\n",
|
||||
"\n",
|
||||
"df = isd.to_pandas_dataframe().fillna(0)\n",
|
||||
"df = df[df['stationName'].str.contains('FLORIDA', regex=True, na=False)]\n",
|
||||
"df = tset.to_pandas_dataframe().fillna(0)\n",
|
||||
"\n",
|
||||
"X_features = ['latitude', 'longitude', 'temperature', 'windAngle', 'windSpeed']\n",
|
||||
"y_features = ['elevation']\n",
|
||||
@@ -264,9 +258,9 @@
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"today_data = json.dumps({'data': X.values.tolist()})\n",
|
||||
"data = json.dumps({'data': X.values.tolist()})\n",
|
||||
"\n",
|
||||
"data_encoded = bytes(today_data, encoding='utf8')\n",
|
||||
"data_encoded = bytes(data, encoding='utf8')\n",
|
||||
"prediction = service.run(input_data=data_encoded)\n",
|
||||
"print(prediction)"
|
||||
]
|
||||
@@ -342,6 +336,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"from azureml.datadrift import DataDriftDetector, AlertConfiguration\n",
|
||||
"\n",
|
||||
"services = [service_name]\n",
|
||||
|
||||
@@ -100,7 +100,7 @@
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using SDK version 1.0.76.2, you are currently running version\", azureml.core.VERSION)"
|
||||
"print(\"This notebook was created using SDK version 1.0.81, you are currently running version\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -925,6 +925,7 @@
|
||||
"cd = CondaDependencies.create()\n",
|
||||
"cd.add_tensorflow_conda_package()\n",
|
||||
"cd.add_conda_package('keras==2.2.5')\n",
|
||||
"cd.add_pip_package(\"azureml-defaults\")\n",
|
||||
"cd.save_to_file(base_directory='./', conda_file_path='myenv.yml')\n",
|
||||
"\n",
|
||||
"print(cd.serialize_to_string())"
|
||||
@@ -947,10 +948,11 @@
|
||||
"from azureml.core.webservice import AciWebservice\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.model import Model\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,\n",
|
||||
" auth_enabled=True, # this flag generates API keys to secure access\n",
|
||||
|
||||
@@ -10,7 +10,7 @@ With Azure Machine Learning datasets, you can:
|
||||
|
||||
## Learn how to use Azure Machine Learning datasets
|
||||
* [Create and register datasets](https://aka.ms/azureml/howto/createdatasets)
|
||||
* Use [Datasets in training](datasets-tutorial/train-with-datasets.ipynb)
|
||||
* Use [Datasets in training](datasets-tutorial/train-with-datasets/train-with-datasets.ipynb)
|
||||
* Use TabularDatasets in [automated machine learning training](https://aka.ms/automl-dataset)
|
||||
* Use FileDatasets in [image classification](https://aka.ms/filedataset-samplenotebook)
|
||||
* Use FileDatasets in [deep learning with hyperparameter tuning](https://aka.ms/filedataset-hyperdrive)
|
||||
|
||||
@@ -290,7 +290,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# backfill for one month\n",
|
||||
"backfill = monitor.backfill(datetime(2019, 9, 1), datetime(2019, 10, 1))\n",
|
||||
"backfill_start_date = datetime(2019, 9, 1)\n",
|
||||
"backfill_end_date = datetime(2019, 10, 1)\n",
|
||||
"backfill = monitor.backfill(backfill_start_date, backfill_end_date)\n",
|
||||
"backfill"
|
||||
]
|
||||
},
|
||||
@@ -353,7 +355,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# plot the results from Python SDK \n",
|
||||
"monitor.show()"
|
||||
"monitor.show(backfill_start_date, backfill_end_date)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -371,7 +373,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"link = 'https://ml.azure.com/data/monitor/{}?wsid=/subscriptions/{}/resourcegroups/{}/workspaces/{}'.format(monitor.name, ws.subscription_id, ws.resource_group, ws.name)\n",
|
||||
"link = 'https://ml.azure.com/data/monitor/{}?wsid=/subscriptions/{}/resourcegroups/{}/workspaces/{}&startDate={}&endDate={}'.format(monitor.name, ws.subscription_id, ws.resource_group, ws.name, backfill_start_date.strftime('%Y-%m-%d'), backfill_end_date .strftime('%Y-%m-%d'))\n",
|
||||
"print(link)"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -0,0 +1,403 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Introduction to labeled datasets\n",
|
||||
"\n",
|
||||
"Labeled datasets are output from Azure Machine Learning [labeling projects](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-create-labeling-projects). It captures the reference to the data (e.g. image files) and its labels. \n",
|
||||
"\n",
|
||||
"This tutorial introduces the capabilities of labeled datasets and how to use it in training.\n",
|
||||
"\n",
|
||||
"Learn how-to:\n",
|
||||
"\n",
|
||||
"> * Set up your development environment\n",
|
||||
"> * Explore labeled datasets\n",
|
||||
"> * Train a simple deep learning neural network on a remote cluster\n",
|
||||
"\n",
|
||||
"## Prerequisite:\n",
|
||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||
"* Go through Azure Machine Learning [labeling projects](https://docs.microsoft.com/azure/machine-learning/service/how-to-create-labeling-projects) and export the labels as an Azure Machine Learning dataset\n",
|
||||
"* Go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||
" * install the latest version of azureml-sdk\n",
|
||||
" * install the latest version of azureml-contrib-dataset\n",
|
||||
" * install [PyTorch](https://pytorch.org/)\n",
|
||||
" * create a workspace and its configuration file (`config.json`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up your development environment\n",
|
||||
"\n",
|
||||
"All the setup for your development work can be accomplished in a Python notebook. Setup includes:\n",
|
||||
"\n",
|
||||
"* Importing Python packages\n",
|
||||
"* Connecting to a workspace to enable communication between your local computer and remote resources\n",
|
||||
"* Creating an experiment to track all your runs\n",
|
||||
"* Creating a remote compute target to use for training\n",
|
||||
"\n",
|
||||
"### Import packages\n",
|
||||
"\n",
|
||||
"Import Python packages you need in this session. Also display the Azure Machine Learning SDK version."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import azureml.core\n",
|
||||
"import azureml.contrib.dataset\n",
|
||||
"from azureml.core import Dataset, Workspace, Experiment\n",
|
||||
"from azureml.contrib.dataset import FileHandlingOption\n",
|
||||
"\n",
|
||||
"# check core SDK version number\n",
|
||||
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)\n",
|
||||
"print(\"Azure ML Contrib Version\", azureml.contrib.dataset.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to workspace\n",
|
||||
"\n",
|
||||
"Create a workspace object from the existing workspace. `Workspace.from_config()` reads the file **config.json** and loads the details into an object named `workspace`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load workspace\n",
|
||||
"workspace = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + workspace.name, \n",
|
||||
" 'Azure region: ' + workspace.location, \n",
|
||||
" 'Subscription id: ' + workspace.subscription_id, \n",
|
||||
" 'Resource group: ' + workspace.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create experiment and a directory\n",
|
||||
"\n",
|
||||
"Create an experiment to track the runs in your workspace and a directory to deliver the necessary code from your computer to the remote resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create an ML experiment\n",
|
||||
"exp = Experiment(workspace=workspace, name='labeled-datasets')\n",
|
||||
"\n",
|
||||
"# create a directory\n",
|
||||
"script_folder = './labeled-datasets'\n",
|
||||
"os.makedirs(script_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach existing compute resource\n",
|
||||
"By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you will create Azure Machine Learning Compute as your training environment. The code below creates the compute clusters for you if they don't already exist in your workspace.\n",
|
||||
"\n",
|
||||
"**Creation of compute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"openhack\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=workspace, name=cluster_name)\n",
|
||||
" print('Found existing compute target')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||
" # if no min node count is provided it uses the scale settings for the cluster\n",
|
||||
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Explore labeled datasets\n",
|
||||
"\n",
|
||||
"**Note**: How to create labeled datasets is not covered in this tutorial. To create labeled datasets, you can go through [labeling projects](https://docs.microsoft.com/azure/machine-learning/service/how-to-create-labeling-projects) and export the output labels as Azure Machine Lerning datasets. \n",
|
||||
"\n",
|
||||
"`animal_labels` used in this tutorial section is the output from a labeling project, with the task type of \"Object Identification\"."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get animal_labels dataset from the workspace\n",
|
||||
"animal_labels = Dataset.get_by_name(workspace, 'animal_labels')\n",
|
||||
"animal_labels"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can load labeled datasets into pandas DataFrame. There are 3 file handling option that you can choose to load the data files referenced by the labeled datasets:\n",
|
||||
"* Streaming: The default option to load data files.\n",
|
||||
"* Download: Download your data files to a local path.\n",
|
||||
"* Mount: Mount your data files to a mount point. Mount only works for Linux-based compute, including Azure Machine Learning notebook VM and Azure Machine Learning Compute."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"animal_pd = animal_labels.to_pandas_dataframe(file_handling_option=FileHandlingOption.DOWNLOAD, target_path='./download/', overwrite_download=True)\n",
|
||||
"animal_pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import matplotlib.image as mpimg\n",
|
||||
"\n",
|
||||
"# read images from downloaded path\n",
|
||||
"img = mpimg.imread(animal_pd.loc[0,'image_url'])\n",
|
||||
"imgplot = plt.imshow(img)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also load labeled datasets into [torchvision datasets](https://pytorch.org/docs/stable/torchvision/datasets.html), so that you can leverage on the open source libraries provided by PyTorch for image transformation and training."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from torchvision.transforms import functional as F\n",
|
||||
"\n",
|
||||
"# load animal_labels dataset into torchvision dataset\n",
|
||||
"pytorch_dataset = animal_labels.to_torchvision()\n",
|
||||
"img = pytorch_dataset[0][0]\n",
|
||||
"print(type(img))\n",
|
||||
"\n",
|
||||
"# use methods from torchvision to transform the img into grayscale\n",
|
||||
"pil_image = F.to_pil_image(img)\n",
|
||||
"gray_image = F.to_grayscale(pil_image, num_output_channels=3)\n",
|
||||
"\n",
|
||||
"imgplot = plt.imshow(gray_image)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Train an image classification model\n",
|
||||
"\n",
|
||||
" `crack_labels` dataset used in this tutorial section is the output from a labeling project, with the task type of \"Image Classification Multi-class\". We will use this dataset to train an image classification model that classify whether an image has cracks or not."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get crack_labels dataset from the workspace\n",
|
||||
"crack_labels = Dataset.get_by_name(workspace, 'crack_labels')\n",
|
||||
"crack_labels"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure Estimator for training\n",
|
||||
"\n",
|
||||
"You can ask the system to build a conda environment based on your dependency specification. Once the environment is built, and if you don't change your dependencies, it will be reused in subsequent runs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"\n",
|
||||
"conda_env = Environment('conda-env')\n",
|
||||
"conda_env.python.conda_dependencies = CondaDependencies.create(pip_packages=['azureml-sdk',\n",
|
||||
" 'azureml-contrib-dataset',\n",
|
||||
" 'torch','torchvision',\n",
|
||||
" 'azureml-dataprep[pandas]'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"An estimator object is used to submit the run. Azure Machine Learning has pre-configured estimators for common machine learning frameworks, as well as generic Estimator. Create a generic estimator for by specifying\n",
|
||||
"\n",
|
||||
"* The name of the estimator object, `est`\n",
|
||||
"* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n",
|
||||
"* The training script name, train.py\n",
|
||||
"* The input dataset for training\n",
|
||||
"* The compute target. In this case you will use the AmlCompute you created\n",
|
||||
"* The environment definition for the experiment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.train.estimator import Estimator\n",
|
||||
"\n",
|
||||
"est = Estimator(source_directory=script_folder, \n",
|
||||
" entry_script='train.py',\n",
|
||||
" inputs=[crack_labels.as_named_input('crack_labels')],\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" environment_definition= conda_env)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Submit job to run\n",
|
||||
"\n",
|
||||
"Submit the estimator to the Azure ML experiment to kick off the execution."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run = exp.submit(est)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sihhu"
|
||||
}
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"Remote"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"Azure ML"
|
||||
],
|
||||
"friendly_name": "Introduction to labeled datasets",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
},
|
||||
"star_tag": [
|
||||
"featured"
|
||||
],
|
||||
"tags": [
|
||||
"Dataset",
|
||||
"label",
|
||||
"Estimator"
|
||||
],
|
||||
"task": "Train"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,106 @@
|
||||
import os
|
||||
import torchvision
|
||||
import torchvision.transforms as transforms
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
|
||||
from azureml.core import Dataset, Run
|
||||
import azureml.contrib.dataset
|
||||
from azureml.contrib.dataset import FileHandlingOption, LabeledDatasetTask
|
||||
|
||||
run = Run.get_context()
|
||||
|
||||
# get input dataset by name
|
||||
labeled_dataset = run.input_datasets['crack_labels']
|
||||
pytorch_dataset = labeled_dataset.to_torchvision()
|
||||
|
||||
|
||||
indices = torch.randperm(len(pytorch_dataset)).tolist()
|
||||
dataset_train = torch.utils.data.Subset(pytorch_dataset, indices[:40])
|
||||
dataset_test = torch.utils.data.Subset(pytorch_dataset, indices[-10:])
|
||||
|
||||
trainloader = torch.utils.data.DataLoader(dataset_train, batch_size=4,
|
||||
shuffle=True, num_workers=0)
|
||||
|
||||
testloader = torch.utils.data.DataLoader(dataset_test, batch_size=4,
|
||||
shuffle=True, num_workers=0)
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__()
|
||||
self.conv1 = nn.Conv2d(3, 6, 5)
|
||||
self.pool = nn.MaxPool2d(2, 2)
|
||||
self.conv2 = nn.Conv2d(6, 16, 5)
|
||||
self.fc1 = nn.Linear(16 * 71 * 71, 120)
|
||||
self.fc2 = nn.Linear(120, 84)
|
||||
self.fc3 = nn.Linear(84, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.pool(F.relu(self.conv1(x)))
|
||||
x = self.pool(F.relu(self.conv2(x)))
|
||||
x = x.view(x.size(0), 16 * 71 * 71)
|
||||
x = F.relu(self.fc1(x))
|
||||
x = F.relu(self.fc2(x))
|
||||
x = self.fc3(x)
|
||||
return x
|
||||
|
||||
|
||||
net = Net()
|
||||
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
|
||||
|
||||
|
||||
for epoch in range(2): # loop over the dataset multiple times
|
||||
|
||||
running_loss = 0.0
|
||||
for i, data in enumerate(trainloader, 0):
|
||||
# get the inputs; data is a list of [inputs, labels]
|
||||
inputs, labels = data
|
||||
|
||||
# zero the parameter gradients
|
||||
optimizer.zero_grad()
|
||||
|
||||
# forward + backward + optimize
|
||||
outputs = net(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# print statistics
|
||||
running_loss += loss.item()
|
||||
if i % 5 == 4: # print every 5 mini-batches
|
||||
print('[%d, %5d] loss: %.3f' %
|
||||
(epoch + 1, i + 1, running_loss / 5))
|
||||
running_loss = 0.0
|
||||
|
||||
print('Finished Training')
|
||||
classes = trainloader.dataset.dataset.labels
|
||||
PATH = './cifar_net.pth'
|
||||
torch.save(net.state_dict(), PATH)
|
||||
|
||||
dataiter = iter(testloader)
|
||||
images, labels = dataiter.next()
|
||||
|
||||
net = Net()
|
||||
net.load_state_dict(torch.load(PATH))
|
||||
|
||||
outputs = net(images)
|
||||
|
||||
_, predicted = torch.max(outputs, 1)
|
||||
|
||||
correct = 0
|
||||
total = 0
|
||||
with torch.no_grad():
|
||||
for data in testloader:
|
||||
images, labels = data
|
||||
outputs = net(images)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum().item()
|
||||
|
||||
print('Accuracy of the network on the 10 test images: %d %%' % (100 * correct / total))
|
||||
pass
|
||||
@@ -0,0 +1,35 @@
|
||||
import os
|
||||
|
||||
|
||||
def convert(imgf, labelf, outf, n):
|
||||
f = open(imgf, "rb")
|
||||
l = open(labelf, "rb")
|
||||
o = open(outf, "w")
|
||||
|
||||
f.read(16)
|
||||
l.read(8)
|
||||
images = []
|
||||
|
||||
for i in range(n):
|
||||
image = [ord(l.read(1))]
|
||||
for j in range(28 * 28):
|
||||
image.append(ord(f.read(1)))
|
||||
images.append(image)
|
||||
|
||||
for image in images:
|
||||
o.write(",".join(str(pix) for pix in image) + "\n")
|
||||
f.close()
|
||||
o.close()
|
||||
l.close()
|
||||
|
||||
|
||||
mounted_input_path = os.environ['fashion_ds']
|
||||
mounted_output_path = os.environ['AZUREML_DATAREFERENCE_prepared_fashion_ds']
|
||||
os.makedirs(mounted_output_path, exist_ok=True)
|
||||
|
||||
convert(os.path.join(mounted_input_path, 'train-images-idx3-ubyte'),
|
||||
os.path.join(mounted_input_path, 'train-labels-idx1-ubyte'),
|
||||
os.path.join(mounted_output_path, 'mnist_train.csv'), 60000)
|
||||
convert(os.path.join(mounted_input_path, 't10k-images-idx3-ubyte'),
|
||||
os.path.join(mounted_input_path, 't10k-labels-idx1-ubyte'),
|
||||
os.path.join(mounted_output_path, 'mnist_test.csv'), 10000)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,120 @@
|
||||
import keras
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, Flatten
|
||||
from keras.layers import Conv2D, MaxPooling2D
|
||||
from keras.layers.normalization import BatchNormalization
|
||||
from keras.utils import to_categorical
|
||||
from keras.callbacks import Callback
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.model_selection import train_test_split
|
||||
from azureml.core import Run
|
||||
|
||||
# dataset object from the run
|
||||
run = Run.get_context()
|
||||
dataset = run.input_datasets['prepared_fashion_ds']
|
||||
|
||||
# split dataset into train and test set
|
||||
(train_dataset, test_dataset) = dataset.random_split(percentage=0.8, seed=111)
|
||||
|
||||
# load dataset into pandas dataframe
|
||||
data_train = train_dataset.to_pandas_dataframe()
|
||||
data_test = test_dataset.to_pandas_dataframe()
|
||||
|
||||
img_rows, img_cols = 28, 28
|
||||
input_shape = (img_rows, img_cols, 1)
|
||||
|
||||
X = np.array(data_train.iloc[:, 1:])
|
||||
y = to_categorical(np.array(data_train.iloc[:, 0]))
|
||||
|
||||
# here we split validation data to optimiza classifier during training
|
||||
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=13)
|
||||
|
||||
# test data
|
||||
X_test = np.array(data_test.iloc[:, 1:])
|
||||
y_test = to_categorical(np.array(data_test.iloc[:, 0]))
|
||||
|
||||
|
||||
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1).astype('float32') / 255
|
||||
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1).astype('float32') / 255
|
||||
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1).astype('float32') / 255
|
||||
|
||||
batch_size = 256
|
||||
num_classes = 10
|
||||
epochs = 10
|
||||
|
||||
# construct neuron network
|
||||
model = Sequential()
|
||||
model.add(Conv2D(32, kernel_size=(3, 3),
|
||||
activation='relu',
|
||||
kernel_initializer='he_normal',
|
||||
input_shape=input_shape))
|
||||
model.add(MaxPooling2D((2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
model.add(Conv2D(64, (3, 3), activation='relu'))
|
||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||
model.add(Dropout(0.25))
|
||||
model.add(Conv2D(128, (3, 3), activation='relu'))
|
||||
model.add(Dropout(0.4))
|
||||
model.add(Flatten())
|
||||
model.add(Dense(128, activation='relu'))
|
||||
model.add(Dropout(0.3))
|
||||
model.add(Dense(num_classes, activation='softmax'))
|
||||
|
||||
model.compile(loss=keras.losses.categorical_crossentropy,
|
||||
optimizer=keras.optimizers.Adam(),
|
||||
metrics=['accuracy'])
|
||||
|
||||
# start an Azure ML run
|
||||
run = Run.get_context()
|
||||
|
||||
|
||||
class LogRunMetrics(Callback):
|
||||
# callback at the end of every epoch
|
||||
def on_epoch_end(self, epoch, log):
|
||||
# log a value repeated which creates a list
|
||||
run.log('Loss', log['loss'])
|
||||
run.log('Accuracy', log['accuracy'])
|
||||
|
||||
|
||||
history = model.fit(X_train, y_train,
|
||||
batch_size=batch_size,
|
||||
epochs=epochs,
|
||||
verbose=1,
|
||||
validation_data=(X_val, y_val),
|
||||
callbacks=[LogRunMetrics()])
|
||||
|
||||
score = model.evaluate(X_test, y_test, verbose=0)
|
||||
|
||||
# log a single value
|
||||
run.log("Final test loss", score[0])
|
||||
print('Test loss:', score[0])
|
||||
|
||||
run.log('Final test accuracy', score[1])
|
||||
print('Test accuracy:', score[1])
|
||||
|
||||
plt.figure(figsize=(6, 3))
|
||||
plt.title('Fashion MNIST with Keras ({} epochs)'.format(epochs), fontsize=14)
|
||||
plt.plot(history.history['accuracy'], 'b-', label='Accuracy', lw=4, alpha=0.5)
|
||||
plt.plot(history.history['loss'], 'r--', label='Loss', lw=4, alpha=0.5)
|
||||
plt.legend(fontsize=12)
|
||||
plt.grid(True)
|
||||
|
||||
# log an image
|
||||
run.log_image('Loss v.s. Accuracy', plot=plt)
|
||||
|
||||
# create a ./outputs/model folder in the compute target
|
||||
# files saved in the "./outputs" folder are automatically uploaded into run history
|
||||
os.makedirs('./outputs/model', exist_ok=True)
|
||||
|
||||
# serialize NN architecture to JSON
|
||||
model_json = model.to_json()
|
||||
# save model JSON
|
||||
with open('./outputs/model/model.json', 'w') as f:
|
||||
f.write(model_json)
|
||||
# save model weights
|
||||
model.save_weights('./outputs/model/model.h5')
|
||||
print("model saved in ./outputs/model folder")
|
||||
@@ -0,0 +1,488 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License [2017] Zalando SE, https://tech.zalando.com"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Build a simple ML pipeline for image classification\n",
|
||||
"\n",
|
||||
"## Introduction\n",
|
||||
"This tutorial shows how to train a simple deep neural network using the [Fashion MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset and Keras on Azure Machine Learning. Fashion-MNIST is a dataset of Zalando's article images\u00e2\u20ac\u201dconsisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes.\n",
|
||||
"\n",
|
||||
"Learn how to:\n",
|
||||
"\n",
|
||||
"> * Set up your development environment\n",
|
||||
"> * Create the Fashion MNIST dataset\n",
|
||||
"> * Create a machine learning pipeline to train a simple deep learning neural network on a remote cluster\n",
|
||||
"> * Retrieve input datasets from the experiment and register the output model with datasets\n",
|
||||
"\n",
|
||||
"## Prerequisite:\n",
|
||||
"* Understand the [architecture and terms](https://docs.microsoft.com/azure/machine-learning/service/concept-azure-machine-learning-architecture) introduced by Azure Machine Learning\n",
|
||||
"* If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) to:\n",
|
||||
" * install the latest version of AzureML SDK\n",
|
||||
" * create a workspace and its configuration file (`config.json`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set up your development environment\n",
|
||||
"\n",
|
||||
"All the setup for your development work can be accomplished in a Python notebook. Setup includes:\n",
|
||||
"\n",
|
||||
"* Importing Python packages\n",
|
||||
"* Connecting to a workspace to enable communication between your local computer and remote resources\n",
|
||||
"* Creating an experiment to track all your runs\n",
|
||||
"* Creating a remote compute target to use for training\n",
|
||||
"\n",
|
||||
"### Import packages\n",
|
||||
"\n",
|
||||
"Import Python packages you need in this session. Also display the Azure Machine Learning SDK version."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import azureml.core\n",
|
||||
"from azureml.core import Workspace, Dataset, Datastore, ComputeTarget, RunConfiguration, Experiment\n",
|
||||
"from azureml.core.runconfig import CondaDependencies\n",
|
||||
"from azureml.pipeline.steps import PythonScriptStep, EstimatorStep\n",
|
||||
"from azureml.pipeline.core import Pipeline, PipelineData\n",
|
||||
"from azureml.train.dnn import TensorFlow\n",
|
||||
"\n",
|
||||
"# check core SDK version number\n",
|
||||
"print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to workspace\n",
|
||||
"\n",
|
||||
"Create a workspace object from the existing workspace. `Workspace.from_config()` reads the file **config.json** and loads the details into an object named `workspace`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# load workspace\n",
|
||||
"workspace = Workspace.from_config()\n",
|
||||
"print('Workspace name: ' + workspace.name, \n",
|
||||
" 'Azure region: ' + workspace.location, \n",
|
||||
" 'Subscription id: ' + workspace.subscription_id, \n",
|
||||
" 'Resource group: ' + workspace.resource_group, sep='\\n')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create experiment and a directory\n",
|
||||
"\n",
|
||||
"Create an experiment to track the runs in your workspace and a directory to deliver the necessary code from your computer to the remote resource."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# create an ML experiment\n",
|
||||
"exp = Experiment(workspace=workspace, name='keras-mnist-fashion')\n",
|
||||
"\n",
|
||||
"# create a directory\n",
|
||||
"script_folder = './keras-mnist-fashion'\n",
|
||||
"os.makedirs(script_folder, exist_ok=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or Attach existing compute resource\n",
|
||||
"By using Azure Machine Learning Compute, a managed service, data scientists can train machine learning models on clusters of Azure virtual machines. Examples include VMs with GPU support. In this tutorial, you create Azure Machine Learning Compute as your training environment. The code below creates the compute clusters for you if they don't already exist in your workspace.\n",
|
||||
"\n",
|
||||
"**Creation of compute takes approximately 5 minutes.** If the AmlCompute with that name is already in your workspace the code will skip the creation process."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"your-cluster-name\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=workspace, name=cluster_name)\n",
|
||||
" print('Found existing compute target')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', \n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(workspace, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" # can poll for a minimum number of nodes and for a specific timeout. \n",
|
||||
" # if no min node count is provided it uses the scale settings for the cluster\n",
|
||||
" compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Fashion MNIST dataset\n",
|
||||
"\n",
|
||||
"By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred. \n",
|
||||
"\n",
|
||||
"Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create a dataset from it. We will now upload the [Fashion MNIST](./keras-mnist-fashion) to the default datastore (blob) within your workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"datastore = workspace.get_default_datastore()\n",
|
||||
"datastore.upload_files(files = ['keras-mnist-fashion/t10k-images-idx3-ubyte', 'keras-mnist-fashion/t10k-labels-idx1-ubyte',\n",
|
||||
" 'keras-mnist-fashion/train-images-idx3-ubyte','keras-mnist-fashion/train-labels-idx1-ubyte'],\n",
|
||||
" target_path = 'mnist-fashion',\n",
|
||||
" overwrite = True,\n",
|
||||
" show_progress = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we will create an unregistered FileDataset pointing to the path in the datastore. You can also create a dataset from multiple paths. [Learn More](https://aka.ms/azureml/howto/createdatasets) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fashion_ds = Dataset.File.from_files([(datastore, 'mnist-fashion')])\n",
|
||||
"\n",
|
||||
"# list the files referenced by fashion_ds\n",
|
||||
"fashion_ds.to_path()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Build 2-step ML pipeline\n",
|
||||
"\n",
|
||||
"The [Azure Machine Learning Pipeline](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-ml-pipelines) enables data scientists to create and manage multiple simple and complex workflows concurrently. A typical pipeline would have multiple tasks to prepare data, train, deploy and evaluate models. Individual steps in the pipeline can make use of diverse compute options (for example: CPU for data preparation and GPU for training) and languages. [Learn More](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/machine-learning-pipelines)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Step 1: data preparation\n",
|
||||
"\n",
|
||||
"In step one, we will load the image and labels from Fashion MNIST dataset into mnist_train.csv and mnist_test.csv\n",
|
||||
"\n",
|
||||
"Each image is 28 pixels in height and 28 pixels in width, for a total of 784 pixels in total. Each pixel has a single pixel-value associated with it, indicating the lightness or darkness of that pixel, with higher numbers meaning darker. This pixel-value is an integer between 0 and 255. Both mnist_train.csv and mnist_test.csv contain 785 columns. The first column consists of the class labels, which represent the article of clothing. The rest of the columns contain the pixel-values of the associated image."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set up the compute environment to install required packages\n",
|
||||
"conda = CondaDependencies.create(\n",
|
||||
" pip_packages=['azureml-sdk','azureml-dataprep[fuse,pandas]'],\n",
|
||||
" pin_sdk_version=False)\n",
|
||||
"\n",
|
||||
"conda.set_pip_option('--pre')\n",
|
||||
"\n",
|
||||
"run_config = RunConfiguration()\n",
|
||||
"run_config.environment.python.conda_dependencies = conda"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Intermediate data (or output of a step) is represented by a `PipelineData` object. preprared_fashion_ds is produced as the output of step 1, and used as the input of step 2. PipelineData introduces a data dependency between steps, and creates an implicit execution order in the pipeline. You can register a `PipelineData` as a dataset and version the output data automatically. [Learn More](https://docs.microsoft.com/azure/machine-learning/service/how-to-version-track-datasets#version-a-pipeline-output-dataset) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# define output data\n",
|
||||
"prepared_fashion_ds = PipelineData('prepared_fashion_ds', datastore=datastore).as_dataset()\n",
|
||||
"\n",
|
||||
"# register output data as dataset\n",
|
||||
"prepared_fashion_ds = prepared_fashion_ds.register(name='prepared_fashion_ds', create_new_version=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"A **PythonScriptStep** is a basic, built-in step to run a Python Script on a compute target. It takes a script name and optionally other parameters like arguments for the script, compute target, inputs and outputs. If no compute target is specified, default compute target for the workspace is used. You can also use a [**RunConfiguration**](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.runconfiguration?view=azure-ml-py) to specify requirements for the PythonScriptStep, such as conda dependencies and docker image."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prep_step = PythonScriptStep(name='prepare step',\n",
|
||||
" script_name=\"prepare.py\",\n",
|
||||
" # mount fashion_ds dataset to the compute_target\n",
|
||||
" inputs=[fashion_ds.as_named_input('fashion_ds').as_mount()],\n",
|
||||
" outputs=[prepared_fashion_ds],\n",
|
||||
" source_directory=script_folder,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" runconfig=run_config)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Step 2: train CNN with Keras\n",
|
||||
"\n",
|
||||
"Next, we construct an `azureml.train.dnn.TensorFlow` estimator object. The TensorFlow estimator is providing a simple way of launching a TensorFlow training job on a compute target. It will automatically provide a docker image that has TensorFlow installed.\n",
|
||||
"\n",
|
||||
"[EstimatorStep](https://docs.microsoft.com/en-us/python/api/azureml-pipeline-steps/azureml.pipeline.steps.estimator_step.estimatorstep?view=azure-ml-py) adds a step to run Tensorflow Estimator in a Pipeline. It takes a dataset as the input."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# set up training step with Tensorflow estimator\n",
|
||||
"est = TensorFlow(entry_script='train.py',\n",
|
||||
" source_directory=script_folder, \n",
|
||||
" pip_packages = ['azureml-sdk','keras','numpy','scikit-learn', 'matplotlib'],\n",
|
||||
" compute_target=compute_target)\n",
|
||||
"\n",
|
||||
"est_step = EstimatorStep(name='train step',\n",
|
||||
" estimator=est,\n",
|
||||
" estimator_entry_script_arguments=[],\n",
|
||||
" # parse prepared_fashion_ds into TabularDataset and use it as the input\n",
|
||||
" inputs=[prepared_fashion_ds.parse_delimited_files()],\n",
|
||||
" compute_target=compute_target)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Build the pipeline\n",
|
||||
"Once we have the steps (or steps collection), we can build the [pipeline](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.pipeline.pipeline?view=azure-ml-py).\n",
|
||||
"\n",
|
||||
"A pipeline is created with a list of steps and a workspace. Submit a pipeline using [submit](https://docs.microsoft.com/python/api/azureml-core/azureml.core.experiment(class)?view=azure-ml-py#submit-config--tags-none----kwargs-). When submit is called, a [PipelineRun](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.pipelinerun?view=azure-ml-py) is created which in turn creates [StepRun](https://docs.microsoft.com/python/api/azureml-pipeline-core/azureml.pipeline.core.steprun?view=azure-ml-py) objects for each step in the workflow."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# build pipeline & run experiment\n",
|
||||
"pipeline = Pipeline(workspace, steps=[prep_step, est_step])\n",
|
||||
"run = exp.submit(pipeline)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Monitor the PipelineRun"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"inputHidden": false,
|
||||
"outputHidden": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.find_step_run('train step')[0].get_metrics()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register the input dataset and the output model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Azure Machine Learning dataset makes it easy to trace how your data is used in ML. [Learn More](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-version-track-datasets#track-datasets-in-experiments)<br>\n",
|
||||
"For each Machine Learning experiment, you can easily trace the datasets used as the input through `Run` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# get input datasets\n",
|
||||
"prep_step = run.find_step_run('prepare step')[0]\n",
|
||||
"inputs = prep_step.get_details()['inputDatasets']\n",
|
||||
"input_dataset = inputs[0]['dataset']\n",
|
||||
"\n",
|
||||
"# list the files referenced by input_dataset\n",
|
||||
"input_dataset.to_path()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Register the input Fashion MNIST dataset with the workspace so that you can reuse it in other experiments or share it with your colleagues who have access to your workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fashion_ds = input_dataset.register(workspace = workspace,\n",
|
||||
" name = 'fashion_ds',\n",
|
||||
" description = 'image and label files from fashion mnist',\n",
|
||||
" create_new_version = True)\n",
|
||||
"fashion_ds"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Register the output model with dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.find_step_run('train step')[0].register_model(model_name = 'keras-model', model_path = 'outputs/model/', \n",
|
||||
" datasets =[('train test data',fashion_ds)])"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "sihhu"
|
||||
}
|
||||
],
|
||||
"category": "tutorial",
|
||||
"compute": [
|
||||
"Remote"
|
||||
],
|
||||
"datasets": [
|
||||
"Fashion MNIST"
|
||||
],
|
||||
"deployment": [
|
||||
"None"
|
||||
],
|
||||
"exclude_from_index": false,
|
||||
"framework": [
|
||||
"Azure ML"
|
||||
],
|
||||
"friendly_name": "Datasets with ML Pipeline",
|
||||
"index_order": 1,
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "python",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
},
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
},
|
||||
"star_tag": [
|
||||
"featured"
|
||||
],
|
||||
"tags": [
|
||||
"Dataset",
|
||||
"Pipeline",
|
||||
"Estimator",
|
||||
"ScriptRun"
|
||||
],
|
||||
"task": "Train"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -13,23 +13,23 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Train with Azure Machine Learning Datasets\n",
|
||||
"# Train with Azure Machine Learning datasets\n",
|
||||
"Datasets are categorized into TabularDataset and FileDataset based on how users consume them in training. \n",
|
||||
"* A TabularDataset represents data in a tabular format by parsing the provided file or list of files. TabularDataset can be created from csv, tsv, parquet files, SQL query results etc. For the complete list, please visit our [documentation](https://aka.ms/tabulardataset-api-reference). It provides you with the ability to materialize the data into a pandas DataFrame.\n",
|
||||
"* A FileDataset references single or multiple files in your datastores or public urls. This provides you with the ability to download or mount the files to your compute. The files can be of any format, which enables a wider range of machine learning scenarios including deep learning.\n",
|
||||
"\n",
|
||||
"In this tutorial, you will learn how to train with Azure Machine Learning Datasets:\n",
|
||||
"In this tutorial, you will learn how to train with Azure Machine Learning datasets:\n",
|
||||
"\n",
|
||||
"☑ Use Datasets directly in your training script\n",
|
||||
"☑ Use datasets directly in your training script\n",
|
||||
"\n",
|
||||
"☑ Use Datasets to mount files to a remote compute"
|
||||
"☑ Use datasets to mount files to a remote compute"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -149,12 +149,12 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You now have the necessary packages and compute resources to train a model in the cloud.\n",
|
||||
"## Use Datasets directly in training\n",
|
||||
"## Use datasets directly in training\n",
|
||||
"\n",
|
||||
"### Create a TabularDataset\n",
|
||||
"By creating a dataset, you create a reference to the data source location. If you applied any subsetting transformations to the dataset, they will be stored in the dataset as well. The data remains in its existing location, so no extra storage cost is incurred. \n",
|
||||
"\n",
|
||||
"Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create Dataset from it. We will now upload the [Iris data](./train-dataset/Iris.csv) to the default datastore (blob) within your workspace."
|
||||
"Every workspace comes with a default [datastore](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-access-data) (and you can register more) which is backed by the Azure blob storage account associated with the workspace. We can use it to transfer data from local to the cloud, and create dataset from it. We will now upload the [Iris data](./train-dataset/Iris.csv) to the default datastore (blob) within your workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -174,7 +174,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then we will create an unregistered TabularDataset pointing to the path in the datastore. You can also create a Dataset from multiple paths. [learn more](https://aka.ms/azureml/howto/createdatasets) "
|
||||
"Then we will create an unregistered TabularDataset pointing to the path in the datastore. You can also create a dataset from multiple paths. [learn more](https://aka.ms/azureml/howto/createdatasets) \n",
|
||||
"\n",
|
||||
"[TabularDataset](https://docs.microsoft.com/python/api/azureml-core/azureml.data.tabulardataset?view=azure-ml-py) represents data in a tabular format by parsing the provided file or list of files. This provides you with the ability to materialize the data into a Pandas or Spark DataFrame. You can create a TabularDataset object from .csv, .tsv, and parquet files, and from SQL query results. For a complete list, see [TabularDatasetFactory](https://docs.microsoft.com/python/api/azureml-core/azureml.data.dataset_factory.tabulardatasetfactory?view=azure-ml-py) class."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -260,7 +262,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use Datasets as the input to Estimator"
|
||||
"### Configure and use datasets as the input to Estimator"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -294,7 +296,7 @@
|
||||
"* The name of the estimator object, `est`\n",
|
||||
"* The directory that contains your scripts. All the files in this directory are uploaded into the cluster nodes for execution. \n",
|
||||
"* The training script name, train_titanic.py\n",
|
||||
"* The input Dataset for training\n",
|
||||
"* The input dataset for training\n",
|
||||
"* The compute target. In this case you will use the AmlCompute you created\n",
|
||||
"* The environment definition for the experiment"
|
||||
]
|
||||
@@ -348,9 +350,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use Datasets to mount files to a remote compute\n",
|
||||
"## Use datasets to mount files to a remote compute\n",
|
||||
"\n",
|
||||
"You can use the Dataset object to mount or download files referred by it. When you mount a file system, you attach that file system to a directory (mount point) and make it available to the system. Because mounting load files at the time of processing, it is usually faster than download.<br> \n",
|
||||
"You can use the `Dataset` object to mount or download files referred by it. When you mount a file system, you attach that file system to a directory (mount point) and make it available to the system. Because mounting load files at the time of processing, it is usually faster than download.<br> \n",
|
||||
"Note: mounting is only available for Linux-based compute (DSVM/VM, AMLCompute, HDInsights)."
|
||||
]
|
||||
},
|
||||
@@ -365,7 +367,6 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from sklearn.datasets import load_diabetes\n",
|
||||
@@ -396,7 +397,9 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create a FileDataset"
|
||||
"### Create a FileDataset\n",
|
||||
"\n",
|
||||
"[FileDataset](https://docs.microsoft.com/python/api/azureml-core/azureml.data.file_dataset.filedataset?view=azure-ml-py) references single or multiple files in your datastores or public URLs. Using this method, you can download or mount the files to your compute as a FileDataset object. The files can be in any format, which enables a wider range of machine learning scenarios, including deep learning."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -492,7 +495,7 @@
|
||||
"src = ScriptRunConfig(source_directory=script_folder, \n",
|
||||
" script='train_diabetes.py', \n",
|
||||
" # to mount the dataset on the remote compute and pass the mounted path as an argument to the training script\n",
|
||||
" arguments =[dataset.as_named_input('diabetes').as_mount('tmp/dataset')])\n",
|
||||
" arguments =[dataset.as_named_input('diabetes').as_mount()])\n",
|
||||
"\n",
|
||||
"src.run_config.framework = 'python'\n",
|
||||
"src.run_config.environment = conda_env\n",
|
||||
@@ -533,7 +536,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Register Datasets\n",
|
||||
"### Register datasets\n",
|
||||
"Use the register() method to register datasets to your workspace so they can be shared with others, reused across various experiments, and referred to by name in your training script."
|
||||
]
|
||||
},
|
||||
@@ -553,10 +556,10 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Register models with Datasets\n",
|
||||
"## Register models with datasets\n",
|
||||
"The last step in the training script wrote the model files in a directory named `outputs` in the VM of the cluster where the job is executed. `outputs` is a special directory in that all content in this directory is automatically uploaded to your workspace. This content appears in the run record in the experiment under your workspace. Hence, the model file is now also available in your workspace.\n",
|
||||
"\n",
|
||||
"You can register models with Datasets for reproducibility and auditing purpose."
|
||||
"You can register models with datasets for reproducibility and auditing purpose."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -642,9 +645,11 @@
|
||||
"featured"
|
||||
],
|
||||
"tags": [
|
||||
"Dataset"
|
||||
"Dataset",
|
||||
"Estimator",
|
||||
"ScriptRun"
|
||||
],
|
||||
"task": "Filtering"
|
||||
"task": "Train"
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
10
index.md
10
index.md
@@ -36,7 +36,11 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
|
||||
| :star:[Filtering data using Tabular Timeseiries Dataset related API](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/tabular-timeseries-dataset-filtering.ipynb) | Filtering | NOAA | Local | None | Azure ML | Dataset, Tabular Timeseries |
|
||||
|
||||
| :star:[Train with Datasets (Tabular and File)](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/train-with-datasets.ipynb) | Filtering | Iris, Diabetes | Remote | None | Azure ML | Dataset |
|
||||
| :star:[Introduction to labeled datasets](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/labeled-datasets/labeled-datasets.ipynb) | Train | | Remote | None | Azure ML | Dataset, label, Estimator |
|
||||
|
||||
| :star:[Datasets with ML Pipeline](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/pipeline-with-datasets/pipeline-for-image-classification.ipynb) | Train | Fashion MNIST | Remote | None | Azure ML | Dataset, Pipeline, Estimator, ScriptRun |
|
||||
|
||||
| :star:[Train with Datasets (Tabular and File)](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/work-with-data/datasets-tutorial/train-with-datasets/train-with-datasets.ipynb) | Train | Iris, Diabetes | Remote | None | Azure ML | Dataset, Estimator, ScriptRun |
|
||||
|
||||
| [Forecasting away from training data](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-high-frequency/automl-forecasting-function.ipynb) | Forecasting | None | Remote | None | Azure ML AutoML | Forecasting, Confidence Intervals |
|
||||
|
||||
@@ -78,6 +82,8 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
|
||||
| :star:[Azure Machine Learning Pipelines with Data Dependency](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-data-dependency-steps.ipynb) | Demonstrates how to construct a Pipeline with data dependency between steps | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
| [How to use run a notebook as a step in AML Pipelines](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-with-notebook-runner-step.ipynb) | Demonstrates the use of NotebookRunnerStep | Custom | AML Compute | None | Azure ML | None |
|
||||
|
||||
|
||||
## Training
|
||||
|
||||
@@ -215,6 +221,8 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
|
||||
| [train-explain-model-on-amlcompute-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.ipynb) | | | | | | |
|
||||
|
||||
| [training_notebook](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/notebook_runner/training_notebook.ipynb) | | | | | | |
|
||||
|
||||
| [nyc-taxi-data-regression-model-building](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/nyc-taxi-data-regression-model-building.ipynb) | | | | | | |
|
||||
|
||||
| [pipeline-batch-scoring](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/machine-learning-pipelines/pipeline-batch-scoring/pipeline-batch-scoring.ipynb) | | | | | | |
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.0.76.2 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.81 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -328,6 +328,7 @@
|
||||
"\n",
|
||||
"myenv = CondaDependencies()\n",
|
||||
"myenv.add_conda_package(\"scikit-learn\")\n",
|
||||
"myenv.add_pip_package(\"azureml-defaults\")\n",
|
||||
"\n",
|
||||
"with open(\"myenv.yml\",\"w\") as f:\n",
|
||||
" f.write(myenv.serialize_to_string())"
|
||||
@@ -387,13 +388,11 @@
|
||||
"\n",
|
||||
"Configure the image and deploy. The following code goes through these steps:\n",
|
||||
"\n",
|
||||
"1. Build an image using:\n",
|
||||
"1. Create environment object containing dependencies needed by the model using the environment file (`myenv.yml`)\n",
|
||||
"1. Create inference configuration necessary to deploy the model as a web service using:\n",
|
||||
" * The scoring file (`score.py`)\n",
|
||||
" * The environment file (`myenv.yml`)\n",
|
||||
" * The model file\n",
|
||||
"1. Register that image under the workspace. \n",
|
||||
"1. Send the image to the ACI container.\n",
|
||||
"1. Start up a container in ACI using the image.\n",
|
||||
" * envrionment object created in previous step\n",
|
||||
"1. Deploy the model to the ACI container.\n",
|
||||
"1. Get the web service HTTP endpoint."
|
||||
]
|
||||
},
|
||||
@@ -413,10 +412,11 @@
|
||||
"%%time\n",
|
||||
"from azureml.core.webservice import Webservice\n",
|
||||
"from azureml.core.model import InferenceConfig\n",
|
||||
"from azureml.core.environment import Environment\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(runtime= \"python\", \n",
|
||||
" entry_script=\"score.py\",\n",
|
||||
" conda_file=\"myenv.yml\")\n",
|
||||
"\n",
|
||||
"myenv = Environment.from_conda_specification(name=\"myenv\", file_path=\"myenv.yml\")\n",
|
||||
"inference_config = InferenceConfig(entry_script=\"score.py\", environment=myenv)\n",
|
||||
"\n",
|
||||
"service = Model.deploy(workspace=ws, \n",
|
||||
" name='sklearn-mnist-svc', \n",
|
||||
|
||||
Reference in New Issue
Block a user