Compare commits

...

4 Commits

Author SHA1 Message Date
amlrelsa-ms
24f53f1aa1 update samples from Release-105 as a part of SDK release 2021-06-24 23:00:13 +00:00
Harneet Virk
6fc5d11de2 Merge pull request #1518 from Azure/release_update/Release-104
update samples from Release-104 as a part of  SDK release
2021-06-21 10:29:53 -07:00
amlrelsa-ms
d17547d890 update samples from Release-104 as a part of SDK release 2021-06-21 17:16:09 +00:00
Harneet Virk
928e0d4327 Merge pull request #1510 from Azure/release_update/Release-103
update samples from Release-103 as a part of  SDK release
2021-06-14 10:33:34 -07:00
28 changed files with 599 additions and 210 deletions

View File

@@ -103,7 +103,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -21,8 +21,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.30.0
- azureml-widgets~=1.31.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.30.0/validated_win32_requirements.txt [--no-deps]
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.31.0/validated_win32_requirements.txt [--no-deps]

View File

@@ -21,8 +21,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.30.0
- azureml-widgets~=1.31.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.30.0/validated_linux_requirements.txt [--no-deps]
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.31.0/validated_linux_requirements.txt [--no-deps]

View File

@@ -22,8 +22,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.30.0
- azureml-widgets~=1.31.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.30.0/validated_darwin_requirements.txt [--no-deps]
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.31.0/validated_darwin_requirements.txt [--no-deps]

View File

@@ -105,7 +105,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -93,7 +93,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -81,7 +81,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -49,6 +49,8 @@ print("Argument 1(ds_name): %s" % args.ds_name)
dstor = ws.get_default_datastore()
register_dataset = False
end_time = datetime.utcnow()
try:
ds = Dataset.get_by_name(ws, args.ds_name)
end_time_last_slice = ds.data_changed_time.replace(tzinfo=None)
@@ -58,9 +60,9 @@ except Exception:
print(traceback.format_exc())
print("Dataset with name {0} not found, registering new dataset.".format(args.ds_name))
register_dataset = True
end_time_last_slice = datetime.today() - relativedelta(weeks=4)
end_time = datetime(2021, 5, 1, 0, 0)
end_time_last_slice = end_time - relativedelta(weeks=2)
end_time = datetime.utcnow()
train_df = get_noaa_data(end_time_last_slice, end_time)
if train_df.size > 0:

View File

@@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -91,7 +91,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -113,7 +113,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -87,7 +87,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -97,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -94,7 +94,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -82,7 +82,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -728,7 +728,7 @@
"X_query[time_column_name] = X_query[time_column_name].astype(str)\n",
"# The Service object accept the complex dictionary, which is internally converted to JSON string.\n",
"# The section 'data' contains the data frame in the form of dictionary.\n",
"test_sample = json.dumps({'data': X_query.to_dict(orient='records')})\n",
"test_sample = json.dumps({\"data\": json.loads(X_query.to_json(orient=\"records\"))})\n",
"response = aci_service.run(input_data = test_sample)\n",
"# translate from networkese to datascientese\n",
"try: \n",

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -436,7 +436,8 @@
"\n",
"automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, X=X_train, \n",
" X_test=X_test, y=y_train, \n",
" task='classification')"
" task='classification',\n",
" automl_run=automl_run)"
]
},
{
@@ -453,11 +454,10 @@
"metadata": {},
"outputs": [],
"source": [
"from interpret.ext.glassbox import LGBMExplainableModel\n",
"from azureml.interpret.mimic_wrapper import MimicWrapper\n",
"explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator,\n",
" explainable_model=automl_explainer_setup_obj.surrogate_model, \n",
" init_dataset=automl_explainer_setup_obj.X_transform, run=automl_run,\n",
" init_dataset=automl_explainer_setup_obj.X_transform, run=automl_explainer_setup_obj.automl_run,\n",
" features=automl_explainer_setup_obj.engineered_feature_names, \n",
" feature_maps=[automl_explainer_setup_obj.feature_map],\n",
" classes=automl_explainer_setup_obj.classes,\n",

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -50,11 +50,13 @@ X_test = test_dataset.drop_columns(columns=['<<target_column_name>>'])
# Setup the class for explaining the AutoML models
automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, '<<task>>',
X=X_train, X_test=X_test,
y=y_train)
y=y_train,
automl_run=automl_run)
# Initialize the Mimic Explainer
explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMExplainableModel,
init_dataset=automl_explainer_setup_obj.X_transform, run=automl_run,
init_dataset=automl_explainer_setup_obj.X_transform,
run=automl_explainer_setup_obj.automl_run,
features=automl_explainer_setup_obj.engineered_feature_names,
feature_maps=[automl_explainer_setup_obj.feature_map],
classes=automl_explainer_setup_obj.classes)

View File

@@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,160 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/azure-arcadia/Synapse_Job_Scala_Support.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get AML workspace which has synapse spark pool attached"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace, Experiment, Dataset, Environment\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Leverage ScriptRunConfig to submit scala job to an attached synapse spark cluster"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.data import HDFSOutputDatasetConfig\n",
"import uuid\n",
"\n",
"run_config = RunConfiguration(framework=\"pyspark\")\n",
"run_config.target = \"link-pool\"\n",
"run_config.spark.configuration[\"spark.driver.memory\"] = \"2g\"\n",
"run_config.spark.configuration[\"spark.driver.cores\"] = 2\n",
"run_config.spark.configuration[\"spark.executor.memory\"] = \"2g\"\n",
"run_config.spark.configuration[\"spark.executor.cores\"] = 1\n",
"run_config.spark.configuration[\"spark.executor.instances\"] = 1\n",
"\n",
"run_config.spark.configuration[\"spark.yarn.dist.jars\"]=\"wasbs://synapse@azuremlexamples.blob.core.windows.net/shared/wordcount.jar\" # this can be removed if you are using local jars in source folder\n",
"\n",
"dir_name = \"wordcount-{}\".format(str(uuid.uuid4()))\n",
"input = \"wasbs://synapse@azuremlexamples.blob.core.windows.net/shared/shakespeare.txt\"\n",
"output = HDFSOutputDatasetConfig(destination=(ws.get_default_datastore(), \"{}/result\".format(dir_name)))\n",
"\n",
"from azureml.core import ScriptRunConfig\n",
"args = ['--input', input, '--output', output]\n",
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
" script= 'start_script.py',\n",
" arguments= args,\n",
" run_config = run_config)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Experiment\n",
"exp = Experiment(workspace=ws, name='synapse-spark')\n",
"run = exp.submit(config=script_run_config)\n",
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Leverage SynapseSparkStep in an AML pipeline to add dataprep step on synapse spark cluster"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.pipeline.core import Pipeline\n",
"from azureml.pipeline.steps import SynapseSparkStep\n",
"\n",
"configs = {}\n",
"configs[\"spark.yarn.dist.jars\"] = \"wasbs://synapse@azuremlexamples.blob.core.windows.net/shared/wordcount.jar\"\n",
"step_1 = SynapseSparkStep(name = 'synapse-spark',\n",
" file = 'start_script.py',\n",
" source_directory=\".\",\n",
" arguments = args,\n",
" compute_target = 'link-pool',\n",
" driver_memory = \"2g\",\n",
" driver_cores = 2,\n",
" executor_memory = \"2g\",\n",
" executor_cores = 1,\n",
" num_executors = 1,\n",
" conf = configs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipeline = Pipeline(workspace=ws, steps=[step_1])\n",
"pipeline_run = pipeline.submit('synapse-pipeline', regenerate_outputs=True)"
]
}
],
"metadata": {
"authors": [
{
"name": "feli1"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"nteract": {
"version": "0.28.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,240 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/azure-arcadia/Synapse_Session_Scala_Support.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Interactive Spark Session on Synapse Spark Pool"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install -U \"azureml-synapse\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For JupyterLab, please additionally run:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!jupyter lab build --minimize=False"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## PLEASE restart kernel and then refresh web page before starting spark session."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 0. Magic Usage"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2020-06-05T03:22:14.965395Z",
"iopub.status.busy": "2020-06-05T03:22:14.965395Z",
"iopub.status.idle": "2020-06-05T03:22:14.970398Z",
"shell.execute_reply": "2020-06-05T03:22:14.969397Z",
"shell.execute_reply.started": "2020-06-05T03:22:14.965395Z"
},
"gather": {
"logged": 1615594584642
}
},
"outputs": [],
"source": [
"# show help\n",
"%synapse ?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 1. Start Synapse Session"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1615577715289
}
},
"outputs": [],
"source": [
"%synapse start -c linktestpool --start-timeout 1000"
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"# 2. Use Scala"
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"## (1) Read Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"%%synapse scala\n",
"\n",
"var df = spark.read.option(\"header\", \"true\").csv(\"wasbs://demo@dprepdata.blob.core.windows.net/Titanic.csv\")\n",
"df.show(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## (2) Use Scala Sql"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"%%synapse scala\n",
"\n",
"df.createOrReplaceTempView(\"titanic\")\n",
"var sqlDF = spark.sql(\"SELECT Name, Fare from titanic\")\n",
"sqlDF.show(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Stop Session"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"%synapse stop"
]
}
],
"metadata": {
"authors": [
{
"name": "feli1"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"nteract": {
"version": "0.28.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -0,0 +1,18 @@
from pyspark.sql import SparkSession
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input", default="")
parser.add_argument("--output", default="")
args, unparsed = parser.parse_known_args()
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext
arr = sc._gateway.new_array(sc._jvm.java.lang.String, 2)
arr[0] = args.input
arr[1] = args.output
obj = sc._jvm.WordCount
obj.main(arr)

View File

@@ -451,9 +451,8 @@
"metadata": {},
"source": [
"### Create a dataset of training artifacts\n",
"To evaluate a trained policy (a checkpoint) we need to make the checkpoint accessible to the rollout script. All the training artifacts are stored in workspace default datastore under **azureml/&lt;run_id&gt;** directory.\n",
"\n",
"Here we create a file dataset from the stored artifacts, and then use this dataset to feed these data to rollout estimator."
"To evaluate a trained policy (a checkpoint) we need to make the checkpoint accessible to the rollout script.\n",
"We can use the Run API to download policy training artifacts (saved model and checkpoints) to local compute."
]
},
{
@@ -462,22 +461,24 @@
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Dataset\n",
"from os import path\n",
"from distutils import dir_util\n",
"\n",
"run_id = child_run_0.id # Or set to run id of a completed run (e.g. 'rl-cartpole-v0_1587572312_06e04ace_head')\n",
"run_artifacts_path = os.path.join('azureml', run_id)\n",
"print(\"Run artifacts path:\", run_artifacts_path)\n",
"training_artifacts_path = path.join(\"logs\", training_algorithm)\n",
"print(\"Training artifacts path:\", training_artifacts_path)\n",
"\n",
"# Create a file dataset object from the files stored on default datastore\n",
"datastore = ws.get_default_datastore()\n",
"training_artifacts_ds = Dataset.File.from_files(datastore.path(os.path.join(run_artifacts_path, '**')))"
"if path.exists(training_artifacts_path):\n",
" dir_util.remove_tree(training_artifacts_path)\n",
"\n",
"# Download run artifacts to local compute\n",
"child_run_0.download_files(training_artifacts_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To verify, we can print out the number (and paths) of all the files in the dataset, as follows."
"Now let's find the checkpoints and the last checkpoint number."
]
},
{
@@ -486,7 +487,73 @@
"metadata": {},
"outputs": [],
"source": [
"artifacts_paths = training_artifacts_ds.to_path()\n",
"# A helper function to find checkpoint files in a directory\n",
"def find_checkpoints(file_path):\n",
" print(\"Looking in path:\", file_path)\n",
" checkpoints = []\n",
" for root, _, files in os.walk(file_path):\n",
" for name in files:\n",
" if os.path.basename(root).startswith('checkpoint_'):\n",
" checkpoints.append(path.join(root, name))\n",
" return checkpoints"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Find checkpoints and last checkpoint number\n",
"checkpoint_files = find_checkpoints(training_artifacts_path)\n",
"\n",
"checkpoint_numbers = []\n",
"for file in checkpoint_files:\n",
" file = os.path.basename(file)\n",
" if file.startswith('checkpoint-') and not file.endswith('.tune_metadata'):\n",
" checkpoint_numbers.append(int(file.split('-')[1]))\n",
"\n",
"print(\"Checkpoints:\", checkpoint_numbers)\n",
"\n",
"last_checkpoint_number = max(checkpoint_numbers)\n",
"print(\"Last checkpoint number:\", last_checkpoint_number)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we upload checkpoints to default datastore and create a file dataset. This dataset will be used to pass in the checkpoints to the rollout script."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Upload the checkpoint files and create a DataSet\n",
"from azureml.core import Dataset\n",
"\n",
"datastore = ws.get_default_datastore()\n",
"checkpoint_dataref = datastore.upload_files(checkpoint_files, target_path='cartpole_checkpoints_' + run_id, overwrite=True)\n",
"checkpoint_ds = Dataset.File.from_files(checkpoint_dataref)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To verify, we can print out the number (and paths) of all the files in the dataset."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"artifacts_paths = checkpoint_ds.to_path()\n",
"print(\"Number of files in dataset:\", len(artifacts_paths))\n",
"\n",
"# Uncomment line below to print all file paths\n",
@@ -505,36 +572,6 @@
"\n",
"The checkpoints dataset will be accessible to the rollout script as a mounted folder. The mounted folder and the checkpoint number, passed in via `checkpoint-number`, will be used to create a path to the checkpoint we are going to evaluate. The created checkpoint path then will be passed into RLlib rollout script for evaluation.\n",
"\n",
"Let's find the checkpoints and the last checkpoint number first."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Find checkpoints and last checkpoint number\n",
"checkpoint_files = [\n",
" os.path.basename(file) for file in training_artifacts_ds.to_path() \\\n",
" if os.path.basename(file).startswith('checkpoint-') and \\\n",
" not os.path.basename(file).endswith('tune_metadata')\n",
"]\n",
"\n",
"checkpoint_numbers = []\n",
"for file in checkpoint_files:\n",
" checkpoint_numbers.append(int(file.split('-')[1]))\n",
"\n",
"print(\"Checkpoints:\", checkpoint_numbers)\n",
"\n",
"last_checkpoint_number = max(checkpoint_numbers)\n",
"print(\"Last checkpoint number:\", last_checkpoint_number)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's configure rollout estimator. Note that we use the last checkpoint for evaluation. The assumption is that the last checkpoint points to our best trained agent. You may change this to any of the checkpoint numbers printed above and observe the effect."
]
},
@@ -576,8 +613,8 @@
" \n",
" # Data inputs\n",
" inputs=[\n",
" training_artifacts_ds.as_named_input('artifacts_dataset'),\n",
" training_artifacts_ds.as_named_input('artifacts_path').as_mount()],\n",
" checkpoint_ds.as_named_input('artifacts_dataset'),\n",
" checkpoint_ds.as_named_input('artifacts_path').as_mount()],\n",
" \n",
" # The Azure Machine Learning compute target\n",
" compute_target=compute_target,\n",

View File

@@ -474,61 +474,14 @@
"from os import path\n",
"from distutils import dir_util\n",
"\n",
"path_prefix = path.join(\"logs\", training_algorithm)\n",
"print(\"Path prefix:\", path_prefix)\n",
"training_artifacts_path = path.join(\"logs\", training_algorithm)\n",
"print(\"Training artifacts path:\", training_artifacts_path)\n",
"\n",
"if path.exists(path_prefix):\n",
" dir_util.remove_tree(path_prefix)\n",
"if path.exists(training_artifacts_path):\n",
" dir_util.remove_tree(training_artifacts_path)\n",
"\n",
"# Uncomment line below to download run artifacts to local compute\n",
"#child_run_0.download_files(path_prefix)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create a dataset of training artifacts\n",
"To evaluate a trained policy (a checkpoint) we need to make the checkpoint accessible to the rollout script. All the training artifacts are stored in workspace default datastore under **azureml/&lt;run_id&gt;** directory.\n",
"\n",
"Here we create a file dataset from the stored artifacts, and then use this dataset to feed these data to rollout estimator."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Dataset\n",
"\n",
"run_id = child_run_0.id # Or set to run id of a completed run (e.g. 'rl-cartpole-v0_1587572312_06e04ace_head')\n",
"run_artifacts_path = os.path.join('azureml', run_id)\n",
"print(\"Run artifacts path:\", run_artifacts_path)\n",
"\n",
"# Create a file dataset object from the files stored on default datastore\n",
"datastore = ws.get_default_datastore()\n",
"training_artifacts_ds = Dataset.File.from_files(datastore.path(os.path.join(run_artifacts_path, '**')))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To verify, we can print out the number (and paths) of all the files in the dataset, as follows."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"artifacts_paths = training_artifacts_ds.to_path()\n",
"print(\"Number of files in dataset:\", len(artifacts_paths))\n",
"\n",
"# Uncomment line below to print all file paths\n",
"#print(\"Artifacts dataset file paths: \", artifacts_paths)"
"# Download run artifacts to local compute\n",
"child_run_0.download_files(training_artifacts_path)"
]
},
{
@@ -550,21 +503,6 @@
"source": [
"import shutil\n",
"\n",
"# A helper function to download movies from a dataset to local directory\n",
"def download_movies(artifacts_ds, movies, destination):\n",
" # Create the local destination directory \n",
" if path.exists(destination):\n",
" dir_util.remove_tree(destination)\n",
" dir_util.mkpath(destination)\n",
"\n",
" for i, artifact in enumerate(artifacts_ds.to_path()):\n",
" if artifact in movies:\n",
" print('Downloading {} ...'.format(artifact))\n",
" artifacts_ds.skip(i).take(1).download(target_path=destination, overwrite=True)\n",
"\n",
" print('Downloading movies completed!')\n",
"\n",
"\n",
"# A helper function to find movies in a directory\n",
"def find_movies(movie_path):\n",
" print(\"Looking in path:\", movie_path)\n",
@@ -590,34 +528,6 @@
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let's find the first and the last recorded videos in training artifacts dataset and download them to a local directory."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Find first and last movie\n",
"mp4_files = [file for file in training_artifacts_ds.to_path() if file.endswith('.mp4')]\n",
"mp4_files.sort()\n",
"\n",
"first_movie = mp4_files[0] if len(mp4_files) > 0 else None\n",
"last_movie = mp4_files[-1] if len(mp4_files) > 1 else None\n",
"\n",
"print(\"First movie:\", first_movie)\n",
"print(\"Last movie:\", last_movie)\n",
"\n",
"# Download movies\n",
"training_movies_path = path.join(\"training\", \"videos\")\n",
"download_movies(training_artifacts_ds, [first_movie, last_movie], training_movies_path)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -631,7 +541,7 @@
"metadata": {},
"outputs": [],
"source": [
"mp4_files = find_movies(training_movies_path)\n",
"mp4_files = find_movies(training_artifacts_path)\n",
"mp4_files.sort()"
]
},
@@ -704,16 +614,31 @@
"metadata": {},
"outputs": [],
"source": [
"# Find checkpoints and last checkpoint number\n",
"checkpoint_files = [\n",
" os.path.basename(file) for file in training_artifacts_ds.to_path() \\\n",
" if os.path.basename(file).startswith('checkpoint-') and \\\n",
" not os.path.basename(file).endswith('tune_metadata')\n",
"]\n",
"# A helper function to find checkpoint files in a directory\n",
"def find_checkpoints(file_path):\n",
" print(\"Looking in path:\", file_path)\n",
" checkpoints = []\n",
" for root, _, files in os.walk(file_path):\n",
" for name in files:\n",
" if os.path.basename(root).startswith('checkpoint_'):\n",
" checkpoints.append(path.join(root, name))\n",
" return checkpoints\n",
"\n",
"checkpoint_files = find_checkpoints(training_artifacts_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Find checkpoints and last checkpoint number\n",
"checkpoint_numbers = []\n",
"for file in checkpoint_files:\n",
" checkpoint_numbers.append(int(file.split('-')[1]))\n",
" file = os.path.basename(file)\n",
" if file.startswith('checkpoint-') and not file.endswith('.tune_metadata'):\n",
" checkpoint_numbers.append(int(file.split('-')[-1]))\n",
"\n",
"print(\"Checkpoints:\", checkpoint_numbers)\n",
"\n",
@@ -721,6 +646,20 @@
"print(\"Last checkpoint number:\", last_checkpoint_number)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Upload the checkpoint files and create a DataSet\n",
"from azureml.core import Dataset\n",
"\n",
"datastore = ws.get_default_datastore()\n",
"checkpoint_dataref = datastore.upload_files(checkpoint_files, target_path='cartpole_checkpoints_' + run_id, overwrite=True)\n",
"checkpoint_ds = Dataset.File.from_files(checkpoint_dataref)"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -796,8 +735,8 @@
" \n",
" # Data inputs\n",
" inputs=[\n",
" training_artifacts_ds.as_named_input('artifacts_dataset'),\n",
" training_artifacts_ds.as_named_input('artifacts_path').as_mount()],\n",
" checkpoint_ds.as_named_input('artifacts_dataset'),\n",
" checkpoint_ds.as_named_input('artifacts_path').as_mount()],\n",
" \n",
" # The Azure Machine Learning compute target set up for Ray head nodes\n",
" compute_target=compute_target,\n",
@@ -879,16 +818,15 @@
"print('Number of child runs:', len(child_runs))\n",
"child_run_0 = child_runs[0]\n",
"\n",
"run_id = child_run_0.id # Or set to run id of a completed run (e.g. 'rl-cartpole-v0_1587572312_06e04ace_head')\n",
"run_artifacts_path = os.path.join('azureml', run_id)\n",
"print(\"Run artifacts path:\", run_artifacts_path)\n",
"# Download rollout artifacts\n",
"rollout_artifacts_path = path.join(\"logs\", \"rollout\")\n",
"print(\"Rollout artifacts path:\", rollout_artifacts_path)\n",
"\n",
"# Create a file dataset object from the files stored on default datastore\n",
"datastore = ws.get_default_datastore()\n",
"rollout_artifacts_ds = Dataset.File.from_files(datastore.path(os.path.join(run_artifacts_path, '**')))\n",
"if path.exists(rollout_artifacts_path):\n",
" dir_util.remove_tree(rollout_artifacts_path)\n",
"\n",
"artifacts_paths = rollout_artifacts_ds.to_path()\n",
"print(\"Number of files in dataset:\", len(artifacts_paths))"
"# Download videos to local compute\n",
"child_run_0.download_files(\"logs/video\", output_directory = rollout_artifacts_path)"
]
},
{
@@ -904,20 +842,11 @@
"metadata": {},
"outputs": [],
"source": [
"# Find last movie\n",
"mp4_files = [file for file in rollout_artifacts_ds.to_path() if file.endswith('.mp4')]\n",
"mp4_files.sort()\n",
"\n",
"last_movie = mp4_files[-1] if len(mp4_files) > 1 else None\n",
"print(\"Last movie:\", last_movie)\n",
"\n",
"# Download last movie\n",
"rollout_movies_path = path.join(\"rollout\", \"videos\")\n",
"download_movies(rollout_artifacts_ds, [last_movie], rollout_movies_path)\n",
"\n",
"# Look for the downloaded movie in local directory\n",
"mp4_files = find_movies(rollout_movies_path)\n",
"mp4_files.sort()"
"mp4_files = find_movies(rollout_artifacts_path)\n",
"mp4_files.sort()\n",
"last_movie = mp4_files[-1] if len(mp4_files) > 1 else None\n",
"print(\"Last movie:\", last_movie)"
]
},
{
@@ -960,16 +889,12 @@
"#compute_target.delete()\n",
"\n",
"# To delete downloaded training artifacts\n",
"#if os.path.exists(path_prefix):\n",
"# dir_util.remove_tree(path_prefix)\n",
"\n",
"# To delete downloaded training videos\n",
"#if path.exists(training_movies_path):\n",
"# dir_util.remove_tree(training_movies_path)\n",
"#if os.path.exists(training_artifacts_path):\n",
"# dir_util.remove_tree(training_artifacts_path)\n",
"\n",
"# To delete downloaded rollout videos\n",
"#if path.exists(rollout_movies_path):\n",
"# dir_util.remove_tree(rollout_movies_path)"
"#if path.exists(rollout_artifacts_path):\n",
"# dir_util.remove_tree(rollout_artifacts_path)"
]
},
{
@@ -986,6 +911,9 @@
"authors": [
{
"name": "hoazari"
},
{
"name": "dasommer"
}
],
"kernelspec": {

View File

@@ -100,7 +100,7 @@
"\n",
"# Check core SDK version number\n",
"\n",
"print(\"This notebook was created using SDK version 1.30.0, you are currently running version\", azureml.core.VERSION)"
"print(\"This notebook was created using SDK version 1.31.0, you are currently running version\", azureml.core.VERSION)"
]
},
{

View File

@@ -112,6 +112,8 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
| [automl-databricks-local-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb) | | | | | | |
| [spark_job_on_synapse_spark_pool](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-synapse/spark_job_on_synapse_spark_pool.ipynb) | | | | | | |
| [spark_session_on_synapse_spark_pool](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-synapse/spark_session_on_synapse_spark_pool.ipynb) | | | | | | |
| [Synapse_Job_Scala_Support](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-synapse/Synapse_Job_Scala_Support.ipynb) | | | | | | |
| [Synapse_Session_Scala_Support](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-synapse/Synapse_Session_Scala_Support.ipynb) | | | | | | |
| [multi-model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-multi-model/multi-model-register-and-deploy.ipynb) | | | | | | |
| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | |
| [enable-app-insights-in-production-service](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb) | | | | | | |

View File

@@ -102,7 +102,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},