update samples from Release-104 as a part of SDK release

This commit is contained in:
amlrelsa-ms
2021-06-21 17:16:09 +00:00
parent 928e0d4327
commit d17547d890
26 changed files with 455 additions and 31 deletions

View File

@@ -103,7 +103,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -21,8 +21,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.30.0
- azureml-widgets~=1.31.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.30.0/validated_win32_requirements.txt [--no-deps]
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.31.0/validated_win32_requirements.txt [--no-deps]

View File

@@ -21,8 +21,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.30.0
- azureml-widgets~=1.31.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.30.0/validated_linux_requirements.txt [--no-deps]
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.31.0/validated_linux_requirements.txt [--no-deps]

View File

@@ -22,8 +22,8 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.30.0
- azureml-widgets~=1.31.0
- pytorch-transformers==1.0.0
- spacy==2.1.8
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.30.0/validated_darwin_requirements.txt [--no-deps]
- -r https://automlresources-prod.azureedge.net/validated-requirements/1.31.0/validated_darwin_requirements.txt [--no-deps]

View File

@@ -105,7 +105,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -93,7 +93,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -81,7 +81,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -49,6 +49,8 @@ print("Argument 1(ds_name): %s" % args.ds_name)
dstor = ws.get_default_datastore()
register_dataset = False
end_time = datetime.utcnow()
try:
ds = Dataset.get_by_name(ws, args.ds_name)
end_time_last_slice = ds.data_changed_time.replace(tzinfo=None)
@@ -58,9 +60,9 @@ except Exception:
print(traceback.format_exc())
print("Dataset with name {0} not found, registering new dataset.".format(args.ds_name))
register_dataset = True
end_time_last_slice = datetime.today() - relativedelta(weeks=4)
end_time = datetime(2021, 5, 1, 0, 0)
end_time_last_slice = end_time - relativedelta(weeks=2)
end_time = datetime.utcnow()
train_df = get_noaa_data(end_time_last_slice, end_time)
if train_df.size > 0:

View File

@@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -91,7 +91,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -113,7 +113,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -87,7 +87,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -97,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -94,7 +94,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -82,7 +82,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -728,7 +728,7 @@
"X_query[time_column_name] = X_query[time_column_name].astype(str)\n",
"# The Service object accept the complex dictionary, which is internally converted to JSON string.\n",
"# The section 'data' contains the data frame in the form of dictionary.\n",
"test_sample = json.dumps({'data': X_query.to_dict(orient='records')})\n",
"test_sample = json.dumps({\"data\": json.loads(X_query.to_json(orient=\"records\"))})\n",
"response = aci_service.run(input_data = test_sample)\n",
"# translate from networkese to datascientese\n",
"try: \n",

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -436,7 +436,8 @@
"\n",
"automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, X=X_train, \n",
" X_test=X_test, y=y_train, \n",
" task='classification')"
" task='classification',\n",
" automl_run=automl_run)"
]
},
{
@@ -453,11 +454,10 @@
"metadata": {},
"outputs": [],
"source": [
"from interpret.ext.glassbox import LGBMExplainableModel\n",
"from azureml.interpret.mimic_wrapper import MimicWrapper\n",
"explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator,\n",
" explainable_model=automl_explainer_setup_obj.surrogate_model, \n",
" init_dataset=automl_explainer_setup_obj.X_transform, run=automl_run,\n",
" init_dataset=automl_explainer_setup_obj.X_transform, run=automl_explainer_setup_obj.automl_run,\n",
" features=automl_explainer_setup_obj.engineered_feature_names, \n",
" feature_maps=[automl_explainer_setup_obj.feature_map],\n",
" classes=automl_explainer_setup_obj.classes,\n",

View File

@@ -96,7 +96,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -50,11 +50,13 @@ X_test = test_dataset.drop_columns(columns=['<<target_column_name>>'])
# Setup the class for explaining the AutoML models
automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, '<<task>>',
X=X_train, X_test=X_test,
y=y_train)
y=y_train,
automl_run=automl_run)
# Initialize the Mimic Explainer
explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMExplainableModel,
init_dataset=automl_explainer_setup_obj.X_transform, run=automl_run,
init_dataset=automl_explainer_setup_obj.X_transform,
run=automl_explainer_setup_obj.automl_run,
features=automl_explainer_setup_obj.engineered_feature_names,
feature_maps=[automl_explainer_setup_obj.feature_map],
classes=automl_explainer_setup_obj.classes)

View File

@@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -0,0 +1,160 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/azure-arcadia/Synapse_Job_Scala_Support.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get AML workspace which has synapse spark pool attached"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Workspace, Experiment, Dataset, Environment\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\\n')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Leverage ScriptRunConfig to submit scala job to an attached synapse spark cluster"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core.runconfig import RunConfiguration\n",
"from azureml.data import HDFSOutputDatasetConfig\n",
"import uuid\n",
"\n",
"run_config = RunConfiguration(framework=\"pyspark\")\n",
"run_config.target = \"link-pool\"\n",
"run_config.spark.configuration[\"spark.driver.memory\"] = \"2g\"\n",
"run_config.spark.configuration[\"spark.driver.cores\"] = 2\n",
"run_config.spark.configuration[\"spark.executor.memory\"] = \"2g\"\n",
"run_config.spark.configuration[\"spark.executor.cores\"] = 1\n",
"run_config.spark.configuration[\"spark.executor.instances\"] = 1\n",
"\n",
"run_config.spark.configuration[\"spark.yarn.dist.jars\"]=\"wasbs://synapse@azuremlexamples.blob.core.windows.net/shared/wordcount.jar\" # this can be removed if you are using local jars in source folder\n",
"\n",
"dir_name = \"wordcount-{}\".format(str(uuid.uuid4()))\n",
"input = \"wasbs://synapse@azuremlexamples.blob.core.windows.net/shared/shakespeare.txt\"\n",
"output = HDFSOutputDatasetConfig(destination=(ws.get_default_datastore(), \"{}/result\".format(dir_name)))\n",
"\n",
"from azureml.core import ScriptRunConfig\n",
"args = ['--input', input, '--output', output]\n",
"script_run_config = ScriptRunConfig(source_directory = '.',\n",
" script= 'start_script.py',\n",
" arguments= args,\n",
" run_config = run_config)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.core import Experiment\n",
"exp = Experiment(workspace=ws, name='synapse-spark')\n",
"run = exp.submit(config=script_run_config)\n",
"run"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Leverage SynapseSparkStep in an AML pipeline to add dataprep step on synapse spark cluster"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from azureml.pipeline.core import Pipeline\n",
"from azureml.pipeline.steps import SynapseSparkStep\n",
"\n",
"configs = {}\n",
"configs[\"spark.yarn.dist.jars\"] = \"wasbs://synapse@azuremlexamples.blob.core.windows.net/shared/wordcount.jar\"\n",
"step_1 = SynapseSparkStep(name = 'synapse-spark',\n",
" file = 'start_script.py',\n",
" source_directory=\".\",\n",
" arguments = args,\n",
" compute_target = 'link-pool',\n",
" driver_memory = \"2g\",\n",
" driver_cores = 2,\n",
" executor_memory = \"2g\",\n",
" executor_cores = 1,\n",
" num_executors = 1,\n",
" conf = configs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pipeline = Pipeline(workspace=ws, steps=[step_1])\n",
"pipeline_run = pipeline.submit('synapse-pipeline', regenerate_outputs=True)"
]
}
],
"metadata": {
"authors": [
{
"name": "feli1"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"nteract": {
"version": "0.28.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -0,0 +1,240 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved. \n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/azure-arcadia/Synapse_Session_Scala_Support.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Interactive Spark Session on Synapse Spark Pool"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install -U \"azureml-synapse\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For JupyterLab, please additionally run:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!jupyter lab build --minimize=False"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## PLEASE restart kernel and then refresh web page before starting spark session."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 0. Magic Usage"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"execution": {
"iopub.execute_input": "2020-06-05T03:22:14.965395Z",
"iopub.status.busy": "2020-06-05T03:22:14.965395Z",
"iopub.status.idle": "2020-06-05T03:22:14.970398Z",
"shell.execute_reply": "2020-06-05T03:22:14.969397Z",
"shell.execute_reply.started": "2020-06-05T03:22:14.965395Z"
},
"gather": {
"logged": 1615594584642
}
},
"outputs": [],
"source": [
"# show help\n",
"%synapse ?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 1. Start Synapse Session"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1615577715289
}
},
"outputs": [],
"source": [
"%synapse start -c linktestpool --start-timeout 1000"
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"# 2. Use Scala"
]
},
{
"cell_type": "markdown",
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
},
"source": [
"## (1) Read Data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"%%synapse scala\n",
"\n",
"var df = spark.read.option(\"header\", \"true\").csv(\"wasbs://demo@dprepdata.blob.core.windows.net/Titanic.csv\")\n",
"df.show(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## (2) Use Scala Sql"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"%%synapse scala\n",
"\n",
"df.createOrReplaceTempView(\"titanic\")\n",
"var sqlDF = spark.sql(\"SELECT Name, Fare from titanic\")\n",
"sqlDF.show(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Stop Session"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"%synapse stop"
]
}
],
"metadata": {
"authors": [
{
"name": "feli1"
}
],
"kernelspec": {
"display_name": "Python 3.6",
"language": "python",
"name": "python36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"nteract": {
"version": "0.28.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View File

@@ -0,0 +1,18 @@
from pyspark.sql import SparkSession
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input", default="")
parser.add_argument("--output", default="")
args, unparsed = parser.parse_known_args()
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext
arr = sc._gateway.new_array(sc._jvm.java.lang.String, 2)
arr[0] = args.input
arr[1] = args.output
obj = sc._jvm.WordCount
obj.main(arr)

View File

@@ -100,7 +100,7 @@
"\n",
"# Check core SDK version number\n",
"\n",
"print(\"This notebook was created using SDK version 1.30.0, you are currently running version\", azureml.core.VERSION)"
"print(\"This notebook was created using SDK version 1.31.0, you are currently running version\", azureml.core.VERSION)"
]
},
{

View File

@@ -112,6 +112,8 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
| [automl-databricks-local-with-deployment](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-databricks/automl/automl-databricks-local-with-deployment.ipynb) | | | | | | |
| [spark_job_on_synapse_spark_pool](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-synapse/spark_job_on_synapse_spark_pool.ipynb) | | | | | | |
| [spark_session_on_synapse_spark_pool](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-synapse/spark_session_on_synapse_spark_pool.ipynb) | | | | | | |
| [Synapse_Job_Scala_Support](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-synapse/Synapse_Job_Scala_Support.ipynb) | | | | | | |
| [Synapse_Session_Scala_Support](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/azure-synapse/Synapse_Session_Scala_Support.ipynb) | | | | | | |
| [multi-model-register-and-deploy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-multi-model/multi-model-register-and-deploy.ipynb) | | | | | | |
| [register-model-deploy-local-advanced](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/deploy-to-local/register-model-deploy-local-advanced.ipynb) | | | | | | |
| [enable-app-insights-in-production-service](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/deployment/enable-app-insights-in-production-service/enable-app-insights-in-production-service.ipynb) | | | | | | |

View File

@@ -102,7 +102,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.30.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.31.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},