update samples from Release-168 as a part of SDK release

This commit is contained in:
amlrelsa-ms
2022-12-05 17:52:07 +00:00
parent 38d5743bbb
commit 4404e62f58
44 changed files with 187 additions and 814 deletions

View File

@@ -330,7 +330,7 @@
"- **inputs:** List of input connections for data consumed by this step. Fetch this inside the notebook using dbutils.widgets.get(\"input\")\n",
"- **outputs:** List of output port definitions for outputs produced by this step. Fetch this inside the notebook using dbutils.widgets.get(\"output\")\n",
"- **existing_cluster_id:** Cluster ID of an existing Interactive cluster on the Databricks workspace. If you are providing this, do not provide any of the parameters below that are used to create a new cluster such as spark_version, node_type, etc.\n",
"- **spark_version:** Version of spark for the databricks run cluster. default value: 4.0.x-scala2.11\n",
"- **spark_version:** Version of spark for the databricks run cluster. You can refer to [DataBricks runtime version](https://learn.microsoft.com/azure/databricks/dev-tools/api/#--runtime-version-strings) to specify the spark version. default value: 4.0.x-scala2.11\n",
"- **node_type:** Azure vm node types for the databricks run cluster. default value: Standard_D3_v2\n",
"- **num_workers:** Specifies a static number of workers for the databricks run cluster\n",
"- **min_workers:** Specifies a min number of workers to use for auto-scaling the databricks run cluster\n",

View File

@@ -252,7 +252,7 @@
"# is_directory=None)\n",
"\n",
"# Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1.\n",
"processed_data1 = PipelineData(\"processed_data1\",datastore=def_blob_store)\n",
"processed_data1 = PipelineData(\"processed_data1\",datastore=def_blob_store, is_directory=True)\n",
"print(\"PipelineData object created\")"
]
},
@@ -347,7 +347,7 @@
"source": [
"# step5 to use the intermediate data produced by step4\n",
"# This step also produces an output processed_data2\n",
"processed_data2 = PipelineData(\"processed_data2\", datastore=def_blob_store)\n",
"processed_data2 = PipelineData(\"processed_data2\", datastore=def_blob_store, is_directory=True)\n",
"source_directory = \"data_dependency_run_extract\"\n",
"\n",
"extractStep = PythonScriptStep(\n",
@@ -394,7 +394,7 @@
"outputs": [],
"source": [
"# Now define the compare step which takes two inputs and produces an output\n",
"processed_data3 = PipelineData(\"processed_data3\", datastore=def_blob_store)\n",
"processed_data3 = PipelineData(\"processed_data3\", datastore=def_blob_store, is_directory=True)\n",
"source_directory = \"data_dependency_run_compare\"\n",
"\n",
"compareStep = PythonScriptStep(\n",

View File

@@ -235,7 +235,8 @@
" path_on_datastore=\"titanic/Titanic.csv\")\n",
"\n",
"output_data = PipelineData(name=\"processed_data\",\n",
" datastore=Datastore.get(ws, \"workspaceblobstore\"))"
" datastore=Datastore.get(ws, \"workspaceblobstore\"),\n",
" is_directory=True)"
]
},
{
@@ -306,7 +307,8 @@
"from azureml.pipeline.core import PipelineParameter\n",
"\n",
"output_from_notebook = PipelineData(name=\"notebook_processed_data\",\n",
" datastore=Datastore.get(ws, \"workspaceblobstore\"))\n",
" datastore=Datastore.get(ws, \"workspaceblobstore\"),\n",
" is_directory=True)\n",
"\n",
"my_pipeline_param = PipelineParameter(name=\"pipeline_param\", default_value=\"my_param\")\n",
"\n",

View File

@@ -1,5 +1,5 @@
# DisableDockerDetector "Disabled to unblock PRs until the owner can fix the file. Not used in any prod deployments - only as a documentation for the customers"
FROM rocker/tidyverse:4.0.0-ubuntu18.04
FROM rocker/tidyverse:4.0.0-ubuntu20.04
# Install python
RUN apt-get update -qq && \

View File

@@ -363,7 +363,7 @@
"}).replace(\",\", \";\")\n",
"\n",
"# Define output after cleansing step\n",
"cleansed_green_data = PipelineData(\"cleansed_green_data\", datastore=default_store).as_dataset()\n",
"cleansed_green_data = PipelineData(\"cleansed_green_data\", datastore=default_store, is_directory=True).as_dataset()\n",
"\n",
"print('Cleanse script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
"\n",
@@ -414,7 +414,7 @@
"}).replace(\",\", \";\")\n",
"\n",
"# Define output after cleansing step\n",
"cleansed_yellow_data = PipelineData(\"cleansed_yellow_data\", datastore=default_store).as_dataset()\n",
"cleansed_yellow_data = PipelineData(\"cleansed_yellow_data\", datastore=default_store, is_directory=True).as_dataset()\n",
"\n",
"print('Cleanse script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
"\n",
@@ -452,7 +452,7 @@
"outputs": [],
"source": [
"# Define output after merging step\n",
"merged_data = PipelineData(\"merged_data\", datastore=default_store).as_dataset()\n",
"merged_data = PipelineData(\"merged_data\", datastore=default_store, is_directory=True).as_dataset()\n",
"\n",
"print('Merge script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
"\n",
@@ -489,7 +489,7 @@
"outputs": [],
"source": [
"# Define output after merging step\n",
"filtered_data = PipelineData(\"filtered_data\", datastore=default_store).as_dataset()\n",
"filtered_data = PipelineData(\"filtered_data\", datastore=default_store, is_directory=True).as_dataset()\n",
"\n",
"print('Filter script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
"\n",
@@ -525,7 +525,7 @@
"outputs": [],
"source": [
"# Define output after normalize step\n",
"normalized_data = PipelineData(\"normalized_data\", datastore=default_store).as_dataset()\n",
"normalized_data = PipelineData(\"normalized_data\", datastore=default_store, is_directory=True).as_dataset()\n",
"\n",
"print('Normalize script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
"\n",
@@ -566,7 +566,7 @@
"outputs": [],
"source": [
"# Define output after transform step\n",
"transformed_data = PipelineData(\"transformed_data\", datastore=default_store).as_dataset()\n",
"transformed_data = PipelineData(\"transformed_data\", datastore=default_store, is_directory=True).as_dataset()\n",
"\n",
"print('Transform script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
"\n",
@@ -604,8 +604,8 @@
"train_model_folder = './scripts/trainmodel'\n",
"\n",
"# train and test splits output\n",
"output_split_train = PipelineData(\"output_split_train\", datastore=default_store).as_dataset()\n",
"output_split_test = PipelineData(\"output_split_test\", datastore=default_store).as_dataset()\n",
"output_split_train = PipelineData(\"output_split_train\", datastore=default_store, is_directory=True).as_dataset()\n",
"output_split_test = PipelineData(\"output_split_test\", datastore=default_store, is_directory=True).as_dataset()\n",
"\n",
"print('Data spilt script is in {}.'.format(os.path.realpath(train_model_folder)))\n",
"\n",