mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 09:37:04 -05:00
update samples from Release-168 as a part of SDK release
This commit is contained in:
@@ -330,7 +330,7 @@
|
||||
"- **inputs:** List of input connections for data consumed by this step. Fetch this inside the notebook using dbutils.widgets.get(\"input\")\n",
|
||||
"- **outputs:** List of output port definitions for outputs produced by this step. Fetch this inside the notebook using dbutils.widgets.get(\"output\")\n",
|
||||
"- **existing_cluster_id:** Cluster ID of an existing Interactive cluster on the Databricks workspace. If you are providing this, do not provide any of the parameters below that are used to create a new cluster such as spark_version, node_type, etc.\n",
|
||||
"- **spark_version:** Version of spark for the databricks run cluster. default value: 4.0.x-scala2.11\n",
|
||||
"- **spark_version:** Version of spark for the databricks run cluster. You can refer to [DataBricks runtime version](https://learn.microsoft.com/azure/databricks/dev-tools/api/#--runtime-version-strings) to specify the spark version. default value: 4.0.x-scala2.11\n",
|
||||
"- **node_type:** Azure vm node types for the databricks run cluster. default value: Standard_D3_v2\n",
|
||||
"- **num_workers:** Specifies a static number of workers for the databricks run cluster\n",
|
||||
"- **min_workers:** Specifies a min number of workers to use for auto-scaling the databricks run cluster\n",
|
||||
|
||||
@@ -252,7 +252,7 @@
|
||||
"# is_directory=None)\n",
|
||||
"\n",
|
||||
"# Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1.\n",
|
||||
"processed_data1 = PipelineData(\"processed_data1\",datastore=def_blob_store)\n",
|
||||
"processed_data1 = PipelineData(\"processed_data1\",datastore=def_blob_store, is_directory=True)\n",
|
||||
"print(\"PipelineData object created\")"
|
||||
]
|
||||
},
|
||||
@@ -347,7 +347,7 @@
|
||||
"source": [
|
||||
"# step5 to use the intermediate data produced by step4\n",
|
||||
"# This step also produces an output processed_data2\n",
|
||||
"processed_data2 = PipelineData(\"processed_data2\", datastore=def_blob_store)\n",
|
||||
"processed_data2 = PipelineData(\"processed_data2\", datastore=def_blob_store, is_directory=True)\n",
|
||||
"source_directory = \"data_dependency_run_extract\"\n",
|
||||
"\n",
|
||||
"extractStep = PythonScriptStep(\n",
|
||||
@@ -394,7 +394,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Now define the compare step which takes two inputs and produces an output\n",
|
||||
"processed_data3 = PipelineData(\"processed_data3\", datastore=def_blob_store)\n",
|
||||
"processed_data3 = PipelineData(\"processed_data3\", datastore=def_blob_store, is_directory=True)\n",
|
||||
"source_directory = \"data_dependency_run_compare\"\n",
|
||||
"\n",
|
||||
"compareStep = PythonScriptStep(\n",
|
||||
|
||||
@@ -235,7 +235,8 @@
|
||||
" path_on_datastore=\"titanic/Titanic.csv\")\n",
|
||||
"\n",
|
||||
"output_data = PipelineData(name=\"processed_data\",\n",
|
||||
" datastore=Datastore.get(ws, \"workspaceblobstore\"))"
|
||||
" datastore=Datastore.get(ws, \"workspaceblobstore\"),\n",
|
||||
" is_directory=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -306,7 +307,8 @@
|
||||
"from azureml.pipeline.core import PipelineParameter\n",
|
||||
"\n",
|
||||
"output_from_notebook = PipelineData(name=\"notebook_processed_data\",\n",
|
||||
" datastore=Datastore.get(ws, \"workspaceblobstore\"))\n",
|
||||
" datastore=Datastore.get(ws, \"workspaceblobstore\"),\n",
|
||||
" is_directory=True)\n",
|
||||
"\n",
|
||||
"my_pipeline_param = PipelineParameter(name=\"pipeline_param\", default_value=\"my_param\")\n",
|
||||
"\n",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# DisableDockerDetector "Disabled to unblock PRs until the owner can fix the file. Not used in any prod deployments - only as a documentation for the customers"
|
||||
FROM rocker/tidyverse:4.0.0-ubuntu18.04
|
||||
FROM rocker/tidyverse:4.0.0-ubuntu20.04
|
||||
|
||||
# Install python
|
||||
RUN apt-get update -qq && \
|
||||
|
||||
@@ -363,7 +363,7 @@
|
||||
"}).replace(\",\", \";\")\n",
|
||||
"\n",
|
||||
"# Define output after cleansing step\n",
|
||||
"cleansed_green_data = PipelineData(\"cleansed_green_data\", datastore=default_store).as_dataset()\n",
|
||||
"cleansed_green_data = PipelineData(\"cleansed_green_data\", datastore=default_store, is_directory=True).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Cleanse script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -414,7 +414,7 @@
|
||||
"}).replace(\",\", \";\")\n",
|
||||
"\n",
|
||||
"# Define output after cleansing step\n",
|
||||
"cleansed_yellow_data = PipelineData(\"cleansed_yellow_data\", datastore=default_store).as_dataset()\n",
|
||||
"cleansed_yellow_data = PipelineData(\"cleansed_yellow_data\", datastore=default_store, is_directory=True).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Cleanse script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -452,7 +452,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define output after merging step\n",
|
||||
"merged_data = PipelineData(\"merged_data\", datastore=default_store).as_dataset()\n",
|
||||
"merged_data = PipelineData(\"merged_data\", datastore=default_store, is_directory=True).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Merge script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -489,7 +489,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define output after merging step\n",
|
||||
"filtered_data = PipelineData(\"filtered_data\", datastore=default_store).as_dataset()\n",
|
||||
"filtered_data = PipelineData(\"filtered_data\", datastore=default_store, is_directory=True).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Filter script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -525,7 +525,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define output after normalize step\n",
|
||||
"normalized_data = PipelineData(\"normalized_data\", datastore=default_store).as_dataset()\n",
|
||||
"normalized_data = PipelineData(\"normalized_data\", datastore=default_store, is_directory=True).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Normalize script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -566,7 +566,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define output after transform step\n",
|
||||
"transformed_data = PipelineData(\"transformed_data\", datastore=default_store).as_dataset()\n",
|
||||
"transformed_data = PipelineData(\"transformed_data\", datastore=default_store, is_directory=True).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Transform script is in {}.'.format(os.path.realpath(prepare_data_folder)))\n",
|
||||
"\n",
|
||||
@@ -604,8 +604,8 @@
|
||||
"train_model_folder = './scripts/trainmodel'\n",
|
||||
"\n",
|
||||
"# train and test splits output\n",
|
||||
"output_split_train = PipelineData(\"output_split_train\", datastore=default_store).as_dataset()\n",
|
||||
"output_split_test = PipelineData(\"output_split_test\", datastore=default_store).as_dataset()\n",
|
||||
"output_split_train = PipelineData(\"output_split_train\", datastore=default_store, is_directory=True).as_dataset()\n",
|
||||
"output_split_test = PipelineData(\"output_split_test\", datastore=default_store, is_directory=True).as_dataset()\n",
|
||||
"\n",
|
||||
"print('Data spilt script is in {}.'.format(os.path.realpath(train_model_folder)))\n",
|
||||
"\n",
|
||||
|
||||
Reference in New Issue
Block a user