|
|
|
|
@@ -460,8 +460,8 @@
|
|
|
|
|
" name=\"Merge Taxi Data\",\n",
|
|
|
|
|
" script_name=\"merge.py\", \n",
|
|
|
|
|
" arguments=[\"--output_merge\", merged_data],\n",
|
|
|
|
|
" inputs=[cleansed_green_data.parse_parquet_files(file_extension=None),\n",
|
|
|
|
|
" cleansed_yellow_data.parse_parquet_files(file_extension=None)],\n",
|
|
|
|
|
" inputs=[cleansed_green_data.parse_parquet_files(),\n",
|
|
|
|
|
" cleansed_yellow_data.parse_parquet_files()],\n",
|
|
|
|
|
" outputs=[merged_data],\n",
|
|
|
|
|
" compute_target=aml_compute,\n",
|
|
|
|
|
" runconfig=aml_run_config,\n",
|
|
|
|
|
@@ -497,7 +497,7 @@
|
|
|
|
|
" name=\"Filter Taxi Data\",\n",
|
|
|
|
|
" script_name=\"filter.py\", \n",
|
|
|
|
|
" arguments=[\"--output_filter\", filtered_data],\n",
|
|
|
|
|
" inputs=[merged_data.parse_parquet_files(file_extension=None)],\n",
|
|
|
|
|
" inputs=[merged_data.parse_parquet_files()],\n",
|
|
|
|
|
" outputs=[filtered_data],\n",
|
|
|
|
|
" compute_target=aml_compute,\n",
|
|
|
|
|
" runconfig = aml_run_config,\n",
|
|
|
|
|
@@ -533,7 +533,7 @@
|
|
|
|
|
" name=\"Normalize Taxi Data\",\n",
|
|
|
|
|
" script_name=\"normalize.py\", \n",
|
|
|
|
|
" arguments=[\"--output_normalize\", normalized_data],\n",
|
|
|
|
|
" inputs=[filtered_data.parse_parquet_files(file_extension=None)],\n",
|
|
|
|
|
" inputs=[filtered_data.parse_parquet_files()],\n",
|
|
|
|
|
" outputs=[normalized_data],\n",
|
|
|
|
|
" compute_target=aml_compute,\n",
|
|
|
|
|
" runconfig = aml_run_config,\n",
|
|
|
|
|
@@ -574,7 +574,7 @@
|
|
|
|
|
" name=\"Transform Taxi Data\",\n",
|
|
|
|
|
" script_name=\"transform.py\", \n",
|
|
|
|
|
" arguments=[\"--output_transform\", transformed_data],\n",
|
|
|
|
|
" inputs=[normalized_data.parse_parquet_files(file_extension=None)],\n",
|
|
|
|
|
" inputs=[normalized_data.parse_parquet_files()],\n",
|
|
|
|
|
" outputs=[transformed_data],\n",
|
|
|
|
|
" compute_target=aml_compute,\n",
|
|
|
|
|
" runconfig = aml_run_config,\n",
|
|
|
|
|
@@ -614,7 +614,7 @@
|
|
|
|
|
" script_name=\"train_test_split.py\", \n",
|
|
|
|
|
" arguments=[\"--output_split_train\", output_split_train,\n",
|
|
|
|
|
" \"--output_split_test\", output_split_test],\n",
|
|
|
|
|
" inputs=[transformed_data.parse_parquet_files(file_extension=None)],\n",
|
|
|
|
|
" inputs=[transformed_data.parse_parquet_files()],\n",
|
|
|
|
|
" outputs=[output_split_train, output_split_test],\n",
|
|
|
|
|
" compute_target=aml_compute,\n",
|
|
|
|
|
" runconfig = aml_run_config,\n",
|
|
|
|
|
@@ -690,7 +690,7 @@
|
|
|
|
|
" \"n_cross_validations\": 5\n",
|
|
|
|
|
"}\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"training_dataset = output_split_train.parse_parquet_files(file_extension=None).keep_columns(['pickup_weekday','pickup_hour', 'distance','passengers', 'vendor', 'cost'])\n",
|
|
|
|
|
"training_dataset = output_split_train.parse_parquet_files().keep_columns(['pickup_weekday','pickup_hour', 'distance','passengers', 'vendor', 'cost'])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"automl_config = AutoMLConfig(task = 'regression',\n",
|
|
|
|
|
" debug_log = 'automated_ml_errors.log',\n",
|
|
|
|
|
|