mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-21 18:15:13 -05:00
30 lines
1.1 KiB
Python
30 lines
1.1 KiB
Python
|
|
import argparse
|
|
import os
|
|
import azureml.dataprep as dprep
|
|
|
|
print("Merge Green and Yellow taxi data")
|
|
|
|
parser = argparse.ArgumentParser("merge")
|
|
parser.add_argument("--input_green_merge", type=str, help="cleaned green taxi data directory")
|
|
parser.add_argument("--input_yellow_merge", type=str, help="cleaned yellow taxi data directory")
|
|
parser.add_argument("--output_merge", type=str, help="green and yellow taxi data merged")
|
|
|
|
args = parser.parse_args()
|
|
|
|
print("Argument 1(input green taxi data path): %s" % args.input_green_merge)
|
|
print("Argument 2(input yellow taxi data path): %s" % args.input_yellow_merge)
|
|
print("Argument 3(output merge taxi data path): %s" % args.output_merge)
|
|
|
|
green_df = dprep.read_csv(args.input_green_merge + '/part-*')
|
|
yellow_df = dprep.read_csv(args.input_yellow_merge + '/part-*')
|
|
|
|
# Appending yellow data to green data
|
|
combined_df = green_df.append_rows([yellow_df])
|
|
|
|
if not (args.output_merge is None):
|
|
os.makedirs(args.output_merge, exist_ok=True)
|
|
print("%s created" % args.output_merge)
|
|
write_df = combined_df.write_to_csv(directory_path=dprep.LocalFileOutput(args.output_merge))
|
|
write_df.run_local()
|