From 879607e59ba70f4698f831cdce9367279284c22b Mon Sep 17 00:00:00 2001 From: amlrelsa-ms Date: Fri, 28 Jan 2022 07:00:37 +0000 Subject: [PATCH] update samples from Release-120 as a part of SDK release --- .../scripts/prepdata/cleanse.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/scripts/prepdata/cleanse.py b/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/scripts/prepdata/cleanse.py index 0da693cc..bdbfb465 100644 --- a/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/scripts/prepdata/cleanse.py +++ b/how-to-use-azureml/machine-learning-pipelines/nyc-taxi-data-regression-model-building/scripts/prepdata/cleanse.py @@ -5,17 +5,6 @@ import argparse import os from azureml.core import Run - -def get_dict(dict_str): - pairs = dict_str.strip("{}").split(r'\;') - new_dict = {} - for pair in pairs: - key, value = pair.strip().split(":") - new_dict[key.strip().strip("'")] = value.strip().strip("'") - - return new_dict - - print("Cleans the input data") # Get the input green_taxi_data. To learn more about how to access dataset in your script, please @@ -23,7 +12,6 @@ print("Cleans the input data") run = Run.get_context() raw_data = run.input_datasets["raw_data"] - parser = argparse.ArgumentParser("cleanse") parser.add_argument("--output_cleanse", type=str, help="cleaned taxi data directory") parser.add_argument("--useful_columns", type=str, help="useful columns to keep") @@ -38,8 +26,8 @@ print("Argument 3(output cleansed taxi data path): %s" % args.output_cleanse) # These functions ensure that null data is removed from the dataset, # which will help increase machine learning model accuracy. -useful_columns = [s.strip().strip("'") for s in args.useful_columns.strip("[]").split(r'\;')] -columns = get_dict(args.columns) +useful_columns = eval(args.useful_columns.replace(';', ',')) +columns = eval(args.columns.replace(';', ',')) new_df = (raw_data.to_pandas_dataframe() .dropna(how='all')