Compare commits

..

1 Commits

Author SHA1 Message Date
amlrelsa-ms
ca76074645 update samples from Release-118 as a part of SDK release 2022-01-24 19:14:42 +00:00
4 changed files with 14 additions and 5 deletions

View File

@@ -5,6 +5,17 @@ import argparse
import os
from azureml.core import Run
def get_dict(dict_str):
pairs = dict_str.strip("{}").split(r'\;')
new_dict = {}
for pair in pairs:
key, value = pair.strip().split(":")
new_dict[key.strip().strip("'")] = value.strip().strip("'")
return new_dict
print("Cleans the input data")
# Get the input green_taxi_data. To learn more about how to access dataset in your script, please
@@ -12,6 +23,7 @@ print("Cleans the input data")
run = Run.get_context()
raw_data = run.input_datasets["raw_data"]
parser = argparse.ArgumentParser("cleanse")
parser.add_argument("--output_cleanse", type=str, help="cleaned taxi data directory")
parser.add_argument("--useful_columns", type=str, help="useful columns to keep")
@@ -26,8 +38,8 @@ print("Argument 3(output cleansed taxi data path): %s" % args.output_cleanse)
# These functions ensure that null data is removed from the dataset,
# which will help increase machine learning model accuracy.
useful_columns = eval(args.useful_columns.replace(';', ','))
columns = eval(args.columns.replace(';', ','))
useful_columns = [s.strip().strip("'") for s in args.useful_columns.strip("[]").split(r'\;')]
columns = get_dict(args.columns)
new_df = (raw_data.to_pandas_dataframe()
.dropna(how='all')

View File

@@ -254,7 +254,6 @@
"- conda-forge\n",
"dependencies:\n",
"- python=3.6.2\n",
"- pip=21.3.1\n",
"- pip:\n",
" - azureml-defaults\n",
" - azureml-opendatasets\n",

View File

@@ -431,7 +431,6 @@
"- conda-forge\n",
"dependencies:\n",
"- python=3.6.2\n",
"- pip=21.3.1\n",
"- pip:\n",
" - h5py<=2.10.0\n",
" - azureml-defaults\n",

View File

@@ -262,7 +262,6 @@
"- conda-forge\n",
"dependencies:\n",
"- python=3.6.2\n",
"- pip=21.3.1\n",
"- pip:\n",
" - azureml-defaults\n",
" - torch==1.6.0\n",