mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 17:45:10 -05:00
38 lines
1002 B
Python
38 lines
1002 B
Python
from pathlib import Path
|
|
from azureml.core import Run
|
|
import argparse
|
|
|
|
|
|
def main(args):
|
|
output = Path(args.output)
|
|
output.mkdir(parents=True, exist_ok=True)
|
|
|
|
run_context = Run.get_context()
|
|
dataset = run_context.input_datasets["train_10_models"]
|
|
df = dataset.to_pandas_dataframe()
|
|
|
|
# Drop the column "Revenue" from the dataset to avoid information leak as
|
|
# "Quantity" = "Revenue" / "Price". Please modify the logic based on your data.
|
|
drop_column_name = "Revenue"
|
|
if drop_column_name in df.columns:
|
|
df.drop(drop_column_name, axis=1, inplace=True)
|
|
|
|
# Apply any data pre-processing techniques here
|
|
|
|
df.to_parquet(output / "data_prepared_result.parquet", compression=None)
|
|
|
|
|
|
def my_parse_args():
|
|
parser = argparse.ArgumentParser("Test")
|
|
|
|
parser.add_argument("--input", type=str)
|
|
parser.add_argument("--output", type=str)
|
|
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
if __name__ == "__main__":
|
|
args = my_parse_args()
|
|
main(args)
|