mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-20 09:37:04 -05:00
156 lines
4.4 KiB
Python
156 lines
4.4 KiB
Python
import argparse
|
|
from datetime import datetime
|
|
import os
|
|
import uuid
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
from pandas.tseries.frequencies import to_offset
|
|
import joblib
|
|
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
|
|
|
from azureml.data.dataset_factory import TabularDatasetFactory
|
|
from azureml.automl.runtime.shared.score import scoring, constants as metrics_constants
|
|
import azureml.automl.core.shared.constants as constants
|
|
from azureml.core import Run, Dataset, Model
|
|
|
|
try:
|
|
import torch
|
|
|
|
_torch_present = True
|
|
except ImportError:
|
|
_torch_present = False
|
|
|
|
|
|
def infer_forecasting_dataset_tcn(
|
|
X_test, y_test, model, output_path, output_dataset_name="results"
|
|
):
|
|
|
|
y_pred, df_all = model.forecast(X_test, y_test)
|
|
|
|
run = Run.get_context()
|
|
|
|
TabularDatasetFactory.register_pandas_dataframe(
|
|
df_all,
|
|
target=(
|
|
run.experiment.workspace.get_default_datastore(),
|
|
datetime.now().strftime("%Y-%m-%d-") + str(uuid.uuid4())[:6],
|
|
),
|
|
name=output_dataset_name,
|
|
)
|
|
df_all.to_csv(os.path.join(output_path, output_dataset_name + ".csv"), index=False)
|
|
|
|
|
|
def map_location_cuda(storage, loc):
|
|
return storage.cuda()
|
|
|
|
|
|
def get_model(model_path, model_file_name):
|
|
# _, ext = os.path.splitext(model_path)
|
|
model_full_path = os.path.join(model_path, model_file_name)
|
|
print(model_full_path)
|
|
if model_file_name.endswith("pt"):
|
|
# Load the fc-tcn torch model.
|
|
assert _torch_present, "Loading DNN models needs torch to be presented."
|
|
if torch.cuda.is_available():
|
|
map_location = map_location_cuda
|
|
else:
|
|
map_location = "cpu"
|
|
with open(model_full_path, "rb") as fh:
|
|
fitted_model = torch.load(fh, map_location=map_location)
|
|
else:
|
|
# Load the sklearn pipeline.
|
|
fitted_model = joblib.load(model_full_path)
|
|
return fitted_model
|
|
|
|
|
|
def get_args():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
"--model_name", type=str, dest="model_name", help="Model to be loaded"
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--ouput_dataset_name",
|
|
type=str,
|
|
dest="ouput_dataset_name",
|
|
default="results",
|
|
help="Dataset name of the final output",
|
|
)
|
|
parser.add_argument(
|
|
"--target_column_name",
|
|
type=str,
|
|
dest="target_column_name",
|
|
help="The target column name.",
|
|
)
|
|
parser.add_argument(
|
|
"--test_dataset_name",
|
|
type=str,
|
|
dest="test_dataset_name",
|
|
default="results",
|
|
help="Dataset name of the final output",
|
|
)
|
|
parser.add_argument(
|
|
"--output_path",
|
|
type=str,
|
|
dest="output_path",
|
|
default="results",
|
|
help="The output path",
|
|
)
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def get_data(run, fitted_model, target_column_name, test_dataset_name):
|
|
|
|
# get input dataset by name
|
|
test_dataset = Dataset.get_by_name(run.experiment.workspace, test_dataset_name)
|
|
test_df = test_dataset.to_pandas_dataframe()
|
|
if target_column_name in test_df:
|
|
y_test = test_df.pop(target_column_name).values
|
|
else:
|
|
y_test = np.full(test_df.shape[0], np.nan)
|
|
|
|
return test_df, y_test
|
|
|
|
|
|
def get_model_filename(run, model_name, model_path):
|
|
model = Model(run.experiment.workspace, model_name)
|
|
if "model_file_name" in model.tags:
|
|
return model.tags["model_file_name"]
|
|
is_pkl = True
|
|
if model.tags.get("algorithm") == "TCNForecaster" or os.path.exists(
|
|
os.path.join(model_path, "model.pt")
|
|
):
|
|
is_pkl = False
|
|
return "model.pkl" if is_pkl else "model.pt"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run = Run.get_context()
|
|
|
|
args = get_args()
|
|
model_name = args.model_name
|
|
ouput_dataset_name = args.ouput_dataset_name
|
|
test_dataset_name = args.test_dataset_name
|
|
target_column_name = args.target_column_name
|
|
print("args passed are: ")
|
|
|
|
print(model_name)
|
|
print(test_dataset_name)
|
|
print(ouput_dataset_name)
|
|
print(target_column_name)
|
|
|
|
model_path = Model.get_model_path(model_name)
|
|
model_file_name = get_model_filename(run, model_name, model_path)
|
|
print(model_file_name)
|
|
fitted_model = get_model(model_path, model_file_name)
|
|
|
|
X_test_df, y_test = get_data(
|
|
run, fitted_model, target_column_name, test_dataset_name
|
|
)
|
|
|
|
infer_forecasting_dataset_tcn(
|
|
X_test_df, y_test, fitted_model, args.output_path, ouput_dataset_name
|
|
)
|