MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/forecasting-pipelines/scripts/infer.py

import argparse
from datetime import datetime
import os
import uuid
import numpy as np
import pandas as pd

from pandas.tseries.frequencies import to_offset
import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error

from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.automl.runtime.shared.score import scoring, constants as metrics_constants
import azureml.automl.core.shared.constants as constants
from azureml.core import Run, Dataset, Model

try:
    import torch

    _torch_present = True
except ImportError:
    _torch_present = False


def infer_forecasting_dataset_tcn(
    X_test, y_test, model, output_path, output_dataset_name="results"
):

    y_pred, df_all = model.forecast(X_test, y_test)

    run = Run.get_context()

    TabularDatasetFactory.register_pandas_dataframe(
        df_all,
        target=(
            run.experiment.workspace.get_default_datastore(),
            datetime.now().strftime("%Y-%m-%d-") + str(uuid.uuid4())[:6],
        ),
        name=output_dataset_name,
    )
    df_all.to_csv(os.path.join(output_path, output_dataset_name + ".csv"), index=False)


def map_location_cuda(storage, loc):
    return storage.cuda()


def get_model(model_path, model_file_name):
    # _, ext = os.path.splitext(model_path)
    model_full_path = os.path.join(model_path, model_file_name)
    print(model_full_path)
    if model_file_name.endswith("pt"):
        # Load the fc-tcn torch model.
        assert _torch_present, "Loading DNN models needs torch to be presented."
        if torch.cuda.is_available():
            map_location = map_location_cuda
        else:
            map_location = "cpu"
        with open(model_full_path, "rb") as fh:
            fitted_model = torch.load(fh, map_location=map_location)
    else:
        # Load the sklearn pipeline.
        fitted_model = joblib.load(model_full_path)
    return fitted_model


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model_name", type=str, dest="model_name", help="Model to be loaded"
    )

    parser.add_argument(
        "--ouput_dataset_name",
        type=str,
        dest="ouput_dataset_name",
        default="results",
        help="Dataset name of the final output",
    )
    parser.add_argument(
        "--target_column_name",
        type=str,
        dest="target_column_name",
        help="The target column name.",
    )
    parser.add_argument(
        "--test_dataset_name",
        type=str,
        dest="test_dataset_name",
        default="results",
        help="Dataset name of the final output",
    )
    parser.add_argument(
        "--output_path",
        type=str,
        dest="output_path",
        default="results",
        help="The output path",
    )
    args = parser.parse_args()
    return args


def get_data(run, fitted_model, target_column_name, test_dataset_name):

    # get input dataset by name
    test_dataset = Dataset.get_by_name(run.experiment.workspace, test_dataset_name)
    test_df = test_dataset.to_pandas_dataframe()
    if target_column_name in test_df:
        y_test = test_df.pop(target_column_name).values
    else:
        y_test = np.full(test_df.shape[0], np.nan)

    return test_df, y_test


def get_model_filename(run, model_name, model_path):
    model = Model(run.experiment.workspace, model_name)
    if "model_file_name" in model.tags:
        return model.tags["model_file_name"]
    is_pkl = True
    if model.tags.get("algorithm") == "TCNForecaster" or os.path.exists(
        os.path.join(model_path, "model.pt")
    ):
        is_pkl = False
    return "model.pkl" if is_pkl else "model.pt"


if __name__ == "__main__":
    run = Run.get_context()

    args = get_args()
    model_name = args.model_name
    ouput_dataset_name = args.ouput_dataset_name
    test_dataset_name = args.test_dataset_name
    target_column_name = args.target_column_name
    print("args passed are: ")

    print(model_name)
    print(test_dataset_name)
    print(ouput_dataset_name)
    print(target_column_name)

    model_path = Model.get_model_path(model_name)
    model_file_name = get_model_filename(run, model_name, model_path)
    print(model_file_name)
    fitted_model = get_model(model_path, model_file_name)

    X_test_df, y_test = get_data(
        run, fitted_model, target_column_name, test_dataset_name
    )

    infer_forecasting_dataset_tcn(
        X_test_df, y_test, fitted_model, args.output_path, ouput_dataset_name
    )