update samples from Release-141 as a part of SDK release

2025-12-21 18:15:13 -05:00 · 2022-05-25 16:56:26 +00:00
parent 83ed8222d2
commit 1594ee64a1
48 changed files with 2165 additions and 757 deletions
--- a/how-to-use-azureml/automated-machine-learning/forecasting-pipelines/scripts/infer.py
+++ b/how-to-use-azureml/automated-machine-learning/forecasting-pipelines/scripts/infer.py
@@ -0,0 +1,170 @@
+import argparse
+from datetime import datetime
+import os
+import uuid
+import numpy as np
+import pandas as pd
+
+from pandas.tseries.frequencies import to_offset
+from sklearn.externals import joblib
+from sklearn.metrics import mean_absolute_error, mean_squared_error
+
+from azureml.data.dataset_factory import TabularDatasetFactory
+from azureml.automl.runtime.shared.score import scoring, constants as metrics_constants
+import azureml.automl.core.shared.constants as constants
+from azureml.core import Run, Dataset, Model
+
+try:
+    import torch
+
+    _torch_present = True
+except ImportError:
+    _torch_present = False
+
+
+def infer_forecasting_dataset_tcn(
+    X_test,
+    y_test,
+    model,
+    output_path,
+    output_dataset_name="results",
+):
+
+    y_pred, df_all = model.forecast(X_test, y_test)
+
+    run = Run.get_context()
+
+    registered_train = TabularDatasetFactory.register_pandas_dataframe(
+        df_all,
+        target=(
+            run.experiment.workspace.get_default_datastore(),
+            datetime.now().strftime("%Y-%m-%d-") + str(uuid.uuid4())[:6],
+        ),
+        name=output_dataset_name,
+    )
+    df_all.to_csv(os.path.join(output_path, output_dataset_name + ".csv"), index=False)
+
+
+def map_location_cuda(storage, loc):
+    return storage.cuda()
+
+
+def get_model(model_path, model_file_name):
+    # _, ext = os.path.splitext(model_path)
+    model_full_path = os.path.join(model_path, model_file_name)
+    print(model_full_path)
+    if model_file_name.endswith("pt"):
+        # Load the fc-tcn torch model.
+        assert _torch_present, "Loading DNN models needs torch to be presented."
+        if torch.cuda.is_available():
+            map_location = map_location_cuda
+        else:
+            map_location = "cpu"
+        with open(model_full_path, "rb") as fh:
+            fitted_model = torch.load(fh, map_location=map_location)
+    else:
+        # Load the sklearn pipeline.
+        fitted_model = joblib.load(model_full_path)
+    return fitted_model
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        dest="model_name",
+        help="Model to be loaded",
+    )
+
+    parser.add_argument(
+        "--ouput_dataset_name",
+        type=str,
+        dest="ouput_dataset_name",
+        default="results",
+        help="Dataset name of the final output",
+    )
+    parser.add_argument(
+        "--target_column_name",
+        type=str,
+        dest="target_column_name",
+        help="The target column name.",
+    )
+    parser.add_argument(
+        "--test_dataset_name",
+        type=str,
+        dest="test_dataset_name",
+        default="results",
+        help="Dataset name of the final output",
+    )
+    parser.add_argument(
+        "--output_path",
+        type=str,
+        dest="output_path",
+        default="results",
+        help="The output path",
+    )
+    args = parser.parse_args()
+    return args
+
+
+def get_data(
+    run,
+    fitted_model,
+    target_column_name,
+    test_dataset_name,
+):
+
+    # get input dataset by name
+    test_dataset = Dataset.get_by_name(run.experiment.workspace, test_dataset_name)
+    test_df = test_dataset.to_pandas_dataframe()
+    if target_column_name in test_df:
+        y_test = test_df.pop(target_column_name)
+    else:
+        y_test = np.full(test_df.shape[0], np.nan)
+
+    return test_df, y_test
+
+
+def get_model_filename(run, model_name, model_path):
+    model = Model(run.experiment.workspace, model_name)
+    if "model_file_name" in model.tags:
+        return model.tags["model_file_name"]
+    is_pkl = True
+    if model.tags.get("algorithm") == "TCNForecaster" or os.path.exists(
+        os.path.join(model_path, "model.pt")
+    ):
+        is_pkl = False
+    return "model.pkl" if is_pkl else "model.pt"
+
+
+if __name__ == "__main__":
+    run = Run.get_context()
+
+    args = get_args()
+    model_name = args.model_name
+    ouput_dataset_name = args.ouput_dataset_name
+    test_dataset_name = args.test_dataset_name
+    target_column_name = args.target_column_name
+    print("args passed are: ")
+
+    print(model_name)
+    print(test_dataset_name)
+    print(ouput_dataset_name)
+    print(target_column_name)
+
+    model_path = Model.get_model_path(model_name)
+    model_file_name = get_model_filename(run, model_name, model_path)
+    print(model_file_name)
+    fitted_model = get_model(model_path, model_file_name)
+
+    X_test_df, y_test = get_data(
+        run,
+        fitted_model,
+        target_column_name,
+        test_dataset_name,
+    )
+
+    infer_forecasting_dataset_tcn(
+        X_test_df, y_test, fitted_model, args.output_path, ouput_dataset_name
+    )