""" This is the script that is executed on the compute instance. It relies on the model.pkl file which is uploaded along with this script to the compute instance. """ import argparse from azureml.core import Dataset, Run from azureml.automl.core.shared.constants import TimeSeriesInternal from sklearn.externals import joblib parser = argparse.ArgumentParser() parser.add_argument( "--target_column_name", type=str, dest="target_column_name", help="Target Column Name", ) parser.add_argument( "--test_dataset", type=str, dest="test_dataset", help="Test Dataset" ) args = parser.parse_args() target_column_name = args.target_column_name test_dataset_id = args.test_dataset run = Run.get_context() ws = run.experiment.workspace # get the input dataset by id test_dataset = Dataset.get_by_id(ws, id=test_dataset_id) X_test = ( test_dataset.drop_columns(columns=[target_column_name]) .to_pandas_dataframe() .reset_index(drop=True) ) y_test_df = ( test_dataset.with_timestamp_columns(None) .keep_columns(columns=[target_column_name]) .to_pandas_dataframe() ) # generate forecast fitted_model = joblib.load("model.pkl") # We have default quantiles values set as below(95th percentile) quantiles = [0.025, 0.5, 0.975] predicted_column_name = "predicted" PI = "prediction_interval" fitted_model.quantiles = quantiles pred_quantiles = fitted_model.forecast_quantiles(X_test) pred_quantiles[PI] = pred_quantiles[[min(quantiles), max(quantiles)]].apply( lambda x: "[{}, {}]".format(x[0], x[1]), axis=1 ) X_test[target_column_name] = y_test_df[target_column_name] X_test[PI] = pred_quantiles[PI] X_test[predicted_column_name] = pred_quantiles[0.5] # drop rows where prediction or actuals are nan # happens because of missing actuals # or at edges of time due to lags/rolling windows clean = X_test[ X_test[[target_column_name, predicted_column_name]].notnull().all(axis=1) ] clean.rename(columns={target_column_name: "actual"}, inplace=True) file_name = "outputs/predictions.csv" export_csv = clean.to_csv(file_name, header=True, index=False) # added Index # Upload the predictions into artifacts run.upload_file(name=file_name, path_or_stream=file_name)