update samples from Release-108 as a part of 1.35.0 SDK stable release

This commit is contained in:
amlrelsa-ms
2021-10-11 15:36:08 +00:00
parent e2dddfde85
commit f9892966fd
57 changed files with 28923 additions and 192 deletions

View File

@@ -5,55 +5,10 @@ compute instance.
"""
import argparse
import pandas as pd
import numpy as np
from azureml.core import Dataset, Run
from azureml.automl.core.shared.constants import TimeSeriesInternal
from sklearn.externals import joblib
from pandas.tseries.frequencies import to_offset
def align_outputs(y_predicted, X_trans, X_test, y_test, target_column_name,
predicted_column_name='predicted',
horizon_colname='horizon_origin'):
"""
Demonstrates how to get the output aligned to the inputs
using pandas indexes. Helps understand what happened if
the output's shape differs from the input shape, or if
the data got re-sorted by time and grain during forecasting.
Typical causes of misalignment are:
* we predicted some periods that were missing in actuals -> drop from eval
* model was asked to predict past max_horizon -> increase max horizon
* data at start of X_test was needed for lags -> provide previous periods
"""
if (horizon_colname in X_trans):
df_fcst = pd.DataFrame({predicted_column_name: y_predicted,
horizon_colname: X_trans[horizon_colname]})
else:
df_fcst = pd.DataFrame({predicted_column_name: y_predicted})
# y and X outputs are aligned by forecast() function contract
df_fcst.index = X_trans.index
# align original X_test to y_test
X_test_full = X_test.copy()
X_test_full[target_column_name] = y_test
# X_test_full's index does not include origin, so reset for merge
df_fcst.reset_index(inplace=True)
X_test_full = X_test_full.reset_index().drop(columns='index')
together = df_fcst.merge(X_test_full, how='right')
# drop rows where prediction or actuals are nan
# happens because of missing actuals
# or at edges of time due to lags/rolling windows
clean = together[together[[target_column_name,
predicted_column_name]].notnull().all(axis=1)]
return(clean)
parser = argparse.ArgumentParser()
parser.add_argument(
'--target_column_name', type=str, dest='target_column_name',
@@ -77,13 +32,25 @@ y_test = X_test.pop(target_column_name).values
# generate forecast
fitted_model = joblib.load('model.pkl')
y_predictions, X_trans = fitted_model.forecast(X_test)
# align output
df_all = align_outputs(y_predictions, X_trans, X_test, y_test, target_column_name)
# We have default quantiles values set as below(95th percentile)
quantiles = [0.025, 0.5, 0.975]
predicted_column_name = 'predicted'
PI = 'prediction_interval'
fitted_model.quantiles = quantiles
pred_quantiles = fitted_model.forecast_quantiles(X_test)
pred_quantiles[PI] = pred_quantiles[[min(quantiles), max(quantiles)]].apply(lambda x: '[{}, {}]'.format(x[0],
x[1]), axis=1)
X_test[target_column_name] = y_test
X_test[PI] = pred_quantiles[PI]
X_test[predicted_column_name] = pred_quantiles[0.5]
# drop rows where prediction or actuals are nan
# happens because of missing actuals
# or at edges of time due to lags/rolling windows
clean = X_test[X_test[[target_column_name,
predicted_column_name]].notnull().all(axis=1)]
file_name = 'outputs/predictions.csv'
export_csv = df_all.to_csv(file_name, header=True, index=False) # added Index
export_csv = clean.to_csv(file_name, header=True, index=False) # added Index
# Upload the predictions into artifacts
run.upload_file(name=file_name, path_or_stream=file_name)