mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-19 17:17:04 -05:00
update samples from Release-132 as a part of 1.0.48 SDK release
This commit is contained in:
@@ -0,0 +1,92 @@
|
||||
-- This procedure forecast values based on a forecasting model returned by AutoMLTrain.
|
||||
-- It returns a dataset with the forecasted values.
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
CREATE OR ALTER PROCEDURE [dbo].[AutoMLForecast]
|
||||
(
|
||||
@input_query NVARCHAR(MAX), -- A SQL query returning data to predict on.
|
||||
@model NVARCHAR(MAX), -- A model returned from AutoMLTrain.
|
||||
@time_column_name NVARCHAR(255)='', -- The name of the timestamp column for forecasting.
|
||||
@label_column NVARCHAR(255)='', -- Optional name of the column from input_query, which should be ignored when predicting
|
||||
@y_query_column NVARCHAR(255)='', -- Optional value column that can be used for predicting.
|
||||
-- If specified, this can contain values for past times (after the model was trained)
|
||||
-- and contain Nan for future times.
|
||||
@forecast_column_name NVARCHAR(255) = 'predicted'
|
||||
-- The name of the output column containing the forecast value.
|
||||
) AS
|
||||
BEGIN
|
||||
|
||||
EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd
|
||||
import azureml.core
|
||||
import numpy as np
|
||||
from azureml.train.automl import AutoMLConfig
|
||||
import pickle
|
||||
import codecs
|
||||
|
||||
model_obj = pickle.loads(codecs.decode(model.encode(), "base64"))
|
||||
|
||||
test_data = input_data.copy()
|
||||
|
||||
if label_column != "" and label_column is not None:
|
||||
y_test = test_data.pop(label_column).values
|
||||
else:
|
||||
y_test = None
|
||||
|
||||
if y_query_column != "" and y_query_column is not None:
|
||||
y_query = test_data.pop(y_query_column).values
|
||||
else:
|
||||
y_query = np.repeat(np.nan, len(test_data))
|
||||
|
||||
X_test = test_data
|
||||
|
||||
if time_column_name != "" and time_column_name is not None:
|
||||
X_test[time_column_name] = pd.to_datetime(X_test[time_column_name])
|
||||
|
||||
y_fcst, X_trans = model_obj.forecast(X_test, y_query)
|
||||
|
||||
def align_outputs(y_forecast, X_trans, X_test, y_test, forecast_column_name):
|
||||
# Demonstrates how to get the output aligned to the inputs
|
||||
# using pandas indexes. Helps understand what happened if
|
||||
# the output shape differs from the input shape, or if
|
||||
# the data got re-sorted by time and grain during forecasting.
|
||||
|
||||
# Typical causes of misalignment are:
|
||||
# * we predicted some periods that were missing in actuals -> drop from eval
|
||||
# * model was asked to predict past max_horizon -> increase max horizon
|
||||
# * data at start of X_test was needed for lags -> provide previous periods
|
||||
|
||||
df_fcst = pd.DataFrame({forecast_column_name : y_forecast})
|
||||
# y and X outputs are aligned by forecast() function contract
|
||||
df_fcst.index = X_trans.index
|
||||
|
||||
# align original X_test to y_test
|
||||
X_test_full = X_test.copy()
|
||||
if y_test is not None:
|
||||
X_test_full[label_column] = y_test
|
||||
|
||||
# X_test_full does not include origin, so reset for merge
|
||||
df_fcst.reset_index(inplace=True)
|
||||
X_test_full = X_test_full.reset_index().drop(columns=''index'')
|
||||
together = df_fcst.merge(X_test_full, how=''right'')
|
||||
|
||||
# drop rows where prediction or actuals are nan
|
||||
# happens because of missing actuals
|
||||
# or at edges of time due to lags/rolling windows
|
||||
clean = together[together[[label_column, forecast_column_name]].notnull().all(axis=1)]
|
||||
return(clean)
|
||||
|
||||
combined_output = align_outputs(y_fcst, X_trans, X_test, y_test, forecast_column_name)
|
||||
|
||||
'
|
||||
, @input_data_1 = @input_query
|
||||
, @input_data_1_name = N'input_data'
|
||||
, @output_data_1_name = N'combined_output'
|
||||
, @params = N'@model NVARCHAR(MAX), @time_column_name NVARCHAR(255), @label_column NVARCHAR(255), @y_query_column NVARCHAR(255), @forecast_column_name NVARCHAR(255)'
|
||||
, @model = @model
|
||||
, @time_column_name = @time_column_name
|
||||
, @label_column = @label_column
|
||||
, @y_query_column = @y_query_column
|
||||
, @forecast_column_name = @forecast_column_name
|
||||
END
|
||||
@@ -69,7 +69,10 @@ CREATE OR ALTER PROCEDURE [dbo].[AutoMLTrain]
|
||||
@is_validate_column NVARCHAR(255)='', -- The name of the column in the result of @input_query that indicates if the row is for training or validation.
|
||||
-- In the values of the column, 0 means for training and 1 means for validation.
|
||||
@time_column_name NVARCHAR(255)='', -- The name of the timestamp column for forecasting.
|
||||
@connection_name NVARCHAR(255)='default' -- The AML connection to use.
|
||||
@connection_name NVARCHAR(255)='default', -- The AML connection to use.
|
||||
@max_horizon INT = 0 -- A forecast horizon is a time span into the future (or just beyond the latest date in the training data)
|
||||
-- where forecasts of the target quantity are needed.
|
||||
-- For example, if data is recorded daily and max_horizon is 5, we will predict 5 days ahead.
|
||||
) AS
|
||||
BEGIN
|
||||
|
||||
@@ -151,8 +154,10 @@ if __name__.startswith("sqlindb"):
|
||||
if time_column_name != "" and time_column_name is not None:
|
||||
automl_settings = { "time_column_name": time_column_name }
|
||||
preprocess = False
|
||||
if max_horizon > 0:
|
||||
automl_settings["max_horizon"] = max_horizon
|
||||
|
||||
log_file_name = "automl_errors.log"
|
||||
log_file_name = "automl_sqlindb_errors.log"
|
||||
|
||||
automl_config = AutoMLConfig(task = task,
|
||||
debug_log = log_file_name,
|
||||
@@ -163,7 +168,6 @@ if __name__.startswith("sqlindb"):
|
||||
n_cross_validations = n_cross_validations,
|
||||
preprocess = preprocess,
|
||||
verbosity = logging.INFO,
|
||||
enable_ensembling = False,
|
||||
X = X_train,
|
||||
y = y_train,
|
||||
path = project_folder,
|
||||
@@ -211,7 +215,8 @@ if __name__.startswith("sqlindb"):
|
||||
@tenantid NVARCHAR(255),
|
||||
@appid NVARCHAR(255),
|
||||
@password NVARCHAR(255),
|
||||
@config_file NVARCHAR(255)'
|
||||
@config_file NVARCHAR(255),
|
||||
@max_horizon INT'
|
||||
, @label_column = @label_column
|
||||
, @primary_metric = @primary_metric
|
||||
, @iterations = @iterations
|
||||
@@ -230,5 +235,6 @@ if __name__.startswith("sqlindb"):
|
||||
, @appid = @appid
|
||||
, @password = @password
|
||||
, @config_file = @config_file
|
||||
, @max_horizon = @max_horizon
|
||||
WITH RESULT SETS ((best_run NVARCHAR(250), experiment_name NVARCHAR(100), fitted_model VARCHAR(MAX), log_file_text NVARCHAR(MAX), workspace NVARCHAR(100)))
|
||||
END
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user