update samples from Release-132 as a part of 1.0.48 SDK release

2025-12-19 17:17:04 -05:00 · 2019-07-09 22:02:57 +00:00
parent 9e0fc4f0e7
commit 475ea36106
195 changed files with 31305 additions and 4675 deletions
--- a/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLForecast.sql
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLForecast.sql
@@ -0,0 +1,92 @@
+-- This procedure forecast values based on a forecasting model returned by AutoMLTrain.
+-- It returns a dataset with the forecasted values.
+SET ANSI_NULLS ON
+GO
+SET QUOTED_IDENTIFIER ON
+GO
+CREATE OR ALTER PROCEDURE [dbo].[AutoMLForecast]
+ (
+   @input_query NVARCHAR(MAX),          -- A SQL query returning data to predict on.
+   @model NVARCHAR(MAX),                -- A model returned from AutoMLTrain.
+   @time_column_name  NVARCHAR(255)='', -- The name of the timestamp column for forecasting.
+   @label_column  NVARCHAR(255)='',     -- Optional name of the column from input_query, which should be ignored when predicting
+   @y_query_column NVARCHAR(255)='',    -- Optional value column that can be used for predicting.
+                                        -- If specified, this can contain values for past times (after the model was trained)
+									    -- and contain Nan for future times.
+   @forecast_column_name NVARCHAR(255) = 'predicted'
+                                        -- The name of the output column containing the forecast value.
+ ) AS 
+BEGIN 
+  
+    EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd 
+import azureml.core  
+import numpy as np 
+from azureml.train.automl import AutoMLConfig  
+import pickle 
+import codecs 
+  
+model_obj = pickle.loads(codecs.decode(model.encode(), "base64")) 
+  
+test_data = input_data.copy() 
+
+if label_column != "" and label_column is not None:
+    y_test = test_data.pop(label_column).values
+else:
+    y_test = None 
+
+if y_query_column != "" and y_query_column is not None:
+    y_query = test_data.pop(y_query_column).values
+else:
+    y_query = np.repeat(np.nan, len(test_data))
+
+X_test = test_data 
+
+if time_column_name != "" and time_column_name is not None:
+    X_test[time_column_name] = pd.to_datetime(X_test[time_column_name])
+
+y_fcst, X_trans = model_obj.forecast(X_test, y_query) 
+
+def align_outputs(y_forecast, X_trans, X_test, y_test, forecast_column_name):
+    # Demonstrates how to get the output aligned to the inputs
+    # using pandas indexes. Helps understand what happened if
+    # the output shape differs from the input shape, or if
+    # the data got re-sorted by time and grain during forecasting.
+    
+    # Typical causes of misalignment are:
+    # * we predicted some periods that were missing in actuals -> drop from eval
+    # * model was asked to predict past max_horizon -> increase max horizon
+    # * data at start of X_test was needed for lags -> provide previous periods
+
+    df_fcst = pd.DataFrame({forecast_column_name : y_forecast})
+    # y and X outputs are aligned by forecast() function contract
+    df_fcst.index = X_trans.index
+    
+    # align original X_test to y_test    
+    X_test_full = X_test.copy()
+    if y_test is not None:
+        X_test_full[label_column] = y_test
+
+    # X_test_full does not include origin, so reset for merge
+    df_fcst.reset_index(inplace=True)
+    X_test_full = X_test_full.reset_index().drop(columns=''index'')
+    together = df_fcst.merge(X_test_full, how=''right'')
+    
+    # drop rows where prediction or actuals are nan 
+    # happens because of missing actuals 
+    # or at edges of time due to lags/rolling windows
+    clean = together[together[[label_column, forecast_column_name]].notnull().all(axis=1)]
+    return(clean)
+
+combined_output = align_outputs(y_fcst, X_trans, X_test, y_test, forecast_column_name)
+  
+' 
+    , @input_data_1 = @input_query 
+    , @input_data_1_name = N'input_data' 
+    , @output_data_1_name = N'combined_output' 
+    , @params = N'@model NVARCHAR(MAX), @time_column_name  NVARCHAR(255), @label_column NVARCHAR(255), @y_query_column NVARCHAR(255), @forecast_column_name NVARCHAR(255)' 
+    , @model = @model 
+	, @time_column_name = @time_column_name
+	, @label_column = @label_column
+	, @y_query_column = @y_query_column
+	, @forecast_column_name = @forecast_column_name
+END
--- a/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLTrain.sql
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLTrain.sql
@@ -69,7 +69,10 @@ CREATE OR ALTER PROCEDURE [dbo].[AutoMLTrain]
    @is_validate_column NVARCHAR(255)='',            -- The name of the column in the result of  @input_query that indicates if the row is for training or validation.
 	                                                 -- In the values of the column, 0 means for training and 1 means for validation.
    @time_column_name  NVARCHAR(255)='',             -- The name of the timestamp column for forecasting.
-	@connection_name NVARCHAR(255)='default'         -- The AML connection to use.
+    @connection_name NVARCHAR(255)='default',        -- The AML connection to use.
+    @max_horizon INT = 0                             -- A forecast horizon is a time span into the future (or just beyond the latest date in the training data)
+                                                     -- where forecasts of the target quantity are needed.
+                                                     -- For example, if data is recorded daily and max_horizon is 5, we will predict 5 days ahead.
 ) AS
 BEGIN

@@ -151,8 +154,10 @@ if __name__.startswith("sqlindb"):
    if time_column_name != "" and time_column_name is not None:
        automl_settings = { "time_column_name": time_column_name }
        preprocess = False
+        if max_horizon > 0:
+            automl_settings["max_horizon"] = max_horizon

-    log_file_name = "automl_errors.log"
+    log_file_name = "automl_sqlindb_errors.log"
 	 
    automl_config = AutoMLConfig(task = task, 
                                 debug_log = log_file_name, 
@@ -163,7 +168,6 @@ if __name__.startswith("sqlindb"):
                                 n_cross_validations = n_cross_validations, 
                                 preprocess = preprocess,
                                 verbosity = logging.INFO, 
-                                 enable_ensembling = False,
                                 X = X_train,  
                                 y = y_train, 
                                 path = project_folder,
@@ -211,7 +215,8 @@ if __name__.startswith("sqlindb"):
 				  @tenantid NVARCHAR(255),
 				  @appid NVARCHAR(255),
 				  @password NVARCHAR(255),
-				  @config_file NVARCHAR(255)'
+				  @config_file NVARCHAR(255),
+				  @max_horizon INT'
 	, @label_column = @label_column
 	, @primary_metric = @primary_metric
 	, @iterations = @iterations
@@ -230,5 +235,6 @@ if __name__.startswith("sqlindb"):
 	, @appid = @appid
 	, @password = @password
 	, @config_file = @config_file
+	, @max_horizon = @max_horizon
 WITH RESULT SETS ((best_run NVARCHAR(250), experiment_name NVARCHAR(100), fitted_model VARCHAR(MAX), log_file_text NVARCHAR(MAX), workspace NVARCHAR(100)))
 END
--- a/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb