diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/README.md b/how-to-use-azureml/automated-machine-learning/sql-server/README.md
new file mode 100644
index 00000000..338262b3
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/README.md
@@ -0,0 +1,113 @@
+# Table of Contents
+1. [Introduction](#introduction)
+1. [Setup using Azure Data Studio](#azuredatastudiosetup)
+1. [Energy demand example using Azure Data Studio](#azuredatastudioenergydemand)
+1. [Set using SQL Server Management Studio for SQL Server 2017 on Windows](#ssms2017)
+1. [Set using SQL Server Management Studio for SQL Server 2019 on Linux](#ssms2019)
+1. [Energy demand example using SQL Server Management Studio](#ssmsenergydemand)
+
+
+
+# Introduction
+SQL Server 2017 or 2019 can call Azure ML automated machine learning to create models trained on data from SQL Server.
+This uses the sp_execute_external_script stored procedure, which can call Python scripts.
+SQL Server 2017 and SQL Server 2019 can both run on Windows or Linux.
+However, this integration is not available for SQL Server 2017 on Linux.
+
+This folder shows how to setup the integration and has a sample that uses the integration to train and predict based on an energy demand dataset.
+
+This integration is part of SQL Server and so can be used from any SQL client.
+These instructions show using it from Azure Data Studio or SQL Server Managment Studio.
+
+
+## Setup using Azure Data Studio
+
+These step show setting up the integration using Azure Data Studio.
+
+1. If you don't already have SQL Server, you can install it from [https://www.microsoft.com/en-us/sql-server/sql-server-downloads](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
+1. Install Azure Data Studio from [https://docs.microsoft.com/en-us/sql/azure-data-studio/download?view=sql-server-2017](https://docs.microsoft.com/en-us/sql/azure-data-studio/download?view=sql-server-2017)
+1. Start Azure Data Studio and connect to SQL Server. [https://docs.microsoft.com/en-us/sql/azure-data-studio/sql-notebooks?view=sql-server-2017](https://docs.microsoft.com/en-us/sql/azure-data-studio/sql-notebooks?view=sql-server-2017)
+1. Create a database named "automl".
+1. Open the notebook how-to-use-azureml\automated-machine-learning\sql-server\setup\auto-ml-sql-setup.ipynb and follow the instructions in it.
+
+
+## Energy demand example using Azure Data Studio
+
+Once you have completed the setup, you can try the energy demand sample in the notebook energy-demand\auto-ml-sql-energy-demand.ipynb.
+This has cells to train a model, predict based on the model and show metrics for each pipeline run in training the model.
+
+
+## Setup using SQL Server Management Studio for SQL Server 2017 on Windows
+
+These instruction setup the integration for SQL Server 2017 on Windows.
+
+1. If you don't already have SQL Server, you can install it from [https://www.microsoft.com/en-us/sql-server/sql-server-downloads](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
+2. Enable external scripts with the following commands:
+```sh
+ sp_configure 'external scripts enabled',1
+ reconfigure with override
+```
+3. Stop SQL Server.
+4. Install the automated machine learning libraries using the following commands from Administrator command prompt (If you are using a non-default SQL Server instance name, replace MSSQLSERVER in the second command with the instance name)
+```sh
+ cd "C:\Program Files\Microsoft SQL Server"
+ cd "MSSQL14.MSSQLSERVER\PYTHON_SERVICES"
+ python.exe -m pip install azureml-sdk[automl]
+ python.exe -m pip install --upgrade numpy
+ python.exe -m pip install --upgrade sklearn
+```
+5. Start SQL Server and the service "SQL Server Launchpad service".
+6. In Windows Firewall, click on advanced settings and in Outbound Rules, disable "Block network access for R local user accounts in SQL Server instance xxxx".
+7. Execute the files in the setup folder in SQL Server Management Studio: aml_model.sql, aml_connection.sql, AutoMLGetMetrics.sql, AutoMLPredict.sql and AutoMLTrain.sql
+8. Create an Azure Machine Learning Workspace. You can use the instructions at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace ](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace)
+9. Create a config.json file file using the subscription id, resource group name and workspace name that you used to create the workspace. The file is described at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace)
+10. Create an Azure service principal. You can do this with the commands:
+```sh
+ az login
+ az account set --subscription subscriptionid
+ az ad sp create-for-rbac --name principlename --password password
+```
+11. Insert the values \, \ and \ returned by create-for-rbac above into the aml_connection table. Set \ as the absolute path to your config.json file. Set the name to “Default”.
+
+
+## Setup using SQL Server Management Studio for SQL Server 2019 on Linux
+1. Install SQL Server 2019 from: [https://www.microsoft.com/en-us/sql-server/sql-server-downloads](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
+2. Install machine learning support from: [https://docs.microsoft.com/en-us/sql/linux/sql-server-linux-setup-machine-learning?view=sqlallproducts-allversions#ubuntu](https://docs.microsoft.com/en-us/sql/linux/sql-server-linux-setup-machine-learning?view=sqlallproducts-allversions#ubuntu)
+3. Then install SQL Server management Studio from [https://docs.microsoft.com/en-us/sql/ssms/download-sql-server-management-studio-ssms?view=sql-server-2017](https://docs.microsoft.com/en-us/sql/ssms/download-sql-server-management-studio-ssms?view=sql-server-2017)
+4. Enable external scripts with the following commands:
+```sh
+ sp_configure 'external scripts enabled',1
+ reconfigure with override
+```
+5. Stop SQL Server.
+6. Install the automated machine learning libraries using the following commands from Administrator command (If you are using a non-default SQL Server instance name, replace MSSQLSERVER in the second command with the instance name):
+```sh
+ sudo /opt/mssql/mlservices/bin/python/python -m pip install azureml-sdk[automl]
+ sudo /opt/mssql/mlservices/bin/python/python -m pip install --upgrade numpy
+ sudo /opt/mssql/mlservices/bin/python/python -m pip install --upgrade sklearn
+```
+7. Start SQL Server.
+8. Execute the files aml_model.sql, aml_connection.sql, AutoMLGetMetrics.sql, AutoMLPredict.sql and AutoMLTrain.sql in SQL Server Management Studio.
+9. Create an Azure Machine Learning Workspace. You can use the instructions at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace)
+10. Create a config.json file file using the subscription id, resource group name and workspace name that you use to create the workspace. The file is described at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace)
+11. Create an Azure service principal. You can do this with the commands:
+```sh
+ az login
+ az account set --subscription subscriptionid
+ az ad sp create-for-rbac --name principlename --password password
+```
+12. Insert the values \, \ and \ returned by create-for-rbac above into the aml_connection table. Set \ as the absolute path to your config.json file. Set the name to “Default”.
+
+
+## Energy demand example using SQL Server Management Studio
+
+Once you have completed the setup, you can try the energy demand sample queries.
+First you need to load the sample data in the database.
+1. In SQL Server Management Studio, you can right-click the database, select Tasks, then Import Flat file.
+1. Select the file MachineLearningNotebooks\notebooks\how-to-use-azureml\automated-machine-learning\forecasting-energy-demand\nyc_energy.csv.
+1. When you get to the column definition page, allow nulls for all columns.
+
+You can then run the queries in the energy-demand folder:
+* TrainEnergyDemand.sql runs AutoML, trains multiple models on data and selects the best model.
+* PredictEnergyDemand.sql predicts based on the most recent training run.
+* GetMetrics.sql returns all the metrics for each model in the most recent training run.
diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/GetMetrics.sql b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/GetMetrics.sql
new file mode 100644
index 00000000..5f0000cf
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/GetMetrics.sql
@@ -0,0 +1,10 @@
+-- This lists all the metrics for all iterations for the most recent run.
+
+DECLARE @RunId NVARCHAR(43)
+DECLARE @ExperimentName NVARCHAR(255)
+
+SELECT TOP 1 @ExperimentName=ExperimentName, @RunId=SUBSTRING(RunId, 1, 43)
+FROM aml_model
+ORDER BY CreatedDate DESC
+
+EXEC dbo.AutoMLGetMetrics @RunId, @ExperimentName
\ No newline at end of file
diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/PredictEnergyDemand.sql b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/PredictEnergyDemand.sql
new file mode 100644
index 00000000..1161a758
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/PredictEnergyDemand.sql
@@ -0,0 +1,17 @@
+-- This shows using the AutoMLPredict stored procedure to predict using a forecasting model for the nyc_energy dataset.
+
+DECLARE @Model NVARCHAR(MAX) = (SELECT TOP 1 Model FROM dbo.aml_model
+ WHERE ExperimentName = 'automl-sql-forecast'
+ ORDER BY CreatedDate DESC)
+
+EXEC dbo.AutoMLPredict @input_query='
+SELECT CAST(timeStamp AS NVARCHAR(30)) AS timeStamp,
+ demand,
+ precip,
+ temp
+FROM nyc_energy
+WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL
+AND timeStamp >= ''2017-02-01''',
+@label_column='demand',
+@model=@model
+WITH RESULT SETS ((timeStamp NVARCHAR(30), actual_demand FLOAT, precip FLOAT, temp FLOAT, predicted_demand FLOAT))
\ No newline at end of file
diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/TrainEnergyDemand.sql b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/TrainEnergyDemand.sql
new file mode 100644
index 00000000..a38e5232
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/TrainEnergyDemand.sql
@@ -0,0 +1,21 @@
+-- This shows using the AutoMLTrain stored procedure to create a forecasting model for the nyc_energy dataset.
+
+INSERT INTO dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)
+EXEC dbo.AutoMLTrain @input_query='
+SELECT CAST(timeStamp as NVARCHAR(30)) as timeStamp,
+ demand,
+ precip,
+ temp,
+ CASE WHEN timeStamp < ''2017-01-01'' THEN 0 ELSE 1 END AS is_validate_column
+FROM nyc_energy
+WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL
+and timeStamp < ''2017-02-01''',
+@label_column='demand',
+@task='forecasting',
+@iterations=10,
+@iteration_timeout_minutes=5,
+@time_column_name='timeStamp',
+@is_validate_column='is_validate_column',
+@experiment_name='automl-sql-forecast',
+@primary_metric='normalized_root_mean_squared_error'
+
diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.ipynb b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.ipynb
new file mode 100644
index 00000000..3136f29f
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.ipynb
@@ -0,0 +1,141 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Train a model and use it for prediction\r\n",
+ "\r\n",
+ "Before running this notebook, run the auto-ml-sql-setup.ipynb notebook."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Set the default database"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "USE [automl]\r\n",
+ "GO"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Use the AutoMLTrain stored procedure to create a forecasting model for the nyc_energy dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "INSERT INTO dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)\r\n",
+ "EXEC dbo.AutoMLTrain @input_query='\r\n",
+ "SELECT CAST(timeStamp as NVARCHAR(30)) as timeStamp,\r\n",
+ " demand,\r\n",
+ "\t precip,\r\n",
+ "\t temp,\r\n",
+ "\t CASE WHEN timeStamp < ''2017-01-01'' THEN 0 ELSE 1 END AS is_validate_column\r\n",
+ "FROM nyc_energy\r\n",
+ "WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL\r\n",
+ "and timeStamp < ''2017-02-01''',\r\n",
+ "@label_column='demand',\r\n",
+ "@task='forecasting',\r\n",
+ "@iterations=10,\r\n",
+ "@iteration_timeout_minutes=5,\r\n",
+ "@time_column_name='timeStamp',\r\n",
+ "@is_validate_column='is_validate_column',\r\n",
+ "@experiment_name='automl-sql-forecast',\r\n",
+ "@primary_metric='normalized_root_mean_squared_error'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Use the AutoMLPredict stored procedure to predict using the forecasting model for the nyc_energy dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "DECLARE @Model NVARCHAR(MAX) = (SELECT TOP 1 Model FROM dbo.aml_model\r\n",
+ " WHERE ExperimentName = 'automl-sql-forecast'\r\n",
+ "\t\t\t\t\t\t\t\tORDER BY CreatedDate DESC)\r\n",
+ "\r\n",
+ "EXEC dbo.AutoMLPredict @input_query='\r\n",
+ "SELECT CAST(timeStamp AS NVARCHAR(30)) AS timeStamp,\r\n",
+ " demand,\r\n",
+ "\t precip,\r\n",
+ "\t temp\r\n",
+ "FROM nyc_energy\r\n",
+ "WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL\r\n",
+ "AND timeStamp >= ''2017-02-01''',\r\n",
+ "@label_column='demand',\r\n",
+ "@model=@model\r\n",
+ "WITH RESULT SETS ((timeStamp NVARCHAR(30), actual_demand FLOAT, precip FLOAT, temp FLOAT, predicted_demand FLOAT))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## List all the metrics for all iterations for the most recent training run."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "DECLARE @RunId NVARCHAR(43)\r\n",
+ "DECLARE @ExperimentName NVARCHAR(255)\r\n",
+ "\r\n",
+ "SELECT TOP 1 @ExperimentName=ExperimentName, @RunId=SUBSTRING(RunId, 1, 43)\r\n",
+ "FROM aml_model\r\n",
+ "ORDER BY CreatedDate DESC\r\n",
+ "\r\n",
+ "EXEC dbo.AutoMLGetMetrics @RunId, @ExperimentName"
+ ]
+ }
+ ],
+ "metadata": {
+ "authors": [
+ {
+ "name": "jeffshep"
+ }
+ ],
+ "kernelspec": {
+ "display_name": "SQL",
+ "language": "sql",
+ "name": "SQL"
+ },
+ "language_info": {
+ "name": "sql",
+ "version": ""
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLGetMetrics.sql b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLGetMetrics.sql
new file mode 100644
index 00000000..555b4de9
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLGetMetrics.sql
@@ -0,0 +1,70 @@
+-- This procedure returns a list of metrics for each iteration of a run.
+SET ANSI_NULLS ON
+GO
+SET QUOTED_IDENTIFIER ON
+GO
+CREATE OR ALTER PROCEDURE [dbo].[AutoMLGetMetrics]
+ (
+ @run_id NVARCHAR(250), -- The RunId
+ @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.
+ @connection_name NVARCHAR(255)='default' -- The AML connection to use.
+ ) AS
+BEGIN
+ DECLARE @tenantid NVARCHAR(255)
+ DECLARE @appid NVARCHAR(255)
+ DECLARE @password NVARCHAR(255)
+ DECLARE @config_file NVARCHAR(255)
+
+ SELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile
+ FROM aml_connection
+ WHERE ConnectionName = @connection_name;
+
+ EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd
+import logging
+import azureml.core
+import numpy as np
+from azureml.core.experiment import Experiment
+from azureml.train.automl.run import AutoMLRun
+from azureml.core.authentication import ServicePrincipalAuthentication
+from azureml.core.workspace import Workspace
+
+auth = ServicePrincipalAuthentication(tenantid, appid, password)
+
+ws = Workspace.from_config(path=config_file, auth=auth)
+
+experiment = Experiment(ws, experiment_name)
+
+ml_run = AutoMLRun(experiment = experiment, run_id = run_id)
+
+children = list(ml_run.get_children())
+iterationlist = []
+metricnamelist = []
+metricvaluelist = []
+
+for run in children:
+ properties = run.get_properties()
+ if "iteration" in properties:
+ iteration = int(properties["iteration"])
+ for metric_name, metric_value in run.get_metrics().items():
+ if isinstance(metric_value, float):
+ iterationlist.append(iteration)
+ metricnamelist.append(metric_name)
+ metricvaluelist.append(metric_value)
+
+metrics = pd.DataFrame({"iteration": iterationlist, "metric_name": metricnamelist, "metric_value": metricvaluelist})
+'
+ , @output_data_1_name = N'metrics'
+ , @params = N'@run_id NVARCHAR(250),
+ @experiment_name NVARCHAR(32),
+ @tenantid NVARCHAR(255),
+ @appid NVARCHAR(255),
+ @password NVARCHAR(255),
+ @config_file NVARCHAR(255)'
+ , @run_id = @run_id
+ , @experiment_name = @experiment_name
+ , @tenantid = @tenantid
+ , @appid = @appid
+ , @password = @password
+ , @config_file = @config_file
+WITH RESULT SETS ((iteration INT, metric_name NVARCHAR(100), metric_value FLOAT))
+END
\ No newline at end of file
diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLPredict.sql b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLPredict.sql
new file mode 100644
index 00000000..23d4173a
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLPredict.sql
@@ -0,0 +1,41 @@
+-- This procedure predicts values based on a model returned by AutoMLTrain and a dataset.
+-- It returns the dataset with a new column added, which is the predicted value.
+SET ANSI_NULLS ON
+GO
+SET QUOTED_IDENTIFIER ON
+GO
+CREATE OR ALTER PROCEDURE [dbo].[AutoMLPredict]
+ (
+ @input_query NVARCHAR(MAX), -- A SQL query returning data to predict on.
+ @model NVARCHAR(MAX), -- A model returned from AutoMLTrain.
+ @label_column NVARCHAR(255)='' -- Optional name of the column from input_query, which should be ignored when predicting
+ ) AS
+BEGIN
+
+ EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd
+import azureml.core
+import numpy as np
+from azureml.train.automl import AutoMLConfig
+import pickle
+import codecs
+
+model_obj = pickle.loads(codecs.decode(model.encode(), "base64"))
+
+test_data = input_data.copy()
+
+if label_column != "" and label_column is not None:
+ y_test = test_data.pop(label_column).values
+X_test = test_data
+
+predicted = model_obj.predict(X_test)
+
+combined_output = input_data.assign(predicted=predicted)
+
+'
+ , @input_data_1 = @input_query
+ , @input_data_1_name = N'input_data'
+ , @output_data_1_name = N'combined_output'
+ , @params = N'@model NVARCHAR(MAX), @label_column NVARCHAR(255)'
+ , @model = @model
+ , @label_column = @label_column
+END
diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLTrain.sql b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLTrain.sql
new file mode 100644
index 00000000..02bc975b
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLTrain.sql
@@ -0,0 +1,234 @@
+-- This stored procedure uses automated machine learning to train several models
+-- and returns the best model.
+--
+-- The result set has several columns:
+-- best_run - iteration ID for the best model
+-- experiment_name - experiment name pass in with the @experiment_name parameter
+-- fitted_model - best model found
+-- log_file_text - AutoML debug_log contents
+-- workspace - name of the Azure ML workspace where run history is stored
+--
+-- An example call for a classification problem is:
+-- insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)
+-- exec dbo.AutoMLTrain @input_query='
+-- SELECT top 100000
+-- CAST([pickup_datetime] AS NVARCHAR(30)) AS pickup_datetime
+-- ,CAST([dropoff_datetime] AS NVARCHAR(30)) AS dropoff_datetime
+-- ,[passenger_count]
+-- ,[trip_time_in_secs]
+-- ,[trip_distance]
+-- ,[payment_type]
+-- ,[tip_class]
+-- FROM [dbo].[nyctaxi_sample] order by [hack_license] ',
+-- @label_column = 'tip_class',
+-- @iterations=10
+--
+-- An example call for forecasting is:
+-- insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)
+-- exec dbo.AutoMLTrain @input_query='
+-- select cast(timeStamp as nvarchar(30)) as timeStamp,
+-- demand,
+-- precip,
+-- temp,
+-- case when timeStamp < ''2017-01-01'' then 0 else 1 end as is_validate_column
+-- from nyc_energy
+-- where demand is not null and precip is not null and temp is not null
+-- and timeStamp < ''2017-02-01''',
+-- @label_column='demand',
+-- @task='forecasting',
+-- @iterations=10,
+-- @iteration_timeout_minutes=5,
+-- @time_column_name='timeStamp',
+-- @is_validate_column='is_validate_column',
+-- @experiment_name='automl-sql-forecast',
+-- @primary_metric='normalized_root_mean_squared_error'
+
+SET ANSI_NULLS ON
+GO
+SET QUOTED_IDENTIFIER ON
+GO
+CREATE OR ALTER PROCEDURE [dbo].[AutoMLTrain]
+ (
+ @input_query NVARCHAR(MAX), -- The SQL Query that will return the data to train and validate the model.
+ @label_column NVARCHAR(255)='Label', -- The name of the column in the result of @input_query that is the label.
+ @primary_metric NVARCHAR(40)='AUC_weighted', -- The metric to optimize.
+ @iterations INT=100, -- The maximum number of pipelines to train.
+ @task NVARCHAR(40)='classification', -- The type of task. Can be classification, regression or forecasting.
+ @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.
+ @iteration_timeout_minutes INT = 15, -- The maximum time in minutes for training a single pipeline.
+ @experiment_timeout_minutes INT = 60, -- The maximum time in minutes for training all pipelines.
+ @n_cross_validations INT = 3, -- The number of cross validations.
+ @blacklist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that will not be used.
+ -- The list of possible models can be found at:
+ -- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings
+ @whitelist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that can be used.
+ -- The list of possible models can be found at:
+ -- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings
+ @experiment_exit_score FLOAT = 0, -- Stop the experiment if this score is acheived.
+ @sample_weight_column NVARCHAR(255)='', -- The name of the column in the result of @input_query that gives a sample weight.
+ @is_validate_column NVARCHAR(255)='', -- The name of the column in the result of @input_query that indicates if the row is for training or validation.
+ -- In the values of the column, 0 means for training and 1 means for validation.
+ @time_column_name NVARCHAR(255)='', -- The name of the timestamp column for forecasting.
+ @connection_name NVARCHAR(255)='default' -- The AML connection to use.
+ ) AS
+BEGIN
+
+ DECLARE @tenantid NVARCHAR(255)
+ DECLARE @appid NVARCHAR(255)
+ DECLARE @password NVARCHAR(255)
+ DECLARE @config_file NVARCHAR(255)
+
+ SELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile
+ FROM aml_connection
+ WHERE ConnectionName = @connection_name;
+
+ EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd
+import logging
+import azureml.core
+import pandas as pd
+import numpy as np
+from azureml.core.experiment import Experiment
+from azureml.train.automl import AutoMLConfig
+from sklearn import datasets
+import pickle
+import codecs
+from azureml.core.authentication import ServicePrincipalAuthentication
+from azureml.core.workspace import Workspace
+
+if __name__.startswith("sqlindb"):
+ auth = ServicePrincipalAuthentication(tenantid, appid, password)
+
+ ws = Workspace.from_config(path=config_file, auth=auth)
+
+ project_folder = "./sample_projects/" + experiment_name
+
+ experiment = Experiment(ws, experiment_name)
+
+ data_train = input_data
+ X_valid = None
+ y_valid = None
+ sample_weight_valid = None
+
+ if is_validate_column != "" and is_validate_column is not None:
+ data_train = input_data[input_data[is_validate_column] <= 0]
+ data_valid = input_data[input_data[is_validate_column] > 0]
+ data_train.pop(is_validate_column)
+ data_valid.pop(is_validate_column)
+ y_valid = data_valid.pop(label_column).values
+ if sample_weight_column != "" and sample_weight_column is not None:
+ sample_weight_valid = data_valid.pop(sample_weight_column).values
+ X_valid = data_valid
+ n_cross_validations = None
+
+ y_train = data_train.pop(label_column).values
+
+ sample_weight = None
+ if sample_weight_column != "" and sample_weight_column is not None:
+ sample_weight = data_train.pop(sample_weight_column).values
+
+ X_train = data_train
+
+ if experiment_timeout_minutes == 0:
+ experiment_timeout_minutes = None
+
+ if experiment_exit_score == 0:
+ experiment_exit_score = None
+
+ if blacklist_models == "":
+ blacklist_models = None
+
+ if blacklist_models is not None:
+ blacklist_models = blacklist_models.replace(" ", "").split(",")
+
+ if whitelist_models == "":
+ whitelist_models = None
+
+ if whitelist_models is not None:
+ whitelist_models = whitelist_models.replace(" ", "").split(",")
+
+ automl_settings = {}
+ preprocess = True
+ if time_column_name != "" and time_column_name is not None:
+ automl_settings = { "time_column_name": time_column_name }
+ preprocess = False
+
+ log_file_name = "automl_errors.log"
+
+ automl_config = AutoMLConfig(task = task,
+ debug_log = log_file_name,
+ primary_metric = primary_metric,
+ iteration_timeout_minutes = iteration_timeout_minutes,
+ experiment_timeout_minutes = experiment_timeout_minutes,
+ iterations = iterations,
+ n_cross_validations = n_cross_validations,
+ preprocess = preprocess,
+ verbosity = logging.INFO,
+ enable_ensembling = False,
+ X = X_train,
+ y = y_train,
+ path = project_folder,
+ blacklist_models = blacklist_models,
+ whitelist_models = whitelist_models,
+ experiment_exit_score = experiment_exit_score,
+ sample_weight = sample_weight,
+ X_valid = X_valid,
+ y_valid = y_valid,
+ sample_weight_valid = sample_weight_valid,
+ **automl_settings)
+
+ local_run = experiment.submit(automl_config, show_output = True)
+
+ best_run, fitted_model = local_run.get_output()
+
+ pickled_model = codecs.encode(pickle.dumps(fitted_model), "base64").decode()
+
+ log_file_text = ""
+
+ try:
+ with open(log_file_name, "r") as log_file:
+ log_file_text = log_file.read()
+ except:
+ log_file_text = "Log file not found"
+
+ returned_model = pd.DataFrame({"best_run": [best_run.id], "experiment_name": [experiment_name], "fitted_model": [pickled_model], "log_file_text": [log_file_text], "workspace": [ws.name]}, dtype=np.dtype(np.str))
+'
+ , @input_data_1 = @input_query
+ , @input_data_1_name = N'input_data'
+ , @output_data_1_name = N'returned_model'
+ , @params = N'@label_column NVARCHAR(255),
+ @primary_metric NVARCHAR(40),
+ @iterations INT, @task NVARCHAR(40),
+ @experiment_name NVARCHAR(32),
+ @iteration_timeout_minutes INT,
+ @experiment_timeout_minutes INT,
+ @n_cross_validations INT,
+ @blacklist_models NVARCHAR(MAX),
+ @whitelist_models NVARCHAR(MAX),
+ @experiment_exit_score FLOAT,
+ @sample_weight_column NVARCHAR(255),
+ @is_validate_column NVARCHAR(255),
+ @time_column_name NVARCHAR(255),
+ @tenantid NVARCHAR(255),
+ @appid NVARCHAR(255),
+ @password NVARCHAR(255),
+ @config_file NVARCHAR(255)'
+ , @label_column = @label_column
+ , @primary_metric = @primary_metric
+ , @iterations = @iterations
+ , @task = @task
+ , @experiment_name = @experiment_name
+ , @iteration_timeout_minutes = @iteration_timeout_minutes
+ , @experiment_timeout_minutes = @experiment_timeout_minutes
+ , @n_cross_validations = @n_cross_validations
+ , @blacklist_models = @blacklist_models
+ , @whitelist_models = @whitelist_models
+ , @experiment_exit_score = @experiment_exit_score
+ , @sample_weight_column = @sample_weight_column
+ , @is_validate_column = @is_validate_column
+ , @time_column_name = @time_column_name
+ , @tenantid = @tenantid
+ , @appid = @appid
+ , @password = @password
+ , @config_file = @config_file
+WITH RESULT SETS ((best_run NVARCHAR(250), experiment_name NVARCHAR(100), fitted_model VARCHAR(MAX), log_file_text NVARCHAR(MAX), workspace NVARCHAR(100)))
+END
\ No newline at end of file
diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/setup/aml_connection.sql b/how-to-use-azureml/automated-machine-learning/sql-server/setup/aml_connection.sql
new file mode 100644
index 00000000..55e9caac
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/aml_connection.sql
@@ -0,0 +1,18 @@
+-- This is a table to store the Azure ML connection information.
+SET ANSI_NULLS ON
+GO
+
+SET QUOTED_IDENTIFIER ON
+GO
+
+CREATE TABLE [dbo].[aml_connection](
+ [Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,
+ [ConnectionName] [nvarchar](255) NULL,
+ [TenantId] [nvarchar](255) NULL,
+ [AppId] [nvarchar](255) NULL,
+ [Password] [nvarchar](255) NULL,
+ [ConfigFile] [nvarchar](255) NULL
+) ON [PRIMARY]
+GO
+
+
diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/setup/aml_model.sql b/how-to-use-azureml/automated-machine-learning/sql-server/setup/aml_model.sql
new file mode 100644
index 00000000..f1363afc
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/aml_model.sql
@@ -0,0 +1,22 @@
+-- This is a table to hold the results from the AutoMLTrain procedure.
+SET ANSI_NULLS ON
+GO
+
+SET QUOTED_IDENTIFIER ON
+GO
+
+CREATE TABLE [dbo].[aml_model](
+ [Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,
+ [Model] [varchar](max) NOT NULL, -- The model, which can be passed to AutoMLPredict for testing or prediction.
+ [RunId] [nvarchar](250) NULL, -- The RunId, which can be used to view the model in the Azure Portal.
+ [CreatedDate] [datetime] NULL,
+ [ExperimentName] [nvarchar](100) NULL, -- Azure ML Experiment Name
+ [WorkspaceName] [nvarchar](100) NULL, -- Azure ML Workspace Name
+ [LogFileText] [nvarchar](max) NULL
+)
+GO
+
+ALTER TABLE [dbo].[aml_model] ADD DEFAULT (getutcdate()) FOR [CreatedDate]
+GO
+
+
diff --git a/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb b/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb
new file mode 100644
index 00000000..902818a4
--- /dev/null
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb
@@ -0,0 +1,562 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Set up Azure ML Automated Machine Learning on SQL Server 2019 CTP 2.4 big data cluster\r\n",
+ "\r\n",
+ "\\# Prerequisites: \r\n",
+ "\\# - An Azure subscription and resource group \r\n",
+ "\\# - An Azure Machine Learning workspace \r\n",
+ "\\# - A SQL Server 2019 CTP 2.4 big data cluster with Internet access and a database named 'automl' \r\n",
+ "\\# - Azure CLI \r\n",
+ "\\# - kubectl command \r\n",
+ "\\# - The https://github.com/Azure/MachineLearningNotebooks repository downloaded (cloned) to your local machine\r\n",
+ "\r\n",
+ "\\# In the 'automl' database, create a table named 'dbo.nyc_energy' as follows: \r\n",
+ "\\# - In SQL Server Management Studio, right-click the 'automl' database, select Tasks, then Import Flat File. \r\n",
+ "\\# - Select the file AzureMlCli\\notebooks\\how-to-use-azureml\\automated-machine-learning\\forecasting-energy-demand\\nyc_energy.csv. \r\n",
+ "\\# - Using the \"Modify Columns\" page, allow nulls for all columns. \r\n",
+ "\r\n",
+ "\\# Create an Azure Machine Learning Workspace using the instructions at https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace \r\n",
+ "\r\n",
+ "\\# Create an Azure service principal. You can do this with the following commands: \r\n",
+ "\r\n",
+ "az login \r\n",
+ "az account set --subscription *subscriptionid* \r\n",
+ "\r\n",
+ "\\# The following command prints out the **appId** and **tenant**, \r\n",
+ "\\# which you insert into the indicated cell later in this notebook \r\n",
+ "\\# to allow AutoML to authenticate with Azure: \r\n",
+ "\r\n",
+ "az ad sp create-for-rbac --name *principlename* --password *password*\r\n",
+ "\r\n",
+ "\\# Log into the master instance of SQL Server 2019 CTP 2.4: \r\n",
+ "kubectl exec -it mssql-master-pool-0 -n *clustername* -c mssql-server -- /bin/bash\r\n",
+ "\r\n",
+ "mkdir /tmp/aml\r\n",
+ "\r\n",
+ "cd /tmp/aml\r\n",
+ "\r\n",
+ "\\# **Modify** the following with your subscription_id, resource_group, and workspace_name: \r\n",
+ "cat > config.json << EOF \r\n",
+ "{ \r\n",
+ " \"subscription_id\": \"123456ab-78cd-0123-45ef-abcd12345678\", \r\n",
+ " \"resource_group\": \"myrg1\", \r\n",
+ " \"workspace_name\": \"myws1\" \r\n",
+ "} \r\n",
+ "EOF\r\n",
+ "\r\n",
+ "\\# The directory referenced below is appropriate for the master instance of SQL Server 2019 CTP 2.4.\r\n",
+ "\r\n",
+ "cd /opt/mssql/mlservices/runtime/python/bin\r\n",
+ "\r\n",
+ "./python -m pip install azureml-sdk[automl]\r\n",
+ "\r\n",
+ "./python -m pip install --upgrade numpy \r\n",
+ "\r\n",
+ "./python -m pip install --upgrade sklearn\r\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "-- Enable external scripts to allow invoking Python\r\n",
+ "sp_configure 'external scripts enabled',1 \r\n",
+ "reconfigure with override \r\n",
+ "GO\r\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "-- Use database 'automl'\r\n",
+ "USE [automl]\r\n",
+ "GO"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "-- This is a table to hold the Azure ML connection information.\r\n",
+ "SET ANSI_NULLS ON\r\n",
+ "GO\r\n",
+ "\r\n",
+ "SET QUOTED_IDENTIFIER ON\r\n",
+ "GO\r\n",
+ "\r\n",
+ "CREATE TABLE [dbo].[aml_connection](\r\n",
+ " [Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,\r\n",
+ "\t[ConnectionName] [nvarchar](255) NULL,\r\n",
+ "\t[TenantId] [nvarchar](255) NULL,\r\n",
+ "\t[AppId] [nvarchar](255) NULL,\r\n",
+ "\t[Password] [nvarchar](255) NULL,\r\n",
+ "\t[ConfigFile] [nvarchar](255) NULL\r\n",
+ ") ON [PRIMARY]\r\n",
+ "GO"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Copy the values from create-for-rbac above into the cell below"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "-- Use the following values:\r\n",
+ "-- Leave the name as 'Default'\r\n",
+ "-- Insert returned by create-for-rbac above\r\n",
+ "-- Insert returned by create-for-rbac above\r\n",
+ "-- Insert used in create-for-rbac above\r\n",
+ "-- Leave as '/tmp/aml/config.json'\r\n",
+ "INSERT INTO [dbo].[aml_connection] \r\n",
+ "VALUES (\r\n",
+ " N'Default', -- Name\r\n",
+ " N'11111111-2222-3333-4444-555555555555', -- Tenant\r\n",
+ " N'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', -- AppId\r\n",
+ " N'insertpasswordhere', -- Password\r\n",
+ " N'/tmp/aml/config.json' -- Path\r\n",
+ " );\r\n",
+ "GO"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "-- This is a table to hold the results from the AutoMLTrain procedure.\r\n",
+ "SET ANSI_NULLS ON\r\n",
+ "GO\r\n",
+ "\r\n",
+ "SET QUOTED_IDENTIFIER ON\r\n",
+ "GO\r\n",
+ "\r\n",
+ "CREATE TABLE [dbo].[aml_model](\r\n",
+ " [Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,\r\n",
+ " [Model] [varchar](max) NOT NULL, -- The model, which can be passed to AutoMLPredict for testing or prediction.\r\n",
+ " [RunId] [nvarchar](250) NULL, -- The RunId, which can be used to view the model in the Azure Portal.\r\n",
+ " [CreatedDate] [datetime] NULL,\r\n",
+ " [ExperimentName] [nvarchar](100) NULL, -- Azure ML Experiment Name\r\n",
+ " [WorkspaceName] [nvarchar](100) NULL, -- Azure ML Workspace Name\r\n",
+ "\t[LogFileText] [nvarchar](max) NULL\r\n",
+ ") \r\n",
+ "GO\r\n",
+ "\r\n",
+ "ALTER TABLE [dbo].[aml_model] ADD DEFAULT (getutcdate()) FOR [CreatedDate]\r\n",
+ "GO\r\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "-- This stored procedure uses automated machine learning to train several models\r\n",
+ "-- and return the best model.\r\n",
+ "--\r\n",
+ "-- The result set has several columns:\r\n",
+ "-- best_run - ID of the best model found\r\n",
+ "-- experiment_name - training run name\r\n",
+ "-- fitted_model - best model found\r\n",
+ "-- log_file_text - console output\r\n",
+ "-- workspace - name of the Azure ML workspace where run history is stored\r\n",
+ "--\r\n",
+ "-- An example call for a classification problem is:\r\n",
+ "-- insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)\r\n",
+ "-- exec dbo.AutoMLTrain @input_query='\r\n",
+ "-- SELECT top 100000 \r\n",
+ "-- CAST([pickup_datetime] AS NVARCHAR(30)) AS pickup_datetime\r\n",
+ "-- ,CAST([dropoff_datetime] AS NVARCHAR(30)) AS dropoff_datetime\r\n",
+ "-- ,[passenger_count]\r\n",
+ "-- ,[trip_time_in_secs]\r\n",
+ "-- ,[trip_distance]\r\n",
+ "-- ,[payment_type]\r\n",
+ "-- ,[tip_class]\r\n",
+ "-- FROM [dbo].[nyctaxi_sample] order by [hack_license] ',\r\n",
+ "-- @label_column = 'tip_class',\r\n",
+ "-- @iterations=10\r\n",
+ "-- \r\n",
+ "-- An example call for forecasting is:\r\n",
+ "-- insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)\r\n",
+ "-- exec dbo.AutoMLTrain @input_query='\r\n",
+ "-- select cast(timeStamp as nvarchar(30)) as timeStamp,\r\n",
+ "-- demand,\r\n",
+ "-- \t precip,\r\n",
+ "-- \t temp,\r\n",
+ "-- case when timeStamp < ''2017-01-01'' then 0 else 1 end as is_validate_column\r\n",
+ "-- from nyc_energy\r\n",
+ "-- where demand is not null and precip is not null and temp is not null\r\n",
+ "-- and timeStamp < ''2017-02-01''',\r\n",
+ "-- @label_column='demand',\r\n",
+ "-- @task='forecasting',\r\n",
+ "-- @iterations=10,\r\n",
+ "-- @iteration_timeout_minutes=5,\r\n",
+ "-- @time_column_name='timeStamp',\r\n",
+ "-- @is_validate_column='is_validate_column',\r\n",
+ "-- @experiment_name='automl-sql-forecast',\r\n",
+ "-- @primary_metric='normalized_root_mean_squared_error'\r\n",
+ "\r\n",
+ "SET ANSI_NULLS ON\r\n",
+ "GO\r\n",
+ "SET QUOTED_IDENTIFIER ON\r\n",
+ "GO\r\n",
+ "CREATE OR ALTER PROCEDURE [dbo].[AutoMLTrain]\r\n",
+ " (\r\n",
+ " @input_query NVARCHAR(MAX), -- The SQL Query that will return the data to train and validate the model.\r\n",
+ " @label_column NVARCHAR(255)='Label', -- The name of the column in the result of @input_query that is the label.\r\n",
+ " @primary_metric NVARCHAR(40)='AUC_weighted', -- The metric to optimize.\r\n",
+ " @iterations INT=100, -- The maximum number of pipelines to train.\r\n",
+ " @task NVARCHAR(40)='classification', -- The type of task. Can be classification, regression or forecasting.\r\n",
+ " @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.\r\n",
+ " @iteration_timeout_minutes INT = 15, -- The maximum time in minutes for training a single pipeline. \r\n",
+ " @experiment_timeout_minutes INT = 60, -- The maximum time in minutes for training all pipelines.\r\n",
+ " @n_cross_validations INT = 3, -- The number of cross validations.\r\n",
+ " @blacklist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that will not be used.\r\n",
+ " -- The list of possible models can be found at:\r\n",
+ " -- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings\r\n",
+ " @whitelist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that can be used.\r\n",
+ " -- The list of possible models can be found at:\r\n",
+ " -- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings\r\n",
+ " @experiment_exit_score FLOAT = 0, -- Stop the experiment if this score is acheived.\r\n",
+ " @sample_weight_column NVARCHAR(255)='', -- The name of the column in the result of @input_query that gives a sample weight.\r\n",
+ " @is_validate_column NVARCHAR(255)='', -- The name of the column in the result of @input_query that indicates if the row is for training or validation.\r\n",
+ "\t -- In the values of the column, 0 means for training and 1 means for validation.\r\n",
+ " @time_column_name NVARCHAR(255)='', -- The name of the timestamp column for forecasting.\r\n",
+ "\t@connection_name NVARCHAR(255)='default' -- The AML connection to use.\r\n",
+ " ) AS\r\n",
+ "BEGIN\r\n",
+ "\r\n",
+ " DECLARE @tenantid NVARCHAR(255)\r\n",
+ " DECLARE @appid NVARCHAR(255)\r\n",
+ " DECLARE @password NVARCHAR(255)\r\n",
+ " DECLARE @config_file NVARCHAR(255)\r\n",
+ "\r\n",
+ "\tSELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile\r\n",
+ "\tFROM aml_connection\r\n",
+ "\tWHERE ConnectionName = @connection_name;\r\n",
+ "\r\n",
+ "\tEXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd\r\n",
+ "import logging \r\n",
+ "import azureml.core \r\n",
+ "import pandas as pd\r\n",
+ "import numpy as np\r\n",
+ "from azureml.core.experiment import Experiment \r\n",
+ "from azureml.train.automl import AutoMLConfig \r\n",
+ "from sklearn import datasets \r\n",
+ "import pickle\r\n",
+ "import codecs\r\n",
+ "from azureml.core.authentication import ServicePrincipalAuthentication \r\n",
+ "from azureml.core.workspace import Workspace \r\n",
+ "\r\n",
+ "if __name__.startswith(\"sqlindb\"):\r\n",
+ " auth = ServicePrincipalAuthentication(tenantid, appid, password) \r\n",
+ " \r\n",
+ " ws = Workspace.from_config(path=config_file, auth=auth) \r\n",
+ " \r\n",
+ " project_folder = \"./sample_projects/\" + experiment_name\r\n",
+ " \r\n",
+ " experiment = Experiment(ws, experiment_name) \r\n",
+ "\r\n",
+ " data_train = input_data\r\n",
+ " X_valid = None\r\n",
+ " y_valid = None\r\n",
+ " sample_weight_valid = None\r\n",
+ "\r\n",
+ " if is_validate_column != \"\" and is_validate_column is not None:\r\n",
+ " data_train = input_data[input_data[is_validate_column] <= 0]\r\n",
+ " data_valid = input_data[input_data[is_validate_column] > 0]\r\n",
+ " data_train.pop(is_validate_column)\r\n",
+ " data_valid.pop(is_validate_column)\r\n",
+ " y_valid = data_valid.pop(label_column).values\r\n",
+ " if sample_weight_column != \"\" and sample_weight_column is not None:\r\n",
+ " sample_weight_valid = data_valid.pop(sample_weight_column).values\r\n",
+ " X_valid = data_valid\r\n",
+ " n_cross_validations = None\r\n",
+ "\r\n",
+ " y_train = data_train.pop(label_column).values\r\n",
+ "\r\n",
+ " sample_weight = None\r\n",
+ " if sample_weight_column != \"\" and sample_weight_column is not None:\r\n",
+ " sample_weight = data_train.pop(sample_weight_column).values\r\n",
+ "\r\n",
+ " X_train = data_train\r\n",
+ "\r\n",
+ " if experiment_timeout_minutes == 0:\r\n",
+ " experiment_timeout_minutes = None\r\n",
+ "\r\n",
+ " if experiment_exit_score == 0:\r\n",
+ " experiment_exit_score = None\r\n",
+ "\r\n",
+ " if blacklist_models == \"\":\r\n",
+ " blacklist_models = None\r\n",
+ "\r\n",
+ " if blacklist_models is not None:\r\n",
+ " blacklist_models = blacklist_models.replace(\" \", \"\").split(\",\")\r\n",
+ "\r\n",
+ " if whitelist_models == \"\":\r\n",
+ " whitelist_models = None\r\n",
+ "\r\n",
+ " if whitelist_models is not None:\r\n",
+ " whitelist_models = whitelist_models.replace(\" \", \"\").split(\",\")\r\n",
+ "\r\n",
+ " automl_settings = {}\r\n",
+ " preprocess = True\r\n",
+ " if time_column_name != \"\" and time_column_name is not None:\r\n",
+ " automl_settings = { \"time_column_name\": time_column_name }\r\n",
+ " preprocess = False\r\n",
+ "\r\n",
+ " log_file_name = \"automl_errors.log\"\r\n",
+ "\t \r\n",
+ " automl_config = AutoMLConfig(task = task, \r\n",
+ " debug_log = log_file_name, \r\n",
+ " primary_metric = primary_metric, \r\n",
+ " iteration_timeout_minutes = iteration_timeout_minutes, \r\n",
+ " experiment_timeout_minutes = experiment_timeout_minutes,\r\n",
+ " iterations = iterations, \r\n",
+ " n_cross_validations = n_cross_validations, \r\n",
+ " preprocess = preprocess,\r\n",
+ " verbosity = logging.INFO, \r\n",
+ " enable_ensembling = False,\r\n",
+ " X = X_train, \r\n",
+ " y = y_train, \r\n",
+ " path = project_folder,\r\n",
+ " blacklist_models = blacklist_models,\r\n",
+ " whitelist_models = whitelist_models,\r\n",
+ " experiment_exit_score = experiment_exit_score,\r\n",
+ " sample_weight = sample_weight,\r\n",
+ " X_valid = X_valid,\r\n",
+ " y_valid = y_valid,\r\n",
+ " sample_weight_valid = sample_weight_valid,\r\n",
+ " **automl_settings) \r\n",
+ " \r\n",
+ " local_run = experiment.submit(automl_config, show_output = True) \r\n",
+ "\r\n",
+ " best_run, fitted_model = local_run.get_output()\r\n",
+ "\r\n",
+ " pickled_model = codecs.encode(pickle.dumps(fitted_model), \"base64\").decode()\r\n",
+ "\r\n",
+ " log_file_text = \"\"\r\n",
+ "\r\n",
+ " try:\r\n",
+ " with open(log_file_name, \"r\") as log_file:\r\n",
+ " log_file_text = log_file.read()\r\n",
+ " except:\r\n",
+ " log_file_text = \"Log file not found\"\r\n",
+ "\r\n",
+ " returned_model = pd.DataFrame({\"best_run\": [best_run.id], \"experiment_name\": [experiment_name], \"fitted_model\": [pickled_model], \"log_file_text\": [log_file_text], \"workspace\": [ws.name]}, dtype=np.dtype(np.str))\r\n",
+ "'\r\n",
+ "\t, @input_data_1 = @input_query\r\n",
+ "\t, @input_data_1_name = N'input_data'\r\n",
+ "\t, @output_data_1_name = N'returned_model'\r\n",
+ "\t, @params = N'@label_column NVARCHAR(255), \r\n",
+ "\t @primary_metric NVARCHAR(40),\r\n",
+ "\t\t\t\t @iterations INT, @task NVARCHAR(40),\r\n",
+ "\t\t\t\t @experiment_name NVARCHAR(32),\r\n",
+ "\t\t\t\t @iteration_timeout_minutes INT,\r\n",
+ "\t\t\t\t @experiment_timeout_minutes INT,\r\n",
+ "\t\t\t\t @n_cross_validations INT,\r\n",
+ "\t\t\t\t @blacklist_models NVARCHAR(MAX),\r\n",
+ "\t\t\t\t @whitelist_models NVARCHAR(MAX),\r\n",
+ "\t\t\t\t @experiment_exit_score FLOAT,\r\n",
+ "\t\t\t\t @sample_weight_column NVARCHAR(255),\r\n",
+ "\t\t\t\t @is_validate_column NVARCHAR(255),\r\n",
+ "\t\t\t\t @time_column_name NVARCHAR(255),\r\n",
+ "\t\t\t\t @tenantid NVARCHAR(255),\r\n",
+ "\t\t\t\t @appid NVARCHAR(255),\r\n",
+ "\t\t\t\t @password NVARCHAR(255),\r\n",
+ "\t\t\t\t @config_file NVARCHAR(255)'\r\n",
+ "\t, @label_column = @label_column\r\n",
+ "\t, @primary_metric = @primary_metric\r\n",
+ "\t, @iterations = @iterations\r\n",
+ "\t, @task = @task\r\n",
+ "\t, @experiment_name = @experiment_name\r\n",
+ "\t, @iteration_timeout_minutes = @iteration_timeout_minutes\r\n",
+ "\t, @experiment_timeout_minutes = @experiment_timeout_minutes\r\n",
+ "\t, @n_cross_validations = @n_cross_validations\r\n",
+ "\t, @blacklist_models = @blacklist_models\r\n",
+ "\t, @whitelist_models = @whitelist_models\r\n",
+ "\t, @experiment_exit_score = @experiment_exit_score\r\n",
+ "\t, @sample_weight_column = @sample_weight_column\r\n",
+ "\t, @is_validate_column = @is_validate_column\r\n",
+ "\t, @time_column_name = @time_column_name\r\n",
+ "\t, @tenantid = @tenantid\r\n",
+ "\t, @appid = @appid\r\n",
+ "\t, @password = @password\r\n",
+ "\t, @config_file = @config_file\r\n",
+ "WITH RESULT SETS ((best_run NVARCHAR(250), experiment_name NVARCHAR(100), fitted_model VARCHAR(MAX), log_file_text NVARCHAR(MAX), workspace NVARCHAR(100)))\r\n",
+ "END"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "-- This procedure returns a list of metrics for each iteration of a training run.\r\n",
+ "SET ANSI_NULLS ON\r\n",
+ "GO\r\n",
+ "SET QUOTED_IDENTIFIER ON\r\n",
+ "GO\r\n",
+ "CREATE OR ALTER PROCEDURE [dbo].[AutoMLGetMetrics]\r\n",
+ " (\r\n",
+ "\t@run_id NVARCHAR(250), -- The RunId\r\n",
+ " @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.\r\n",
+ " @connection_name NVARCHAR(255)='default' -- The AML connection to use.\r\n",
+ " ) AS\r\n",
+ "BEGIN\r\n",
+ " DECLARE @tenantid NVARCHAR(255)\r\n",
+ " DECLARE @appid NVARCHAR(255)\r\n",
+ " DECLARE @password NVARCHAR(255)\r\n",
+ " DECLARE @config_file NVARCHAR(255)\r\n",
+ "\r\n",
+ "\tSELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile\r\n",
+ "\tFROM aml_connection\r\n",
+ "\tWHERE ConnectionName = @connection_name;\r\n",
+ "\r\n",
+ " EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd\r\n",
+ "import logging \r\n",
+ "import azureml.core \r\n",
+ "import numpy as np\r\n",
+ "from azureml.core.experiment import Experiment \r\n",
+ "from azureml.train.automl.run import AutoMLRun\r\n",
+ "from azureml.core.authentication import ServicePrincipalAuthentication \r\n",
+ "from azureml.core.workspace import Workspace \r\n",
+ "\r\n",
+ "auth = ServicePrincipalAuthentication(tenantid, appid, password) \r\n",
+ " \r\n",
+ "ws = Workspace.from_config(path=config_file, auth=auth) \r\n",
+ " \r\n",
+ "experiment = Experiment(ws, experiment_name) \r\n",
+ "\r\n",
+ "ml_run = AutoMLRun(experiment = experiment, run_id = run_id)\r\n",
+ "\r\n",
+ "children = list(ml_run.get_children())\r\n",
+ "iterationlist = []\r\n",
+ "metricnamelist = []\r\n",
+ "metricvaluelist = []\r\n",
+ "\r\n",
+ "for run in children:\r\n",
+ " properties = run.get_properties()\r\n",
+ " if \"iteration\" in properties:\r\n",
+ " iteration = int(properties[\"iteration\"])\r\n",
+ " for metric_name, metric_value in run.get_metrics().items():\r\n",
+ " if isinstance(metric_value, float):\r\n",
+ " iterationlist.append(iteration)\r\n",
+ " metricnamelist.append(metric_name)\r\n",
+ " metricvaluelist.append(metric_value)\r\n",
+ " \r\n",
+ "metrics = pd.DataFrame({\"iteration\": iterationlist, \"metric_name\": metricnamelist, \"metric_value\": metricvaluelist})\r\n",
+ "'\r\n",
+ " , @output_data_1_name = N'metrics'\r\n",
+ "\t, @params = N'@run_id NVARCHAR(250), \r\n",
+ "\t\t\t\t @experiment_name NVARCHAR(32),\r\n",
+ " \t\t\t\t @tenantid NVARCHAR(255),\r\n",
+ "\t\t\t\t @appid NVARCHAR(255),\r\n",
+ "\t\t\t\t @password NVARCHAR(255),\r\n",
+ "\t\t\t\t @config_file NVARCHAR(255)'\r\n",
+ " , @run_id = @run_id\r\n",
+ "\t, @experiment_name = @experiment_name\r\n",
+ "\t, @tenantid = @tenantid\r\n",
+ "\t, @appid = @appid\r\n",
+ "\t, @password = @password\r\n",
+ "\t, @config_file = @config_file\r\n",
+ "WITH RESULT SETS ((iteration INT, metric_name NVARCHAR(100), metric_value FLOAT))\r\n",
+ "END"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "-- This procedure predicts values based on a model returned by AutoMLTrain and a dataset.\r\n",
+ "-- It returns the dataset with a new column added, which is the predicted value.\r\n",
+ "SET ANSI_NULLS ON\r\n",
+ "GO\r\n",
+ "SET QUOTED_IDENTIFIER ON\r\n",
+ "GO\r\n",
+ "CREATE OR ALTER PROCEDURE [dbo].[AutoMLPredict]\r\n",
+ " (\r\n",
+ " @input_query NVARCHAR(MAX), -- A SQL query returning data to predict on.\r\n",
+ " @model NVARCHAR(MAX), -- A model returned from AutoMLTrain.\r\n",
+ " @label_column NVARCHAR(255)='' -- Optional name of the column from input_query, which should be ignored when predicting\r\n",
+ " ) AS \r\n",
+ "BEGIN \r\n",
+ " \r\n",
+ " EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd \r\n",
+ "import azureml.core \r\n",
+ "import numpy as np \r\n",
+ "from azureml.train.automl import AutoMLConfig \r\n",
+ "import pickle \r\n",
+ "import codecs \r\n",
+ " \r\n",
+ "model_obj = pickle.loads(codecs.decode(model.encode(), \"base64\")) \r\n",
+ " \r\n",
+ "test_data = input_data.copy() \r\n",
+ "\r\n",
+ "if label_column != \"\" and label_column is not None:\r\n",
+ " y_test = test_data.pop(label_column).values \r\n",
+ "X_test = test_data \r\n",
+ " \r\n",
+ "predicted = model_obj.predict(X_test) \r\n",
+ " \r\n",
+ "combined_output = input_data.assign(predicted=predicted)\r\n",
+ " \r\n",
+ "' \r\n",
+ " , @input_data_1 = @input_query \r\n",
+ " , @input_data_1_name = N'input_data' \r\n",
+ " , @output_data_1_name = N'combined_output' \r\n",
+ " , @params = N'@model NVARCHAR(MAX), @label_column NVARCHAR(255)' \r\n",
+ " , @model = @model \r\n",
+ "\t, @label_column = @label_column\r\n",
+ "END"
+ ]
+ }
+ ],
+ "metadata": {
+ "authors": [
+ {
+ "name": "jeffshep"
+ }
+ ],
+ "kernelspec": {
+ "display_name": "SQL",
+ "language": "sql",
+ "name": "SQL"
+ },
+ "language_info": {
+ "name": "sql",
+ "version": ""
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}