Merge pull request #448 from jeff-shepherd/master

Update new notebooks to use dataprep and add sql files
2025-12-19 17:17:04 -05:00 · 2019-06-27 09:07:47 -04:00
parent cd3c980a6e 61b396be4f
commit 14ecfb0bf3
15 changed files with 4282 additions and 3087 deletions
--- a/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/classification-bank-marketing/auto-ml-classification-bank-marketing.ipynb
@@ -77,6 +77,7 @@
    "import pandas as pd\n",
    "import os\n",
    "from sklearn import datasets\n",
+    "import azureml.dataprep as dprep\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "import azureml.core\n",
@@ -220,30 +221,12 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "%%writefile $project_folder/get_data.py\n",
-        "\n",
-        "import pandas as pd\n",
-        "from sklearn.model_selection import train_test_split\n",
-        "\n",
-        "def _read_x_y(file_name, label_col):\n",
-        "        df = pd.read_csv(file_name)\n",
-        "        y = None\n",
-        "        if label_col in df.columns:\n",
-        "            y = df.pop(label_col)\n",
-        "            y = y.values[:, None]\n",
-        "        X = df.values\n",
-        "        return X, y\n",
-        "    \n",
-        "def get_data():\n",
-        "    # Load the bank marketing datasets.\n",
-        "    from sklearn.datasets import load_diabetes\n",
-        "    from sklearn.model_selection import train_test_split\n",
-        "\n",
-        "    X_train, y_train =  _read_x_y('https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv', \"y\")\n",
-        "\n",
-        "    columns = ['age','job','marital','education','default','housing','loan','contact','month','day_of_week','duration','campaign','pdays','previous','poutcome','emp.var.rate','cons.price.idx','cons.conf.idx','euribor3m','nr.employed','y']\n",
-        "\n",
-        "    return { \"X\" : X_train, \"y\" : y_train[:,0] }"
+    "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv\"\n",
+    "dflow = dprep.auto_read_file(data)\n",
+    "dflow.get_profile()\n",
+    "X_train = dflow.drop_columns(columns=['y'])\n",
+    "y_train = dflow.keep_columns(columns=['y'], validate_column_exists=True)\n",
+    "dflow.head()"
   ]
  },
  {
@@ -288,7 +271,8 @@
    "                             debug_log = 'automl_errors.log',\n",
    "                             path = project_folder,\n",
    "                             run_configuration=conda_run_config,\n",
-        "                             data_script = project_folder + \"/get_data.py\",\n",
+    "                             X = X_train,\n",
+    "                             y = y_train,\n",
    "                             **automl_settings\n",
    "                            )"
   ]
@@ -631,14 +615,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "def _read_x_y(file_name, label_col):\n",
-        "        df = pd.read_csv(file_name)\n",
-        "        y = None\n",
-        "        if label_col in df.columns:\n",
-        "            y = df.pop(label_col)\n",
-        "            y = y.values[:, None]\n",
-        "        X = df.values\n",
-        "        return X, y"
+    "# Load the bank marketing datasets.\n",
+    "from sklearn.datasets import load_diabetes\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from numpy import array"
   ]
  },
  {
@@ -647,15 +627,22 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "# Load the bank marketing datasets.\n",
-        "from sklearn.datasets import load_diabetes\n",
-        "from sklearn.model_selection import train_test_split\n",
-        "from numpy import array\n",
-        "\n",
-        "\n",
-        "X_test, y_test =  _read_x_y('https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_validate.csv',\"y\")\n",
-        "\n",
-        "columns = ['age','job','marital','education','default','housing','loan','contact','month','day_of_week','duration','campaign','pdays','previous','poutcome','emp.var.rate','cons.price.idx','cons.conf.idx','euribor3m','nr.employed','y']"
+    "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_validate.csv\"\n",
+    "dflow = dprep.auto_read_file(data)\n",
+    "dflow.get_profile()\n",
+    "X_test = dflow.drop_columns(columns=['y'])\n",
+    "y_test = dflow.keep_columns(columns=['y'], validate_column_exists=True)\n",
+    "dflow.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_test = X_test.to_pandas_dataframe()\n",
+    "y_test = y_test.to_pandas_dataframe()"
   ]
  },
  {
@@ -665,8 +652,9 @@
   "outputs": [],
   "source": [
    "y_pred  = fitted_model.predict(X_test)\n",
-        "actual = array(y_test.tolist())\n",
-        "print(y_pred.shape, \" \", actual[:,0].shape)"
+    "actual = array(y_test)\n",
+    "actual = actual[:,0]\n",
+    "print(y_pred.shape, \" \", actual.shape)"
   ]
  },
  {
@@ -685,10 +673,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "y_test = y_test[:,0]# Plot outputs\n",
    "%matplotlib notebook\n",
-        "test_pred = plt.scatter(y_test, y_pred, color='b')\n",
-        "test_test = plt.scatter(y_test, y_test, color='g')\n",
+    "test_pred = plt.scatter(actual, y_pred, color='b')\n",
+    "test_test = plt.scatter(actual, actual, color='g')\n",
    "plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
    "plt.show()"
   ]
--- a/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb
@@ -75,6 +75,7 @@
    "import pandas as pd\n",
    "import os\n",
    "from sklearn.model_selection import train_test_split\n",
+    "import azureml.dataprep as dprep\n",
    "\n",
    "import azureml.core\n",
    "from azureml.core.experiment import Experiment\n",
@@ -217,19 +218,13 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "%%writefile $project_folder/get_data.py\n",
-        "\n",
-        "import pandas as pd\n",
-        "from sklearn.model_selection import train_test_split\n",
-        "\n",
-        "    \n",
-        "def get_data():\n",
-        "    cards = pd.read_csv(\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/creditcard.csv\")\n",
-        "    y = cards.Class\n",
-        "    x = cards.drop('Class', axis=1)\n",
-        "    X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=1)\n",
-        "    \n",
-        "    return { \"X\" : X_train, \"y\" : y_train.values}"
+    "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/creditcard.csv\"\n",
+    "dflow = dprep.auto_read_file(data)\n",
+    "dflow.get_profile()\n",
+    "X = dflow.drop_columns(columns=['Class'])\n",
+    "y = dflow.keep_columns(columns=['Class'], validate_column_exists=True)\n",
+    "X_train, X_test = X.random_split(percentage=0.8, seed=223)\n",
+    "y_train, y_test = y.random_split(percentage=0.8, seed=223)"
   ]
  },
  {
@@ -281,7 +276,8 @@
    "                             debug_log = 'automl_errors_20190417.log',\n",
    "                             path = project_folder,\n",
    "                             run_configuration=conda_run_config,\n",
-        "                             data_script = project_folder + \"/get_data.py\",\n",
+    "                             X = X_train,\n",
+    "                             y = y_train,\n",
    "                             **automl_settings\n",
    "                            )"
   ]
@@ -621,11 +617,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "cards = pd.read_csv(\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/creditcard.csv\")\n",
-        "print(cards.head())\n",
-        "y = cards.Class\n",
-        "x = cards.drop('Class', axis=1)\n",
-        "X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=1)\n"
+    "#Randomly select and test\n",
+    "X_test = X_test.to_pandas_dataframe()\n",
+    "y_test = y_test.to_pandas_dataframe()\n"
   ]
  },
  {
@@ -678,14 +672,14 @@
    "This Credit Card fraud Detection dataset is made available under the Open Database License: http://opendatacommons.org/licenses/odbl/1.0/. Any rights in individual contents of the database are licensed under the Database Contents License: http://opendatacommons.org/licenses/dbcl/1.0/ and is available at: https://www.kaggle.com/mlg-ulb/creditcardfraud\n",
    "\n",
    "\n",
-        "The dataset has been collected and analysed during a research collaboration of Worldline and the Machine Learning Group (http://mlg.ulb.ac.be) of ULB (Universit\u00c3\u00a9 Libre de Bruxelles) on big data mining and fraud detection. More details on current and past projects on related topics are available on https://www.researchgate.net/project/Fraud-detection-5 and the page of the DefeatFraud project\n",
+    "The dataset has been collected and analysed during a research collaboration of Worldline and the Machine Learning Group (http://mlg.ulb.ac.be) of ULB (Université Libre de Bruxelles) on big data mining and fraud detection. More details on current and past projects on related topics are available on https://www.researchgate.net/project/Fraud-detection-5 and the page of the DefeatFraud project\n",
    "Please cite the following works: \n",
-        "\u00e2\u20ac\u00a2\tAndrea Dal Pozzolo, Olivier Caelen, Reid A. Johnson and Gianluca Bontempi. Calibrating Probability with Undersampling for Unbalanced Classification. In Symposium on Computational Intelligence and Data Mining (CIDM), IEEE, 2015\n",
-        "\u00e2\u20ac\u00a2\tDal Pozzolo, Andrea; Caelen, Olivier; Le Borgne, Yann-Ael; Waterschoot, Serge; Bontempi, Gianluca. Learned lessons in credit card fraud detection from a practitioner perspective, Expert systems with applications,41,10,4915-4928,2014, Pergamon\n",
-        "\u00e2\u20ac\u00a2\tDal Pozzolo, Andrea; Boracchi, Giacomo; Caelen, Olivier; Alippi, Cesare; Bontempi, Gianluca. Credit card fraud detection: a realistic modeling and a novel learning strategy, IEEE transactions on neural networks and learning systems,29,8,3784-3797,2018,IEEE\n",
+    "•\tAndrea Dal Pozzolo, Olivier Caelen, Reid A. Johnson and Gianluca Bontempi. Calibrating Probability with Undersampling for Unbalanced Classification. In Symposium on Computational Intelligence and Data Mining (CIDM), IEEE, 2015\n",
+    "•\tDal Pozzolo, Andrea; Caelen, Olivier; Le Borgne, Yann-Ael; Waterschoot, Serge; Bontempi, Gianluca. Learned lessons in credit card fraud detection from a practitioner perspective, Expert systems with applications,41,10,4915-4928,2014, Pergamon\n",
+    "•\tDal Pozzolo, Andrea; Boracchi, Giacomo; Caelen, Olivier; Alippi, Cesare; Bontempi, Gianluca. Credit card fraud detection: a realistic modeling and a novel learning strategy, IEEE transactions on neural networks and learning systems,29,8,3784-3797,2018,IEEE\n",
    "o\tDal Pozzolo, Andrea Adaptive Machine learning for credit card fraud detection ULB MLG PhD thesis (supervised by G. Bontempi)\n",
-        "\u00e2\u20ac\u00a2\tCarcillo, Fabrizio; Dal Pozzolo, Andrea; Le Borgne, Yann-A\u00c3\u00abl; Caelen, Olivier; Mazzer, Yannis; Bontempi, Gianluca. Scarff: a scalable framework for streaming credit card fraud detection with Spark, Information fusion,41, 182-194,2018,Elsevier\n",
-        "\u00e2\u20ac\u00a2\tCarcillo, Fabrizio; Le Borgne, Yann-A\u00c3\u00abl; Caelen, Olivier; Bontempi, Gianluca. Streaming active learning strategies for real-life credit card fraud detection: assessment and visualization, International Journal of Data Science and Analytics, 5,4,285-300,2018,Springer International Publishing"
+    "•\tCarcillo, Fabrizio; Dal Pozzolo, Andrea; Le Borgne, Yann-Aël; Caelen, Olivier; Mazzer, Yannis; Bontempi, Gianluca. Scarff: a scalable framework for streaming credit card fraud detection with Spark, Information fusion,41, 182-194,2018,Elsevier\n",
+    "•\tCarcillo, Fabrizio; Le Borgne, Yann-Aël; Caelen, Olivier; Bontempi, Gianluca. Streaming active learning strategies for real-life credit card fraud detection: assessment and visualization, International Journal of Data Science and Analytics, 5,4,285-300,2018,Springer International Publishing"
   ]
  }
 ],
--- a/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/regression-concrete-strength/auto-ml-regression-concrete-strength.ipynb
@@ -71,6 +71,7 @@
    "import pandas as pd\n",
    "import os\n",
    "from sklearn.model_selection import train_test_split\n",
+    "import azureml.dataprep as dprep\n",
    " \n",
    "\n",
    "import azureml.core\n",
@@ -212,25 +213,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "%%writefile $project_folder/get_data.py\n",
-        "\n",
-        "import pandas as pd\n",
-        "from sklearn.model_selection import train_test_split\n",
-        "\n",
-        "def _read_x_y(file_name, label_col):\n",
-        "        df = pd.read_csv(file_name)\n",
-        "        y = None\n",
-        "        if label_col in df.columns:\n",
-        "            y = df.pop(label_col)\n",
-        "            y = y.values[:, None]\n",
-        "        X = df.values\n",
-        "        return X, y\n",
-        "    \n",
-        "def get_data():\n",
-        "    X,y = _read_x_y(\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/compresive_strength_concrete.csv\",\"CONCRETE\")\n",
-        "    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)\n",
-        "    \n",
-        "    return { \"X\" : X_train, \"y\" : y_train[:,0] }"
+    "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/compresive_strength_concrete.csv\"\n",
+    "dflow = dprep.auto_read_file(data)\n",
+    "dflow.get_profile()\n",
+    "X = dflow.drop_columns(columns=['CONCRETE'])\n",
+    "y = dflow.keep_columns(columns=['CONCRETE'], validate_column_exists=True)\n",
+    "X_train, X_test = X.random_split(percentage=0.8, seed=223)\n",
+    "y_train, y_test = y.random_split(percentage=0.8, seed=223) \n",
+    "dflow.head()"
   ]
  },
  {
@@ -282,7 +272,8 @@
    "                             debug_log = 'automl.log',\n",
    "                             path = project_folder,\n",
    "                             run_configuration=conda_run_config,\n",
-        "                             data_script = project_folder + \"/get_data.py\",\n",
+    "                             X = X_train,\n",
+    "                             y = y_train,\n",
    "                             **automl_settings\n",
    "                            )"
   ]
@@ -311,7 +302,7 @@
   "source": [
    "## Results\n",
    "Widget for Monitoring Runs\n",
-        "The widget will first report a \u00e2\u20ac\u0153loading status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
+    "The widget will first report a “loading status while running the first iteration. After completing the first iteration, an auto-updating graph and table will be shown. The widget will refresh once per minute, so you should see the graph update as child runs complete.\n",
    "Note: The widget displays a link at the bottom. Use this link to open a web interface to explore the individual run details."
   ]
  },
@@ -664,14 +655,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "def _read_x_y(file_name, label_col):\n",
-        "        df = pd.read_csv(file_name)\n",
-        "        y = None\n",
-        "        if label_col in df.columns:\n",
-        "            y = df.pop(label_col)\n",
-        "            y = y.values[:, None]\n",
-        "        X = df.values\n",
-        "        return X, y"
+    "X_test = X_test.to_pandas_dataframe()\n",
+    "y_test = y_test.to_pandas_dataframe()\n",
+    "y_test = np.array(y_test)\n",
+    "y_test = y_test[:,0]\n",
+    "X_train = X_train.to_pandas_dataframe()\n",
+    "y_train = y_train.to_pandas_dataframe()\n",
+    "y_train = np.array(y_train)\n",
+    "y_train = y_train[:,0]"
   ]
  },
  {
@@ -687,9 +678,6 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "X,y = _read_x_y(\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/compresive_strength_concrete.csv\",\"CONCRETE\")\n",
-        "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)\n",
-        "\n",
    "y_pred_train = fitted_model.predict(X_train)\n",
    "y_residual_train = y_train - y_pred_train\n",
    "\n",
--- a/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/regression-hardware-performance/auto-ml-regression-hardware-performance.ipynb
@@ -71,6 +71,7 @@
    "import pandas as pd\n",
    "import os\n",
    "from sklearn.model_selection import train_test_split\n",
+    "import azureml.dataprep as dprep\n",
    " \n",
    "\n",
    "import azureml.core\n",
@@ -212,25 +213,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "%%writefile $project_folder/get_data.py\n",
-        "\n",
-        "import pandas as pd\n",
-        "from sklearn.model_selection import train_test_split\n",
-        "\n",
-        "def _read_x_y(file_name, label_col):\n",
-        "        df = pd.read_csv(file_name)\n",
-        "        y = None\n",
-        "        if label_col in df.columns:\n",
-        "            y = df.pop(label_col)\n",
-        "            y = y.values[:, None]\n",
-        "        X = df.values\n",
-        "        return X, y\n",
-        "    \n",
-        "def get_data():\n",
-        "    X,y = _read_x_y(\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/machineData.csv\",\"ERP\")\n",
-        "    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)\n",
-        "    \n",
-        "    return { \"X\" : X_train, \"y\" : y_train[:,0] }"
+    "data = \"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/machineData.csv\"\n",
+    "dflow = dprep.auto_read_file(data)\n",
+    "dflow.get_profile()\n",
+    "X = dflow.drop_columns(columns=['ERP'])\n",
+    "y = dflow.keep_columns(columns=['ERP'], validate_column_exists=True)\n",
+    "X_train, X_test = X.random_split(percentage=0.8, seed=223)\n",
+    "y_train, y_test = y.random_split(percentage=0.8, seed=223) \n",
+    "dflow.head()"
   ]
  },
  {
@@ -283,7 +273,8 @@
    "                             debug_log = 'automl_errors_20190417.log',\n",
    "                             path = project_folder,\n",
    "                             run_configuration=conda_run_config,\n",
-        "                             data_script = project_folder + \"/get_data.py\",\n",
+    "                             X = X_train,\n",
+    "                             y = y_train,\n",
    "                             **automl_settings\n",
    "                            )"
   ]
@@ -334,16 +325,6 @@
    "RunDetails(remote_run).show() "
   ]
  },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from azureml.train.automl.run import AutoMLRun\n",
-        "setup_run = AutoMLRun(experiment, remote_run.id + \"_setup\")"
-      ]
-    },
  {
   "cell_type": "code",
   "execution_count": null,
@@ -690,18 +671,14 @@
   "metadata": {},
   "outputs": [],
   "source": [
-        "def _read_x_y(file_name, label_col):\n",
-        "    df = pd.read_csv(file_name)\n",
-        "    y_split = None\n",
-        "    if label_col in df.columns:\n",
-        "        y_split = df.pop(label_col)\n",
-        "        y_split = y_split.values[:, None]\n",
-        "    X_split = df.values\n",
-        "    return X_split, y_split\n",
-        "    \n",
-        "\n",
-        "X,y = _read_x_y(\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/machineData.csv\",\"ERP\")\n",
-        "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)"
+    "X_test = X_test.to_pandas_dataframe()\n",
+    "y_test = y_test.to_pandas_dataframe()\n",
+    "y_test = np.array(y_test)\n",
+    "y_test = y_test[:,0]\n",
+    "X_train = X_train.to_pandas_dataframe()\n",
+    "y_train = y_train.to_pandas_dataframe()\n",
+    "y_train = np.array(y_train)\n",
+    "y_train = y_train[:,0]"
   ]
  },
  {
--- a/how-to-use-azureml/automated-machine-learning/sql-server/README.md
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/README.md
@@ -0,0 +1,113 @@
+# Table of Contents
+1. [Introduction](#introduction)
+1. [Setup using Azure Data Studio](#azuredatastudiosetup)
+1. [Energy demand example using Azure Data Studio](#azuredatastudioenergydemand)
+1. [Set using SQL Server Management Studio for SQL Server 2017 on Windows](#ssms2017)
+1. [Set using SQL Server Management Studio for SQL Server 2019 on Linux](#ssms2019)
+1. [Energy demand example using SQL Server Management Studio](#ssmsenergydemand)
+
+
+<a name="introduction"></a>
+# Introduction
+SQL Server 2017 or 2019 can call Azure ML automated machine learning to create models trained on data from SQL Server.
+This uses the sp_execute_external_script stored procedure, which can call Python scripts.
+SQL Server 2017 and SQL Server 2019 can both run on Windows or Linux.
+However, this integration is not available for SQL Server 2017 on Linux. 
+
+This folder shows how to setup the integration and has a sample that uses the integration to train and predict based on an energy demand dataset.
+
+This integration is part of SQL Server and so can be used from any SQL client. 
+These instructions show using it from Azure Data Studio or SQL Server Managment Studio.
+
+<a name="azuredatastudiosetup"></a>
+## Setup using Azure Data Studio
+
+These step show setting up the integration using Azure Data Studio.
+
+1. If you don't already have SQL Server, you can install it from [https://www.microsoft.com/en-us/sql-server/sql-server-downloads](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
+1. Install Azure Data Studio from [https://docs.microsoft.com/en-us/sql/azure-data-studio/download?view=sql-server-2017](https://docs.microsoft.com/en-us/sql/azure-data-studio/download?view=sql-server-2017)
+1. Start Azure Data Studio and connect to SQL Server. [https://docs.microsoft.com/en-us/sql/azure-data-studio/sql-notebooks?view=sql-server-2017](https://docs.microsoft.com/en-us/sql/azure-data-studio/sql-notebooks?view=sql-server-2017)
+1. Create a database named "automl".
+1. Open the notebook how-to-use-azureml\automated-machine-learning\sql-server\setup\auto-ml-sql-setup.ipynb and follow the instructions in it.
+
+ <a name="azuredatastudioenergydemand"></a>
+## Energy demand example using Azure Data Studio
+
+Once you have completed the setup, you can try the energy demand sample in the notebook energy-demand\auto-ml-sql-energy-demand.ipynb.
+This has cells to train a model, predict based on the model and show metrics for each pipeline run in training the model.
+
+<a name="ssms2017"></a>
+## Setup using SQL Server Management Studio for SQL Server 2017 on Windows
+
+These instruction setup the integration for SQL Server 2017 on Windows.
+
+1. If you don't already have SQL Server, you can install it from [https://www.microsoft.com/en-us/sql-server/sql-server-downloads](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
+2. Enable external scripts with the following commands: 
+```sh
+   sp_configure 'external scripts enabled',1 
+   reconfigure with override
+```
+3. Stop SQL Server. 
+4. Install the automated machine learning libraries using the following commands from Administrator command prompt (If you are using a non-default SQL Server instance name, replace MSSQLSERVER in the second command with the instance name)
+```sh
+   cd "C:\Program Files\Microsoft SQL Server"
+   cd "MSSQL14.MSSQLSERVER\PYTHON_SERVICES"
+   python.exe -m pip install azureml-sdk[automl]
+   python.exe -m pip install --upgrade numpy
+   python.exe -m pip install --upgrade sklearn
+```
+5. Start SQL Server and the service "SQL Server Launchpad service". 
+6. In Windows Firewall, click on advanced settings and in Outbound Rules, disable "Block network access for R local user accounts in SQL Server instance xxxx". 
+7. Execute the files in the setup folder in SQL Server Management Studio: aml_model.sql, aml_connection.sql, AutoMLGetMetrics.sql, AutoMLPredict.sql and AutoMLTrain.sql 
+8. Create an Azure Machine Learning Workspace.  You can use the instructions at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace ](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace)
+9. Create a config.json file file using the subscription id, resource group name and workspace name that you used to create the workspace.  The file is described at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace)
+10. Create an Azure service principal.  You can do this with the commands: 
+```sh
+   az login 
+   az account set --subscription subscriptionid 
+   az ad sp create-for-rbac --name principlename --password password 
+```
+11. Insert the values \<tenant\>, \<AppId\> and \<password\> returned by create-for-rbac above into the aml_connection table.  Set \<path\> as the absolute path to your config.json file. Set the name to <20>Default<6C>. 
+ 
+<a name="ssms2019"></a>
+## Setup using SQL Server Management Studio for SQL Server 2019 on Linux
+1. Install SQL Server 2019 from: [https://www.microsoft.com/en-us/sql-server/sql-server-downloads](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
+2. Install machine learning support from: [https://docs.microsoft.com/en-us/sql/linux/sql-server-linux-setup-machine-learning?view=sqlallproducts-allversions#ubuntu](https://docs.microsoft.com/en-us/sql/linux/sql-server-linux-setup-machine-learning?view=sqlallproducts-allversions#ubuntu)
+3. Then install SQL Server management Studio from [https://docs.microsoft.com/en-us/sql/ssms/download-sql-server-management-studio-ssms?view=sql-server-2017](https://docs.microsoft.com/en-us/sql/ssms/download-sql-server-management-studio-ssms?view=sql-server-2017)
+4. Enable external scripts with the following commands: 
+```sh
+   sp_configure 'external scripts enabled',1 
+   reconfigure with override 
+```
+5. Stop SQL Server. 
+6. Install the automated machine learning libraries using the following commands from Administrator command (If you are using a non-default SQL Server instance name, replace MSSQLSERVER in the second command with the instance name): 
+```sh
+   sudo /opt/mssql/mlservices/bin/python/python -m pip install azureml-sdk[automl] 
+   sudo /opt/mssql/mlservices/bin/python/python -m pip install --upgrade numpy 
+   sudo /opt/mssql/mlservices/bin/python/python -m pip install --upgrade sklearn
+```
+7. Start SQL Server. 
+8. Execute the files aml_model.sql, aml_connection.sql, AutoMLGetMetrics.sql, AutoMLPredict.sql and AutoMLTrain.sql in SQL Server Management Studio. 
+9. Create an Azure Machine Learning Workspace.  You can use the instructions at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace)
+10. Create a config.json file file using the subscription id, resource group name and workspace name that you use to create the workspace.  The file is described at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace)
+11. Create an Azure service principal.  You can do this with the commands: 
+```sh
+   az login 
+   az account set --subscription subscriptionid 
+   az ad sp create-for-rbac --name principlename --password password 
+``` 
+12. Insert the values \<tenant\>, \<AppId\> and \<password\> returned by create-for-rbac above into the aml_connection table.  Set \<path\> as the absolute path to your config.json file. Set the name to <20>Default<6C>. 
+ 
+<a name="ssmsenergydemand"></a>
+## Energy demand example using SQL Server Management Studio
+
+Once you have completed the setup, you can try the energy demand sample queries.
+First you need to load the sample data in the database.
+1. In SQL Server Management Studio, you can right-click the database, select Tasks, then Import Flat file. 
+1. Select the file MachineLearningNotebooks\notebooks\how-to-use-azureml\automated-machine-learning\forecasting-energy-demand\nyc_energy.csv. 
+1. When you get to the column definition page, allow nulls for all columns. 
+
+You can then run the queries in the energy-demand folder:
+* TrainEnergyDemand.sql runs AutoML, trains multiple models on data and selects the best model.
+* PredictEnergyDemand.sql predicts based on the most recent training run.
+* GetMetrics.sql returns all the metrics for each model in the most recent training run.
--- a/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/GetMetrics.sql
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/GetMetrics.sql
@@ -0,0 +1,10 @@
+-- This lists all the metrics for all iterations for the most recent run.
+
+DECLARE @RunId NVARCHAR(43)
+DECLARE @ExperimentName NVARCHAR(255)
+
+SELECT TOP 1 @ExperimentName=ExperimentName, @RunId=SUBSTRING(RunId, 1, 43)
+FROM aml_model
+ORDER BY CreatedDate DESC
+
+EXEC dbo.AutoMLGetMetrics @RunId, @ExperimentName
--- a/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/PredictEnergyDemand.sql
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/PredictEnergyDemand.sql
@@ -0,0 +1,17 @@
+-- This shows using the AutoMLPredict stored procedure to predict using a forecasting model for the nyc_energy dataset.
+
+DECLARE @Model NVARCHAR(MAX) = (SELECT TOP 1 Model FROM dbo.aml_model
+                                WHERE ExperimentName = 'automl-sql-forecast'
+								ORDER BY CreatedDate DESC)
+
+EXEC dbo.AutoMLPredict @input_query='
+SELECT CAST(timeStamp AS NVARCHAR(30)) AS timeStamp,
+       demand,
+	   precip,
+	   temp
+FROM nyc_energy
+WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL
+AND timeStamp >= ''2017-02-01''',
+@label_column='demand',
+@model=@model
+WITH RESULT SETS ((timeStamp NVARCHAR(30), actual_demand FLOAT, precip FLOAT, temp FLOAT, predicted_demand FLOAT))
--- a/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/TrainEnergyDemand.sql
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/TrainEnergyDemand.sql
@@ -0,0 +1,21 @@
+-- This shows using the AutoMLTrain stored procedure to create a forecasting model for the nyc_energy dataset.
+
+INSERT INTO dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)
+EXEC dbo.AutoMLTrain @input_query='
+SELECT CAST(timeStamp as NVARCHAR(30)) as timeStamp,
+       demand,
+	   precip,
+	   temp,
+	   CASE WHEN timeStamp < ''2017-01-01'' THEN 0 ELSE 1 END AS is_validate_column
+FROM nyc_energy
+WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL
+and timeStamp < ''2017-02-01''',
+@label_column='demand',
+@task='forecasting',
+@iterations=10,
+@iteration_timeout_minutes=5,
+@time_column_name='timeStamp',
+@is_validate_column='is_validate_column',
+@experiment_name='automl-sql-forecast',
+@primary_metric='normalized_root_mean_squared_error'
+
--- a/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.ipynb
@@ -0,0 +1,141 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Train a model and use it for prediction\r\n",
+    "\r\n",
+    "Before running this notebook, run the auto-ml-sql-setup.ipynb notebook."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/sql-server/energy-demand/auto-ml-sql-energy-demand.png)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set the default database"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "USE [automl]\r\n",
+    "GO"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use the AutoMLTrain stored procedure to create a forecasting model for the nyc_energy dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "INSERT INTO dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)\r\n",
+    "EXEC dbo.AutoMLTrain @input_query='\r\n",
+    "SELECT CAST(timeStamp as NVARCHAR(30)) as timeStamp,\r\n",
+    "       demand,\r\n",
+    "\t   precip,\r\n",
+    "\t   temp,\r\n",
+    "\t   CASE WHEN timeStamp < ''2017-01-01'' THEN 0 ELSE 1 END AS is_validate_column\r\n",
+    "FROM nyc_energy\r\n",
+    "WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL\r\n",
+    "and timeStamp < ''2017-02-01''',\r\n",
+    "@label_column='demand',\r\n",
+    "@task='forecasting',\r\n",
+    "@iterations=10,\r\n",
+    "@iteration_timeout_minutes=5,\r\n",
+    "@time_column_name='timeStamp',\r\n",
+    "@is_validate_column='is_validate_column',\r\n",
+    "@experiment_name='automl-sql-forecast',\r\n",
+    "@primary_metric='normalized_root_mean_squared_error'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use the AutoMLPredict stored procedure to predict using the forecasting model for the nyc_energy dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DECLARE @Model NVARCHAR(MAX) = (SELECT TOP 1 Model FROM dbo.aml_model\r\n",
+    "                                WHERE ExperimentName = 'automl-sql-forecast'\r\n",
+    "\t\t\t\t\t\t\t\tORDER BY CreatedDate DESC)\r\n",
+    "\r\n",
+    "EXEC dbo.AutoMLPredict @input_query='\r\n",
+    "SELECT CAST(timeStamp AS NVARCHAR(30)) AS timeStamp,\r\n",
+    "       demand,\r\n",
+    "\t   precip,\r\n",
+    "\t   temp\r\n",
+    "FROM nyc_energy\r\n",
+    "WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL\r\n",
+    "AND timeStamp >= ''2017-02-01''',\r\n",
+    "@label_column='demand',\r\n",
+    "@model=@model\r\n",
+    "WITH RESULT SETS ((timeStamp NVARCHAR(30), actual_demand FLOAT, precip FLOAT, temp FLOAT, predicted_demand FLOAT))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## List all the metrics for all iterations for the most recent training run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "DECLARE @RunId NVARCHAR(43)\r\n",
+    "DECLARE @ExperimentName NVARCHAR(255)\r\n",
+    "\r\n",
+    "SELECT TOP 1 @ExperimentName=ExperimentName, @RunId=SUBSTRING(RunId, 1, 43)\r\n",
+    "FROM aml_model\r\n",
+    "ORDER BY CreatedDate DESC\r\n",
+    "\r\n",
+    "EXEC dbo.AutoMLGetMetrics @RunId, @ExperimentName"
+   ]
+  }
+ ],
+ "metadata": {
+  "authors": [
+   {
+    "name": "jeffshep"
+   }
+  ],
+  "kernelspec": {
+   "display_name": "SQL",
+   "language": "sql",
+   "name": "SQL"
+  },
+  "language_info": {
+   "name": "sql",
+   "version": ""
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLGetMetrics.sql
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLGetMetrics.sql
@@ -0,0 +1,70 @@
+-- This procedure returns a list of metrics for each iteration of a run.
+SET ANSI_NULLS ON
+GO
+SET QUOTED_IDENTIFIER ON
+GO
+CREATE OR ALTER PROCEDURE [dbo].[AutoMLGetMetrics]
+ (
+	@run_id NVARCHAR(250),                           -- The RunId
+    @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.
+    @connection_name NVARCHAR(255)='default'         -- The AML connection to use.
+ ) AS
+BEGIN
+    DECLARE @tenantid NVARCHAR(255)
+    DECLARE @appid NVARCHAR(255)
+    DECLARE @password NVARCHAR(255)
+    DECLARE @config_file NVARCHAR(255)
+
+	SELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile
+	FROM aml_connection
+	WHERE ConnectionName = @connection_name;
+
+    EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd
+import logging 
+import azureml.core 
+import numpy as np
+from azureml.core.experiment import Experiment 
+from azureml.train.automl.run import AutoMLRun
+from azureml.core.authentication import ServicePrincipalAuthentication 
+from azureml.core.workspace import Workspace 
+
+auth = ServicePrincipalAuthentication(tenantid, appid, password) 
+ 
+ws = Workspace.from_config(path=config_file, auth=auth) 
+ 
+experiment = Experiment(ws, experiment_name) 
+
+ml_run = AutoMLRun(experiment = experiment, run_id = run_id)
+
+children = list(ml_run.get_children())
+iterationlist = []
+metricnamelist = []
+metricvaluelist = []
+
+for run in children:
+    properties = run.get_properties()
+    if "iteration" in properties:
+        iteration = int(properties["iteration"])
+        for metric_name, metric_value in run.get_metrics().items():
+            if isinstance(metric_value, float):
+                iterationlist.append(iteration)
+                metricnamelist.append(metric_name)
+                metricvaluelist.append(metric_value)
+             
+metrics = pd.DataFrame({"iteration": iterationlist, "metric_name": metricnamelist, "metric_value": metricvaluelist})
+'
+    , @output_data_1_name = N'metrics'
+	, @params = N'@run_id NVARCHAR(250), 
+				  @experiment_name NVARCHAR(32),
+  				  @tenantid NVARCHAR(255),
+				  @appid NVARCHAR(255),
+				  @password NVARCHAR(255),
+				  @config_file NVARCHAR(255)'
+    , @run_id = @run_id
+	, @experiment_name = @experiment_name
+	, @tenantid = @tenantid
+	, @appid = @appid
+	, @password = @password
+	, @config_file = @config_file
+WITH RESULT SETS ((iteration INT, metric_name NVARCHAR(100), metric_value FLOAT))
+END
--- a/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLPredict.sql
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLPredict.sql
@@ -0,0 +1,41 @@
+-- This procedure predicts values based on a model returned by AutoMLTrain and a dataset.
+-- It returns the dataset with a new column added, which is the predicted value.
+SET ANSI_NULLS ON
+GO
+SET QUOTED_IDENTIFIER ON
+GO
+CREATE OR ALTER PROCEDURE [dbo].[AutoMLPredict]
+ (
+   @input_query NVARCHAR(MAX),      -- A SQL query returning data to predict on.
+   @model NVARCHAR(MAX),            -- A model returned from AutoMLTrain.
+   @label_column  NVARCHAR(255)=''  -- Optional name of the column from input_query, which should be ignored when predicting
+ ) AS 
+BEGIN 
+  
+    EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd 
+import azureml.core  
+import numpy as np 
+from azureml.train.automl import AutoMLConfig  
+import pickle 
+import codecs 
+  
+model_obj = pickle.loads(codecs.decode(model.encode(), "base64")) 
+  
+test_data = input_data.copy() 
+
+if label_column != "" and label_column is not None:
+    y_test = test_data.pop(label_column).values 
+X_test = test_data 
+  
+predicted = model_obj.predict(X_test) 
+  
+combined_output = input_data.assign(predicted=predicted)
+  
+' 
+    , @input_data_1 = @input_query 
+    , @input_data_1_name = N'input_data' 
+    , @output_data_1_name = N'combined_output' 
+    , @params = N'@model NVARCHAR(MAX), @label_column  NVARCHAR(255)' 
+    , @model = @model 
+	, @label_column = @label_column
+END
--- a/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLTrain.sql
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/AutoMLTrain.sql
@@ -0,0 +1,234 @@
+-- This stored procedure uses automated machine learning to train several models
+-- and returns the best model.
+--
+-- The result set has several columns:
+--   best_run - iteration ID for the best model
+--   experiment_name - experiment name pass in with the @experiment_name parameter
+--   fitted_model - best model found
+--   log_file_text - AutoML debug_log contents
+--   workspace - name of the Azure ML workspace where run history is stored
+--
+-- An example call for a classification problem is:
+--    insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)
+--    exec dbo.AutoMLTrain @input_query='
+--    SELECT top 100000 
+--          CAST([pickup_datetime] AS NVARCHAR(30)) AS pickup_datetime
+--          ,CAST([dropoff_datetime] AS NVARCHAR(30)) AS dropoff_datetime
+--          ,[passenger_count]
+--          ,[trip_time_in_secs]
+--          ,[trip_distance]
+--          ,[payment_type]
+--          ,[tip_class]
+--      FROM [dbo].[nyctaxi_sample] order by [hack_license] ',
+--      @label_column = 'tip_class',
+--      @iterations=10
+-- 
+-- An example call for forecasting is:
+--      insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)
+--      exec dbo.AutoMLTrain @input_query='
+--      select cast(timeStamp as nvarchar(30)) as timeStamp,
+--             demand,
+--      	   precip,
+--      	   temp,
+--             case when timeStamp < ''2017-01-01'' then 0 else 1 end as is_validate_column
+--      from nyc_energy
+--      where demand is not null and precip is not null and temp is not null
+--      and timeStamp < ''2017-02-01''',
+--      @label_column='demand',
+--      @task='forecasting',
+--      @iterations=10,
+--      @iteration_timeout_minutes=5,
+--      @time_column_name='timeStamp',
+--      @is_validate_column='is_validate_column',
+--      @experiment_name='automl-sql-forecast',
+--      @primary_metric='normalized_root_mean_squared_error'
+
+SET ANSI_NULLS ON
+GO
+SET QUOTED_IDENTIFIER ON
+GO
+CREATE OR ALTER PROCEDURE [dbo].[AutoMLTrain]
+ (
+    @input_query NVARCHAR(MAX),                      -- The SQL Query that will return the data to train and validate the model.
+    @label_column NVARCHAR(255)='Label',             -- The name of the column in the result of @input_query that is the label.
+    @primary_metric NVARCHAR(40)='AUC_weighted',     -- The metric to optimize.
+    @iterations INT=100,                             -- The maximum number of pipelines to train.
+    @task NVARCHAR(40)='classification',             -- The type of task.  Can be classification, regression or forecasting.
+    @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.
+    @iteration_timeout_minutes INT = 15,             -- The maximum time in minutes for training a single pipeline. 
+    @experiment_timeout_minutes INT = 60,            -- The maximum time in minutes for training all pipelines.
+    @n_cross_validations INT = 3,                    -- The number of cross validations.
+    @blacklist_models NVARCHAR(MAX) = '',            -- A comma separated list of algos that will not be used.
+                                                     -- The list of possible models can be found at:
+                                                     -- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings
+    @whitelist_models NVARCHAR(MAX) = '',            -- A comma separated list of algos that can be used.
+                                                     -- The list of possible models can be found at:
+                                                     -- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings
+    @experiment_exit_score FLOAT = 0,                -- Stop the experiment if this score is acheived.
+    @sample_weight_column NVARCHAR(255)='',          -- The name of the column in the result of  @input_query that gives a sample weight.
+    @is_validate_column NVARCHAR(255)='',            -- The name of the column in the result of  @input_query that indicates if the row is for training or validation.
+	                                                 -- In the values of the column, 0 means for training and 1 means for validation.
+    @time_column_name  NVARCHAR(255)='',             -- The name of the timestamp column for forecasting.
+	@connection_name NVARCHAR(255)='default'         -- The AML connection to use.
+ ) AS
+BEGIN
+
+    DECLARE @tenantid NVARCHAR(255)
+    DECLARE @appid NVARCHAR(255)
+    DECLARE @password NVARCHAR(255)
+    DECLARE @config_file NVARCHAR(255)
+
+	SELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile
+	FROM aml_connection
+	WHERE ConnectionName = @connection_name;
+
+	EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd
+import logging 
+import azureml.core 
+import pandas as pd
+import numpy as np
+from azureml.core.experiment import Experiment 
+from azureml.train.automl import AutoMLConfig 
+from sklearn import datasets 
+import pickle
+import codecs
+from azureml.core.authentication import ServicePrincipalAuthentication 
+from azureml.core.workspace import Workspace 
+
+if __name__.startswith("sqlindb"):
+    auth = ServicePrincipalAuthentication(tenantid, appid, password) 
+ 
+    ws = Workspace.from_config(path=config_file, auth=auth) 
+ 
+    project_folder = "./sample_projects/" + experiment_name
+ 
+    experiment = Experiment(ws, experiment_name) 
+
+    data_train = input_data
+    X_valid = None
+    y_valid = None
+    sample_weight_valid = None
+
+    if is_validate_column != "" and is_validate_column is not None:
+        data_train = input_data[input_data[is_validate_column] <= 0]
+        data_valid = input_data[input_data[is_validate_column] > 0]
+        data_train.pop(is_validate_column)
+        data_valid.pop(is_validate_column)
+        y_valid = data_valid.pop(label_column).values
+        if sample_weight_column != "" and sample_weight_column is not None:
+            sample_weight_valid = data_valid.pop(sample_weight_column).values
+        X_valid = data_valid
+        n_cross_validations = None
+
+    y_train = data_train.pop(label_column).values
+
+    sample_weight = None
+    if sample_weight_column != "" and sample_weight_column is not None:
+        sample_weight = data_train.pop(sample_weight_column).values
+
+    X_train = data_train
+
+    if experiment_timeout_minutes == 0:
+        experiment_timeout_minutes = None
+
+    if experiment_exit_score == 0:
+        experiment_exit_score = None
+
+    if blacklist_models == "":
+        blacklist_models = None
+
+    if blacklist_models is not None:
+        blacklist_models = blacklist_models.replace(" ", "").split(",")
+
+    if whitelist_models == "":
+        whitelist_models = None
+
+    if whitelist_models is not None:
+        whitelist_models = whitelist_models.replace(" ", "").split(",")
+
+    automl_settings = {}
+    preprocess = True
+    if time_column_name != "" and time_column_name is not None:
+        automl_settings = { "time_column_name": time_column_name }
+        preprocess = False
+
+    log_file_name = "automl_errors.log"
+	 
+    automl_config = AutoMLConfig(task = task, 
+                                 debug_log = log_file_name, 
+                                 primary_metric = primary_metric, 
+                                 iteration_timeout_minutes = iteration_timeout_minutes, 
+                                 experiment_timeout_minutes = experiment_timeout_minutes,
+                                 iterations = iterations, 
+                                 n_cross_validations = n_cross_validations, 
+                                 preprocess = preprocess,
+                                 verbosity = logging.INFO, 
+                                 enable_ensembling = False,
+                                 X = X_train,  
+                                 y = y_train, 
+                                 path = project_folder,
+                                 blacklist_models = blacklist_models,
+                                 whitelist_models = whitelist_models,
+                                 experiment_exit_score = experiment_exit_score,
+                                 sample_weight = sample_weight,
+                                 X_valid = X_valid,
+                                 y_valid = y_valid,
+                                 sample_weight_valid = sample_weight_valid,
+                                 **automl_settings) 
+ 
+    local_run = experiment.submit(automl_config, show_output = True) 
+
+    best_run, fitted_model = local_run.get_output()
+
+    pickled_model = codecs.encode(pickle.dumps(fitted_model), "base64").decode()
+
+    log_file_text = ""
+
+    try:
+        with open(log_file_name, "r") as log_file:
+            log_file_text = log_file.read()
+    except:
+        log_file_text = "Log file not found"
+
+    returned_model = pd.DataFrame({"best_run": [best_run.id], "experiment_name": [experiment_name], "fitted_model": [pickled_model], "log_file_text": [log_file_text], "workspace": [ws.name]}, dtype=np.dtype(np.str))
+'
+	, @input_data_1 = @input_query
+	, @input_data_1_name = N'input_data'
+	, @output_data_1_name = N'returned_model'
+	, @params = N'@label_column NVARCHAR(255), 
+	              @primary_metric NVARCHAR(40),
+				  @iterations INT, @task NVARCHAR(40),
+				  @experiment_name NVARCHAR(32),
+				  @iteration_timeout_minutes INT,
+				  @experiment_timeout_minutes INT,
+				  @n_cross_validations INT,
+				  @blacklist_models NVARCHAR(MAX),
+				  @whitelist_models NVARCHAR(MAX),
+				  @experiment_exit_score FLOAT,
+				  @sample_weight_column NVARCHAR(255),
+				  @is_validate_column NVARCHAR(255),
+				  @time_column_name  NVARCHAR(255),
+				  @tenantid NVARCHAR(255),
+				  @appid NVARCHAR(255),
+				  @password NVARCHAR(255),
+				  @config_file NVARCHAR(255)'
+	, @label_column = @label_column
+	, @primary_metric = @primary_metric
+	, @iterations = @iterations
+	, @task = @task
+	, @experiment_name = @experiment_name
+	, @iteration_timeout_minutes = @iteration_timeout_minutes
+	, @experiment_timeout_minutes = @experiment_timeout_minutes
+	, @n_cross_validations = @n_cross_validations
+	, @blacklist_models = @blacklist_models
+	, @whitelist_models = @whitelist_models
+	, @experiment_exit_score = @experiment_exit_score
+	, @sample_weight_column = @sample_weight_column
+	, @is_validate_column = @is_validate_column
+	, @time_column_name = @time_column_name
+	, @tenantid = @tenantid
+	, @appid = @appid
+	, @password = @password
+	, @config_file = @config_file
+WITH RESULT SETS ((best_run NVARCHAR(250), experiment_name NVARCHAR(100), fitted_model VARCHAR(MAX), log_file_text NVARCHAR(MAX), workspace NVARCHAR(100)))
+END
--- a/how-to-use-azureml/automated-machine-learning/sql-server/setup/aml_connection.sql
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/aml_connection.sql
@@ -0,0 +1,18 @@
+-- This is a table to store the Azure ML connection information.
+SET ANSI_NULLS ON
+GO
+
+SET QUOTED_IDENTIFIER ON
+GO
+
+CREATE TABLE [dbo].[aml_connection](
+    [Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,
+	[ConnectionName] [nvarchar](255) NULL,
+	[TenantId] [nvarchar](255) NULL,
+	[AppId] [nvarchar](255) NULL,
+	[Password] [nvarchar](255) NULL,
+	[ConfigFile] [nvarchar](255) NULL
+) ON [PRIMARY]
+GO
+
+
--- a/how-to-use-azureml/automated-machine-learning/sql-server/setup/aml_model.sql
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/aml_model.sql
@@ -0,0 +1,22 @@
+-- This is a table to hold the results from the AutoMLTrain procedure.
+SET ANSI_NULLS ON
+GO
+
+SET QUOTED_IDENTIFIER ON
+GO
+
+CREATE TABLE [dbo].[aml_model](
+    [Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,
+    [Model] [varchar](max) NOT NULL,        -- The model, which can be passed to AutoMLPredict for testing or prediction.
+    [RunId] [nvarchar](250) NULL,           -- The RunId, which can be used to view the model in the Azure Portal.
+    [CreatedDate] [datetime] NULL,
+    [ExperimentName] [nvarchar](100) NULL,  -- Azure ML Experiment Name
+    [WorkspaceName] [nvarchar](100) NULL,   -- Azure ML Workspace Name
+	[LogFileText] [nvarchar](max) NULL
+) 
+GO
+
+ALTER TABLE [dbo].[aml_model] ADD  DEFAULT (getutcdate()) FOR [CreatedDate]
+GO
+
+
--- a/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb
+++ b/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.ipynb
@@ -0,0 +1,562 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Set up Azure ML Automated Machine Learning on SQL Server 2019 CTP 2.4 big data cluster\r\n",
+    "\r\n",
+    "\\# Prerequisites:  \r\n",
+    "\\# - An Azure subscription and resource group  \r\n",
+    "\\# - An Azure Machine Learning workspace  \r\n",
+    "\\# - A SQL Server 2019 CTP 2.4 big data cluster with Internet access and a database named 'automl'  \r\n",
+    "\\# - Azure CLI  \r\n",
+    "\\# - kubectl command  \r\n",
+    "\\# - The https://github.com/Azure/MachineLearningNotebooks repository downloaded (cloned) to your local machine\r\n",
+    "\r\n",
+    "\\# In the 'automl' database, create a table named 'dbo.nyc_energy' as follows:  \r\n",
+    "\\# - In SQL Server Management Studio, right-click the 'automl' database, select Tasks, then Import Flat File.  \r\n",
+    "\\# - Select the file AzureMlCli\\notebooks\\how-to-use-azureml\\automated-machine-learning\\forecasting-energy-demand\\nyc_energy.csv.  \r\n",
+    "\\# - Using the \"Modify Columns\" page, allow nulls for all columns. \r\n",
+    "\r\n",
+    "\\# Create an Azure Machine Learning Workspace using the instructions at https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace \r\n",
+    "\r\n",
+    "\\# Create an Azure service principal.  You can do this with the following commands: \r\n",
+    "\r\n",
+    "az login  \r\n",
+    "az account set --subscription *subscriptionid*  \r\n",
+    "\r\n",
+    "\\# The following command prints out the **appId** and **tenant**,  \r\n",
+    "\\# which you insert into the indicated cell later in this notebook  \r\n",
+    "\\# to allow AutoML to authenticate with Azure:  \r\n",
+    "\r\n",
+    "az ad sp create-for-rbac --name *principlename* --password *password*\r\n",
+    "\r\n",
+    "\\# Log into the master instance of SQL Server 2019 CTP 2.4:  \r\n",
+    "kubectl exec -it mssql-master-pool-0 -n *clustername* -c mssql-server -- /bin/bash\r\n",
+    "\r\n",
+    "mkdir /tmp/aml\r\n",
+    "\r\n",
+    "cd /tmp/aml\r\n",
+    "\r\n",
+    "\\# **Modify** the following with your subscription_id, resource_group, and workspace_name:  \r\n",
+    "cat > config.json << EOF  \r\n",
+    "{  \r\n",
+    "    \"subscription_id\": \"123456ab-78cd-0123-45ef-abcd12345678\",  \r\n",
+    "    \"resource_group\": \"myrg1\",  \r\n",
+    "    \"workspace_name\": \"myws1\"  \r\n",
+    "}  \r\n",
+    "EOF\r\n",
+    "\r\n",
+    "\\# The directory referenced below is appropriate for the master instance of SQL Server 2019 CTP 2.4.\r\n",
+    "\r\n",
+    "cd /opt/mssql/mlservices/runtime/python/bin\r\n",
+    "\r\n",
+    "./python -m pip install azureml-sdk[automl]\r\n",
+    "\r\n",
+    "./python -m pip install --upgrade numpy \r\n",
+    "\r\n",
+    "./python -m pip install --upgrade sklearn\r\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/automated-machine-learning/sql-server/setup/auto-ml-sql-setup.png)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "-- Enable external scripts to allow invoking Python\r\n",
+    "sp_configure 'external scripts enabled',1 \r\n",
+    "reconfigure with override \r\n",
+    "GO\r\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "-- Use database 'automl'\r\n",
+    "USE [automl]\r\n",
+    "GO"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "-- This is a table to hold the Azure ML connection information.\r\n",
+    "SET ANSI_NULLS ON\r\n",
+    "GO\r\n",
+    "\r\n",
+    "SET QUOTED_IDENTIFIER ON\r\n",
+    "GO\r\n",
+    "\r\n",
+    "CREATE TABLE [dbo].[aml_connection](\r\n",
+    "    [Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,\r\n",
+    "\t[ConnectionName] [nvarchar](255) NULL,\r\n",
+    "\t[TenantId] [nvarchar](255) NULL,\r\n",
+    "\t[AppId] [nvarchar](255) NULL,\r\n",
+    "\t[Password] [nvarchar](255) NULL,\r\n",
+    "\t[ConfigFile] [nvarchar](255) NULL\r\n",
+    ") ON [PRIMARY]\r\n",
+    "GO"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Copy the values from create-for-rbac above into the cell below"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "-- Use the following values:\r\n",
+    "-- Leave the name as 'Default'\r\n",
+    "-- Insert <tenant> returned by create-for-rbac above\r\n",
+    "-- Insert <AppId> returned by create-for-rbac above\r\n",
+    "-- Insert <password> used in create-for-rbac above\r\n",
+    "-- Leave <path> as '/tmp/aml/config.json'\r\n",
+    "INSERT INTO [dbo].[aml_connection]  \r\n",
+    "VALUES (\r\n",
+    "    N'Default', -- Name\r\n",
+    "    N'11111111-2222-3333-4444-555555555555', -- Tenant\r\n",
+    "    N'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', -- AppId\r\n",
+    "    N'insertpasswordhere', -- Password\r\n",
+    "    N'/tmp/aml/config.json' -- Path\r\n",
+    "    );\r\n",
+    "GO"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "-- This is a table to hold the results from the AutoMLTrain procedure.\r\n",
+    "SET ANSI_NULLS ON\r\n",
+    "GO\r\n",
+    "\r\n",
+    "SET QUOTED_IDENTIFIER ON\r\n",
+    "GO\r\n",
+    "\r\n",
+    "CREATE TABLE [dbo].[aml_model](\r\n",
+    "    [Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,\r\n",
+    "    [Model] [varchar](max) NOT NULL,        -- The model, which can be passed to AutoMLPredict for testing or prediction.\r\n",
+    "    [RunId] [nvarchar](250) NULL,           -- The RunId, which can be used to view the model in the Azure Portal.\r\n",
+    "    [CreatedDate] [datetime] NULL,\r\n",
+    "    [ExperimentName] [nvarchar](100) NULL,  -- Azure ML Experiment Name\r\n",
+    "    [WorkspaceName] [nvarchar](100) NULL,   -- Azure ML Workspace Name\r\n",
+    "\t[LogFileText] [nvarchar](max) NULL\r\n",
+    ") \r\n",
+    "GO\r\n",
+    "\r\n",
+    "ALTER TABLE [dbo].[aml_model] ADD  DEFAULT (getutcdate()) FOR [CreatedDate]\r\n",
+    "GO\r\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "-- This stored procedure uses automated machine learning to train several models\r\n",
+    "-- and return the best model.\r\n",
+    "--\r\n",
+    "-- The result set has several columns:\r\n",
+    "--   best_run - ID of the best model found\r\n",
+    "--   experiment_name - training run name\r\n",
+    "--   fitted_model - best model found\r\n",
+    "--   log_file_text - console output\r\n",
+    "--   workspace - name of the Azure ML workspace where run history is stored\r\n",
+    "--\r\n",
+    "-- An example call for a classification problem is:\r\n",
+    "--    insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)\r\n",
+    "--    exec dbo.AutoMLTrain @input_query='\r\n",
+    "--    SELECT top 100000 \r\n",
+    "--          CAST([pickup_datetime] AS NVARCHAR(30)) AS pickup_datetime\r\n",
+    "--          ,CAST([dropoff_datetime] AS NVARCHAR(30)) AS dropoff_datetime\r\n",
+    "--          ,[passenger_count]\r\n",
+    "--          ,[trip_time_in_secs]\r\n",
+    "--          ,[trip_distance]\r\n",
+    "--          ,[payment_type]\r\n",
+    "--          ,[tip_class]\r\n",
+    "--      FROM [dbo].[nyctaxi_sample] order by [hack_license] ',\r\n",
+    "--      @label_column = 'tip_class',\r\n",
+    "--      @iterations=10\r\n",
+    "-- \r\n",
+    "-- An example call for forecasting is:\r\n",
+    "--      insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)\r\n",
+    "--      exec dbo.AutoMLTrain @input_query='\r\n",
+    "--      select cast(timeStamp as nvarchar(30)) as timeStamp,\r\n",
+    "--             demand,\r\n",
+    "--      \t   precip,\r\n",
+    "--      \t   temp,\r\n",
+    "--             case when timeStamp < ''2017-01-01'' then 0 else 1 end as is_validate_column\r\n",
+    "--      from nyc_energy\r\n",
+    "--      where demand is not null and precip is not null and temp is not null\r\n",
+    "--      and timeStamp < ''2017-02-01''',\r\n",
+    "--      @label_column='demand',\r\n",
+    "--      @task='forecasting',\r\n",
+    "--      @iterations=10,\r\n",
+    "--      @iteration_timeout_minutes=5,\r\n",
+    "--      @time_column_name='timeStamp',\r\n",
+    "--      @is_validate_column='is_validate_column',\r\n",
+    "--      @experiment_name='automl-sql-forecast',\r\n",
+    "--      @primary_metric='normalized_root_mean_squared_error'\r\n",
+    "\r\n",
+    "SET ANSI_NULLS ON\r\n",
+    "GO\r\n",
+    "SET QUOTED_IDENTIFIER ON\r\n",
+    "GO\r\n",
+    "CREATE OR ALTER PROCEDURE [dbo].[AutoMLTrain]\r\n",
+    " (\r\n",
+    "    @input_query NVARCHAR(MAX),                      -- The SQL Query that will return the data to train and validate the model.\r\n",
+    "    @label_column NVARCHAR(255)='Label',             -- The name of the column in the result of @input_query that is the label.\r\n",
+    "    @primary_metric NVARCHAR(40)='AUC_weighted',     -- The metric to optimize.\r\n",
+    "    @iterations INT=100,                             -- The maximum number of pipelines to train.\r\n",
+    "    @task NVARCHAR(40)='classification',             -- The type of task.  Can be classification, regression or forecasting.\r\n",
+    "    @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.\r\n",
+    "    @iteration_timeout_minutes INT = 15,             -- The maximum time in minutes for training a single pipeline. \r\n",
+    "    @experiment_timeout_minutes INT = 60,            -- The maximum time in minutes for training all pipelines.\r\n",
+    "    @n_cross_validations INT = 3,                    -- The number of cross validations.\r\n",
+    "    @blacklist_models NVARCHAR(MAX) = '',            -- A comma separated list of algos that will not be used.\r\n",
+    "                                                     -- The list of possible models can be found at:\r\n",
+    "                                                     -- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings\r\n",
+    "    @whitelist_models NVARCHAR(MAX) = '',            -- A comma separated list of algos that can be used.\r\n",
+    "                                                     -- The list of possible models can be found at:\r\n",
+    "                                                     -- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings\r\n",
+    "    @experiment_exit_score FLOAT = 0,                -- Stop the experiment if this score is acheived.\r\n",
+    "    @sample_weight_column NVARCHAR(255)='',          -- The name of the column in the result of  @input_query that gives a sample weight.\r\n",
+    "    @is_validate_column NVARCHAR(255)='',            -- The name of the column in the result of  @input_query that indicates if the row is for training or validation.\r\n",
+    "\t                                                 -- In the values of the column, 0 means for training and 1 means for validation.\r\n",
+    "    @time_column_name  NVARCHAR(255)='',             -- The name of the timestamp column for forecasting.\r\n",
+    "\t@connection_name NVARCHAR(255)='default'         -- The AML connection to use.\r\n",
+    " ) AS\r\n",
+    "BEGIN\r\n",
+    "\r\n",
+    "    DECLARE @tenantid NVARCHAR(255)\r\n",
+    "    DECLARE @appid NVARCHAR(255)\r\n",
+    "    DECLARE @password NVARCHAR(255)\r\n",
+    "    DECLARE @config_file NVARCHAR(255)\r\n",
+    "\r\n",
+    "\tSELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile\r\n",
+    "\tFROM aml_connection\r\n",
+    "\tWHERE ConnectionName = @connection_name;\r\n",
+    "\r\n",
+    "\tEXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd\r\n",
+    "import logging \r\n",
+    "import azureml.core \r\n",
+    "import pandas as pd\r\n",
+    "import numpy as np\r\n",
+    "from azureml.core.experiment import Experiment \r\n",
+    "from azureml.train.automl import AutoMLConfig \r\n",
+    "from sklearn import datasets \r\n",
+    "import pickle\r\n",
+    "import codecs\r\n",
+    "from azureml.core.authentication import ServicePrincipalAuthentication \r\n",
+    "from azureml.core.workspace import Workspace \r\n",
+    "\r\n",
+    "if __name__.startswith(\"sqlindb\"):\r\n",
+    "    auth = ServicePrincipalAuthentication(tenantid, appid, password) \r\n",
+    " \r\n",
+    "    ws = Workspace.from_config(path=config_file, auth=auth) \r\n",
+    " \r\n",
+    "    project_folder = \"./sample_projects/\" + experiment_name\r\n",
+    " \r\n",
+    "    experiment = Experiment(ws, experiment_name) \r\n",
+    "\r\n",
+    "    data_train = input_data\r\n",
+    "    X_valid = None\r\n",
+    "    y_valid = None\r\n",
+    "    sample_weight_valid = None\r\n",
+    "\r\n",
+    "    if is_validate_column != \"\" and is_validate_column is not None:\r\n",
+    "        data_train = input_data[input_data[is_validate_column] <= 0]\r\n",
+    "        data_valid = input_data[input_data[is_validate_column] > 0]\r\n",
+    "        data_train.pop(is_validate_column)\r\n",
+    "        data_valid.pop(is_validate_column)\r\n",
+    "        y_valid = data_valid.pop(label_column).values\r\n",
+    "        if sample_weight_column != \"\" and sample_weight_column is not None:\r\n",
+    "            sample_weight_valid = data_valid.pop(sample_weight_column).values\r\n",
+    "        X_valid = data_valid\r\n",
+    "        n_cross_validations = None\r\n",
+    "\r\n",
+    "    y_train = data_train.pop(label_column).values\r\n",
+    "\r\n",
+    "    sample_weight = None\r\n",
+    "    if sample_weight_column != \"\" and sample_weight_column is not None:\r\n",
+    "        sample_weight = data_train.pop(sample_weight_column).values\r\n",
+    "\r\n",
+    "    X_train = data_train\r\n",
+    "\r\n",
+    "    if experiment_timeout_minutes == 0:\r\n",
+    "        experiment_timeout_minutes = None\r\n",
+    "\r\n",
+    "    if experiment_exit_score == 0:\r\n",
+    "        experiment_exit_score = None\r\n",
+    "\r\n",
+    "    if blacklist_models == \"\":\r\n",
+    "        blacklist_models = None\r\n",
+    "\r\n",
+    "    if blacklist_models is not None:\r\n",
+    "        blacklist_models = blacklist_models.replace(\" \", \"\").split(\",\")\r\n",
+    "\r\n",
+    "    if whitelist_models == \"\":\r\n",
+    "        whitelist_models = None\r\n",
+    "\r\n",
+    "    if whitelist_models is not None:\r\n",
+    "        whitelist_models = whitelist_models.replace(\" \", \"\").split(\",\")\r\n",
+    "\r\n",
+    "    automl_settings = {}\r\n",
+    "    preprocess = True\r\n",
+    "    if time_column_name != \"\" and time_column_name is not None:\r\n",
+    "        automl_settings = { \"time_column_name\": time_column_name }\r\n",
+    "        preprocess = False\r\n",
+    "\r\n",
+    "    log_file_name = \"automl_errors.log\"\r\n",
+    "\t \r\n",
+    "    automl_config = AutoMLConfig(task = task, \r\n",
+    "                                 debug_log = log_file_name, \r\n",
+    "                                 primary_metric = primary_metric, \r\n",
+    "                                 iteration_timeout_minutes = iteration_timeout_minutes, \r\n",
+    "                                 experiment_timeout_minutes = experiment_timeout_minutes,\r\n",
+    "                                 iterations = iterations, \r\n",
+    "                                 n_cross_validations = n_cross_validations, \r\n",
+    "                                 preprocess = preprocess,\r\n",
+    "                                 verbosity = logging.INFO, \r\n",
+    "                                 enable_ensembling = False,\r\n",
+    "                                 X = X_train,  \r\n",
+    "                                 y = y_train, \r\n",
+    "                                 path = project_folder,\r\n",
+    "                                 blacklist_models = blacklist_models,\r\n",
+    "                                 whitelist_models = whitelist_models,\r\n",
+    "                                 experiment_exit_score = experiment_exit_score,\r\n",
+    "                                 sample_weight = sample_weight,\r\n",
+    "                                 X_valid = X_valid,\r\n",
+    "                                 y_valid = y_valid,\r\n",
+    "                                 sample_weight_valid = sample_weight_valid,\r\n",
+    "                                 **automl_settings) \r\n",
+    " \r\n",
+    "    local_run = experiment.submit(automl_config, show_output = True) \r\n",
+    "\r\n",
+    "    best_run, fitted_model = local_run.get_output()\r\n",
+    "\r\n",
+    "    pickled_model = codecs.encode(pickle.dumps(fitted_model), \"base64\").decode()\r\n",
+    "\r\n",
+    "    log_file_text = \"\"\r\n",
+    "\r\n",
+    "    try:\r\n",
+    "        with open(log_file_name, \"r\") as log_file:\r\n",
+    "            log_file_text = log_file.read()\r\n",
+    "    except:\r\n",
+    "        log_file_text = \"Log file not found\"\r\n",
+    "\r\n",
+    "    returned_model = pd.DataFrame({\"best_run\": [best_run.id], \"experiment_name\": [experiment_name], \"fitted_model\": [pickled_model], \"log_file_text\": [log_file_text], \"workspace\": [ws.name]}, dtype=np.dtype(np.str))\r\n",
+    "'\r\n",
+    "\t, @input_data_1 = @input_query\r\n",
+    "\t, @input_data_1_name = N'input_data'\r\n",
+    "\t, @output_data_1_name = N'returned_model'\r\n",
+    "\t, @params = N'@label_column NVARCHAR(255), \r\n",
+    "\t              @primary_metric NVARCHAR(40),\r\n",
+    "\t\t\t\t  @iterations INT, @task NVARCHAR(40),\r\n",
+    "\t\t\t\t  @experiment_name NVARCHAR(32),\r\n",
+    "\t\t\t\t  @iteration_timeout_minutes INT,\r\n",
+    "\t\t\t\t  @experiment_timeout_minutes INT,\r\n",
+    "\t\t\t\t  @n_cross_validations INT,\r\n",
+    "\t\t\t\t  @blacklist_models NVARCHAR(MAX),\r\n",
+    "\t\t\t\t  @whitelist_models NVARCHAR(MAX),\r\n",
+    "\t\t\t\t  @experiment_exit_score FLOAT,\r\n",
+    "\t\t\t\t  @sample_weight_column NVARCHAR(255),\r\n",
+    "\t\t\t\t  @is_validate_column NVARCHAR(255),\r\n",
+    "\t\t\t\t  @time_column_name  NVARCHAR(255),\r\n",
+    "\t\t\t\t  @tenantid NVARCHAR(255),\r\n",
+    "\t\t\t\t  @appid NVARCHAR(255),\r\n",
+    "\t\t\t\t  @password NVARCHAR(255),\r\n",
+    "\t\t\t\t  @config_file NVARCHAR(255)'\r\n",
+    "\t, @label_column = @label_column\r\n",
+    "\t, @primary_metric = @primary_metric\r\n",
+    "\t, @iterations = @iterations\r\n",
+    "\t, @task = @task\r\n",
+    "\t, @experiment_name = @experiment_name\r\n",
+    "\t, @iteration_timeout_minutes = @iteration_timeout_minutes\r\n",
+    "\t, @experiment_timeout_minutes = @experiment_timeout_minutes\r\n",
+    "\t, @n_cross_validations = @n_cross_validations\r\n",
+    "\t, @blacklist_models = @blacklist_models\r\n",
+    "\t, @whitelist_models = @whitelist_models\r\n",
+    "\t, @experiment_exit_score = @experiment_exit_score\r\n",
+    "\t, @sample_weight_column = @sample_weight_column\r\n",
+    "\t, @is_validate_column = @is_validate_column\r\n",
+    "\t, @time_column_name = @time_column_name\r\n",
+    "\t, @tenantid = @tenantid\r\n",
+    "\t, @appid = @appid\r\n",
+    "\t, @password = @password\r\n",
+    "\t, @config_file = @config_file\r\n",
+    "WITH RESULT SETS ((best_run NVARCHAR(250), experiment_name NVARCHAR(100), fitted_model VARCHAR(MAX), log_file_text NVARCHAR(MAX), workspace NVARCHAR(100)))\r\n",
+    "END"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "-- This procedure returns a list of metrics for each iteration of a training run.\r\n",
+    "SET ANSI_NULLS ON\r\n",
+    "GO\r\n",
+    "SET QUOTED_IDENTIFIER ON\r\n",
+    "GO\r\n",
+    "CREATE OR ALTER PROCEDURE [dbo].[AutoMLGetMetrics]\r\n",
+    " (\r\n",
+    "\t@run_id NVARCHAR(250),                           -- The RunId\r\n",
+    "    @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.\r\n",
+    "    @connection_name NVARCHAR(255)='default'         -- The AML connection to use.\r\n",
+    " ) AS\r\n",
+    "BEGIN\r\n",
+    "    DECLARE @tenantid NVARCHAR(255)\r\n",
+    "    DECLARE @appid NVARCHAR(255)\r\n",
+    "    DECLARE @password NVARCHAR(255)\r\n",
+    "    DECLARE @config_file NVARCHAR(255)\r\n",
+    "\r\n",
+    "\tSELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile\r\n",
+    "\tFROM aml_connection\r\n",
+    "\tWHERE ConnectionName = @connection_name;\r\n",
+    "\r\n",
+    "    EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd\r\n",
+    "import logging \r\n",
+    "import azureml.core \r\n",
+    "import numpy as np\r\n",
+    "from azureml.core.experiment import Experiment \r\n",
+    "from azureml.train.automl.run import AutoMLRun\r\n",
+    "from azureml.core.authentication import ServicePrincipalAuthentication \r\n",
+    "from azureml.core.workspace import Workspace \r\n",
+    "\r\n",
+    "auth = ServicePrincipalAuthentication(tenantid, appid, password) \r\n",
+    " \r\n",
+    "ws = Workspace.from_config(path=config_file, auth=auth) \r\n",
+    " \r\n",
+    "experiment = Experiment(ws, experiment_name) \r\n",
+    "\r\n",
+    "ml_run = AutoMLRun(experiment = experiment, run_id = run_id)\r\n",
+    "\r\n",
+    "children = list(ml_run.get_children())\r\n",
+    "iterationlist = []\r\n",
+    "metricnamelist = []\r\n",
+    "metricvaluelist = []\r\n",
+    "\r\n",
+    "for run in children:\r\n",
+    "    properties = run.get_properties()\r\n",
+    "    if \"iteration\" in properties:\r\n",
+    "        iteration = int(properties[\"iteration\"])\r\n",
+    "        for metric_name, metric_value in run.get_metrics().items():\r\n",
+    "            if isinstance(metric_value, float):\r\n",
+    "                iterationlist.append(iteration)\r\n",
+    "                metricnamelist.append(metric_name)\r\n",
+    "                metricvaluelist.append(metric_value)\r\n",
+    "             \r\n",
+    "metrics = pd.DataFrame({\"iteration\": iterationlist, \"metric_name\": metricnamelist, \"metric_value\": metricvaluelist})\r\n",
+    "'\r\n",
+    "    , @output_data_1_name = N'metrics'\r\n",
+    "\t, @params = N'@run_id NVARCHAR(250), \r\n",
+    "\t\t\t\t  @experiment_name NVARCHAR(32),\r\n",
+    "  \t\t\t\t  @tenantid NVARCHAR(255),\r\n",
+    "\t\t\t\t  @appid NVARCHAR(255),\r\n",
+    "\t\t\t\t  @password NVARCHAR(255),\r\n",
+    "\t\t\t\t  @config_file NVARCHAR(255)'\r\n",
+    "    , @run_id = @run_id\r\n",
+    "\t, @experiment_name = @experiment_name\r\n",
+    "\t, @tenantid = @tenantid\r\n",
+    "\t, @appid = @appid\r\n",
+    "\t, @password = @password\r\n",
+    "\t, @config_file = @config_file\r\n",
+    "WITH RESULT SETS ((iteration INT, metric_name NVARCHAR(100), metric_value FLOAT))\r\n",
+    "END"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "-- This procedure predicts values based on a model returned by AutoMLTrain and a dataset.\r\n",
+    "-- It returns the dataset with a new column added, which is the predicted value.\r\n",
+    "SET ANSI_NULLS ON\r\n",
+    "GO\r\n",
+    "SET QUOTED_IDENTIFIER ON\r\n",
+    "GO\r\n",
+    "CREATE OR ALTER PROCEDURE [dbo].[AutoMLPredict]\r\n",
+    " (\r\n",
+    "   @input_query NVARCHAR(MAX),      -- A SQL query returning data to predict on.\r\n",
+    "   @model NVARCHAR(MAX),            -- A model returned from AutoMLTrain.\r\n",
+    "   @label_column  NVARCHAR(255)=''  -- Optional name of the column from input_query, which should be ignored when predicting\r\n",
+    " ) AS \r\n",
+    "BEGIN \r\n",
+    "  \r\n",
+    "    EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd \r\n",
+    "import azureml.core  \r\n",
+    "import numpy as np \r\n",
+    "from azureml.train.automl import AutoMLConfig  \r\n",
+    "import pickle \r\n",
+    "import codecs \r\n",
+    "  \r\n",
+    "model_obj = pickle.loads(codecs.decode(model.encode(), \"base64\")) \r\n",
+    "  \r\n",
+    "test_data = input_data.copy() \r\n",
+    "\r\n",
+    "if label_column != \"\" and label_column is not None:\r\n",
+    "    y_test = test_data.pop(label_column).values \r\n",
+    "X_test = test_data \r\n",
+    "  \r\n",
+    "predicted = model_obj.predict(X_test) \r\n",
+    "  \r\n",
+    "combined_output = input_data.assign(predicted=predicted)\r\n",
+    "  \r\n",
+    "' \r\n",
+    "    , @input_data_1 = @input_query \r\n",
+    "    , @input_data_1_name = N'input_data' \r\n",
+    "    , @output_data_1_name = N'combined_output' \r\n",
+    "    , @params = N'@model NVARCHAR(MAX), @label_column  NVARCHAR(255)' \r\n",
+    "    , @model = @model \r\n",
+    "\t, @label_column = @label_column\r\n",
+    "END"
+   ]
+  }
+ ],
+ "metadata": {
+  "authors": [
+   {
+    "name": "jeffshep"
+   }
+  ],
+  "kernelspec": {
+   "display_name": "SQL",
+   "language": "sql",
+   "name": "SQL"
+  },
+  "language_info": {
+   "name": "sql",
+   "version": ""
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}