Compare commits
10 Commits
release_up
...
release_up
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
18cd152591 | ||
|
|
4170a394ed | ||
|
|
475ea36106 | ||
|
|
9e0fc4f0e7 | ||
|
|
e97e4742ba | ||
|
|
14ecfb0bf3 | ||
|
|
61b396be4f | ||
|
|
3d2552174d | ||
|
|
cd3c980a6e | ||
|
|
249bcac3c7 |
@@ -58,7 +58,7 @@
|
||||
"\n",
|
||||
"### What is an Azure Machine Learning workspace\n",
|
||||
"\n",
|
||||
"An Azure ML Workspace is an Azure resource that organizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an Azure ML Workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, deployment, inferencing, and the monitoring of deployed models."
|
||||
"An Azure ML Workspace is an Azure resource that organizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an Azure ML Workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, deployment, inference, and the monitoring of deployed models."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.48\r\n of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -33,10 +33,9 @@
|
||||
"source": [
|
||||
"## Install the DataDrift package\n",
|
||||
"\n",
|
||||
"Install the azureml-contrib-datadrift, azureml-contrib-opendatasets and lightgbm packages before running this notebook.\n",
|
||||
"Install the azureml-contrib-datadrift, azureml-opendatasets and lightgbm packages before running this notebook.\n",
|
||||
"```\n",
|
||||
"pip install azureml-contrib-datadrift\n",
|
||||
"pip install azureml-contrib-datasets\n",
|
||||
"pip install lightgbm\n",
|
||||
"```"
|
||||
]
|
||||
@@ -63,7 +62,7 @@
|
||||
"import pandas as pd\n",
|
||||
"import requests\n",
|
||||
"from azureml.contrib.datadrift import DataDriftDetector, AlertConfiguration\n",
|
||||
"from azureml.contrib.opendatasets import NoaaIsdWeather\n",
|
||||
"from azureml.opendatasets import NoaaIsdWeather\n",
|
||||
"from azureml.core import Dataset, Workspace, Run\n",
|
||||
"from azureml.core.compute import AksCompute, ComputeTarget\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
@@ -259,8 +258,7 @@
|
||||
"trainingDataset = Dataset.auto_read_files(dpath, include_path=True)\n",
|
||||
"trainingDataset = trainingDataset.register(workspace=ws, name=dataset_name, description=\"dset\", exist_ok=True)\n",
|
||||
"\n",
|
||||
"trainingDataSnapshot = trainingDataset.create_snapshot(snapshot_name=snapshot_name, compute_target=None, create_data_snapshot=True)\n",
|
||||
"datasets = [(Dataset.Scenario.TRAINING, trainingDataSnapshot)]\n",
|
||||
"datasets = [(Dataset.Scenario.TRAINING, trainingDataset)]\n",
|
||||
"print(\"dataset registration done.\\n\")\n",
|
||||
"datasets"
|
||||
]
|
||||
@@ -574,6 +572,22 @@
|
||||
" time.sleep(3)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We need to wait up to 10 minutes for the Model Data Collector to dump the model input and inference data to storage in the Workspace, where it's used by the DataDriftDetector job."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"time.sleep(600)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -3,6 +3,6 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-contrib-datadrift
|
||||
- azureml-contrib-opendatasets
|
||||
- azureml-opendatasets
|
||||
- lightgbm
|
||||
- azureml-widgets
|
||||
|
||||
@@ -2,6 +2,7 @@ name: azure_automl
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- pip
|
||||
- python>=3.5.2,<3.6.8
|
||||
- nb_conda
|
||||
- matplotlib==2.1.0
|
||||
|
||||
@@ -192,6 +192,7 @@
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
@@ -201,8 +202,9 @@
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -189,6 +189,7 @@
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
@@ -198,8 +199,9 @@
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -241,6 +241,7 @@
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
@@ -250,7 +251,9 @@
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -9,13 +16,6 @@
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -184,6 +184,7 @@
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
@@ -193,8 +194,9 @@
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy'])\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -184,6 +184,7 @@
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
@@ -193,8 +194,9 @@
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy'])\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -84,7 +84,8 @@
|
||||
"import azureml.core\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.train.automl import AutoMLConfig"
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"import azureml.dataprep as dprep"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -186,18 +187,11 @@
|
||||
"if not os.path.exists(project_folder):\n",
|
||||
" os.makedirs(project_folder)\n",
|
||||
" \n",
|
||||
"pd.DataFrame(data_train.data).to_csv(\"data/X_train.tsv\", index=False, header=False, quoting=csv.QUOTE_ALL, sep=\"\\t\")\n",
|
||||
"pd.DataFrame(data_train.target).to_csv(\"data/y_train.tsv\", index=False, header=False, sep=\"\\t\")\n",
|
||||
"pd.DataFrame(data_train.data[100:,:]).to_csv(\"data/X_train.csv\", index=False)\n",
|
||||
"pd.DataFrame(data_train.target[100:]).to_csv(\"data/y_train.csv\", index=False)\n",
|
||||
"\n",
|
||||
"ds = ws.get_default_datastore()\n",
|
||||
"ds.upload(src_dir='./data', target_path='bai_data', overwrite=True, show_progress=True)\n",
|
||||
"\n",
|
||||
"from azureml.core.runconfig import DataReferenceConfiguration\n",
|
||||
"dr = DataReferenceConfiguration(datastore_name=ds.name, \n",
|
||||
" path_on_datastore='bai_data', \n",
|
||||
" path_on_compute='/tmp/azureml_runs',\n",
|
||||
" mode='download', # download files from datastore to compute target\n",
|
||||
" overwrite=False)"
|
||||
"ds.upload(src_dir='./data', target_path='digitsdata', overwrite=True, show_progress=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -208,6 +202,7 @@
|
||||
"source": [
|
||||
"from azureml.core.runconfig import RunConfiguration\n",
|
||||
"from azureml.core.conda_dependencies import CondaDependencies\n",
|
||||
"import pkg_resources\n",
|
||||
"\n",
|
||||
"# create a new RunConfig object\n",
|
||||
"conda_run_config = RunConfiguration(framework=\"python\")\n",
|
||||
@@ -217,28 +212,29 @@
|
||||
"conda_run_config.environment.docker.enabled = True\n",
|
||||
"conda_run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE\n",
|
||||
"\n",
|
||||
"# set the data reference of the run coonfiguration\n",
|
||||
"conda_run_config.data_references = {ds.name: dr}\n",
|
||||
"dprep_dependency = 'azureml-dataprep==' + pkg_resources.get_distribution(\"azureml-dataprep\").version\n",
|
||||
"\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]', dprep_dependency], conda_packages=['numpy','py-xgboost<=0.80'])\n",
|
||||
"conda_run_config.environment.python.conda_dependencies = cd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Dprep reference\n",
|
||||
"\n",
|
||||
"Defined X and y as dprep references, which are passed to automated machine learning in the AutoMLConfig."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%writefile $project_folder/get_data.py\n",
|
||||
"\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"def get_data():\n",
|
||||
" X_train = pd.read_csv(\"/tmp/azureml_runs/bai_data/X_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
|
||||
" y_train = pd.read_csv(\"/tmp/azureml_runs/bai_data/y_train.tsv\", delimiter=\"\\t\", header=None, quotechar='\"')\n",
|
||||
"\n",
|
||||
" return { \"X\" : X_train.values, \"y\" : y_train[0].values }\n"
|
||||
"X = dprep.auto_read_file(path=ds.path('digitsdata/X_train.csv'))\n",
|
||||
"y = dprep.auto_read_file(path=ds.path('digitsdata/y_train.csv'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -280,7 +276,8 @@
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" path = project_folder,\n",
|
||||
" run_configuration=conda_run_config,\n",
|
||||
" data_script = project_folder + \"/get_data.py\",\n",
|
||||
" X = X,\n",
|
||||
" y = y,\n",
|
||||
" **automl_settings\n",
|
||||
" )\n"
|
||||
]
|
||||
|
||||
@@ -0,0 +1,113 @@
|
||||
# Table of Contents
|
||||
1. [Introduction](#introduction)
|
||||
1. [Setup using Azure Data Studio](#azuredatastudiosetup)
|
||||
1. [Energy demand example using Azure Data Studio](#azuredatastudioenergydemand)
|
||||
1. [Set using SQL Server Management Studio for SQL Server 2017 on Windows](#ssms2017)
|
||||
1. [Set using SQL Server Management Studio for SQL Server 2019 on Linux](#ssms2019)
|
||||
1. [Energy demand example using SQL Server Management Studio](#ssmsenergydemand)
|
||||
|
||||
|
||||
<a name="introduction"></a>
|
||||
# Introduction
|
||||
SQL Server 2017 or 2019 can call Azure ML automated machine learning to create models trained on data from SQL Server.
|
||||
This uses the sp_execute_external_script stored procedure, which can call Python scripts.
|
||||
SQL Server 2017 and SQL Server 2019 can both run on Windows or Linux.
|
||||
However, this integration is not available for SQL Server 2017 on Linux.
|
||||
|
||||
This folder shows how to setup the integration and has a sample that uses the integration to train and predict based on an energy demand dataset.
|
||||
|
||||
This integration is part of SQL Server and so can be used from any SQL client.
|
||||
These instructions show using it from Azure Data Studio or SQL Server Managment Studio.
|
||||
|
||||
<a name="azuredatastudiosetup"></a>
|
||||
## Setup using Azure Data Studio
|
||||
|
||||
These step show setting up the integration using Azure Data Studio.
|
||||
|
||||
1. If you don't already have SQL Server, you can install it from [https://www.microsoft.com/en-us/sql-server/sql-server-downloads](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
|
||||
1. Install Azure Data Studio from [https://docs.microsoft.com/en-us/sql/azure-data-studio/download?view=sql-server-2017](https://docs.microsoft.com/en-us/sql/azure-data-studio/download?view=sql-server-2017)
|
||||
1. Start Azure Data Studio and connect to SQL Server. [https://docs.microsoft.com/en-us/sql/azure-data-studio/sql-notebooks?view=sql-server-2017](https://docs.microsoft.com/en-us/sql/azure-data-studio/sql-notebooks?view=sql-server-2017)
|
||||
1. Create a database named "automl".
|
||||
1. Open the notebook how-to-use-azureml\automated-machine-learning\sql-server\setup\auto-ml-sql-setup.ipynb and follow the instructions in it.
|
||||
|
||||
<a name="azuredatastudioenergydemand"></a>
|
||||
## Energy demand example using Azure Data Studio
|
||||
|
||||
Once you have completed the setup, you can try the energy demand sample in the notebook energy-demand\auto-ml-sql-energy-demand.ipynb.
|
||||
This has cells to train a model, predict based on the model and show metrics for each pipeline run in training the model.
|
||||
|
||||
<a name="ssms2017"></a>
|
||||
## Setup using SQL Server Management Studio for SQL Server 2017 on Windows
|
||||
|
||||
These instruction setup the integration for SQL Server 2017 on Windows.
|
||||
|
||||
1. If you don't already have SQL Server, you can install it from [https://www.microsoft.com/en-us/sql-server/sql-server-downloads](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
|
||||
2. Enable external scripts with the following commands:
|
||||
```sh
|
||||
sp_configure 'external scripts enabled',1
|
||||
reconfigure with override
|
||||
```
|
||||
3. Stop SQL Server.
|
||||
4. Install the automated machine learning libraries using the following commands from Administrator command prompt (If you are using a non-default SQL Server instance name, replace MSSQLSERVER in the second command with the instance name)
|
||||
```sh
|
||||
cd "C:\Program Files\Microsoft SQL Server"
|
||||
cd "MSSQL14.MSSQLSERVER\PYTHON_SERVICES"
|
||||
python.exe -m pip install azureml-sdk[automl]
|
||||
python.exe -m pip install --upgrade numpy
|
||||
python.exe -m pip install --upgrade sklearn
|
||||
```
|
||||
5. Start SQL Server and the service "SQL Server Launchpad service".
|
||||
6. In Windows Firewall, click on advanced settings and in Outbound Rules, disable "Block network access for R local user accounts in SQL Server instance xxxx".
|
||||
7. Execute the files in the setup folder in SQL Server Management Studio: aml_model.sql, aml_connection.sql, AutoMLGetMetrics.sql, AutoMLPredict.sql and AutoMLTrain.sql
|
||||
8. Create an Azure Machine Learning Workspace. You can use the instructions at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace ](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace)
|
||||
9. Create a config.json file file using the subscription id, resource group name and workspace name that you used to create the workspace. The file is described at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace)
|
||||
10. Create an Azure service principal. You can do this with the commands:
|
||||
```sh
|
||||
az login
|
||||
az account set --subscription subscriptionid
|
||||
az ad sp create-for-rbac --name principlename --password password
|
||||
```
|
||||
11. Insert the values \<tenant\>, \<AppId\> and \<password\> returned by create-for-rbac above into the aml_connection table. Set \<path\> as the absolute path to your config.json file. Set the name to “Default”.
|
||||
|
||||
<a name="ssms2019"></a>
|
||||
## Setup using SQL Server Management Studio for SQL Server 2019 on Linux
|
||||
1. Install SQL Server 2019 from: [https://www.microsoft.com/en-us/sql-server/sql-server-downloads](https://www.microsoft.com/en-us/sql-server/sql-server-downloads)
|
||||
2. Install machine learning support from: [https://docs.microsoft.com/en-us/sql/linux/sql-server-linux-setup-machine-learning?view=sqlallproducts-allversions#ubuntu](https://docs.microsoft.com/en-us/sql/linux/sql-server-linux-setup-machine-learning?view=sqlallproducts-allversions#ubuntu)
|
||||
3. Then install SQL Server management Studio from [https://docs.microsoft.com/en-us/sql/ssms/download-sql-server-management-studio-ssms?view=sql-server-2017](https://docs.microsoft.com/en-us/sql/ssms/download-sql-server-management-studio-ssms?view=sql-server-2017)
|
||||
4. Enable external scripts with the following commands:
|
||||
```sh
|
||||
sp_configure 'external scripts enabled',1
|
||||
reconfigure with override
|
||||
```
|
||||
5. Stop SQL Server.
|
||||
6. Install the automated machine learning libraries using the following commands from Administrator command (If you are using a non-default SQL Server instance name, replace MSSQLSERVER in the second command with the instance name):
|
||||
```sh
|
||||
sudo /opt/mssql/mlservices/bin/python/python -m pip install azureml-sdk[automl]
|
||||
sudo /opt/mssql/mlservices/bin/python/python -m pip install --upgrade numpy
|
||||
sudo /opt/mssql/mlservices/bin/python/python -m pip install --upgrade sklearn
|
||||
```
|
||||
7. Start SQL Server.
|
||||
8. Execute the files aml_model.sql, aml_connection.sql, AutoMLGetMetrics.sql, AutoMLPredict.sql and AutoMLTrain.sql in SQL Server Management Studio.
|
||||
9. Create an Azure Machine Learning Workspace. You can use the instructions at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace)
|
||||
10. Create a config.json file file using the subscription id, resource group name and workspace name that you use to create the workspace. The file is described at: [https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace](https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-environment#workspace)
|
||||
11. Create an Azure service principal. You can do this with the commands:
|
||||
```sh
|
||||
az login
|
||||
az account set --subscription subscriptionid
|
||||
az ad sp create-for-rbac --name principlename --password password
|
||||
```
|
||||
12. Insert the values \<tenant\>, \<AppId\> and \<password\> returned by create-for-rbac above into the aml_connection table. Set \<path\> as the absolute path to your config.json file. Set the name to “Default”.
|
||||
|
||||
<a name="ssmsenergydemand"></a>
|
||||
## Energy demand example using SQL Server Management Studio
|
||||
|
||||
Once you have completed the setup, you can try the energy demand sample queries.
|
||||
First you need to load the sample data in the database.
|
||||
1. In SQL Server Management Studio, you can right-click the database, select Tasks, then Import Flat file.
|
||||
1. Select the file MachineLearningNotebooks\notebooks\how-to-use-azureml\automated-machine-learning\forecasting-energy-demand\nyc_energy.csv.
|
||||
1. When you get to the column definition page, allow nulls for all columns.
|
||||
|
||||
You can then run the queries in the energy-demand folder:
|
||||
* TrainEnergyDemand.sql runs AutoML, trains multiple models on data and selects the best model.
|
||||
* PredictEnergyDemand.sql predicts based on the most recent training run.
|
||||
* GetMetrics.sql returns all the metrics for each model in the most recent training run.
|
||||
@@ -0,0 +1,23 @@
|
||||
-- This shows using the AutoMLForecast stored procedure to predict using a forecasting model for the nyc_energy dataset.
|
||||
|
||||
DECLARE @Model NVARCHAR(MAX) = (SELECT TOP 1 Model FROM dbo.aml_model
|
||||
WHERE ExperimentName = 'automl-sql-forecast'
|
||||
ORDER BY CreatedDate DESC)
|
||||
|
||||
DECLARE @max_horizon INT = 48
|
||||
DECLARE @split_time NVARCHAR(22) = (SELECT DATEADD(hour, -@max_horizon, MAX(timeStamp)) FROM nyc_energy WHERE demand IS NOT NULL)
|
||||
|
||||
DECLARE @TestDataQuery NVARCHAR(MAX) = '
|
||||
SELECT CAST(timeStamp AS NVARCHAR(30)) AS timeStamp,
|
||||
demand,
|
||||
precip,
|
||||
temp
|
||||
FROM nyc_energy
|
||||
WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL
|
||||
AND timeStamp > ''' + @split_time + ''''
|
||||
|
||||
EXEC dbo.AutoMLForecast @input_query=@TestDataQuery,
|
||||
@label_column='demand',
|
||||
@time_column_name='timeStamp',
|
||||
@model=@model
|
||||
WITH RESULT SETS ((timeStamp DATETIME, grain NVARCHAR(255), predicted_demand FLOAT, precip FLOAT, temp FLOAT, actual_demand FLOAT))
|
||||
@@ -0,0 +1,10 @@
|
||||
-- This lists all the metrics for all iterations for the most recent run.
|
||||
|
||||
DECLARE @RunId NVARCHAR(43)
|
||||
DECLARE @ExperimentName NVARCHAR(255)
|
||||
|
||||
SELECT TOP 1 @ExperimentName=ExperimentName, @RunId=SUBSTRING(RunId, 1, 43)
|
||||
FROM aml_model
|
||||
ORDER BY CreatedDate DESC
|
||||
|
||||
EXEC dbo.AutoMLGetMetrics @RunId, @ExperimentName
|
||||
@@ -0,0 +1,17 @@
|
||||
-- This shows using the AutoMLPredict stored procedure to predict using a forecasting model for the nyc_energy dataset.
|
||||
|
||||
DECLARE @Model NVARCHAR(MAX) = (SELECT TOP 1 Model FROM dbo.aml_model
|
||||
WHERE ExperimentName = 'automl-sql-forecast'
|
||||
ORDER BY CreatedDate DESC)
|
||||
|
||||
EXEC dbo.AutoMLPredict @input_query='
|
||||
SELECT CAST(timeStamp AS NVARCHAR(30)) AS timeStamp,
|
||||
demand,
|
||||
precip,
|
||||
temp
|
||||
FROM nyc_energy
|
||||
WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL
|
||||
AND timeStamp >= ''2017-02-01''',
|
||||
@label_column='demand',
|
||||
@model=@model
|
||||
WITH RESULT SETS ((timeStamp NVARCHAR(30), actual_demand FLOAT, precip FLOAT, temp FLOAT, predicted_demand FLOAT))
|
||||
@@ -0,0 +1,25 @@
|
||||
-- This shows using the AutoMLTrain stored procedure to create a forecasting model for the nyc_energy dataset.
|
||||
|
||||
DECLARE @max_horizon INT = 48
|
||||
DECLARE @split_time NVARCHAR(22) = (SELECT DATEADD(hour, -@max_horizon, MAX(timeStamp)) FROM nyc_energy WHERE demand IS NOT NULL)
|
||||
|
||||
DECLARE @TrainDataQuery NVARCHAR(MAX) = '
|
||||
SELECT CAST(timeStamp as NVARCHAR(30)) as timeStamp,
|
||||
demand,
|
||||
precip,
|
||||
temp
|
||||
FROM nyc_energy
|
||||
WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL
|
||||
and timeStamp < ''' + @split_time + ''''
|
||||
|
||||
INSERT INTO dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)
|
||||
EXEC dbo.AutoMLTrain @input_query= @TrainDataQuery,
|
||||
@label_column='demand',
|
||||
@task='forecasting',
|
||||
@iterations=10,
|
||||
@iteration_timeout_minutes=5,
|
||||
@time_column_name='timeStamp',
|
||||
@max_horizon=@max_horizon,
|
||||
@experiment_name='automl-sql-forecast',
|
||||
@primary_metric='normalized_root_mean_squared_error'
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Train a model and use it for prediction\r\n",
|
||||
"\r\n",
|
||||
"Before running this notebook, run the auto-ml-sql-setup.ipynb notebook."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set the default database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"USE [automl]\r\n",
|
||||
"GO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use the AutoMLTrain stored procedure to create a forecasting model for the nyc_energy dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"INSERT INTO dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)\r\n",
|
||||
"EXEC dbo.AutoMLTrain @input_query='\r\n",
|
||||
"SELECT CAST(timeStamp as NVARCHAR(30)) as timeStamp,\r\n",
|
||||
" demand,\r\n",
|
||||
"\t precip,\r\n",
|
||||
"\t temp,\r\n",
|
||||
"\t CASE WHEN timeStamp < ''2017-01-01'' THEN 0 ELSE 1 END AS is_validate_column\r\n",
|
||||
"FROM nyc_energy\r\n",
|
||||
"WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL\r\n",
|
||||
"and timeStamp < ''2017-02-01''',\r\n",
|
||||
"@label_column='demand',\r\n",
|
||||
"@task='forecasting',\r\n",
|
||||
"@iterations=10,\r\n",
|
||||
"@iteration_timeout_minutes=5,\r\n",
|
||||
"@time_column_name='timeStamp',\r\n",
|
||||
"@is_validate_column='is_validate_column',\r\n",
|
||||
"@experiment_name='automl-sql-forecast',\r\n",
|
||||
"@primary_metric='normalized_root_mean_squared_error'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Use the AutoMLPredict stored procedure to predict using the forecasting model for the nyc_energy dataset."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DECLARE @Model NVARCHAR(MAX) = (SELECT TOP 1 Model FROM dbo.aml_model\r\n",
|
||||
" WHERE ExperimentName = 'automl-sql-forecast'\r\n",
|
||||
"\t\t\t\t\t\t\t\tORDER BY CreatedDate DESC)\r\n",
|
||||
"\r\n",
|
||||
"EXEC dbo.AutoMLPredict @input_query='\r\n",
|
||||
"SELECT CAST(timeStamp AS NVARCHAR(30)) AS timeStamp,\r\n",
|
||||
" demand,\r\n",
|
||||
"\t precip,\r\n",
|
||||
"\t temp\r\n",
|
||||
"FROM nyc_energy\r\n",
|
||||
"WHERE demand IS NOT NULL AND precip IS NOT NULL AND temp IS NOT NULL\r\n",
|
||||
"AND timeStamp >= ''2017-02-01''',\r\n",
|
||||
"@label_column='demand',\r\n",
|
||||
"@model=@model\r\n",
|
||||
"WITH RESULT SETS ((timeStamp NVARCHAR(30), actual_demand FLOAT, precip FLOAT, temp FLOAT, predicted_demand FLOAT))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## List all the metrics for all iterations for the most recent training run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"DECLARE @RunId NVARCHAR(43)\r\n",
|
||||
"DECLARE @ExperimentName NVARCHAR(255)\r\n",
|
||||
"\r\n",
|
||||
"SELECT TOP 1 @ExperimentName=ExperimentName, @RunId=SUBSTRING(RunId, 1, 43)\r\n",
|
||||
"FROM aml_model\r\n",
|
||||
"ORDER BY CreatedDate DESC\r\n",
|
||||
"\r\n",
|
||||
"EXEC dbo.AutoMLGetMetrics @RunId, @ExperimentName"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "jeffshep"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "sql",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "sql",
|
||||
"version": ""
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -0,0 +1,92 @@
|
||||
-- This procedure forecast values based on a forecasting model returned by AutoMLTrain.
|
||||
-- It returns a dataset with the forecasted values.
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
CREATE OR ALTER PROCEDURE [dbo].[AutoMLForecast]
|
||||
(
|
||||
@input_query NVARCHAR(MAX), -- A SQL query returning data to predict on.
|
||||
@model NVARCHAR(MAX), -- A model returned from AutoMLTrain.
|
||||
@time_column_name NVARCHAR(255)='', -- The name of the timestamp column for forecasting.
|
||||
@label_column NVARCHAR(255)='', -- Optional name of the column from input_query, which should be ignored when predicting
|
||||
@y_query_column NVARCHAR(255)='', -- Optional value column that can be used for predicting.
|
||||
-- If specified, this can contain values for past times (after the model was trained)
|
||||
-- and contain Nan for future times.
|
||||
@forecast_column_name NVARCHAR(255) = 'predicted'
|
||||
-- The name of the output column containing the forecast value.
|
||||
) AS
|
||||
BEGIN
|
||||
|
||||
EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd
|
||||
import azureml.core
|
||||
import numpy as np
|
||||
from azureml.train.automl import AutoMLConfig
|
||||
import pickle
|
||||
import codecs
|
||||
|
||||
model_obj = pickle.loads(codecs.decode(model.encode(), "base64"))
|
||||
|
||||
test_data = input_data.copy()
|
||||
|
||||
if label_column != "" and label_column is not None:
|
||||
y_test = test_data.pop(label_column).values
|
||||
else:
|
||||
y_test = None
|
||||
|
||||
if y_query_column != "" and y_query_column is not None:
|
||||
y_query = test_data.pop(y_query_column).values
|
||||
else:
|
||||
y_query = np.repeat(np.nan, len(test_data))
|
||||
|
||||
X_test = test_data
|
||||
|
||||
if time_column_name != "" and time_column_name is not None:
|
||||
X_test[time_column_name] = pd.to_datetime(X_test[time_column_name])
|
||||
|
||||
y_fcst, X_trans = model_obj.forecast(X_test, y_query)
|
||||
|
||||
def align_outputs(y_forecast, X_trans, X_test, y_test, forecast_column_name):
|
||||
# Demonstrates how to get the output aligned to the inputs
|
||||
# using pandas indexes. Helps understand what happened if
|
||||
# the output shape differs from the input shape, or if
|
||||
# the data got re-sorted by time and grain during forecasting.
|
||||
|
||||
# Typical causes of misalignment are:
|
||||
# * we predicted some periods that were missing in actuals -> drop from eval
|
||||
# * model was asked to predict past max_horizon -> increase max horizon
|
||||
# * data at start of X_test was needed for lags -> provide previous periods
|
||||
|
||||
df_fcst = pd.DataFrame({forecast_column_name : y_forecast})
|
||||
# y and X outputs are aligned by forecast() function contract
|
||||
df_fcst.index = X_trans.index
|
||||
|
||||
# align original X_test to y_test
|
||||
X_test_full = X_test.copy()
|
||||
if y_test is not None:
|
||||
X_test_full[label_column] = y_test
|
||||
|
||||
# X_test_full does not include origin, so reset for merge
|
||||
df_fcst.reset_index(inplace=True)
|
||||
X_test_full = X_test_full.reset_index().drop(columns=''index'')
|
||||
together = df_fcst.merge(X_test_full, how=''right'')
|
||||
|
||||
# drop rows where prediction or actuals are nan
|
||||
# happens because of missing actuals
|
||||
# or at edges of time due to lags/rolling windows
|
||||
clean = together[together[[label_column, forecast_column_name]].notnull().all(axis=1)]
|
||||
return(clean)
|
||||
|
||||
combined_output = align_outputs(y_fcst, X_trans, X_test, y_test, forecast_column_name)
|
||||
|
||||
'
|
||||
, @input_data_1 = @input_query
|
||||
, @input_data_1_name = N'input_data'
|
||||
, @output_data_1_name = N'combined_output'
|
||||
, @params = N'@model NVARCHAR(MAX), @time_column_name NVARCHAR(255), @label_column NVARCHAR(255), @y_query_column NVARCHAR(255), @forecast_column_name NVARCHAR(255)'
|
||||
, @model = @model
|
||||
, @time_column_name = @time_column_name
|
||||
, @label_column = @label_column
|
||||
, @y_query_column = @y_query_column
|
||||
, @forecast_column_name = @forecast_column_name
|
||||
END
|
||||
@@ -0,0 +1,70 @@
|
||||
-- This procedure returns a list of metrics for each iteration of a run.
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
CREATE OR ALTER PROCEDURE [dbo].[AutoMLGetMetrics]
|
||||
(
|
||||
@run_id NVARCHAR(250), -- The RunId
|
||||
@experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.
|
||||
@connection_name NVARCHAR(255)='default' -- The AML connection to use.
|
||||
) AS
|
||||
BEGIN
|
||||
DECLARE @tenantid NVARCHAR(255)
|
||||
DECLARE @appid NVARCHAR(255)
|
||||
DECLARE @password NVARCHAR(255)
|
||||
DECLARE @config_file NVARCHAR(255)
|
||||
|
||||
SELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile
|
||||
FROM aml_connection
|
||||
WHERE ConnectionName = @connection_name;
|
||||
|
||||
EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd
|
||||
import logging
|
||||
import azureml.core
|
||||
import numpy as np
|
||||
from azureml.core.experiment import Experiment
|
||||
from azureml.train.automl.run import AutoMLRun
|
||||
from azureml.core.authentication import ServicePrincipalAuthentication
|
||||
from azureml.core.workspace import Workspace
|
||||
|
||||
auth = ServicePrincipalAuthentication(tenantid, appid, password)
|
||||
|
||||
ws = Workspace.from_config(path=config_file, auth=auth)
|
||||
|
||||
experiment = Experiment(ws, experiment_name)
|
||||
|
||||
ml_run = AutoMLRun(experiment = experiment, run_id = run_id)
|
||||
|
||||
children = list(ml_run.get_children())
|
||||
iterationlist = []
|
||||
metricnamelist = []
|
||||
metricvaluelist = []
|
||||
|
||||
for run in children:
|
||||
properties = run.get_properties()
|
||||
if "iteration" in properties:
|
||||
iteration = int(properties["iteration"])
|
||||
for metric_name, metric_value in run.get_metrics().items():
|
||||
if isinstance(metric_value, float):
|
||||
iterationlist.append(iteration)
|
||||
metricnamelist.append(metric_name)
|
||||
metricvaluelist.append(metric_value)
|
||||
|
||||
metrics = pd.DataFrame({"iteration": iterationlist, "metric_name": metricnamelist, "metric_value": metricvaluelist})
|
||||
'
|
||||
, @output_data_1_name = N'metrics'
|
||||
, @params = N'@run_id NVARCHAR(250),
|
||||
@experiment_name NVARCHAR(32),
|
||||
@tenantid NVARCHAR(255),
|
||||
@appid NVARCHAR(255),
|
||||
@password NVARCHAR(255),
|
||||
@config_file NVARCHAR(255)'
|
||||
, @run_id = @run_id
|
||||
, @experiment_name = @experiment_name
|
||||
, @tenantid = @tenantid
|
||||
, @appid = @appid
|
||||
, @password = @password
|
||||
, @config_file = @config_file
|
||||
WITH RESULT SETS ((iteration INT, metric_name NVARCHAR(100), metric_value FLOAT))
|
||||
END
|
||||
@@ -0,0 +1,41 @@
|
||||
-- This procedure predicts values based on a model returned by AutoMLTrain and a dataset.
|
||||
-- It returns the dataset with a new column added, which is the predicted value.
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
CREATE OR ALTER PROCEDURE [dbo].[AutoMLPredict]
|
||||
(
|
||||
@input_query NVARCHAR(MAX), -- A SQL query returning data to predict on.
|
||||
@model NVARCHAR(MAX), -- A model returned from AutoMLTrain.
|
||||
@label_column NVARCHAR(255)='' -- Optional name of the column from input_query, which should be ignored when predicting
|
||||
) AS
|
||||
BEGIN
|
||||
|
||||
EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd
|
||||
import azureml.core
|
||||
import numpy as np
|
||||
from azureml.train.automl import AutoMLConfig
|
||||
import pickle
|
||||
import codecs
|
||||
|
||||
model_obj = pickle.loads(codecs.decode(model.encode(), "base64"))
|
||||
|
||||
test_data = input_data.copy()
|
||||
|
||||
if label_column != "" and label_column is not None:
|
||||
y_test = test_data.pop(label_column).values
|
||||
X_test = test_data
|
||||
|
||||
predicted = model_obj.predict(X_test)
|
||||
|
||||
combined_output = input_data.assign(predicted=predicted)
|
||||
|
||||
'
|
||||
, @input_data_1 = @input_query
|
||||
, @input_data_1_name = N'input_data'
|
||||
, @output_data_1_name = N'combined_output'
|
||||
, @params = N'@model NVARCHAR(MAX), @label_column NVARCHAR(255)'
|
||||
, @model = @model
|
||||
, @label_column = @label_column
|
||||
END
|
||||
@@ -0,0 +1,240 @@
|
||||
-- This stored procedure uses automated machine learning to train several models
|
||||
-- and returns the best model.
|
||||
--
|
||||
-- The result set has several columns:
|
||||
-- best_run - iteration ID for the best model
|
||||
-- experiment_name - experiment name pass in with the @experiment_name parameter
|
||||
-- fitted_model - best model found
|
||||
-- log_file_text - AutoML debug_log contents
|
||||
-- workspace - name of the Azure ML workspace where run history is stored
|
||||
--
|
||||
-- An example call for a classification problem is:
|
||||
-- insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)
|
||||
-- exec dbo.AutoMLTrain @input_query='
|
||||
-- SELECT top 100000
|
||||
-- CAST([pickup_datetime] AS NVARCHAR(30)) AS pickup_datetime
|
||||
-- ,CAST([dropoff_datetime] AS NVARCHAR(30)) AS dropoff_datetime
|
||||
-- ,[passenger_count]
|
||||
-- ,[trip_time_in_secs]
|
||||
-- ,[trip_distance]
|
||||
-- ,[payment_type]
|
||||
-- ,[tip_class]
|
||||
-- FROM [dbo].[nyctaxi_sample] order by [hack_license] ',
|
||||
-- @label_column = 'tip_class',
|
||||
-- @iterations=10
|
||||
--
|
||||
-- An example call for forecasting is:
|
||||
-- insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)
|
||||
-- exec dbo.AutoMLTrain @input_query='
|
||||
-- select cast(timeStamp as nvarchar(30)) as timeStamp,
|
||||
-- demand,
|
||||
-- precip,
|
||||
-- temp,
|
||||
-- case when timeStamp < ''2017-01-01'' then 0 else 1 end as is_validate_column
|
||||
-- from nyc_energy
|
||||
-- where demand is not null and precip is not null and temp is not null
|
||||
-- and timeStamp < ''2017-02-01''',
|
||||
-- @label_column='demand',
|
||||
-- @task='forecasting',
|
||||
-- @iterations=10,
|
||||
-- @iteration_timeout_minutes=5,
|
||||
-- @time_column_name='timeStamp',
|
||||
-- @is_validate_column='is_validate_column',
|
||||
-- @experiment_name='automl-sql-forecast',
|
||||
-- @primary_metric='normalized_root_mean_squared_error'
|
||||
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
CREATE OR ALTER PROCEDURE [dbo].[AutoMLTrain]
|
||||
(
|
||||
@input_query NVARCHAR(MAX), -- The SQL Query that will return the data to train and validate the model.
|
||||
@label_column NVARCHAR(255)='Label', -- The name of the column in the result of @input_query that is the label.
|
||||
@primary_metric NVARCHAR(40)='AUC_weighted', -- The metric to optimize.
|
||||
@iterations INT=100, -- The maximum number of pipelines to train.
|
||||
@task NVARCHAR(40)='classification', -- The type of task. Can be classification, regression or forecasting.
|
||||
@experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.
|
||||
@iteration_timeout_minutes INT = 15, -- The maximum time in minutes for training a single pipeline.
|
||||
@experiment_timeout_minutes INT = 60, -- The maximum time in minutes for training all pipelines.
|
||||
@n_cross_validations INT = 3, -- The number of cross validations.
|
||||
@blacklist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that will not be used.
|
||||
-- The list of possible models can be found at:
|
||||
-- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings
|
||||
@whitelist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that can be used.
|
||||
-- The list of possible models can be found at:
|
||||
-- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings
|
||||
@experiment_exit_score FLOAT = 0, -- Stop the experiment if this score is acheived.
|
||||
@sample_weight_column NVARCHAR(255)='', -- The name of the column in the result of @input_query that gives a sample weight.
|
||||
@is_validate_column NVARCHAR(255)='', -- The name of the column in the result of @input_query that indicates if the row is for training or validation.
|
||||
-- In the values of the column, 0 means for training and 1 means for validation.
|
||||
@time_column_name NVARCHAR(255)='', -- The name of the timestamp column for forecasting.
|
||||
@connection_name NVARCHAR(255)='default', -- The AML connection to use.
|
||||
@max_horizon INT = 0 -- A forecast horizon is a time span into the future (or just beyond the latest date in the training data)
|
||||
-- where forecasts of the target quantity are needed.
|
||||
-- For example, if data is recorded daily and max_horizon is 5, we will predict 5 days ahead.
|
||||
) AS
|
||||
BEGIN
|
||||
|
||||
DECLARE @tenantid NVARCHAR(255)
|
||||
DECLARE @appid NVARCHAR(255)
|
||||
DECLARE @password NVARCHAR(255)
|
||||
DECLARE @config_file NVARCHAR(255)
|
||||
|
||||
SELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile
|
||||
FROM aml_connection
|
||||
WHERE ConnectionName = @connection_name;
|
||||
|
||||
EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd
|
||||
import logging
|
||||
import azureml.core
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from azureml.core.experiment import Experiment
|
||||
from azureml.train.automl import AutoMLConfig
|
||||
from sklearn import datasets
|
||||
import pickle
|
||||
import codecs
|
||||
from azureml.core.authentication import ServicePrincipalAuthentication
|
||||
from azureml.core.workspace import Workspace
|
||||
|
||||
if __name__.startswith("sqlindb"):
|
||||
auth = ServicePrincipalAuthentication(tenantid, appid, password)
|
||||
|
||||
ws = Workspace.from_config(path=config_file, auth=auth)
|
||||
|
||||
project_folder = "./sample_projects/" + experiment_name
|
||||
|
||||
experiment = Experiment(ws, experiment_name)
|
||||
|
||||
data_train = input_data
|
||||
X_valid = None
|
||||
y_valid = None
|
||||
sample_weight_valid = None
|
||||
|
||||
if is_validate_column != "" and is_validate_column is not None:
|
||||
data_train = input_data[input_data[is_validate_column] <= 0]
|
||||
data_valid = input_data[input_data[is_validate_column] > 0]
|
||||
data_train.pop(is_validate_column)
|
||||
data_valid.pop(is_validate_column)
|
||||
y_valid = data_valid.pop(label_column).values
|
||||
if sample_weight_column != "" and sample_weight_column is not None:
|
||||
sample_weight_valid = data_valid.pop(sample_weight_column).values
|
||||
X_valid = data_valid
|
||||
n_cross_validations = None
|
||||
|
||||
y_train = data_train.pop(label_column).values
|
||||
|
||||
sample_weight = None
|
||||
if sample_weight_column != "" and sample_weight_column is not None:
|
||||
sample_weight = data_train.pop(sample_weight_column).values
|
||||
|
||||
X_train = data_train
|
||||
|
||||
if experiment_timeout_minutes == 0:
|
||||
experiment_timeout_minutes = None
|
||||
|
||||
if experiment_exit_score == 0:
|
||||
experiment_exit_score = None
|
||||
|
||||
if blacklist_models == "":
|
||||
blacklist_models = None
|
||||
|
||||
if blacklist_models is not None:
|
||||
blacklist_models = blacklist_models.replace(" ", "").split(",")
|
||||
|
||||
if whitelist_models == "":
|
||||
whitelist_models = None
|
||||
|
||||
if whitelist_models is not None:
|
||||
whitelist_models = whitelist_models.replace(" ", "").split(",")
|
||||
|
||||
automl_settings = {}
|
||||
preprocess = True
|
||||
if time_column_name != "" and time_column_name is not None:
|
||||
automl_settings = { "time_column_name": time_column_name }
|
||||
preprocess = False
|
||||
if max_horizon > 0:
|
||||
automl_settings["max_horizon"] = max_horizon
|
||||
|
||||
log_file_name = "automl_sqlindb_errors.log"
|
||||
|
||||
automl_config = AutoMLConfig(task = task,
|
||||
debug_log = log_file_name,
|
||||
primary_metric = primary_metric,
|
||||
iteration_timeout_minutes = iteration_timeout_minutes,
|
||||
experiment_timeout_minutes = experiment_timeout_minutes,
|
||||
iterations = iterations,
|
||||
n_cross_validations = n_cross_validations,
|
||||
preprocess = preprocess,
|
||||
verbosity = logging.INFO,
|
||||
X = X_train,
|
||||
y = y_train,
|
||||
path = project_folder,
|
||||
blacklist_models = blacklist_models,
|
||||
whitelist_models = whitelist_models,
|
||||
experiment_exit_score = experiment_exit_score,
|
||||
sample_weight = sample_weight,
|
||||
X_valid = X_valid,
|
||||
y_valid = y_valid,
|
||||
sample_weight_valid = sample_weight_valid,
|
||||
**automl_settings)
|
||||
|
||||
local_run = experiment.submit(automl_config, show_output = True)
|
||||
|
||||
best_run, fitted_model = local_run.get_output()
|
||||
|
||||
pickled_model = codecs.encode(pickle.dumps(fitted_model), "base64").decode()
|
||||
|
||||
log_file_text = ""
|
||||
|
||||
try:
|
||||
with open(log_file_name, "r") as log_file:
|
||||
log_file_text = log_file.read()
|
||||
except:
|
||||
log_file_text = "Log file not found"
|
||||
|
||||
returned_model = pd.DataFrame({"best_run": [best_run.id], "experiment_name": [experiment_name], "fitted_model": [pickled_model], "log_file_text": [log_file_text], "workspace": [ws.name]}, dtype=np.dtype(np.str))
|
||||
'
|
||||
, @input_data_1 = @input_query
|
||||
, @input_data_1_name = N'input_data'
|
||||
, @output_data_1_name = N'returned_model'
|
||||
, @params = N'@label_column NVARCHAR(255),
|
||||
@primary_metric NVARCHAR(40),
|
||||
@iterations INT, @task NVARCHAR(40),
|
||||
@experiment_name NVARCHAR(32),
|
||||
@iteration_timeout_minutes INT,
|
||||
@experiment_timeout_minutes INT,
|
||||
@n_cross_validations INT,
|
||||
@blacklist_models NVARCHAR(MAX),
|
||||
@whitelist_models NVARCHAR(MAX),
|
||||
@experiment_exit_score FLOAT,
|
||||
@sample_weight_column NVARCHAR(255),
|
||||
@is_validate_column NVARCHAR(255),
|
||||
@time_column_name NVARCHAR(255),
|
||||
@tenantid NVARCHAR(255),
|
||||
@appid NVARCHAR(255),
|
||||
@password NVARCHAR(255),
|
||||
@config_file NVARCHAR(255),
|
||||
@max_horizon INT'
|
||||
, @label_column = @label_column
|
||||
, @primary_metric = @primary_metric
|
||||
, @iterations = @iterations
|
||||
, @task = @task
|
||||
, @experiment_name = @experiment_name
|
||||
, @iteration_timeout_minutes = @iteration_timeout_minutes
|
||||
, @experiment_timeout_minutes = @experiment_timeout_minutes
|
||||
, @n_cross_validations = @n_cross_validations
|
||||
, @blacklist_models = @blacklist_models
|
||||
, @whitelist_models = @whitelist_models
|
||||
, @experiment_exit_score = @experiment_exit_score
|
||||
, @sample_weight_column = @sample_weight_column
|
||||
, @is_validate_column = @is_validate_column
|
||||
, @time_column_name = @time_column_name
|
||||
, @tenantid = @tenantid
|
||||
, @appid = @appid
|
||||
, @password = @password
|
||||
, @config_file = @config_file
|
||||
, @max_horizon = @max_horizon
|
||||
WITH RESULT SETS ((best_run NVARCHAR(250), experiment_name NVARCHAR(100), fitted_model VARCHAR(MAX), log_file_text NVARCHAR(MAX), workspace NVARCHAR(100)))
|
||||
END
|
||||
@@ -0,0 +1,18 @@
|
||||
-- This is a table to store the Azure ML connection information.
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
|
||||
CREATE TABLE [dbo].[aml_connection](
|
||||
[Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,
|
||||
[ConnectionName] [nvarchar](255) NULL,
|
||||
[TenantId] [nvarchar](255) NULL,
|
||||
[AppId] [nvarchar](255) NULL,
|
||||
[Password] [nvarchar](255) NULL,
|
||||
[ConfigFile] [nvarchar](255) NULL
|
||||
) ON [PRIMARY]
|
||||
GO
|
||||
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
-- This is a table to hold the results from the AutoMLTrain procedure.
|
||||
SET ANSI_NULLS ON
|
||||
GO
|
||||
|
||||
SET QUOTED_IDENTIFIER ON
|
||||
GO
|
||||
|
||||
CREATE TABLE [dbo].[aml_model](
|
||||
[Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,
|
||||
[Model] [varchar](max) NOT NULL, -- The model, which can be passed to AutoMLPredict for testing or prediction.
|
||||
[RunId] [nvarchar](250) NULL, -- The RunId, which can be used to view the model in the Azure Portal.
|
||||
[CreatedDate] [datetime] NULL,
|
||||
[ExperimentName] [nvarchar](100) NULL, -- Azure ML Experiment Name
|
||||
[WorkspaceName] [nvarchar](100) NULL, -- Azure ML Workspace Name
|
||||
[LogFileText] [nvarchar](max) NULL
|
||||
)
|
||||
GO
|
||||
|
||||
ALTER TABLE [dbo].[aml_model] ADD DEFAULT (getutcdate()) FOR [CreatedDate]
|
||||
GO
|
||||
|
||||
|
||||
@@ -0,0 +1,562 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Set up Azure ML Automated Machine Learning on SQL Server 2019 CTP 2.4 big data cluster\r\n",
|
||||
"\r\n",
|
||||
"\\# Prerequisites: \r\n",
|
||||
"\\# - An Azure subscription and resource group \r\n",
|
||||
"\\# - An Azure Machine Learning workspace \r\n",
|
||||
"\\# - A SQL Server 2019 CTP 2.4 big data cluster with Internet access and a database named 'automl' \r\n",
|
||||
"\\# - Azure CLI \r\n",
|
||||
"\\# - kubectl command \r\n",
|
||||
"\\# - The https://github.com/Azure/MachineLearningNotebooks repository downloaded (cloned) to your local machine\r\n",
|
||||
"\r\n",
|
||||
"\\# In the 'automl' database, create a table named 'dbo.nyc_energy' as follows: \r\n",
|
||||
"\\# - In SQL Server Management Studio, right-click the 'automl' database, select Tasks, then Import Flat File. \r\n",
|
||||
"\\# - Select the file AzureMlCli\\notebooks\\how-to-use-azureml\\automated-machine-learning\\forecasting-energy-demand\\nyc_energy.csv. \r\n",
|
||||
"\\# - Using the \"Modify Columns\" page, allow nulls for all columns. \r\n",
|
||||
"\r\n",
|
||||
"\\# Create an Azure Machine Learning Workspace using the instructions at https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-manage-workspace \r\n",
|
||||
"\r\n",
|
||||
"\\# Create an Azure service principal. You can do this with the following commands: \r\n",
|
||||
"\r\n",
|
||||
"az login \r\n",
|
||||
"az account set --subscription *subscriptionid* \r\n",
|
||||
"\r\n",
|
||||
"\\# The following command prints out the **appId** and **tenant**, \r\n",
|
||||
"\\# which you insert into the indicated cell later in this notebook \r\n",
|
||||
"\\# to allow AutoML to authenticate with Azure: \r\n",
|
||||
"\r\n",
|
||||
"az ad sp create-for-rbac --name *principlename* --password *password*\r\n",
|
||||
"\r\n",
|
||||
"\\# Log into the master instance of SQL Server 2019 CTP 2.4: \r\n",
|
||||
"kubectl exec -it mssql-master-pool-0 -n *clustername* -c mssql-server -- /bin/bash\r\n",
|
||||
"\r\n",
|
||||
"mkdir /tmp/aml\r\n",
|
||||
"\r\n",
|
||||
"cd /tmp/aml\r\n",
|
||||
"\r\n",
|
||||
"\\# **Modify** the following with your subscription_id, resource_group, and workspace_name: \r\n",
|
||||
"cat > config.json << EOF \r\n",
|
||||
"{ \r\n",
|
||||
" \"subscription_id\": \"123456ab-78cd-0123-45ef-abcd12345678\", \r\n",
|
||||
" \"resource_group\": \"myrg1\", \r\n",
|
||||
" \"workspace_name\": \"myws1\" \r\n",
|
||||
"} \r\n",
|
||||
"EOF\r\n",
|
||||
"\r\n",
|
||||
"\\# The directory referenced below is appropriate for the master instance of SQL Server 2019 CTP 2.4.\r\n",
|
||||
"\r\n",
|
||||
"cd /opt/mssql/mlservices/runtime/python/bin\r\n",
|
||||
"\r\n",
|
||||
"./python -m pip install azureml-sdk[automl]\r\n",
|
||||
"\r\n",
|
||||
"./python -m pip install --upgrade numpy \r\n",
|
||||
"\r\n",
|
||||
"./python -m pip install --upgrade sklearn\r\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"-- Enable external scripts to allow invoking Python\r\n",
|
||||
"sp_configure 'external scripts enabled',1 \r\n",
|
||||
"reconfigure with override \r\n",
|
||||
"GO\r\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"-- Use database 'automl'\r\n",
|
||||
"USE [automl]\r\n",
|
||||
"GO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"-- This is a table to hold the Azure ML connection information.\r\n",
|
||||
"SET ANSI_NULLS ON\r\n",
|
||||
"GO\r\n",
|
||||
"\r\n",
|
||||
"SET QUOTED_IDENTIFIER ON\r\n",
|
||||
"GO\r\n",
|
||||
"\r\n",
|
||||
"CREATE TABLE [dbo].[aml_connection](\r\n",
|
||||
" [Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,\r\n",
|
||||
"\t[ConnectionName] [nvarchar](255) NULL,\r\n",
|
||||
"\t[TenantId] [nvarchar](255) NULL,\r\n",
|
||||
"\t[AppId] [nvarchar](255) NULL,\r\n",
|
||||
"\t[Password] [nvarchar](255) NULL,\r\n",
|
||||
"\t[ConfigFile] [nvarchar](255) NULL\r\n",
|
||||
") ON [PRIMARY]\r\n",
|
||||
"GO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Copy the values from create-for-rbac above into the cell below"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"-- Use the following values:\r\n",
|
||||
"-- Leave the name as 'Default'\r\n",
|
||||
"-- Insert <tenant> returned by create-for-rbac above\r\n",
|
||||
"-- Insert <AppId> returned by create-for-rbac above\r\n",
|
||||
"-- Insert <password> used in create-for-rbac above\r\n",
|
||||
"-- Leave <path> as '/tmp/aml/config.json'\r\n",
|
||||
"INSERT INTO [dbo].[aml_connection] \r\n",
|
||||
"VALUES (\r\n",
|
||||
" N'Default', -- Name\r\n",
|
||||
" N'11111111-2222-3333-4444-555555555555', -- Tenant\r\n",
|
||||
" N'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', -- AppId\r\n",
|
||||
" N'insertpasswordhere', -- Password\r\n",
|
||||
" N'/tmp/aml/config.json' -- Path\r\n",
|
||||
" );\r\n",
|
||||
"GO"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"-- This is a table to hold the results from the AutoMLTrain procedure.\r\n",
|
||||
"SET ANSI_NULLS ON\r\n",
|
||||
"GO\r\n",
|
||||
"\r\n",
|
||||
"SET QUOTED_IDENTIFIER ON\r\n",
|
||||
"GO\r\n",
|
||||
"\r\n",
|
||||
"CREATE TABLE [dbo].[aml_model](\r\n",
|
||||
" [Id] [int] IDENTITY(1,1) NOT NULL PRIMARY KEY,\r\n",
|
||||
" [Model] [varchar](max) NOT NULL, -- The model, which can be passed to AutoMLPredict for testing or prediction.\r\n",
|
||||
" [RunId] [nvarchar](250) NULL, -- The RunId, which can be used to view the model in the Azure Portal.\r\n",
|
||||
" [CreatedDate] [datetime] NULL,\r\n",
|
||||
" [ExperimentName] [nvarchar](100) NULL, -- Azure ML Experiment Name\r\n",
|
||||
" [WorkspaceName] [nvarchar](100) NULL, -- Azure ML Workspace Name\r\n",
|
||||
"\t[LogFileText] [nvarchar](max) NULL\r\n",
|
||||
") \r\n",
|
||||
"GO\r\n",
|
||||
"\r\n",
|
||||
"ALTER TABLE [dbo].[aml_model] ADD DEFAULT (getutcdate()) FOR [CreatedDate]\r\n",
|
||||
"GO\r\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"-- This stored procedure uses automated machine learning to train several models\r\n",
|
||||
"-- and return the best model.\r\n",
|
||||
"--\r\n",
|
||||
"-- The result set has several columns:\r\n",
|
||||
"-- best_run - ID of the best model found\r\n",
|
||||
"-- experiment_name - training run name\r\n",
|
||||
"-- fitted_model - best model found\r\n",
|
||||
"-- log_file_text - console output\r\n",
|
||||
"-- workspace - name of the Azure ML workspace where run history is stored\r\n",
|
||||
"--\r\n",
|
||||
"-- An example call for a classification problem is:\r\n",
|
||||
"-- insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)\r\n",
|
||||
"-- exec dbo.AutoMLTrain @input_query='\r\n",
|
||||
"-- SELECT top 100000 \r\n",
|
||||
"-- CAST([pickup_datetime] AS NVARCHAR(30)) AS pickup_datetime\r\n",
|
||||
"-- ,CAST([dropoff_datetime] AS NVARCHAR(30)) AS dropoff_datetime\r\n",
|
||||
"-- ,[passenger_count]\r\n",
|
||||
"-- ,[trip_time_in_secs]\r\n",
|
||||
"-- ,[trip_distance]\r\n",
|
||||
"-- ,[payment_type]\r\n",
|
||||
"-- ,[tip_class]\r\n",
|
||||
"-- FROM [dbo].[nyctaxi_sample] order by [hack_license] ',\r\n",
|
||||
"-- @label_column = 'tip_class',\r\n",
|
||||
"-- @iterations=10\r\n",
|
||||
"-- \r\n",
|
||||
"-- An example call for forecasting is:\r\n",
|
||||
"-- insert into dbo.aml_model(RunId, ExperimentName, Model, LogFileText, WorkspaceName)\r\n",
|
||||
"-- exec dbo.AutoMLTrain @input_query='\r\n",
|
||||
"-- select cast(timeStamp as nvarchar(30)) as timeStamp,\r\n",
|
||||
"-- demand,\r\n",
|
||||
"-- \t precip,\r\n",
|
||||
"-- \t temp,\r\n",
|
||||
"-- case when timeStamp < ''2017-01-01'' then 0 else 1 end as is_validate_column\r\n",
|
||||
"-- from nyc_energy\r\n",
|
||||
"-- where demand is not null and precip is not null and temp is not null\r\n",
|
||||
"-- and timeStamp < ''2017-02-01''',\r\n",
|
||||
"-- @label_column='demand',\r\n",
|
||||
"-- @task='forecasting',\r\n",
|
||||
"-- @iterations=10,\r\n",
|
||||
"-- @iteration_timeout_minutes=5,\r\n",
|
||||
"-- @time_column_name='timeStamp',\r\n",
|
||||
"-- @is_validate_column='is_validate_column',\r\n",
|
||||
"-- @experiment_name='automl-sql-forecast',\r\n",
|
||||
"-- @primary_metric='normalized_root_mean_squared_error'\r\n",
|
||||
"\r\n",
|
||||
"SET ANSI_NULLS ON\r\n",
|
||||
"GO\r\n",
|
||||
"SET QUOTED_IDENTIFIER ON\r\n",
|
||||
"GO\r\n",
|
||||
"CREATE OR ALTER PROCEDURE [dbo].[AutoMLTrain]\r\n",
|
||||
" (\r\n",
|
||||
" @input_query NVARCHAR(MAX), -- The SQL Query that will return the data to train and validate the model.\r\n",
|
||||
" @label_column NVARCHAR(255)='Label', -- The name of the column in the result of @input_query that is the label.\r\n",
|
||||
" @primary_metric NVARCHAR(40)='AUC_weighted', -- The metric to optimize.\r\n",
|
||||
" @iterations INT=100, -- The maximum number of pipelines to train.\r\n",
|
||||
" @task NVARCHAR(40)='classification', -- The type of task. Can be classification, regression or forecasting.\r\n",
|
||||
" @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.\r\n",
|
||||
" @iteration_timeout_minutes INT = 15, -- The maximum time in minutes for training a single pipeline. \r\n",
|
||||
" @experiment_timeout_minutes INT = 60, -- The maximum time in minutes for training all pipelines.\r\n",
|
||||
" @n_cross_validations INT = 3, -- The number of cross validations.\r\n",
|
||||
" @blacklist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that will not be used.\r\n",
|
||||
" -- The list of possible models can be found at:\r\n",
|
||||
" -- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings\r\n",
|
||||
" @whitelist_models NVARCHAR(MAX) = '', -- A comma separated list of algos that can be used.\r\n",
|
||||
" -- The list of possible models can be found at:\r\n",
|
||||
" -- https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings\r\n",
|
||||
" @experiment_exit_score FLOAT = 0, -- Stop the experiment if this score is acheived.\r\n",
|
||||
" @sample_weight_column NVARCHAR(255)='', -- The name of the column in the result of @input_query that gives a sample weight.\r\n",
|
||||
" @is_validate_column NVARCHAR(255)='', -- The name of the column in the result of @input_query that indicates if the row is for training or validation.\r\n",
|
||||
"\t -- In the values of the column, 0 means for training and 1 means for validation.\r\n",
|
||||
" @time_column_name NVARCHAR(255)='', -- The name of the timestamp column for forecasting.\r\n",
|
||||
"\t@connection_name NVARCHAR(255)='default' -- The AML connection to use.\r\n",
|
||||
" ) AS\r\n",
|
||||
"BEGIN\r\n",
|
||||
"\r\n",
|
||||
" DECLARE @tenantid NVARCHAR(255)\r\n",
|
||||
" DECLARE @appid NVARCHAR(255)\r\n",
|
||||
" DECLARE @password NVARCHAR(255)\r\n",
|
||||
" DECLARE @config_file NVARCHAR(255)\r\n",
|
||||
"\r\n",
|
||||
"\tSELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile\r\n",
|
||||
"\tFROM aml_connection\r\n",
|
||||
"\tWHERE ConnectionName = @connection_name;\r\n",
|
||||
"\r\n",
|
||||
"\tEXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd\r\n",
|
||||
"import logging \r\n",
|
||||
"import azureml.core \r\n",
|
||||
"import pandas as pd\r\n",
|
||||
"import numpy as np\r\n",
|
||||
"from azureml.core.experiment import Experiment \r\n",
|
||||
"from azureml.train.automl import AutoMLConfig \r\n",
|
||||
"from sklearn import datasets \r\n",
|
||||
"import pickle\r\n",
|
||||
"import codecs\r\n",
|
||||
"from azureml.core.authentication import ServicePrincipalAuthentication \r\n",
|
||||
"from azureml.core.workspace import Workspace \r\n",
|
||||
"\r\n",
|
||||
"if __name__.startswith(\"sqlindb\"):\r\n",
|
||||
" auth = ServicePrincipalAuthentication(tenantid, appid, password) \r\n",
|
||||
" \r\n",
|
||||
" ws = Workspace.from_config(path=config_file, auth=auth) \r\n",
|
||||
" \r\n",
|
||||
" project_folder = \"./sample_projects/\" + experiment_name\r\n",
|
||||
" \r\n",
|
||||
" experiment = Experiment(ws, experiment_name) \r\n",
|
||||
"\r\n",
|
||||
" data_train = input_data\r\n",
|
||||
" X_valid = None\r\n",
|
||||
" y_valid = None\r\n",
|
||||
" sample_weight_valid = None\r\n",
|
||||
"\r\n",
|
||||
" if is_validate_column != \"\" and is_validate_column is not None:\r\n",
|
||||
" data_train = input_data[input_data[is_validate_column] <= 0]\r\n",
|
||||
" data_valid = input_data[input_data[is_validate_column] > 0]\r\n",
|
||||
" data_train.pop(is_validate_column)\r\n",
|
||||
" data_valid.pop(is_validate_column)\r\n",
|
||||
" y_valid = data_valid.pop(label_column).values\r\n",
|
||||
" if sample_weight_column != \"\" and sample_weight_column is not None:\r\n",
|
||||
" sample_weight_valid = data_valid.pop(sample_weight_column).values\r\n",
|
||||
" X_valid = data_valid\r\n",
|
||||
" n_cross_validations = None\r\n",
|
||||
"\r\n",
|
||||
" y_train = data_train.pop(label_column).values\r\n",
|
||||
"\r\n",
|
||||
" sample_weight = None\r\n",
|
||||
" if sample_weight_column != \"\" and sample_weight_column is not None:\r\n",
|
||||
" sample_weight = data_train.pop(sample_weight_column).values\r\n",
|
||||
"\r\n",
|
||||
" X_train = data_train\r\n",
|
||||
"\r\n",
|
||||
" if experiment_timeout_minutes == 0:\r\n",
|
||||
" experiment_timeout_minutes = None\r\n",
|
||||
"\r\n",
|
||||
" if experiment_exit_score == 0:\r\n",
|
||||
" experiment_exit_score = None\r\n",
|
||||
"\r\n",
|
||||
" if blacklist_models == \"\":\r\n",
|
||||
" blacklist_models = None\r\n",
|
||||
"\r\n",
|
||||
" if blacklist_models is not None:\r\n",
|
||||
" blacklist_models = blacklist_models.replace(\" \", \"\").split(\",\")\r\n",
|
||||
"\r\n",
|
||||
" if whitelist_models == \"\":\r\n",
|
||||
" whitelist_models = None\r\n",
|
||||
"\r\n",
|
||||
" if whitelist_models is not None:\r\n",
|
||||
" whitelist_models = whitelist_models.replace(\" \", \"\").split(\",\")\r\n",
|
||||
"\r\n",
|
||||
" automl_settings = {}\r\n",
|
||||
" preprocess = True\r\n",
|
||||
" if time_column_name != \"\" and time_column_name is not None:\r\n",
|
||||
" automl_settings = { \"time_column_name\": time_column_name }\r\n",
|
||||
" preprocess = False\r\n",
|
||||
"\r\n",
|
||||
" log_file_name = \"automl_errors.log\"\r\n",
|
||||
"\t \r\n",
|
||||
" automl_config = AutoMLConfig(task = task, \r\n",
|
||||
" debug_log = log_file_name, \r\n",
|
||||
" primary_metric = primary_metric, \r\n",
|
||||
" iteration_timeout_minutes = iteration_timeout_minutes, \r\n",
|
||||
" experiment_timeout_minutes = experiment_timeout_minutes,\r\n",
|
||||
" iterations = iterations, \r\n",
|
||||
" n_cross_validations = n_cross_validations, \r\n",
|
||||
" preprocess = preprocess,\r\n",
|
||||
" verbosity = logging.INFO, \r\n",
|
||||
" enable_ensembling = False,\r\n",
|
||||
" X = X_train, \r\n",
|
||||
" y = y_train, \r\n",
|
||||
" path = project_folder,\r\n",
|
||||
" blacklist_models = blacklist_models,\r\n",
|
||||
" whitelist_models = whitelist_models,\r\n",
|
||||
" experiment_exit_score = experiment_exit_score,\r\n",
|
||||
" sample_weight = sample_weight,\r\n",
|
||||
" X_valid = X_valid,\r\n",
|
||||
" y_valid = y_valid,\r\n",
|
||||
" sample_weight_valid = sample_weight_valid,\r\n",
|
||||
" **automl_settings) \r\n",
|
||||
" \r\n",
|
||||
" local_run = experiment.submit(automl_config, show_output = True) \r\n",
|
||||
"\r\n",
|
||||
" best_run, fitted_model = local_run.get_output()\r\n",
|
||||
"\r\n",
|
||||
" pickled_model = codecs.encode(pickle.dumps(fitted_model), \"base64\").decode()\r\n",
|
||||
"\r\n",
|
||||
" log_file_text = \"\"\r\n",
|
||||
"\r\n",
|
||||
" try:\r\n",
|
||||
" with open(log_file_name, \"r\") as log_file:\r\n",
|
||||
" log_file_text = log_file.read()\r\n",
|
||||
" except:\r\n",
|
||||
" log_file_text = \"Log file not found\"\r\n",
|
||||
"\r\n",
|
||||
" returned_model = pd.DataFrame({\"best_run\": [best_run.id], \"experiment_name\": [experiment_name], \"fitted_model\": [pickled_model], \"log_file_text\": [log_file_text], \"workspace\": [ws.name]}, dtype=np.dtype(np.str))\r\n",
|
||||
"'\r\n",
|
||||
"\t, @input_data_1 = @input_query\r\n",
|
||||
"\t, @input_data_1_name = N'input_data'\r\n",
|
||||
"\t, @output_data_1_name = N'returned_model'\r\n",
|
||||
"\t, @params = N'@label_column NVARCHAR(255), \r\n",
|
||||
"\t @primary_metric NVARCHAR(40),\r\n",
|
||||
"\t\t\t\t @iterations INT, @task NVARCHAR(40),\r\n",
|
||||
"\t\t\t\t @experiment_name NVARCHAR(32),\r\n",
|
||||
"\t\t\t\t @iteration_timeout_minutes INT,\r\n",
|
||||
"\t\t\t\t @experiment_timeout_minutes INT,\r\n",
|
||||
"\t\t\t\t @n_cross_validations INT,\r\n",
|
||||
"\t\t\t\t @blacklist_models NVARCHAR(MAX),\r\n",
|
||||
"\t\t\t\t @whitelist_models NVARCHAR(MAX),\r\n",
|
||||
"\t\t\t\t @experiment_exit_score FLOAT,\r\n",
|
||||
"\t\t\t\t @sample_weight_column NVARCHAR(255),\r\n",
|
||||
"\t\t\t\t @is_validate_column NVARCHAR(255),\r\n",
|
||||
"\t\t\t\t @time_column_name NVARCHAR(255),\r\n",
|
||||
"\t\t\t\t @tenantid NVARCHAR(255),\r\n",
|
||||
"\t\t\t\t @appid NVARCHAR(255),\r\n",
|
||||
"\t\t\t\t @password NVARCHAR(255),\r\n",
|
||||
"\t\t\t\t @config_file NVARCHAR(255)'\r\n",
|
||||
"\t, @label_column = @label_column\r\n",
|
||||
"\t, @primary_metric = @primary_metric\r\n",
|
||||
"\t, @iterations = @iterations\r\n",
|
||||
"\t, @task = @task\r\n",
|
||||
"\t, @experiment_name = @experiment_name\r\n",
|
||||
"\t, @iteration_timeout_minutes = @iteration_timeout_minutes\r\n",
|
||||
"\t, @experiment_timeout_minutes = @experiment_timeout_minutes\r\n",
|
||||
"\t, @n_cross_validations = @n_cross_validations\r\n",
|
||||
"\t, @blacklist_models = @blacklist_models\r\n",
|
||||
"\t, @whitelist_models = @whitelist_models\r\n",
|
||||
"\t, @experiment_exit_score = @experiment_exit_score\r\n",
|
||||
"\t, @sample_weight_column = @sample_weight_column\r\n",
|
||||
"\t, @is_validate_column = @is_validate_column\r\n",
|
||||
"\t, @time_column_name = @time_column_name\r\n",
|
||||
"\t, @tenantid = @tenantid\r\n",
|
||||
"\t, @appid = @appid\r\n",
|
||||
"\t, @password = @password\r\n",
|
||||
"\t, @config_file = @config_file\r\n",
|
||||
"WITH RESULT SETS ((best_run NVARCHAR(250), experiment_name NVARCHAR(100), fitted_model VARCHAR(MAX), log_file_text NVARCHAR(MAX), workspace NVARCHAR(100)))\r\n",
|
||||
"END"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"-- This procedure returns a list of metrics for each iteration of a training run.\r\n",
|
||||
"SET ANSI_NULLS ON\r\n",
|
||||
"GO\r\n",
|
||||
"SET QUOTED_IDENTIFIER ON\r\n",
|
||||
"GO\r\n",
|
||||
"CREATE OR ALTER PROCEDURE [dbo].[AutoMLGetMetrics]\r\n",
|
||||
" (\r\n",
|
||||
"\t@run_id NVARCHAR(250), -- The RunId\r\n",
|
||||
" @experiment_name NVARCHAR(32)='automl-sql-test', -- This can be used to find the experiment in the Azure Portal.\r\n",
|
||||
" @connection_name NVARCHAR(255)='default' -- The AML connection to use.\r\n",
|
||||
" ) AS\r\n",
|
||||
"BEGIN\r\n",
|
||||
" DECLARE @tenantid NVARCHAR(255)\r\n",
|
||||
" DECLARE @appid NVARCHAR(255)\r\n",
|
||||
" DECLARE @password NVARCHAR(255)\r\n",
|
||||
" DECLARE @config_file NVARCHAR(255)\r\n",
|
||||
"\r\n",
|
||||
"\tSELECT @tenantid=TenantId, @appid=AppId, @password=Password, @config_file=ConfigFile\r\n",
|
||||
"\tFROM aml_connection\r\n",
|
||||
"\tWHERE ConnectionName = @connection_name;\r\n",
|
||||
"\r\n",
|
||||
" EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd\r\n",
|
||||
"import logging \r\n",
|
||||
"import azureml.core \r\n",
|
||||
"import numpy as np\r\n",
|
||||
"from azureml.core.experiment import Experiment \r\n",
|
||||
"from azureml.train.automl.run import AutoMLRun\r\n",
|
||||
"from azureml.core.authentication import ServicePrincipalAuthentication \r\n",
|
||||
"from azureml.core.workspace import Workspace \r\n",
|
||||
"\r\n",
|
||||
"auth = ServicePrincipalAuthentication(tenantid, appid, password) \r\n",
|
||||
" \r\n",
|
||||
"ws = Workspace.from_config(path=config_file, auth=auth) \r\n",
|
||||
" \r\n",
|
||||
"experiment = Experiment(ws, experiment_name) \r\n",
|
||||
"\r\n",
|
||||
"ml_run = AutoMLRun(experiment = experiment, run_id = run_id)\r\n",
|
||||
"\r\n",
|
||||
"children = list(ml_run.get_children())\r\n",
|
||||
"iterationlist = []\r\n",
|
||||
"metricnamelist = []\r\n",
|
||||
"metricvaluelist = []\r\n",
|
||||
"\r\n",
|
||||
"for run in children:\r\n",
|
||||
" properties = run.get_properties()\r\n",
|
||||
" if \"iteration\" in properties:\r\n",
|
||||
" iteration = int(properties[\"iteration\"])\r\n",
|
||||
" for metric_name, metric_value in run.get_metrics().items():\r\n",
|
||||
" if isinstance(metric_value, float):\r\n",
|
||||
" iterationlist.append(iteration)\r\n",
|
||||
" metricnamelist.append(metric_name)\r\n",
|
||||
" metricvaluelist.append(metric_value)\r\n",
|
||||
" \r\n",
|
||||
"metrics = pd.DataFrame({\"iteration\": iterationlist, \"metric_name\": metricnamelist, \"metric_value\": metricvaluelist})\r\n",
|
||||
"'\r\n",
|
||||
" , @output_data_1_name = N'metrics'\r\n",
|
||||
"\t, @params = N'@run_id NVARCHAR(250), \r\n",
|
||||
"\t\t\t\t @experiment_name NVARCHAR(32),\r\n",
|
||||
" \t\t\t\t @tenantid NVARCHAR(255),\r\n",
|
||||
"\t\t\t\t @appid NVARCHAR(255),\r\n",
|
||||
"\t\t\t\t @password NVARCHAR(255),\r\n",
|
||||
"\t\t\t\t @config_file NVARCHAR(255)'\r\n",
|
||||
" , @run_id = @run_id\r\n",
|
||||
"\t, @experiment_name = @experiment_name\r\n",
|
||||
"\t, @tenantid = @tenantid\r\n",
|
||||
"\t, @appid = @appid\r\n",
|
||||
"\t, @password = @password\r\n",
|
||||
"\t, @config_file = @config_file\r\n",
|
||||
"WITH RESULT SETS ((iteration INT, metric_name NVARCHAR(100), metric_value FLOAT))\r\n",
|
||||
"END"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"-- This procedure predicts values based on a model returned by AutoMLTrain and a dataset.\r\n",
|
||||
"-- It returns the dataset with a new column added, which is the predicted value.\r\n",
|
||||
"SET ANSI_NULLS ON\r\n",
|
||||
"GO\r\n",
|
||||
"SET QUOTED_IDENTIFIER ON\r\n",
|
||||
"GO\r\n",
|
||||
"CREATE OR ALTER PROCEDURE [dbo].[AutoMLPredict]\r\n",
|
||||
" (\r\n",
|
||||
" @input_query NVARCHAR(MAX), -- A SQL query returning data to predict on.\r\n",
|
||||
" @model NVARCHAR(MAX), -- A model returned from AutoMLTrain.\r\n",
|
||||
" @label_column NVARCHAR(255)='' -- Optional name of the column from input_query, which should be ignored when predicting\r\n",
|
||||
" ) AS \r\n",
|
||||
"BEGIN \r\n",
|
||||
" \r\n",
|
||||
" EXEC sp_execute_external_script @language = N'Python', @script = N'import pandas as pd \r\n",
|
||||
"import azureml.core \r\n",
|
||||
"import numpy as np \r\n",
|
||||
"from azureml.train.automl import AutoMLConfig \r\n",
|
||||
"import pickle \r\n",
|
||||
"import codecs \r\n",
|
||||
" \r\n",
|
||||
"model_obj = pickle.loads(codecs.decode(model.encode(), \"base64\")) \r\n",
|
||||
" \r\n",
|
||||
"test_data = input_data.copy() \r\n",
|
||||
"\r\n",
|
||||
"if label_column != \"\" and label_column is not None:\r\n",
|
||||
" y_test = test_data.pop(label_column).values \r\n",
|
||||
"X_test = test_data \r\n",
|
||||
" \r\n",
|
||||
"predicted = model_obj.predict(X_test) \r\n",
|
||||
" \r\n",
|
||||
"combined_output = input_data.assign(predicted=predicted)\r\n",
|
||||
" \r\n",
|
||||
"' \r\n",
|
||||
" , @input_data_1 = @input_query \r\n",
|
||||
" , @input_data_1_name = N'input_data' \r\n",
|
||||
" , @output_data_1_name = N'combined_output' \r\n",
|
||||
" , @params = N'@model NVARCHAR(MAX), @label_column NVARCHAR(255)' \r\n",
|
||||
" , @model = @model \r\n",
|
||||
"\t, @label_column = @label_column\r\n",
|
||||
"END"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "jeffshep"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.6",
|
||||
"language": "sql",
|
||||
"name": "python36"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "sql",
|
||||
"version": ""
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -82,7 +82,7 @@
|
||||
"source": [
|
||||
"## Set up Configuraton and Create Azure ML Workspace\n",
|
||||
"\n",
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../../configuration.ipynb) first if you haven't already to establish your connection to the AzureML Workspace."
|
||||
"If you are using an Azure Machine Learning Notebook VM, you are all set. Otherwise, go through the [configuration notebook](../../configuration.ipynb) first if you haven't already to establish your connection to the AzureML Workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -140,26 +140,26 @@
|
||||
"\n",
|
||||
"columns = ['usaf', 'wban', 'datetime', 'latitude', 'longitude', 'elevation', 'windAngle', 'windSpeed', 'temperature', 'stationName', 'p_k']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def enrich_weather_noaa_data(noaa_df):\n",
|
||||
" hours_in_day = 23\n",
|
||||
" week_in_year = 52\n",
|
||||
" \n",
|
||||
" noaa_df[\"hour\"] = noaa_df[\"datetime\"].dt.hour\n",
|
||||
" noaa_df[\"weekofyear\"] = noaa_df[\"datetime\"].dt.week\n",
|
||||
" \n",
|
||||
" noaa_df[\"sine_weekofyear\"] = noaa_df['datetime'].transform(lambda x: np.sin((2*np.pi*x.dt.week-1)/week_in_year))\n",
|
||||
" noaa_df[\"cosine_weekofyear\"] = noaa_df['datetime'].transform(lambda x: np.cos((2*np.pi*x.dt.week-1)/week_in_year))\n",
|
||||
"\n",
|
||||
" noaa_df[\"sine_hourofday\"] = noaa_df['datetime'].transform(lambda x: np.sin(2*np.pi*x.dt.hour/hours_in_day))\n",
|
||||
" noaa_df[\"cosine_hourofday\"] = noaa_df['datetime'].transform(lambda x: np.cos(2*np.pi*x.dt.hour/hours_in_day))\n",
|
||||
" noaa_df = noaa_df.assign(hour=noaa_df[\"datetime\"].dt.hour,\n",
|
||||
" weekofyear=noaa_df[\"datetime\"].dt.week,\n",
|
||||
" sine_weekofyear=noaa_df['datetime'].transform(lambda x: np.sin((2*np.pi*x.dt.week-1)/week_in_year)),\n",
|
||||
" cosine_weekofyear=noaa_df['datetime'].transform(lambda x: np.cos((2*np.pi*x.dt.week-1)/week_in_year)),\n",
|
||||
" sine_hourofday=noaa_df['datetime'].transform(lambda x: np.sin(2*np.pi*x.dt.hour/hours_in_day)),\n",
|
||||
" cosine_hourofday=noaa_df['datetime'].transform(lambda x: np.cos(2*np.pi*x.dt.hour/hours_in_day))\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" return noaa_df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def add_window_col(input_df):\n",
|
||||
" shift_interval = pd.Timedelta('-7 days') # your X days interval\n",
|
||||
" df_shifted = input_df.copy()\n",
|
||||
" df_shifted['datetime'] = df_shifted['datetime'] - shift_interval\n",
|
||||
" df_shifted.loc[:,'datetime'] = df_shifted['datetime'] - shift_interval\n",
|
||||
" df_shifted.drop(list(input_df.columns.difference(['datetime', 'usaf', 'wban', 'sine_hourofday', 'temperature'])), axis=1, inplace=True)\n",
|
||||
"\n",
|
||||
" # merge, keeping only observations where -1 lag is present\n",
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Prepare data for regression modeling\n",
|
||||
"First, we will prepare data for regression modeling. We will leverage the convenience of Azure Open Datasets along with the power of Azure Machine Learning service to create a regression model to predict NYC taxi fare prices. Perform `pip install azureml-contrib-opendatasets` to get the open dataset package. The Open Datasets package contains a class representing each data source (NycTlcGreen and NycTlcYellow) to easily filter date parameters before downloading.\n",
|
||||
"First, we will prepare data for regression modeling. We will leverage the convenience of Azure Open Datasets along with the power of Azure Machine Learning service to create a regression model to predict NYC taxi fare prices. Perform `pip install azureml-opendatasets` to get the open dataset package. The Open Datasets package contains a class representing each data source (NycTlcGreen and NycTlcYellow) to easily filter date parameters before downloading.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Load data\n",
|
||||
@@ -52,7 +52,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.contrib.opendatasets import NycTlcGreen, NycTlcYellow\n",
|
||||
"from azureml.opendatasets import NycTlcGreen, NycTlcYellow\n",
|
||||
"import pandas as pd\n",
|
||||
"from datetime import datetime\n",
|
||||
"from dateutil.relativedelta import relativedelta\n",
|
||||
|
||||
@@ -3,7 +3,7 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- azureml-contrib-opendatasets
|
||||
- azureml-opendatasets
|
||||
- azureml-dataprep
|
||||
- azureml-train-automl
|
||||
- matplotlib
|
||||
|
||||
@@ -1,6 +1,3 @@
|
||||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license.
|
||||
|
||||
# Original source: https://github.com/pytorch/examples/blob/master/fast_neural_style/neural_style/neural_style.py
|
||||
import argparse
|
||||
import os
|
||||
|
||||
@@ -295,7 +295,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "minxia"
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -314,8 +314,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
},
|
||||
"msauthor": "minxia"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -375,7 +375,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "minxia"
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
|
||||
@@ -315,7 +315,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "minxia"
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -334,8 +334,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
},
|
||||
"msauthor": "minxia"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -403,8 +403,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
},
|
||||
"msauthor": "minxia"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -299,7 +299,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "minxia"
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -318,8 +318,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
},
|
||||
"msauthor": "minxia"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -144,7 +144,7 @@
|
||||
" print('Found existing compute target')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', max_nodes=4)\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" cpu_cluster = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
@@ -515,7 +515,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "maxluk"
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -534,8 +534,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
},
|
||||
"msauthor": "jingywa"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -405,7 +405,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "minxia"
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -424,8 +424,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
},
|
||||
"msauthor": "minxia"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -700,7 +700,7 @@
|
||||
"est = TensorFlow(source_directory=script_folder,\n",
|
||||
" script_params={'--data-folder': ds.path('mnist').as_mount()},\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" conda_packages=['keras', 'matplotlib'],\n",
|
||||
" pip_packages=['keras', 'matplotlib'],\n",
|
||||
" entry_script='keras_mnist.py', \n",
|
||||
" use_gpu=True)"
|
||||
]
|
||||
@@ -1151,7 +1151,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "maxluk"
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -1170,8 +1170,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.7"
|
||||
},
|
||||
"msauthor": "maxluk"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -719,7 +719,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "minxia"
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -738,8 +738,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.6"
|
||||
},
|
||||
"msauthor": "minxia"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
name: train-hyperparameter-tune-deploy-with-pytorch
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-widgets
|
||||
- pillow==5.4.1
|
||||
- matplotlib
|
||||
- https://download.pytorch.org/whl/cpu/torch-1.1.0-cp35-cp35m-win_amd64.whl
|
||||
- https://download.pytorch.org/whl/cpu/torchvision-0.3.0-cp35-cp35m-win_amd64.whl
|
||||
@@ -1151,7 +1151,7 @@
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "minxia"
|
||||
"name": "ninhu"
|
||||
}
|
||||
],
|
||||
"kernelspec": {
|
||||
@@ -1170,8 +1170,7 @@
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.8"
|
||||
},
|
||||
"msauthor": "minxia"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
|
||||
@@ -100,7 +100,7 @@
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using SDK version , you are currently running version\", azureml.core.VERSION)"
|
||||
"print(\"This notebook was created using SDK version 1.0.48\r\n, you are currently running version\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -126,24 +126,13 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import ComputeTarget, AmlCompute\n",
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"from azureml.core.compute import ComputeTarget\n",
|
||||
"\n",
|
||||
"# choose a name for your cluster\n",
|
||||
"cluster_name = \"gpu-cluster\"\n",
|
||||
"cluster_name = \"cpu-cluster\"\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
" print('Found existing compute target.')\n",
|
||||
"except ComputeTargetException:\n",
|
||||
" print('Creating a new compute target...')\n",
|
||||
" compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6',\n",
|
||||
" max_nodes=4)\n",
|
||||
"\n",
|
||||
" # create the cluster\n",
|
||||
" compute_target = ComputeTarget.create(ws, cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
" compute_target.wait_for_completion(show_output=True)\n",
|
||||
"compute_target = ComputeTarget(workspace=ws, name=cluster_name)\n",
|
||||
"print('Found existing compute target.')\n",
|
||||
"\n",
|
||||
"# use get_status() to get a detailed status for the current cluster. \n",
|
||||
"print(compute_target.get_status().serialize())"
|
||||
@@ -153,7 +142,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The above code creates GPU compute. If you instead want to create CPU compute, provide a different VM size to the `vm_size` parameter, such as `STANDARD_D2_V2`."
|
||||
"The above code retrieves an existing CPU compute target. Scikit-learn does not support GPU computing."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -167,7 +156,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now that you have your data and training script prepared, you are ready to train on your remote compute cluster. You can take advantage of Azure compute to leverage GPUs to cut down your training time."
|
||||
"Now that you have your data and training script prepared, you are ready to train on your remote compute. You can take advantage of Azure compute to leverage a CPU cluster."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -299,7 +288,8 @@
|
||||
"estimator = SKLearn(source_directory=project_folder, \n",
|
||||
" script_params=script_params,\n",
|
||||
" compute_target=compute_target,\n",
|
||||
" entry_script='train_iris.py'\n",
|
||||
" entry_script='train_iris.py',\n",
|
||||
" pip_packages=['joblib']\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
@@ -307,7 +297,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `script_params` parameter is a dictionary containing the command-line arguments to your training script `entry_script`. To leverage the Azure VM's GPU for training, we set `use_gpu=True`."
|
||||
"The `script_params` parameter is a dictionary containing the command-line arguments to your training script `entry_script`."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -467,7 +457,57 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"run.wait_for_completion(show_output=True)"
|
||||
"hyperdrive_run.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Find and register best model\n",
|
||||
"When all jobs finish, we can find out the one that has the highest accuracy."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run = hyperdrive_run.get_best_run_by_primary_metric()\n",
|
||||
"print(best_run.get_details()['runDefinition']['arguments'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now, let's list the model files uploaded during the run."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(best_run.get_file_names())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can then register the folder (and all files in it) as a model named `sklearn-iris` under the workspace for deployment"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = best_run.register_model(model_name='sklearn-iris', model_path='model.joblib')"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -492,7 +532,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.5.2"
|
||||
"version": "3.6.6"
|
||||
},
|
||||
"msauthor": "dipeck"
|
||||
},
|
||||
|
||||
@@ -9,6 +9,8 @@ from sklearn import datasets
|
||||
from sklearn.metrics import confusion_matrix
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
import joblib
|
||||
|
||||
from azureml.core.run import Run
|
||||
run = Run.get_context()
|
||||
|
||||
@@ -48,6 +50,9 @@ def main():
|
||||
cm = confusion_matrix(y_test, svm_predictions)
|
||||
print(cm)
|
||||
|
||||
# save model
|
||||
joblib.dump(svm_model_linear, 'model.joblib')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
@@ -309,7 +309,7 @@
|
||||
"source": [
|
||||
"---\n",
|
||||
"## Deploy\n",
|
||||
"Now that we have trained a set of models and identified the run containing the best model, we want to deploy the model for real time inferencing. The process of deploying a model involves\n",
|
||||
"Now that we have trained a set of models and identified the run containing the best model, we want to deploy the model for real time inference. The process of deploying a model involves\n",
|
||||
"* registering a model in your workspace\n",
|
||||
"* creating a scoring file containing init and run methods\n",
|
||||
"* creating an environment dependency file describing packages necessary for your scoring file\n",
|
||||
@@ -386,7 +386,7 @@
|
||||
"source": [
|
||||
"### Describe your environment\n",
|
||||
"\n",
|
||||
"Each modelling process may require a unique set of packages. Therefore we need to create a dependency file providing instructions to AML on how to contstruct a docker image that can support the models and any other objects required for inferencing. In the following cell, we create a environment dependency file, *myenv.yml* that specifies which libraries are needed by the scoring script. You can create this file manually, or use the `CondaDependencies` class to create it for you.\n",
|
||||
"Each modelling process may require a unique set of packages. Therefore we need to create a dependency file providing instructions to AML on how to construct a docker image that can support the models and any other objects required for inference. In the following cell, we create a environment dependency file, *myenv.yml* that specifies which libraries are needed by the scoring script. You can create this file manually, or use the `CondaDependencies` class to create it for you.\n",
|
||||
"\n",
|
||||
"Next we use this environment file to describe the docker container that we need to create in order to deploy our model. This container is created using our environment description and includes our scoring script."
|
||||
]
|
||||
|
||||
@@ -282,7 +282,7 @@
|
||||
"source": [
|
||||
"## Next Steps\n",
|
||||
"\n",
|
||||
"Learn about [model management and inferencing in Azure Machine Learning service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-model-management-and-deployment)."
|
||||
"Learn about [model management and inference in Azure Machine Learning service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-model-management-and-deployment)."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -4,5 +4,5 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-mlflow
|
||||
- https://download.pytorch.org/whl/cpu/torch-1.1.0-cp36-cp36m-linux_x86_64.whl
|
||||
- https://download.pytorch.org/whl/cpu/torchvision-0.3.0-cp36-cp36m-linux_x86_64.whl
|
||||
- https://download.pytorch.org/whl/cpu/torch-1.1.0-cp35-cp35m-win_amd64.whl
|
||||
- https://download.pytorch.org/whl/cpu/torchvision-0.3.0-cp35-cp35m-win_amd64.whl
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
@@ -399,6 +399,13 @@
|
||||
"source": [
|
||||
"You have now finished using a dataset from start to finish of your experiment!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@@ -422,7 +429,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.3"
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -50,7 +57,7 @@
|
||||
"\n",
|
||||
"### What is an Azure Machine Learning workspace\n",
|
||||
"\n",
|
||||
"An Azure ML Workspace is an Azure resource that organizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an Azure ML Workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, deployment, inferencing, and the monitoring of deployed models."
|
||||
"An Azure ML Workspace is an Azure resource that organizes and coordinates the actions of many other Azure resources to assist in executing and sharing machine learning workflows. In particular, an Azure ML Workspace coordinates storage, databases, and compute resources providing added functionality for machine learning experimentation, deployment, inference, and the monitoring of deployed models."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -95,7 +102,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.0.48\r\n of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -253,13 +260,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@@ -58,7 +58,7 @@ Bug fixes and improvements
|
||||
- Now does not fail when attempting to set a date column to be date type.
|
||||
- Improved JoinType types and accompanying reference documentation. When joining two dataflows, you can now specify one of these types of join:
|
||||
- NONE, MATCH, INNER, UNMATCHLEFT, LEFTANTI, LEFTOUTER, UNMATCHRIGHT, RIGHTANTI, RIGHTOUTER, FULLANTI, FULL.
|
||||
- Improved data type inferencing to recognize more date formats.
|
||||
- Improved data type inference to recognize more date formats.
|
||||
|
||||
### 2019-04-17 (version 1.1.2)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user