mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-23 11:02:39 -05:00
Compare commits
15 Commits
azureml-sd
...
azureml-sd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dadc93cfe5 | ||
|
|
c7076bf95c | ||
|
|
ebdffd5626 | ||
|
|
d123880562 | ||
|
|
4864e8ea60 | ||
|
|
c86db0d7fd | ||
|
|
ccfbbb3b14 | ||
|
|
c42ba64b15 | ||
|
|
6d8bf32243 | ||
|
|
9094da4085 | ||
|
|
ebf9d2855c | ||
|
|
1bbd78eb33 | ||
|
|
77f5a69e04 | ||
|
|
ce82af2ab0 | ||
|
|
2a2d2efa17 |
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -188,13 +188,6 @@
|
||||
"### Script to process data and train model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The _process_data.py_ script used in the step below is a slightly modified implementation of [RAPIDS Mortgage E2E example](https://github.com/rapidsai/notebooks-contrib/blob/master/intermediate_notebooks/E2E/mortgage/mortgage_e2e.ipynb)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -373,7 +366,7 @@
|
||||
"run_config.target = gpu_cluster_name\n",
|
||||
"run_config.environment.docker.enabled = True\n",
|
||||
"run_config.environment.docker.gpu_support = True\n",
|
||||
"run_config.environment.docker.base_image = \"mcr.microsoft.com/azureml/base-gpu:intelmpi2018.3-cuda10.0-cudnn7-ubuntu16.04\"\n",
|
||||
"run_config.environment.docker.base_image = \"mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.1-cudnn8-ubuntu20.04\"\n",
|
||||
"run_config.environment.spark.precache_packages = False\n",
|
||||
"run_config.data_references={'data':data_ref.to_config()}"
|
||||
]
|
||||
|
||||
@@ -49,7 +49,7 @@
|
||||
"* `fairlearn>=0.6.2` (pre-v0.5.0 will work with minor modifications)\n",
|
||||
"* `joblib`\n",
|
||||
"* `liac-arff`\n",
|
||||
"* `raiwidgets~=0.7.0`\n",
|
||||
"* `raiwidgets`\n",
|
||||
"\n",
|
||||
"Fairlearn relies on features introduced in v0.22.1 of `scikit-learn`. If you have an older version already installed, please uncomment and run the following cell:"
|
||||
]
|
||||
|
||||
@@ -6,4 +6,4 @@ dependencies:
|
||||
- fairlearn>=0.6.2
|
||||
- joblib
|
||||
- liac-arff
|
||||
- raiwidgets~=0.15.0
|
||||
- raiwidgets~=0.17.0
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"* `fairlearn>=0.6.2` (also works for pre-v0.5.0 with slight modifications)\n",
|
||||
"* `joblib`\n",
|
||||
"* `liac-arff`\n",
|
||||
"* `raiwidgets~=0.7.0`\n",
|
||||
"* `raiwidgets`\n",
|
||||
"\n",
|
||||
"Fairlearn relies on features introduced in v0.22.1 of `scikit-learn`. If you have an older version already installed, please uncomment and run the following cell:"
|
||||
]
|
||||
|
||||
@@ -6,4 +6,4 @@ dependencies:
|
||||
- fairlearn>=0.6.2
|
||||
- joblib
|
||||
- liac-arff
|
||||
- raiwidgets~=0.15.0
|
||||
- raiwidgets~=0.17.0
|
||||
|
||||
@@ -1,29 +1,30 @@
|
||||
name: azure_automl
|
||||
channels:
|
||||
- conda-forge
|
||||
- pytorch
|
||||
- main
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- pip==21.1.2
|
||||
- python>=3.5.2,<3.8
|
||||
- boto3==1.15.18
|
||||
- matplotlib==2.1.0
|
||||
- numpy==1.18.5
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- scipy>=1.4.1,<=1.5.2
|
||||
- scikit-learn==0.22.1
|
||||
- pandas==0.25.1
|
||||
- py-xgboost<=0.90
|
||||
- conda-forge::fbprophet==0.5
|
||||
- holidays==0.9.11
|
||||
# Currently Azure ML only supports 3.6.0 and later.
|
||||
- pip==20.2.4
|
||||
- python>=3.6,<3.9
|
||||
- matplotlib==3.3.4
|
||||
- py-xgboost==1.3.3
|
||||
- pytorch::pytorch=1.4.0
|
||||
- conda-forge::fbprophet==0.7.1
|
||||
- cudatoolkit=10.1.243
|
||||
- tornado==6.1.0
|
||||
- tqdm==4.62.3
|
||||
- notebook
|
||||
- pywin32==225
|
||||
- PySocks==1.7.1
|
||||
- conda-forge::pyqt==5.12.3
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.37.0
|
||||
- azureml-widgets~=1.39.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.37.0/validated_win32_requirements.txt [--no-deps]
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.39.0/validated_win32_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
|
||||
@@ -1,30 +1,33 @@
|
||||
name: azure_automl
|
||||
channels:
|
||||
- conda-forge
|
||||
- pytorch
|
||||
- main
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- pip==21.1.2
|
||||
- python>=3.5.2,<3.8
|
||||
- nb_conda
|
||||
- boto3==1.15.18
|
||||
- matplotlib==2.1.0
|
||||
- numpy==1.18.5
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
# Currently Azure ML only supports 3.6.0 and later.
|
||||
- pip==20.2.4
|
||||
- python>=3.6,<3.9
|
||||
- boto3==1.20.19
|
||||
- botocore<=1.23.19
|
||||
- matplotlib==3.3.4
|
||||
- numpy==1.19.5
|
||||
- cython==0.29.14
|
||||
- urllib3==1.26.7
|
||||
- scipy>=1.4.1,<=1.5.2
|
||||
- scikit-learn==0.22.1
|
||||
- pandas==0.25.1
|
||||
- py-xgboost<=0.90
|
||||
- conda-forge::fbprophet==0.5
|
||||
- holidays==0.9.11
|
||||
- py-xgboost<=1.3.3
|
||||
- holidays==0.10.3
|
||||
- conda-forge::fbprophet==0.7.1
|
||||
- pytorch::pytorch=1.4.0
|
||||
- cudatoolkit=10.1.243
|
||||
- tornado==6.1.0
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.37.0
|
||||
- azureml-widgets~=1.39.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.37.0/validated_linux_requirements.txt [--no-deps]
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.39.0/validated_linux_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
|
||||
@@ -1,31 +1,34 @@
|
||||
name: azure_automl
|
||||
channels:
|
||||
- conda-forge
|
||||
- pytorch
|
||||
- main
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- pip==21.1.2
|
||||
# Currently Azure ML only supports 3.6.0 and later.
|
||||
- pip==20.2.4
|
||||
- nomkl
|
||||
- python>=3.5.2,<3.8
|
||||
- nb_conda
|
||||
- boto3==1.15.18
|
||||
- matplotlib==2.1.0
|
||||
- numpy==1.18.5
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- python>=3.6,<3.9
|
||||
- boto3==1.20.19
|
||||
- botocore<=1.23.19
|
||||
- matplotlib==3.3.4
|
||||
- numpy==1.19.5
|
||||
- cython==0.29.14
|
||||
- urllib3==1.26.7
|
||||
- scipy>=1.4.1,<=1.5.2
|
||||
- scikit-learn==0.22.1
|
||||
- pandas==0.25.1
|
||||
- py-xgboost<=0.90
|
||||
- conda-forge::fbprophet==0.5
|
||||
- holidays==0.9.11
|
||||
- py-xgboost<=1.3.3
|
||||
- holidays==0.10.3
|
||||
- conda-forge::fbprophet==0.7.1
|
||||
- pytorch::pytorch=1.4.0
|
||||
- cudatoolkit=9.0
|
||||
- tornado==6.1.0
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.37.0
|
||||
- azureml-widgets~=1.39.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.1.8
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.37.0/validated_darwin_requirements.txt [--no-deps]
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.39.0/validated_darwin_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
|
||||
@@ -105,7 +105,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -154,7 +154,7 @@
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"pd.set_option('display.max_colwidth', None)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
@@ -430,7 +430,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Download the featuurization summary JSON file locally\n",
|
||||
"# Download the featurization summary JSON file locally\n",
|
||||
"best_run.download_file(\"outputs/featurization_summary.json\", \"featurization_summary.json\")\n",
|
||||
"\n",
|
||||
"# Render the JSON as a pandas DataFrame\n",
|
||||
|
||||
@@ -93,7 +93,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -116,7 +116,7 @@
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"pd.set_option('display.max_colwidth', None)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
|
||||
@@ -97,7 +97,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -127,7 +127,7 @@
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"pd.set_option('display.max_colwidth', None)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
@@ -167,6 +167,7 @@
|
||||
" # To use BERT (this is recommended for best performance), select a GPU such as \"STANDARD_NC6\" \n",
|
||||
" # or similar GPU option\n",
|
||||
" # available in your workspace\n",
|
||||
" idle_seconds_before_scaledown = 60,\n",
|
||||
" max_nodes = num_nodes)\n",
|
||||
" compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)\n",
|
||||
"\n",
|
||||
@@ -369,7 +370,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Download the featuurization summary JSON file locally\n",
|
||||
"# Download the featurization summary JSON file locally\n",
|
||||
"best_run.download_file(\"outputs/featurization_summary.json\", \"featurization_summary.json\")\n",
|
||||
"\n",
|
||||
"# Render the JSON as a pandas DataFrame\n",
|
||||
|
||||
@@ -81,7 +81,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -127,7 +127,7 @@
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Run History Name'] = experiment_name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"pd.set_option('display.max_colwidth', None)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
name: azure_automl_experimental
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- pip<=19.3.1
|
||||
- python>=3.5.2,<3.8
|
||||
- nb_conda
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
# Currently Azure ML only supports 3.6.0 and later.
|
||||
- pip<=20.2.4
|
||||
- python>=3.6.0,<3.9
|
||||
- cython==0.29.14
|
||||
- urllib3==1.26.7
|
||||
- PyJWT < 2.0.0
|
||||
- numpy==1.18.5
|
||||
|
||||
|
||||
@@ -1,15 +1,16 @@
|
||||
name: azure_automl_experimental
|
||||
channels:
|
||||
- conda-forge
|
||||
- main
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.5.2 and later.
|
||||
- pip<=19.3.1
|
||||
# Currently Azure ML only supports 3.6.0 and later.
|
||||
- pip<=20.2.4
|
||||
- nomkl
|
||||
- python>=3.5.2,<3.8
|
||||
- nb_conda
|
||||
- cython
|
||||
- urllib3<1.24
|
||||
- python>=3.6.0,<3.9
|
||||
- urllib3==1.26.7
|
||||
- PyJWT < 2.0.0
|
||||
- numpy==1.18.5
|
||||
- numpy==1.19.5
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
|
||||
@@ -92,7 +92,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -91,7 +91,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
"output[\"Resource Group\"] = ws.resource_group\n",
|
||||
"output[\"Location\"] = ws.location\n",
|
||||
"output[\"Default datastore name\"] = dstore.name\n",
|
||||
"pd.set_option(\"display.max_colwidth\", -1)\n",
|
||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
|
||||
@@ -100,7 +100,7 @@
|
||||
"output[\"SKU\"] = ws.sku\n",
|
||||
"output[\"Resource Group\"] = ws.resource_group\n",
|
||||
"output[\"Location\"] = ws.location\n",
|
||||
"pd.set_option(\"display.max_colwidth\", -1)\n",
|
||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
DATE,grain,BeerProduction
|
||||
2017-01-01,grain,9049
|
||||
2017-02-01,grain,10458
|
||||
2017-03-01,grain,12489
|
||||
2017-04-01,grain,11499
|
||||
2017-05-01,grain,13553
|
||||
2017-06-01,grain,14740
|
||||
2017-07-01,grain,11424
|
||||
2017-08-01,grain,13412
|
||||
2017-09-01,grain,11917
|
||||
2017-10-01,grain,12721
|
||||
2017-11-01,grain,13272
|
||||
2017-12-01,grain,14278
|
||||
2018-01-01,grain,9572
|
||||
2018-02-01,grain,10423
|
||||
2018-03-01,grain,12667
|
||||
2018-04-01,grain,11904
|
||||
2018-05-01,grain,14120
|
||||
2018-06-01,grain,14565
|
||||
2018-07-01,grain,12622
|
||||
|
@@ -1,301 +0,0 @@
|
||||
DATE,grain,BeerProduction
|
||||
1992-01-01,grain,3459
|
||||
1992-02-01,grain,3458
|
||||
1992-03-01,grain,4002
|
||||
1992-04-01,grain,4564
|
||||
1992-05-01,grain,4221
|
||||
1992-06-01,grain,4529
|
||||
1992-07-01,grain,4466
|
||||
1992-08-01,grain,4137
|
||||
1992-09-01,grain,4126
|
||||
1992-10-01,grain,4259
|
||||
1992-11-01,grain,4240
|
||||
1992-12-01,grain,4936
|
||||
1993-01-01,grain,3031
|
||||
1993-02-01,grain,3261
|
||||
1993-03-01,grain,4160
|
||||
1993-04-01,grain,4377
|
||||
1993-05-01,grain,4307
|
||||
1993-06-01,grain,4696
|
||||
1993-07-01,grain,4458
|
||||
1993-08-01,grain,4457
|
||||
1993-09-01,grain,4364
|
||||
1993-10-01,grain,4236
|
||||
1993-11-01,grain,4500
|
||||
1993-12-01,grain,4974
|
||||
1994-01-01,grain,3075
|
||||
1994-02-01,grain,3377
|
||||
1994-03-01,grain,4443
|
||||
1994-04-01,grain,4261
|
||||
1994-05-01,grain,4460
|
||||
1994-06-01,grain,4985
|
||||
1994-07-01,grain,4324
|
||||
1994-08-01,grain,4719
|
||||
1994-09-01,grain,4374
|
||||
1994-10-01,grain,4248
|
||||
1994-11-01,grain,4784
|
||||
1994-12-01,grain,4971
|
||||
1995-01-01,grain,3370
|
||||
1995-02-01,grain,3484
|
||||
1995-03-01,grain,4269
|
||||
1995-04-01,grain,3994
|
||||
1995-05-01,grain,4715
|
||||
1995-06-01,grain,4974
|
||||
1995-07-01,grain,4223
|
||||
1995-08-01,grain,5000
|
||||
1995-09-01,grain,4235
|
||||
1995-10-01,grain,4554
|
||||
1995-11-01,grain,4851
|
||||
1995-12-01,grain,4826
|
||||
1996-01-01,grain,3699
|
||||
1996-02-01,grain,3983
|
||||
1996-03-01,grain,4262
|
||||
1996-04-01,grain,4619
|
||||
1996-05-01,grain,5219
|
||||
1996-06-01,grain,4836
|
||||
1996-07-01,grain,4941
|
||||
1996-08-01,grain,5062
|
||||
1996-09-01,grain,4365
|
||||
1996-10-01,grain,5012
|
||||
1996-11-01,grain,4850
|
||||
1996-12-01,grain,5097
|
||||
1997-01-01,grain,3758
|
||||
1997-02-01,grain,3825
|
||||
1997-03-01,grain,4454
|
||||
1997-04-01,grain,4635
|
||||
1997-05-01,grain,5210
|
||||
1997-06-01,grain,5057
|
||||
1997-07-01,grain,5231
|
||||
1997-08-01,grain,5034
|
||||
1997-09-01,grain,4970
|
||||
1997-10-01,grain,5342
|
||||
1997-11-01,grain,4831
|
||||
1997-12-01,grain,5965
|
||||
1998-01-01,grain,3796
|
||||
1998-02-01,grain,4019
|
||||
1998-03-01,grain,4898
|
||||
1998-04-01,grain,5090
|
||||
1998-05-01,grain,5237
|
||||
1998-06-01,grain,5447
|
||||
1998-07-01,grain,5435
|
||||
1998-08-01,grain,5107
|
||||
1998-09-01,grain,5515
|
||||
1998-10-01,grain,5583
|
||||
1998-11-01,grain,5346
|
||||
1998-12-01,grain,6286
|
||||
1999-01-01,grain,4032
|
||||
1999-02-01,grain,4435
|
||||
1999-03-01,grain,5479
|
||||
1999-04-01,grain,5483
|
||||
1999-05-01,grain,5587
|
||||
1999-06-01,grain,6176
|
||||
1999-07-01,grain,5621
|
||||
1999-08-01,grain,5889
|
||||
1999-09-01,grain,5828
|
||||
1999-10-01,grain,5849
|
||||
1999-11-01,grain,6180
|
||||
1999-12-01,grain,6771
|
||||
2000-01-01,grain,4243
|
||||
2000-02-01,grain,4952
|
||||
2000-03-01,grain,6008
|
||||
2000-04-01,grain,5353
|
||||
2000-05-01,grain,6435
|
||||
2000-06-01,grain,6673
|
||||
2000-07-01,grain,5636
|
||||
2000-08-01,grain,6630
|
||||
2000-09-01,grain,5887
|
||||
2000-10-01,grain,6322
|
||||
2000-11-01,grain,6520
|
||||
2000-12-01,grain,6678
|
||||
2001-01-01,grain,5082
|
||||
2001-02-01,grain,5216
|
||||
2001-03-01,grain,5893
|
||||
2001-04-01,grain,5894
|
||||
2001-05-01,grain,6799
|
||||
2001-06-01,grain,6667
|
||||
2001-07-01,grain,6374
|
||||
2001-08-01,grain,6840
|
||||
2001-09-01,grain,5575
|
||||
2001-10-01,grain,6545
|
||||
2001-11-01,grain,6789
|
||||
2001-12-01,grain,7180
|
||||
2002-01-01,grain,5117
|
||||
2002-02-01,grain,5442
|
||||
2002-03-01,grain,6337
|
||||
2002-04-01,grain,6525
|
||||
2002-05-01,grain,7216
|
||||
2002-06-01,grain,6761
|
||||
2002-07-01,grain,6958
|
||||
2002-08-01,grain,7070
|
||||
2002-09-01,grain,6148
|
||||
2002-10-01,grain,6924
|
||||
2002-11-01,grain,6716
|
||||
2002-12-01,grain,7975
|
||||
2003-01-01,grain,5326
|
||||
2003-02-01,grain,5609
|
||||
2003-03-01,grain,6414
|
||||
2003-04-01,grain,6741
|
||||
2003-05-01,grain,7144
|
||||
2003-06-01,grain,7133
|
||||
2003-07-01,grain,7568
|
||||
2003-08-01,grain,7266
|
||||
2003-09-01,grain,6634
|
||||
2003-10-01,grain,7626
|
||||
2003-11-01,grain,6843
|
||||
2003-12-01,grain,8540
|
||||
2004-01-01,grain,5629
|
||||
2004-02-01,grain,5898
|
||||
2004-03-01,grain,7045
|
||||
2004-04-01,grain,7094
|
||||
2004-05-01,grain,7333
|
||||
2004-06-01,grain,7918
|
||||
2004-07-01,grain,7289
|
||||
2004-08-01,grain,7396
|
||||
2004-09-01,grain,7259
|
||||
2004-10-01,grain,7268
|
||||
2004-11-01,grain,7731
|
||||
2004-12-01,grain,9058
|
||||
2005-01-01,grain,5557
|
||||
2005-02-01,grain,6237
|
||||
2005-03-01,grain,7723
|
||||
2005-04-01,grain,7262
|
||||
2005-05-01,grain,8241
|
||||
2005-06-01,grain,8757
|
||||
2005-07-01,grain,7352
|
||||
2005-08-01,grain,8496
|
||||
2005-09-01,grain,7741
|
||||
2005-10-01,grain,7710
|
||||
2005-11-01,grain,8247
|
||||
2005-12-01,grain,8902
|
||||
2006-01-01,grain,6066
|
||||
2006-02-01,grain,6590
|
||||
2006-03-01,grain,7923
|
||||
2006-04-01,grain,7335
|
||||
2006-05-01,grain,8843
|
||||
2006-06-01,grain,9327
|
||||
2006-07-01,grain,7792
|
||||
2006-08-01,grain,9156
|
||||
2006-09-01,grain,8037
|
||||
2006-10-01,grain,8640
|
||||
2006-11-01,grain,9128
|
||||
2006-12-01,grain,9545
|
||||
2007-01-01,grain,6627
|
||||
2007-02-01,grain,6743
|
||||
2007-03-01,grain,8195
|
||||
2007-04-01,grain,7828
|
||||
2007-05-01,grain,9570
|
||||
2007-06-01,grain,9484
|
||||
2007-07-01,grain,8608
|
||||
2007-08-01,grain,9543
|
||||
2007-09-01,grain,8123
|
||||
2007-10-01,grain,9649
|
||||
2007-11-01,grain,9390
|
||||
2007-12-01,grain,10065
|
||||
2008-01-01,grain,7093
|
||||
2008-02-01,grain,7483
|
||||
2008-03-01,grain,8365
|
||||
2008-04-01,grain,8895
|
||||
2008-05-01,grain,9794
|
||||
2008-06-01,grain,9977
|
||||
2008-07-01,grain,9553
|
||||
2008-08-01,grain,9375
|
||||
2008-09-01,grain,9225
|
||||
2008-10-01,grain,9948
|
||||
2008-11-01,grain,8758
|
||||
2008-12-01,grain,10839
|
||||
2009-01-01,grain,7266
|
||||
2009-02-01,grain,7578
|
||||
2009-03-01,grain,8688
|
||||
2009-04-01,grain,9162
|
||||
2009-05-01,grain,9369
|
||||
2009-06-01,grain,10167
|
||||
2009-07-01,grain,9507
|
||||
2009-08-01,grain,8923
|
||||
2009-09-01,grain,9272
|
||||
2009-10-01,grain,9075
|
||||
2009-11-01,grain,8949
|
||||
2009-12-01,grain,10843
|
||||
2010-01-01,grain,6558
|
||||
2010-02-01,grain,7481
|
||||
2010-03-01,grain,9475
|
||||
2010-04-01,grain,9424
|
||||
2010-05-01,grain,9351
|
||||
2010-06-01,grain,10552
|
||||
2010-07-01,grain,9077
|
||||
2010-08-01,grain,9273
|
||||
2010-09-01,grain,9420
|
||||
2010-10-01,grain,9413
|
||||
2010-11-01,grain,9866
|
||||
2010-12-01,grain,11455
|
||||
2011-01-01,grain,6901
|
||||
2011-02-01,grain,8014
|
||||
2011-03-01,grain,9832
|
||||
2011-04-01,grain,9281
|
||||
2011-05-01,grain,9967
|
||||
2011-06-01,grain,11344
|
||||
2011-07-01,grain,9106
|
||||
2011-08-01,grain,10469
|
||||
2011-09-01,grain,10085
|
||||
2011-10-01,grain,9612
|
||||
2011-11-01,grain,10328
|
||||
2011-12-01,grain,11483
|
||||
2012-01-01,grain,7486
|
||||
2012-02-01,grain,8641
|
||||
2012-03-01,grain,9709
|
||||
2012-04-01,grain,9423
|
||||
2012-05-01,grain,11342
|
||||
2012-06-01,grain,11274
|
||||
2012-07-01,grain,9845
|
||||
2012-08-01,grain,11163
|
||||
2012-09-01,grain,9532
|
||||
2012-10-01,grain,10754
|
||||
2012-11-01,grain,10953
|
||||
2012-12-01,grain,11922
|
||||
2013-01-01,grain,8395
|
||||
2013-02-01,grain,8888
|
||||
2013-03-01,grain,10110
|
||||
2013-04-01,grain,10493
|
||||
2013-05-01,grain,12218
|
||||
2013-06-01,grain,11385
|
||||
2013-07-01,grain,11186
|
||||
2013-08-01,grain,11462
|
||||
2013-09-01,grain,10494
|
||||
2013-10-01,grain,11540
|
||||
2013-11-01,grain,11138
|
||||
2013-12-01,grain,12709
|
||||
2014-01-01,grain,8557
|
||||
2014-02-01,grain,9059
|
||||
2014-03-01,grain,10055
|
||||
2014-04-01,grain,10977
|
||||
2014-05-01,grain,11792
|
||||
2014-06-01,grain,11904
|
||||
2014-07-01,grain,10965
|
||||
2014-08-01,grain,10981
|
||||
2014-09-01,grain,10828
|
||||
2014-10-01,grain,11817
|
||||
2014-11-01,grain,10470
|
||||
2014-12-01,grain,13310
|
||||
2015-01-01,grain,8400
|
||||
2015-02-01,grain,9062
|
||||
2015-03-01,grain,10722
|
||||
2015-04-01,grain,11107
|
||||
2015-05-01,grain,11508
|
||||
2015-06-01,grain,12904
|
||||
2015-07-01,grain,11869
|
||||
2015-08-01,grain,11224
|
||||
2015-09-01,grain,12022
|
||||
2015-10-01,grain,11983
|
||||
2015-11-01,grain,11506
|
||||
2015-12-01,grain,14183
|
||||
2016-01-01,grain,8650
|
||||
2016-02-01,grain,10323
|
||||
2016-03-01,grain,12110
|
||||
2016-04-01,grain,11424
|
||||
2016-05-01,grain,12243
|
||||
2016-06-01,grain,13686
|
||||
2016-07-01,grain,10956
|
||||
2016-08-01,grain,12706
|
||||
2016-09-01,grain,12279
|
||||
2016-10-01,grain,11914
|
||||
2016-11-01,grain,13025
|
||||
2016-12-01,grain,14431
|
||||
|
@@ -1,4 +0,0 @@
|
||||
name: auto-ml-forecasting-beer-remote
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -64,15 +64,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import json\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"from azureml.core import Workspace, Experiment, Dataset\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from datetime import datetime\n",
|
||||
"from azureml.automl.core.featurization import FeaturizationConfig"
|
||||
"\n",
|
||||
"import azureml.core\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from azureml.automl.core.featurization import FeaturizationConfig\n",
|
||||
"from azureml.core import Dataset, Experiment, Workspace\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -88,7 +89,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -119,7 +120,7 @@
|
||||
"output[\"Resource Group\"] = ws.resource_group\n",
|
||||
"output[\"Location\"] = ws.location\n",
|
||||
"output[\"Run History Name\"] = experiment_name\n",
|
||||
"pd.set_option(\"display.max_colwidth\", -1)\n",
|
||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
@@ -398,8 +399,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"Below we select the best model from all the training iterations using get_output method."
|
||||
"### Retrieve the Best Run details\n",
|
||||
"Below we retrieve the best Run object from among all the runs in the experiment."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -408,8 +409,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = remote_run.get_output()\n",
|
||||
"fitted_model.steps"
|
||||
"best_run = remote_run.get_best_child()\n",
|
||||
"best_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -418,7 +419,7 @@
|
||||
"source": [
|
||||
"## Featurization\n",
|
||||
"\n",
|
||||
"You can access the engineered feature names generated in time-series featurization. Note that a number of named holiday periods are represented. We recommend that you have at least one year of data when using this feature to ensure that all yearly holidays are captured in the training featurization."
|
||||
"We can look at the engineered feature names generated in time-series featurization via. the JSON file named 'engineered_feature_names.json' under the run outputs. Note that a number of named holiday periods are represented. We recommend that you have at least one year of data when using this feature to ensure that all yearly holidays are captured in the training featurization."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -427,7 +428,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps[\"timeseriestransformer\"].get_engineered_feature_names()"
|
||||
"# Download the JSON file locally\n",
|
||||
"best_run.download_file(\"outputs/engineered_feature_names.json\", \"engineered_feature_names.json\")\n",
|
||||
"with open(\"engineered_feature_names.json\", \"r\") as f:\n",
|
||||
" records = json.load(f)\n",
|
||||
"\n",
|
||||
"records"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -451,12 +457,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the featurization summary as a list of JSON\n",
|
||||
"featurization_summary = fitted_model.named_steps[\n",
|
||||
" \"timeseriestransformer\"\n",
|
||||
"].get_featurization_summary()\n",
|
||||
"# View the featurization summary as a pandas dataframe\n",
|
||||
"pd.DataFrame.from_records(featurization_summary)"
|
||||
"# Download the featurization summary JSON file locally\n",
|
||||
"best_run.download_file(\"outputs/featurization_summary.json\", \"featurization_summary.json\")\n",
|
||||
"\n",
|
||||
"# Render the JSON as a pandas DataFrame\n",
|
||||
"with open(\"featurization_summary.json\", \"r\") as f:\n",
|
||||
" records = json.load(f)\n",
|
||||
"fs = pd.DataFrame.from_records(records)\n",
|
||||
"\n",
|
||||
"# View a summary of the featurization \n",
|
||||
"fs[[\"RawFeatureName\", \"TypeDetected\", \"Dropped\", \"EngineeredFeatureCount\", \"Transformations\"]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -68,6 +68,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
||||
@@ -99,7 +100,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -398,8 +399,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Retrieve the Best Model\n",
|
||||
"Below we select the best model from all the training iterations using get_output method."
|
||||
"### Retrieve the Best Run details\n",
|
||||
"Below we retrieve the best Run object from among all the runs in the experiment."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -408,8 +409,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = remote_run.get_output()\n",
|
||||
"fitted_model.steps"
|
||||
"best_run = remote_run.get_best_child()\n",
|
||||
"best_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -417,7 +418,7 @@
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Featurization\n",
|
||||
"You can access the engineered feature names generated in time-series featurization."
|
||||
"We can look at the engineered feature names generated in time-series featurization via. the JSON file named 'engineered_feature_names.json' under the run outputs. "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -426,7 +427,12 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fitted_model.named_steps[\"timeseriestransformer\"].get_engineered_feature_names()"
|
||||
"# Download the JSON file locally\n",
|
||||
"best_run.download_file(\"outputs/engineered_feature_names.json\", \"engineered_feature_names.json\")\n",
|
||||
"with open(\"engineered_feature_names.json\", \"r\") as f:\n",
|
||||
" records = json.load(f)\n",
|
||||
"\n",
|
||||
"records"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -449,12 +455,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get the featurization summary as a list of JSON\n",
|
||||
"featurization_summary = fitted_model.named_steps[\n",
|
||||
" \"timeseriestransformer\"\n",
|
||||
"].get_featurization_summary()\n",
|
||||
"# View the featurization summary as a pandas dataframe\n",
|
||||
"pd.DataFrame.from_records(featurization_summary)"
|
||||
"# Download the featurization summary JSON file locally\n",
|
||||
"best_run.download_file(\"outputs/featurization_summary.json\", \"featurization_summary.json\")\n",
|
||||
"\n",
|
||||
"# Render the JSON as a pandas DataFrame\n",
|
||||
"with open(\"featurization_summary.json\", \"r\") as f:\n",
|
||||
" records = json.load(f)\n",
|
||||
"fs = pd.DataFrame.from_records(records)\n",
|
||||
"\n",
|
||||
"# View a summary of the featurization \n",
|
||||
"fs[[\"RawFeatureName\", \"TypeDetected\", \"Dropped\", \"EngineeredFeatureCount\", \"Transformations\"]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -641,7 +651,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model"
|
||||
"### Retrieve the Best Run details"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -650,7 +660,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run_lags, fitted_model_lags = advanced_remote_run.get_output()"
|
||||
"best_run_lags = remote_run.get_best_child()\n",
|
||||
"best_run_lags"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -94,7 +94,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
},
|
||||
"source": [
|
||||
"# Automated Machine Learning\n",
|
||||
"**Beer Production Forecasting**\n",
|
||||
"**Github DAU Forecasting**\n",
|
||||
"\n",
|
||||
"## Contents\n",
|
||||
"1. [Introduction](#Introduction)\n",
|
||||
@@ -48,7 +48,7 @@
|
||||
},
|
||||
"source": [
|
||||
"## Introduction\n",
|
||||
"This notebook demonstrates demand forecasting for Beer Production Dataset using AutoML.\n",
|
||||
"This notebook demonstrates demand forecasting for Github Daily Active Users Dataset using AutoML.\n",
|
||||
"\n",
|
||||
"AutoML highlights here include using Deep Learning forecasts, Arima, Prophet, Remote Execution and Remote Inferencing, and working with the `forecast` function. Please also look at the additional forecasting notebooks, which document lagging, rolling windows, forecast quantiles, other ways to use the forecast function, and forecaster deployment.\n",
|
||||
"\n",
|
||||
@@ -104,7 +104,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This sample notebook may use features that are not available in previous versions of the Azure ML SDK."
|
||||
"This notebook is compatible with Azure ML SDK version 1.35.0 or later."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -113,7 +113,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -139,7 +138,7 @@
|
||||
"ws = Workspace.from_config()\n",
|
||||
"\n",
|
||||
"# choose a name for the run history container in the workspace\n",
|
||||
"experiment_name = \"beer-remote-cpu\"\n",
|
||||
"experiment_name = \"github-remote-cpu\"\n",
|
||||
"\n",
|
||||
"experiment = Experiment(ws, experiment_name)\n",
|
||||
"\n",
|
||||
@@ -149,7 +148,7 @@
|
||||
"output[\"Resource Group\"] = ws.resource_group\n",
|
||||
"output[\"Location\"] = ws.location\n",
|
||||
"output[\"Run History Name\"] = experiment_name\n",
|
||||
"pd.set_option(\"display.max_colwidth\", -1)\n",
|
||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
@@ -180,7 +179,7 @@
|
||||
"from azureml.core.compute_target import ComputeTargetException\n",
|
||||
"\n",
|
||||
"# Choose a name for your CPU cluster\n",
|
||||
"cpu_cluster_name = \"beer-cluster\"\n",
|
||||
"cpu_cluster_name = \"github-cluster\"\n",
|
||||
"\n",
|
||||
"# Verify that cluster does not exist already\n",
|
||||
"try:\n",
|
||||
@@ -203,7 +202,7 @@
|
||||
},
|
||||
"source": [
|
||||
"## Data\n",
|
||||
"Read Beer demand data from file, and preview data."
|
||||
"Read Github DAU data from file, and preview data."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -246,21 +245,19 @@
|
||||
"plt.tight_layout()\n",
|
||||
"\n",
|
||||
"plt.subplot(2, 1, 1)\n",
|
||||
"plt.title(\"Beer Production By Year\")\n",
|
||||
"df = pd.read_csv(\n",
|
||||
" \"Beer_no_valid_split_train.csv\", parse_dates=True, index_col=\"DATE\"\n",
|
||||
").drop(columns=\"grain\")\n",
|
||||
"plt.title(\"Github Daily Active User By Year\")\n",
|
||||
"df = pd.read_csv(\"github_dau_2011-2018_train.csv\", parse_dates=True, index_col=\"date\")\n",
|
||||
"test_df = pd.read_csv(\n",
|
||||
" \"Beer_no_valid_split_test.csv\", parse_dates=True, index_col=\"DATE\"\n",
|
||||
").drop(columns=\"grain\")\n",
|
||||
" \"github_dau_2011-2018_test.csv\", parse_dates=True, index_col=\"date\"\n",
|
||||
")\n",
|
||||
"plt.plot(df)\n",
|
||||
"\n",
|
||||
"plt.subplot(2, 1, 2)\n",
|
||||
"plt.title(\"Beer Production By Month\")\n",
|
||||
"plt.title(\"Github Daily Active User By Month\")\n",
|
||||
"groups = df.groupby(df.index.month)\n",
|
||||
"months = concat([DataFrame(x[1].values) for x in groups], axis=1)\n",
|
||||
"months = DataFrame(months)\n",
|
||||
"months.columns = range(1, 13)\n",
|
||||
"months.columns = range(1, 49)\n",
|
||||
"months.boxplot()\n",
|
||||
"\n",
|
||||
"plt.show()"
|
||||
@@ -275,10 +272,10 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"target_column_name = \"BeerProduction\"\n",
|
||||
"time_column_name = \"DATE\"\n",
|
||||
"target_column_name = \"count\"\n",
|
||||
"time_column_name = \"date\"\n",
|
||||
"time_series_id_column_names = []\n",
|
||||
"freq = \"M\" # Monthly data"
|
||||
"freq = \"D\" # Daily data"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -308,19 +305,19 @@
|
||||
"datastore = ws.get_default_datastore()\n",
|
||||
"datastore.upload_files(\n",
|
||||
" files=[\"./train.csv\"],\n",
|
||||
" target_path=\"beer-dataset/tabular/\",\n",
|
||||
" target_path=\"github-dataset/tabular/\",\n",
|
||||
" overwrite=True,\n",
|
||||
" show_progress=True,\n",
|
||||
")\n",
|
||||
"datastore.upload_files(\n",
|
||||
" files=[\"./valid.csv\"],\n",
|
||||
" target_path=\"beer-dataset/tabular/\",\n",
|
||||
" target_path=\"github-dataset/tabular/\",\n",
|
||||
" overwrite=True,\n",
|
||||
" show_progress=True,\n",
|
||||
")\n",
|
||||
"datastore.upload_files(\n",
|
||||
" files=[\"./test.csv\"],\n",
|
||||
" target_path=\"beer-dataset/tabular/\",\n",
|
||||
" target_path=\"github-dataset/tabular/\",\n",
|
||||
" overwrite=True,\n",
|
||||
" show_progress=True,\n",
|
||||
")\n",
|
||||
@@ -328,13 +325,13 @@
|
||||
"from azureml.core import Dataset\n",
|
||||
"\n",
|
||||
"train_dataset = Dataset.Tabular.from_delimited_files(\n",
|
||||
" path=[(datastore, \"beer-dataset/tabular/train.csv\")]\n",
|
||||
" path=[(datastore, \"github-dataset/tabular/train.csv\")]\n",
|
||||
")\n",
|
||||
"valid_dataset = Dataset.Tabular.from_delimited_files(\n",
|
||||
" path=[(datastore, \"beer-dataset/tabular/valid.csv\")]\n",
|
||||
" path=[(datastore, \"github-dataset/tabular/valid.csv\")]\n",
|
||||
")\n",
|
||||
"test_dataset = Dataset.Tabular.from_delimited_files(\n",
|
||||
" path=[(datastore, \"beer-dataset/tabular/test.csv\")]\n",
|
||||
" path=[(datastore, \"github-dataset/tabular/test.csv\")]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -397,7 +394,7 @@
|
||||
"forecasting_parameters = ForecastingParameters(\n",
|
||||
" time_column_name=time_column_name,\n",
|
||||
" forecast_horizon=forecast_horizon,\n",
|
||||
" freq=\"MS\", # Set the forecast frequency to be monthly (start of the month)\n",
|
||||
" freq=\"D\", # Set the forecast frequency to be daily\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# We will disable the enable_early_stopping flag to ensure the DNN model is recommended for demonstration purpose.\n",
|
||||
@@ -570,7 +567,7 @@
|
||||
"from azureml.core import Dataset\n",
|
||||
"\n",
|
||||
"test_dataset = Dataset.Tabular.from_delimited_files(\n",
|
||||
" path=[(datastore, \"beer-dataset/tabular/test.csv\")]\n",
|
||||
" path=[(datastore, \"github-dataset/tabular/test.csv\")]\n",
|
||||
")\n",
|
||||
"# preview the first 3 rows of the dataset\n",
|
||||
"test_dataset.take(5).to_pandas_dataframe()"
|
||||
@@ -582,7 +579,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"compute_target = ws.compute_targets[\"beer-cluster\"]\n",
|
||||
"compute_target = ws.compute_targets[\"github-cluster\"]\n",
|
||||
"test_experiment = Experiment(ws, experiment_name + \"_test\")"
|
||||
]
|
||||
},
|
||||
@@ -0,0 +1,4 @@
|
||||
name: auto-ml-forecasting-github-dau
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
@@ -0,0 +1,455 @@
|
||||
date,count,day_of_week,month_of_year,holiday
|
||||
2017-06-04,104663,6.0,5.0,0.0
|
||||
2017-06-05,155824,0.0,5.0,0.0
|
||||
2017-06-06,164908,1.0,5.0,0.0
|
||||
2017-06-07,170309,2.0,5.0,0.0
|
||||
2017-06-08,164256,3.0,5.0,0.0
|
||||
2017-06-09,153406,4.0,5.0,0.0
|
||||
2017-06-10,97024,5.0,5.0,0.0
|
||||
2017-06-11,103442,6.0,5.0,0.0
|
||||
2017-06-12,160768,0.0,5.0,0.0
|
||||
2017-06-13,166288,1.0,5.0,0.0
|
||||
2017-06-14,163819,2.0,5.0,0.0
|
||||
2017-06-15,157593,3.0,5.0,0.0
|
||||
2017-06-16,149259,4.0,5.0,0.0
|
||||
2017-06-17,95579,5.0,5.0,0.0
|
||||
2017-06-18,98723,6.0,5.0,0.0
|
||||
2017-06-19,159076,0.0,5.0,0.0
|
||||
2017-06-20,163340,1.0,5.0,0.0
|
||||
2017-06-21,163344,2.0,5.0,0.0
|
||||
2017-06-22,159528,3.0,5.0,0.0
|
||||
2017-06-23,146563,4.0,5.0,0.0
|
||||
2017-06-24,92631,5.0,5.0,0.0
|
||||
2017-06-25,96549,6.0,5.0,0.0
|
||||
2017-06-26,153249,0.0,5.0,0.0
|
||||
2017-06-27,160357,1.0,5.0,0.0
|
||||
2017-06-28,159941,2.0,5.0,0.0
|
||||
2017-06-29,156781,3.0,5.0,0.0
|
||||
2017-06-30,144709,4.0,5.0,0.0
|
||||
2017-07-01,89101,5.0,6.0,0.0
|
||||
2017-07-02,93046,6.0,6.0,0.0
|
||||
2017-07-03,144113,0.0,6.0,0.0
|
||||
2017-07-04,143061,1.0,6.0,1.0
|
||||
2017-07-05,154603,2.0,6.0,0.0
|
||||
2017-07-06,157200,3.0,6.0,0.0
|
||||
2017-07-07,147213,4.0,6.0,0.0
|
||||
2017-07-08,92348,5.0,6.0,0.0
|
||||
2017-07-09,97018,6.0,6.0,0.0
|
||||
2017-07-10,157192,0.0,6.0,0.0
|
||||
2017-07-11,161819,1.0,6.0,0.0
|
||||
2017-07-12,161998,2.0,6.0,0.0
|
||||
2017-07-13,160280,3.0,6.0,0.0
|
||||
2017-07-14,146818,4.0,6.0,0.0
|
||||
2017-07-15,93041,5.0,6.0,0.0
|
||||
2017-07-16,97505,6.0,6.0,0.0
|
||||
2017-07-17,156167,0.0,6.0,0.0
|
||||
2017-07-18,162855,1.0,6.0,0.0
|
||||
2017-07-19,162519,2.0,6.0,0.0
|
||||
2017-07-20,159941,3.0,6.0,0.0
|
||||
2017-07-21,148460,4.0,6.0,0.0
|
||||
2017-07-22,93431,5.0,6.0,0.0
|
||||
2017-07-23,98553,6.0,6.0,0.0
|
||||
2017-07-24,156202,0.0,6.0,0.0
|
||||
2017-07-25,162503,1.0,6.0,0.0
|
||||
2017-07-26,158479,2.0,6.0,0.0
|
||||
2017-07-27,158192,3.0,6.0,0.0
|
||||
2017-07-28,147108,4.0,6.0,0.0
|
||||
2017-07-29,93799,5.0,6.0,0.0
|
||||
2017-07-30,97920,6.0,6.0,0.0
|
||||
2017-07-31,152197,0.0,6.0,0.0
|
||||
2017-08-01,158477,1.0,7.0,0.0
|
||||
2017-08-02,159089,2.0,7.0,0.0
|
||||
2017-08-03,157182,3.0,7.0,0.0
|
||||
2017-08-04,146345,4.0,7.0,0.0
|
||||
2017-08-05,92534,5.0,7.0,0.0
|
||||
2017-08-06,97128,6.0,7.0,0.0
|
||||
2017-08-07,151359,0.0,7.0,0.0
|
||||
2017-08-08,159895,1.0,7.0,0.0
|
||||
2017-08-09,158329,2.0,7.0,0.0
|
||||
2017-08-10,155468,3.0,7.0,0.0
|
||||
2017-08-11,144914,4.0,7.0,0.0
|
||||
2017-08-12,92258,5.0,7.0,0.0
|
||||
2017-08-13,95933,6.0,7.0,0.0
|
||||
2017-08-14,147706,0.0,7.0,0.0
|
||||
2017-08-15,151115,1.0,7.0,0.0
|
||||
2017-08-16,157640,2.0,7.0,0.0
|
||||
2017-08-17,156600,3.0,7.0,0.0
|
||||
2017-08-18,146980,4.0,7.0,0.0
|
||||
2017-08-19,94592,5.0,7.0,0.0
|
||||
2017-08-20,99320,6.0,7.0,0.0
|
||||
2017-08-21,145727,0.0,7.0,0.0
|
||||
2017-08-22,160260,1.0,7.0,0.0
|
||||
2017-08-23,160440,2.0,7.0,0.0
|
||||
2017-08-24,157830,3.0,7.0,0.0
|
||||
2017-08-25,145822,4.0,7.0,0.0
|
||||
2017-08-26,94706,5.0,7.0,0.0
|
||||
2017-08-27,99047,6.0,7.0,0.0
|
||||
2017-08-28,152112,0.0,7.0,0.0
|
||||
2017-08-29,162440,1.0,7.0,0.0
|
||||
2017-08-30,162902,2.0,7.0,0.0
|
||||
2017-08-31,159498,3.0,7.0,0.0
|
||||
2017-09-01,145689,4.0,8.0,0.0
|
||||
2017-09-02,93589,5.0,8.0,0.0
|
||||
2017-09-03,100058,6.0,8.0,0.0
|
||||
2017-09-04,140865,0.0,8.0,1.0
|
||||
2017-09-05,165715,1.0,8.0,0.0
|
||||
2017-09-06,167463,2.0,8.0,0.0
|
||||
2017-09-07,164811,3.0,8.0,0.0
|
||||
2017-09-08,156157,4.0,8.0,0.0
|
||||
2017-09-09,101358,5.0,8.0,0.0
|
||||
2017-09-10,107915,6.0,8.0,0.0
|
||||
2017-09-11,167845,0.0,8.0,0.0
|
||||
2017-09-12,172756,1.0,8.0,0.0
|
||||
2017-09-13,172851,2.0,8.0,0.0
|
||||
2017-09-14,171675,3.0,8.0,0.0
|
||||
2017-09-15,159266,4.0,8.0,0.0
|
||||
2017-09-16,103547,5.0,8.0,0.0
|
||||
2017-09-17,110964,6.0,8.0,0.0
|
||||
2017-09-18,170976,0.0,8.0,0.0
|
||||
2017-09-19,177864,1.0,8.0,0.0
|
||||
2017-09-20,173567,2.0,8.0,0.0
|
||||
2017-09-21,172017,3.0,8.0,0.0
|
||||
2017-09-22,161357,4.0,8.0,0.0
|
||||
2017-09-23,104681,5.0,8.0,0.0
|
||||
2017-09-24,111711,6.0,8.0,0.0
|
||||
2017-09-25,173517,0.0,8.0,0.0
|
||||
2017-09-26,180049,1.0,8.0,0.0
|
||||
2017-09-27,178307,2.0,8.0,0.0
|
||||
2017-09-28,174157,3.0,8.0,0.0
|
||||
2017-09-29,161707,4.0,8.0,0.0
|
||||
2017-09-30,110536,5.0,8.0,0.0
|
||||
2017-10-01,106505,6.0,9.0,0.0
|
||||
2017-10-02,157565,0.0,9.0,0.0
|
||||
2017-10-03,164764,1.0,9.0,0.0
|
||||
2017-10-04,163383,2.0,9.0,0.0
|
||||
2017-10-05,162847,3.0,9.0,0.0
|
||||
2017-10-06,153575,4.0,9.0,0.0
|
||||
2017-10-07,107472,5.0,9.0,0.0
|
||||
2017-10-08,116127,6.0,9.0,0.0
|
||||
2017-10-09,174457,0.0,9.0,1.0
|
||||
2017-10-10,185217,1.0,9.0,0.0
|
||||
2017-10-11,185120,2.0,9.0,0.0
|
||||
2017-10-12,180844,3.0,9.0,0.0
|
||||
2017-10-13,170178,4.0,9.0,0.0
|
||||
2017-10-14,112754,5.0,9.0,0.0
|
||||
2017-10-15,121251,6.0,9.0,0.0
|
||||
2017-10-16,183906,0.0,9.0,0.0
|
||||
2017-10-17,188945,1.0,9.0,0.0
|
||||
2017-10-18,187297,2.0,9.0,0.0
|
||||
2017-10-19,183867,3.0,9.0,0.0
|
||||
2017-10-20,173021,4.0,9.0,0.0
|
||||
2017-10-21,115851,5.0,9.0,0.0
|
||||
2017-10-22,126088,6.0,9.0,0.0
|
||||
2017-10-23,189452,0.0,9.0,0.0
|
||||
2017-10-24,194412,1.0,9.0,0.0
|
||||
2017-10-25,192293,2.0,9.0,0.0
|
||||
2017-10-26,190163,3.0,9.0,0.0
|
||||
2017-10-27,177053,4.0,9.0,0.0
|
||||
2017-10-28,114934,5.0,9.0,0.0
|
||||
2017-10-29,125289,6.0,9.0,0.0
|
||||
2017-10-30,189245,0.0,9.0,0.0
|
||||
2017-10-31,191480,1.0,9.0,0.0
|
||||
2017-11-01,182281,2.0,10.0,0.0
|
||||
2017-11-02,186351,3.0,10.0,0.0
|
||||
2017-11-03,175422,4.0,10.0,0.0
|
||||
2017-11-04,118160,5.0,10.0,0.0
|
||||
2017-11-05,127602,6.0,10.0,0.0
|
||||
2017-11-06,191067,0.0,10.0,0.0
|
||||
2017-11-07,197083,1.0,10.0,0.0
|
||||
2017-11-08,194333,2.0,10.0,0.0
|
||||
2017-11-09,193914,3.0,10.0,0.0
|
||||
2017-11-10,179933,4.0,10.0,1.0
|
||||
2017-11-11,121346,5.0,10.0,0.0
|
||||
2017-11-12,131900,6.0,10.0,0.0
|
||||
2017-11-13,196969,0.0,10.0,0.0
|
||||
2017-11-14,201949,1.0,10.0,0.0
|
||||
2017-11-15,198424,2.0,10.0,0.0
|
||||
2017-11-16,196902,3.0,10.0,0.0
|
||||
2017-11-17,183893,4.0,10.0,0.0
|
||||
2017-11-18,122767,5.0,10.0,0.0
|
||||
2017-11-19,130890,6.0,10.0,0.0
|
||||
2017-11-20,194515,0.0,10.0,0.0
|
||||
2017-11-21,198601,1.0,10.0,0.0
|
||||
2017-11-22,191041,2.0,10.0,0.0
|
||||
2017-11-23,170321,3.0,10.0,1.0
|
||||
2017-11-24,155623,4.0,10.0,0.0
|
||||
2017-11-25,115759,5.0,10.0,0.0
|
||||
2017-11-26,128771,6.0,10.0,0.0
|
||||
2017-11-27,199419,0.0,10.0,0.0
|
||||
2017-11-28,207253,1.0,10.0,0.0
|
||||
2017-11-29,205406,2.0,10.0,0.0
|
||||
2017-11-30,200674,3.0,10.0,0.0
|
||||
2017-12-01,187017,4.0,11.0,0.0
|
||||
2017-12-02,129735,5.0,11.0,0.0
|
||||
2017-12-03,139120,6.0,11.0,0.0
|
||||
2017-12-04,205505,0.0,11.0,0.0
|
||||
2017-12-05,208218,1.0,11.0,0.0
|
||||
2017-12-06,202480,2.0,11.0,0.0
|
||||
2017-12-07,197822,3.0,11.0,0.0
|
||||
2017-12-08,180686,4.0,11.0,0.0
|
||||
2017-12-09,123667,5.0,11.0,0.0
|
||||
2017-12-10,130987,6.0,11.0,0.0
|
||||
2017-12-11,193901,0.0,11.0,0.0
|
||||
2017-12-12,194997,1.0,11.0,0.0
|
||||
2017-12-13,192063,2.0,11.0,0.0
|
||||
2017-12-14,186496,3.0,11.0,0.0
|
||||
2017-12-15,170812,4.0,11.0,0.0
|
||||
2017-12-16,110474,5.0,11.0,0.0
|
||||
2017-12-17,118165,6.0,11.0,0.0
|
||||
2017-12-18,176843,0.0,11.0,0.0
|
||||
2017-12-19,179550,1.0,11.0,0.0
|
||||
2017-12-20,173506,2.0,11.0,0.0
|
||||
2017-12-21,165910,3.0,11.0,0.0
|
||||
2017-12-22,145886,4.0,11.0,0.0
|
||||
2017-12-23,95246,5.0,11.0,0.0
|
||||
2017-12-24,88781,6.0,11.0,0.0
|
||||
2017-12-25,98189,0.0,11.0,1.0
|
||||
2017-12-26,121383,1.0,11.0,0.0
|
||||
2017-12-27,135300,2.0,11.0,0.0
|
||||
2017-12-28,136827,3.0,11.0,0.0
|
||||
2017-12-29,127700,4.0,11.0,0.0
|
||||
2017-12-30,93014,5.0,11.0,0.0
|
||||
2017-12-31,82878,6.0,11.0,0.0
|
||||
2018-01-01,86419,0.0,0.0,1.0
|
||||
2018-01-02,147428,1.0,0.0,0.0
|
||||
2018-01-03,162193,2.0,0.0,0.0
|
||||
2018-01-04,163784,3.0,0.0,0.0
|
||||
2018-01-05,158606,4.0,0.0,0.0
|
||||
2018-01-06,113467,5.0,0.0,0.0
|
||||
2018-01-07,118313,6.0,0.0,0.0
|
||||
2018-01-08,175623,0.0,0.0,0.0
|
||||
2018-01-09,183880,1.0,0.0,0.0
|
||||
2018-01-10,183945,2.0,0.0,0.0
|
||||
2018-01-11,181769,3.0,0.0,0.0
|
||||
2018-01-12,170552,4.0,0.0,0.0
|
||||
2018-01-13,115707,5.0,0.0,0.0
|
||||
2018-01-14,121191,6.0,0.0,0.0
|
||||
2018-01-15,176127,0.0,0.0,1.0
|
||||
2018-01-16,188032,1.0,0.0,0.0
|
||||
2018-01-17,189871,2.0,0.0,0.0
|
||||
2018-01-18,189348,3.0,0.0,0.0
|
||||
2018-01-19,177456,4.0,0.0,0.0
|
||||
2018-01-20,123321,5.0,0.0,0.0
|
||||
2018-01-21,128306,6.0,0.0,0.0
|
||||
2018-01-22,186132,0.0,0.0,0.0
|
||||
2018-01-23,197618,1.0,0.0,0.0
|
||||
2018-01-24,196402,2.0,0.0,0.0
|
||||
2018-01-25,192722,3.0,0.0,0.0
|
||||
2018-01-26,179415,4.0,0.0,0.0
|
||||
2018-01-27,125769,5.0,0.0,0.0
|
||||
2018-01-28,133306,6.0,0.0,0.0
|
||||
2018-01-29,194151,0.0,0.0,0.0
|
||||
2018-01-30,198680,1.0,0.0,0.0
|
||||
2018-01-31,198652,2.0,0.0,0.0
|
||||
2018-02-01,195472,3.0,1.0,0.0
|
||||
2018-02-02,183173,4.0,1.0,0.0
|
||||
2018-02-03,124276,5.0,1.0,0.0
|
||||
2018-02-04,129054,6.0,1.0,0.0
|
||||
2018-02-05,190024,0.0,1.0,0.0
|
||||
2018-02-06,198658,1.0,1.0,0.0
|
||||
2018-02-07,198272,2.0,1.0,0.0
|
||||
2018-02-08,195339,3.0,1.0,0.0
|
||||
2018-02-09,183086,4.0,1.0,0.0
|
||||
2018-02-10,122536,5.0,1.0,0.0
|
||||
2018-02-11,133033,6.0,1.0,0.0
|
||||
2018-02-12,185386,0.0,1.0,0.0
|
||||
2018-02-13,184789,1.0,1.0,0.0
|
||||
2018-02-14,176089,2.0,1.0,0.0
|
||||
2018-02-15,171317,3.0,1.0,0.0
|
||||
2018-02-16,162693,4.0,1.0,0.0
|
||||
2018-02-17,116342,5.0,1.0,0.0
|
||||
2018-02-18,122466,6.0,1.0,0.0
|
||||
2018-02-19,172364,0.0,1.0,1.0
|
||||
2018-02-20,185896,1.0,1.0,0.0
|
||||
2018-02-21,188166,2.0,1.0,0.0
|
||||
2018-02-22,189427,3.0,1.0,0.0
|
||||
2018-02-23,178732,4.0,1.0,0.0
|
||||
2018-02-24,132664,5.0,1.0,0.0
|
||||
2018-02-25,134008,6.0,1.0,0.0
|
||||
2018-02-26,200075,0.0,1.0,0.0
|
||||
2018-02-27,207996,1.0,1.0,0.0
|
||||
2018-02-28,204416,2.0,1.0,0.0
|
||||
2018-03-01,201320,3.0,2.0,0.0
|
||||
2018-03-02,188205,4.0,2.0,0.0
|
||||
2018-03-03,131162,5.0,2.0,0.0
|
||||
2018-03-04,138320,6.0,2.0,0.0
|
||||
2018-03-05,207326,0.0,2.0,0.0
|
||||
2018-03-06,212462,1.0,2.0,0.0
|
||||
2018-03-07,209357,2.0,2.0,0.0
|
||||
2018-03-08,194876,3.0,2.0,0.0
|
||||
2018-03-09,193761,4.0,2.0,0.0
|
||||
2018-03-10,133449,5.0,2.0,0.0
|
||||
2018-03-11,142258,6.0,2.0,0.0
|
||||
2018-03-12,208753,0.0,2.0,0.0
|
||||
2018-03-13,210602,1.0,2.0,0.0
|
||||
2018-03-14,214236,2.0,2.0,0.0
|
||||
2018-03-15,210761,3.0,2.0,0.0
|
||||
2018-03-16,196619,4.0,2.0,0.0
|
||||
2018-03-17,133056,5.0,2.0,0.0
|
||||
2018-03-18,141335,6.0,2.0,0.0
|
||||
2018-03-19,211580,0.0,2.0,0.0
|
||||
2018-03-20,219051,1.0,2.0,0.0
|
||||
2018-03-21,215435,2.0,2.0,0.0
|
||||
2018-03-22,211961,3.0,2.0,0.0
|
||||
2018-03-23,196009,4.0,2.0,0.0
|
||||
2018-03-24,132390,5.0,2.0,0.0
|
||||
2018-03-25,140021,6.0,2.0,0.0
|
||||
2018-03-26,205273,0.0,2.0,0.0
|
||||
2018-03-27,212686,1.0,2.0,0.0
|
||||
2018-03-28,210683,2.0,2.0,0.0
|
||||
2018-03-29,189044,3.0,2.0,0.0
|
||||
2018-03-30,170256,4.0,2.0,0.0
|
||||
2018-03-31,125999,5.0,2.0,0.0
|
||||
2018-04-01,126749,6.0,3.0,0.0
|
||||
2018-04-02,186546,0.0,3.0,0.0
|
||||
2018-04-03,207905,1.0,3.0,0.0
|
||||
2018-04-04,201528,2.0,3.0,0.0
|
||||
2018-04-05,188580,3.0,3.0,0.0
|
||||
2018-04-06,173714,4.0,3.0,0.0
|
||||
2018-04-07,125723,5.0,3.0,0.0
|
||||
2018-04-08,142545,6.0,3.0,0.0
|
||||
2018-04-09,204767,0.0,3.0,0.0
|
||||
2018-04-10,212048,1.0,3.0,0.0
|
||||
2018-04-11,210517,2.0,3.0,0.0
|
||||
2018-04-12,206924,3.0,3.0,0.0
|
||||
2018-04-13,191679,4.0,3.0,0.0
|
||||
2018-04-14,126394,5.0,3.0,0.0
|
||||
2018-04-15,137279,6.0,3.0,0.0
|
||||
2018-04-16,208085,0.0,3.0,0.0
|
||||
2018-04-17,213273,1.0,3.0,0.0
|
||||
2018-04-18,211580,2.0,3.0,0.0
|
||||
2018-04-19,206037,3.0,3.0,0.0
|
||||
2018-04-20,191211,4.0,3.0,0.0
|
||||
2018-04-21,125564,5.0,3.0,0.0
|
||||
2018-04-22,136469,6.0,3.0,0.0
|
||||
2018-04-23,206288,0.0,3.0,0.0
|
||||
2018-04-24,212115,1.0,3.0,0.0
|
||||
2018-04-25,207948,2.0,3.0,0.0
|
||||
2018-04-26,205759,3.0,3.0,0.0
|
||||
2018-04-27,181330,4.0,3.0,0.0
|
||||
2018-04-28,130046,5.0,3.0,0.0
|
||||
2018-04-29,120802,6.0,3.0,0.0
|
||||
2018-04-30,170390,0.0,3.0,0.0
|
||||
2018-05-01,169054,1.0,4.0,0.0
|
||||
2018-05-02,197891,2.0,4.0,0.0
|
||||
2018-05-03,199820,3.0,4.0,0.0
|
||||
2018-05-04,186783,4.0,4.0,0.0
|
||||
2018-05-05,124420,5.0,4.0,0.0
|
||||
2018-05-06,130666,6.0,4.0,0.0
|
||||
2018-05-07,196014,0.0,4.0,0.0
|
||||
2018-05-08,203058,1.0,4.0,0.0
|
||||
2018-05-09,198582,2.0,4.0,0.0
|
||||
2018-05-10,191321,3.0,4.0,0.0
|
||||
2018-05-11,183639,4.0,4.0,0.0
|
||||
2018-05-12,122023,5.0,4.0,0.0
|
||||
2018-05-13,128775,6.0,4.0,0.0
|
||||
2018-05-14,199104,0.0,4.0,0.0
|
||||
2018-05-15,200658,1.0,4.0,0.0
|
||||
2018-05-16,201541,2.0,4.0,0.0
|
||||
2018-05-17,196886,3.0,4.0,0.0
|
||||
2018-05-18,188597,4.0,4.0,0.0
|
||||
2018-05-19,121392,5.0,4.0,0.0
|
||||
2018-05-20,126981,6.0,4.0,0.0
|
||||
2018-05-21,189291,0.0,4.0,0.0
|
||||
2018-05-22,203038,1.0,4.0,0.0
|
||||
2018-05-23,205330,2.0,4.0,0.0
|
||||
2018-05-24,199208,3.0,4.0,0.0
|
||||
2018-05-25,187768,4.0,4.0,0.0
|
||||
2018-05-26,117635,5.0,4.0,0.0
|
||||
2018-05-27,124352,6.0,4.0,0.0
|
||||
2018-05-28,180398,0.0,4.0,1.0
|
||||
2018-05-29,194170,1.0,4.0,0.0
|
||||
2018-05-30,200281,2.0,4.0,0.0
|
||||
2018-05-31,197244,3.0,4.0,0.0
|
||||
2018-06-01,184037,4.0,5.0,0.0
|
||||
2018-06-02,121135,5.0,5.0,0.0
|
||||
2018-06-03,129389,6.0,5.0,0.0
|
||||
2018-06-04,200331,0.0,5.0,0.0
|
||||
2018-06-05,207735,1.0,5.0,0.0
|
||||
2018-06-06,203354,2.0,5.0,0.0
|
||||
2018-06-07,200520,3.0,5.0,0.0
|
||||
2018-06-08,182038,4.0,5.0,0.0
|
||||
2018-06-09,120164,5.0,5.0,0.0
|
||||
2018-06-10,125256,6.0,5.0,0.0
|
||||
2018-06-11,194786,0.0,5.0,0.0
|
||||
2018-06-12,200815,1.0,5.0,0.0
|
||||
2018-06-13,197740,2.0,5.0,0.0
|
||||
2018-06-14,192294,3.0,5.0,0.0
|
||||
2018-06-15,173587,4.0,5.0,0.0
|
||||
2018-06-16,105955,5.0,5.0,0.0
|
||||
2018-06-17,110780,6.0,5.0,0.0
|
||||
2018-06-18,174582,0.0,5.0,0.0
|
||||
2018-06-19,193310,1.0,5.0,0.0
|
||||
2018-06-20,193062,2.0,5.0,0.0
|
||||
2018-06-21,187986,3.0,5.0,0.0
|
||||
2018-06-22,173606,4.0,5.0,0.0
|
||||
2018-06-23,111795,5.0,5.0,0.0
|
||||
2018-06-24,116134,6.0,5.0,0.0
|
||||
2018-06-25,185919,0.0,5.0,0.0
|
||||
2018-06-26,193142,1.0,5.0,0.0
|
||||
2018-06-27,188114,2.0,5.0,0.0
|
||||
2018-06-28,183737,3.0,5.0,0.0
|
||||
2018-06-29,171496,4.0,5.0,0.0
|
||||
2018-06-30,107210,5.0,5.0,0.0
|
||||
2018-07-01,111053,6.0,6.0,0.0
|
||||
2018-07-02,176198,0.0,6.0,0.0
|
||||
2018-07-03,184040,1.0,6.0,0.0
|
||||
2018-07-04,169783,2.0,6.0,1.0
|
||||
2018-07-05,177996,3.0,6.0,0.0
|
||||
2018-07-06,167378,4.0,6.0,0.0
|
||||
2018-07-07,106401,5.0,6.0,0.0
|
||||
2018-07-08,112327,6.0,6.0,0.0
|
||||
2018-07-09,182835,0.0,6.0,0.0
|
||||
2018-07-10,187694,1.0,6.0,0.0
|
||||
2018-07-11,185762,2.0,6.0,0.0
|
||||
2018-07-12,184099,3.0,6.0,0.0
|
||||
2018-07-13,170860,4.0,6.0,0.0
|
||||
2018-07-14,106799,5.0,6.0,0.0
|
||||
2018-07-15,108475,6.0,6.0,0.0
|
||||
2018-07-16,175704,0.0,6.0,0.0
|
||||
2018-07-17,183596,1.0,6.0,0.0
|
||||
2018-07-18,179897,2.0,6.0,0.0
|
||||
2018-07-19,183373,3.0,6.0,0.0
|
||||
2018-07-20,169626,4.0,6.0,0.0
|
||||
2018-07-21,106785,5.0,6.0,0.0
|
||||
2018-07-22,112387,6.0,6.0,0.0
|
||||
2018-07-23,180572,0.0,6.0,0.0
|
||||
2018-07-24,186943,1.0,6.0,0.0
|
||||
2018-07-25,185744,2.0,6.0,0.0
|
||||
2018-07-26,183117,3.0,6.0,0.0
|
||||
2018-07-27,168526,4.0,6.0,0.0
|
||||
2018-07-28,105936,5.0,6.0,0.0
|
||||
2018-07-29,111708,6.0,6.0,0.0
|
||||
2018-07-30,179950,0.0,6.0,0.0
|
||||
2018-07-31,185930,1.0,6.0,0.0
|
||||
2018-08-01,183366,2.0,7.0,0.0
|
||||
2018-08-02,182412,3.0,7.0,0.0
|
||||
2018-08-03,173429,4.0,7.0,0.0
|
||||
2018-08-04,106108,5.0,7.0,0.0
|
||||
2018-08-05,110059,6.0,7.0,0.0
|
||||
2018-08-06,178355,0.0,7.0,0.0
|
||||
2018-08-07,185518,1.0,7.0,0.0
|
||||
2018-08-08,183204,2.0,7.0,0.0
|
||||
2018-08-09,181276,3.0,7.0,0.0
|
||||
2018-08-10,168297,4.0,7.0,0.0
|
||||
2018-08-11,106488,5.0,7.0,0.0
|
||||
2018-08-12,111786,6.0,7.0,0.0
|
||||
2018-08-13,178620,0.0,7.0,0.0
|
||||
2018-08-14,181922,1.0,7.0,0.0
|
||||
2018-08-15,172198,2.0,7.0,0.0
|
||||
2018-08-16,177367,3.0,7.0,0.0
|
||||
2018-08-17,166550,4.0,7.0,0.0
|
||||
2018-08-18,107011,5.0,7.0,0.0
|
||||
2018-08-19,112299,6.0,7.0,0.0
|
||||
2018-08-20,176718,0.0,7.0,0.0
|
||||
2018-08-21,182562,1.0,7.0,0.0
|
||||
2018-08-22,181484,2.0,7.0,0.0
|
||||
2018-08-23,180317,3.0,7.0,0.0
|
||||
2018-08-24,170197,4.0,7.0,0.0
|
||||
2018-08-25,109383,5.0,7.0,0.0
|
||||
2018-08-26,113373,6.0,7.0,0.0
|
||||
2018-08-27,180142,0.0,7.0,0.0
|
||||
2018-08-28,191628,1.0,7.0,0.0
|
||||
2018-08-29,191149,2.0,7.0,0.0
|
||||
2018-08-30,187503,3.0,7.0,0.0
|
||||
2018-08-31,172280,4.0,7.0,0.0
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -105,13 +105,8 @@ def run_inference(
|
||||
train_run.download_file(
|
||||
"outputs/{}".format(model_base_name), "inference/{}".format(model_base_name)
|
||||
)
|
||||
train_run.download_file("outputs/conda_env_v_1_0_0.yml", "inference/condafile.yml")
|
||||
|
||||
inference_env = Environment("myenv")
|
||||
inference_env.docker.enabled = True
|
||||
inference_env.python.conda_dependencies = CondaDependencies(
|
||||
conda_dependencies_file_path="inference/condafile.yml"
|
||||
)
|
||||
inference_env = train_run.get_environment()
|
||||
|
||||
est = Estimator(
|
||||
source_directory=script_folder,
|
||||
@@ -78,7 +78,7 @@
|
||||
"output[\"Resource Group\"] = ws.resource_group\n",
|
||||
"output[\"Location\"] = ws.location\n",
|
||||
"output[\"Default datastore name\"] = dstore.name\n",
|
||||
"pd.set_option(\"display.max_colwidth\", -1)\n",
|
||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
|
||||
@@ -78,7 +78,7 @@
|
||||
"output[\"Resource Group\"] = ws.resource_group\n",
|
||||
"output[\"Location\"] = ws.location\n",
|
||||
"output[\"Default datastore name\"] = dstore.name\n",
|
||||
"pd.set_option(\"display.max_colwidth\", -1)\n",
|
||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
@@ -234,11 +234,14 @@
|
||||
"input_ds_small = Dataset.Tabular.from_delimited_files(\n",
|
||||
" path=oj_datastore.path(ds_name_small + \"/\"), validate=False\n",
|
||||
")\n",
|
||||
"# Drop the columns 'Revenue' as this column contains leak feature.\n",
|
||||
"input_ds_small = input_ds_small.drop_columns(columns=[\"Revenue\"])\n",
|
||||
"\n",
|
||||
"inference_name_small = \"oj-inference-small-tabular\"\n",
|
||||
"inference_ds_small = Dataset.Tabular.from_delimited_files(\n",
|
||||
" path=oj_datastore.path(inference_name_small + \"/\"), validate=False\n",
|
||||
")"
|
||||
")\n",
|
||||
"inference_ds_small = inference_ds_small.drop_columns(columns=[\"Revenue\"])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -354,7 +357,6 @@
|
||||
" \"label_column_name\": \"Quantity\",\n",
|
||||
" \"n_cross_validations\": 3,\n",
|
||||
" \"time_column_name\": \"WeekStarting\",\n",
|
||||
" \"drop_column_names\": \"Revenue\",\n",
|
||||
" \"max_horizon\": 6,\n",
|
||||
" \"grain_column_names\": partition_column_names,\n",
|
||||
" \"track_child_runs\": False,\n",
|
||||
@@ -649,7 +651,6 @@
|
||||
" \"Quantity\",\n",
|
||||
" \"Advert\",\n",
|
||||
" \"Price\",\n",
|
||||
" \"Revenue\",\n",
|
||||
" \"Predicted\",\n",
|
||||
"]\n",
|
||||
"print(\n",
|
||||
|
||||
@@ -58,14 +58,15 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"import pandas as pd\n",
|
||||
"import json\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"import azureml.core\n",
|
||||
"import pandas as pd\n",
|
||||
"from azureml.automl.core.featurization import FeaturizationConfig\n",
|
||||
"from azureml.core.experiment import Experiment\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n",
|
||||
"from azureml.automl.core.featurization import FeaturizationConfig"
|
||||
"from azureml.core.workspace import Workspace\n",
|
||||
"from azureml.train.automl import AutoMLConfig\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -81,7 +82,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -112,7 +113,7 @@
|
||||
"output[\"Resource Group\"] = ws.resource_group\n",
|
||||
"output[\"Location\"] = ws.location\n",
|
||||
"output[\"Run History Name\"] = experiment_name\n",
|
||||
"pd.set_option(\"display.max_colwidth\", -1)\n",
|
||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
@@ -366,7 +367,7 @@
|
||||
"|-|-|\n",
|
||||
"|**time_column_name**|The name of your time column.|\n",
|
||||
"|**forecast_horizon**|The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly).|\n",
|
||||
"|**time_series_id_column_names**|The column names used to uniquely identify the time series in data that has multiple rows with the same timestamp. If the time series identifiers are not defined, the data set is assumed to be one time series.|\n",
|
||||
"|**time_series_id_column_names**|This optional parameter represents the column names used to uniquely identify the time series in data that has multiple rows with the same timestamp. If the time series identifiers are not defined or incorrectly defined, time series identifiers will be created automatically if they exist.|\n",
|
||||
"|**freq**|Forecast frequency. This optional parameter represents the period with which the forecast is desired, for example, daily, weekly, yearly, etc. Use this parameter for the correction of time series containing irregular data points or for padding of short time series. The frequency needs to be a pandas offset alias. Please refer to [pandas documentation](https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects) for more information."
|
||||
]
|
||||
},
|
||||
@@ -378,7 +379,7 @@
|
||||
"\n",
|
||||
"The [AutoMLConfig](https://docs.microsoft.com/en-us/python/api/azureml-train-automl-client/azureml.train.automl.automlconfig.automlconfig?view=azure-ml-py) object defines the settings and data for an AutoML training job. Here, we set necessary inputs like the task type, the number of AutoML iterations to try, the training data, and cross-validation parameters.\n",
|
||||
"\n",
|
||||
"For forecasting tasks, there are some additional parameters that can be set in the `ForecastingParameters` class: the name of the column holding the date/time, the timeseries id column names, and the maximum forecast horizon. A time column is required for forecasting, while the time_series_id is optional. If time_series_id columns are not given, AutoML assumes that the whole dataset is a single time-series. We also pass a list of columns to drop prior to modeling. The _logQuantity_ column is completely correlated with the target quantity, so it must be removed to prevent a target leak.\n",
|
||||
"For forecasting tasks, there are some additional parameters that can be set in the `ForecastingParameters` class: the name of the column holding the date/time, the timeseries id column names, and the maximum forecast horizon. A time column is required for forecasting, while the time_series_id is optional. If time_series_id columns are not given or incorrectly given, AutoML automatically creates time_series_id columns if they exist. We also pass a list of columns to drop prior to modeling. The _logQuantity_ column is completely correlated with the target quantity, so it must be removed to prevent a target leak.\n",
|
||||
"\n",
|
||||
"The forecast horizon is given in units of the time-series frequency; for instance, the OJ series frequency is weekly, so a horizon of 20 means that a trained model will estimate sales up to 20 weeks beyond the latest date in the training data for each series. In this example, we set the forecast horizon to the number of samples per series in the test set (n_test_periods). Generally, the value of this parameter will be dictated by business needs. For example, a demand planning application that estimates the next month of sales should set the horizon according to suitable planning time-scales. Please see the [energy_demand notebook](https://github.com/Azure/MachineLearningNotebooks/tree/master/how-to-use-azureml/automated-machine-learning/forecasting-energy-demand) for more discussion of forecast horizon.\n",
|
||||
"\n",
|
||||
@@ -421,7 +422,6 @@
|
||||
"forecasting_parameters = ForecastingParameters(\n",
|
||||
" time_column_name=time_column_name,\n",
|
||||
" forecast_horizon=n_test_periods,\n",
|
||||
" time_series_id_column_names=time_series_id_column_names,\n",
|
||||
" freq=\"W-THU\", # Set the forecast frequency to be weekly (start on each Thursday)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -472,8 +472,8 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the Best Model\n",
|
||||
"Each run within an Experiment stores serialized (i.e. pickled) pipelines from the AutoML iterations. We can now retrieve the pipeline with the best performance on the validation dataset:"
|
||||
"### Retrieve the Best Run details\n",
|
||||
"Below we retrieve the best Run object from among all the runs in the experiment."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -482,9 +482,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_run, fitted_model = remote_run.get_output()\n",
|
||||
"print(fitted_model.steps)\n",
|
||||
"model_name = best_run.properties[\"model_name\"]"
|
||||
"best_run = remote_run.get_best_child()\n",
|
||||
"model_name = best_run.properties[\"model_name\"]\n",
|
||||
"best_run"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -502,16 +502,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_featurizer = fitted_model.named_steps[\"timeseriestransformer\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"custom_featurizer.get_featurization_summary()"
|
||||
"# Download the featurization summary JSON file locally\n",
|
||||
"best_run.download_file(\"outputs/featurization_summary.json\", \"featurization_summary.json\")\n",
|
||||
"\n",
|
||||
"# Render the JSON as a pandas DataFrame\n",
|
||||
"with open(\"featurization_summary.json\", \"r\") as f:\n",
|
||||
" records = json.load(f)\n",
|
||||
"fs = pd.DataFrame.from_records(records)\n",
|
||||
"\n",
|
||||
"# View a summary of the featurization \n",
|
||||
"fs[[\"RawFeatureName\", \"TypeDetected\", \"Dropped\", \"EngineeredFeatureCount\", \"Transformations\"]]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -229,7 +229,7 @@
|
||||
"output[\"Resource Group\"] = ws.resource_group\n",
|
||||
"output[\"Location\"] = ws.location\n",
|
||||
"output[\"Run History Name\"] = experiment_name\n",
|
||||
"pd.set_option(\"display.max_colwidth\", -1)\n",
|
||||
"pd.set_option(\"display.max_colwidth\", None)\n",
|
||||
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
|
||||
"print(outputDf.T)"
|
||||
]
|
||||
|
||||
@@ -46,11 +46,11 @@ def kpss_test(series, **kw):
|
||||
"""
|
||||
if kw["store"]:
|
||||
statistic, p_value, critical_values, rstore = stattools.kpss(
|
||||
series, regression=kw["reg_type"], lags=kw["lags"], store=kw["store"]
|
||||
series, regression=kw["reg_type"], nlags=kw["lags"], store=kw["store"]
|
||||
)
|
||||
else:
|
||||
statistic, p_value, lags, critical_values = stattools.kpss(
|
||||
series, regression=kw["reg_type"], lags=kw["lags"]
|
||||
series, regression=kw["reg_type"], nlags=kw["lags"]
|
||||
)
|
||||
output = {
|
||||
"statistic": statistic,
|
||||
|
||||
@@ -96,7 +96,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -119,7 +119,7 @@
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"pd.set_option('display.max_colwidth', None)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
|
||||
@@ -96,7 +96,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -118,7 +118,7 @@
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Experiment Name'] = experiment.name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"pd.set_option('display.max_colwidth', None)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
@@ -359,7 +359,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Download the featuurization summary JSON file locally\n",
|
||||
"# Download the featurization summary JSON file locally\n",
|
||||
"best_run.download_file(\"outputs/featurization_summary.json\", \"featurization_summary.json\")\n",
|
||||
"\n",
|
||||
"# Render the JSON as a pandas DataFrame\n",
|
||||
@@ -847,7 +847,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"test_pred = plt.scatter(y_test, y_pred_test, color='')\n",
|
||||
"test_pred = plt.scatter(y_test, y_pred_test, color=None)\n",
|
||||
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||
"plt.show()"
|
||||
|
||||
@@ -2,6 +2,7 @@ import pandas as pd
|
||||
import joblib
|
||||
from azureml.core.model import Model
|
||||
from azureml.train.automl.runtime.automl_explain_utilities import automl_setup_model_explanations
|
||||
import scipy as sp
|
||||
|
||||
|
||||
def init():
|
||||
@@ -18,6 +19,22 @@ def init():
|
||||
scoring_explainer = joblib.load(scoring_explainer_path)
|
||||
|
||||
|
||||
def is_multi_dimensional(matrix):
|
||||
if hasattr(matrix, 'ndim') and matrix.ndim > 1:
|
||||
return True
|
||||
if hasattr(matrix, 'shape') and matrix.shape[1]:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def convert_matrix(matrix):
|
||||
if sp.sparse.issparse(matrix):
|
||||
matrix = matrix.todense()
|
||||
if is_multi_dimensional(matrix):
|
||||
matrix = matrix.tolist()
|
||||
return matrix
|
||||
|
||||
|
||||
def run(raw_data):
|
||||
# Get predictions and explanations for each data point
|
||||
data = pd.read_json(raw_data, orient='records')
|
||||
@@ -28,8 +45,12 @@ def run(raw_data):
|
||||
X_test=data, task='regression')
|
||||
# Retrieve model explanations for engineered explanations
|
||||
engineered_local_importance_values = scoring_explainer.explain(automl_explainer_setup_obj.X_test_transform)
|
||||
engineered_local_importance_values = convert_matrix(engineered_local_importance_values)
|
||||
|
||||
# Retrieve model explanations for raw explanations
|
||||
raw_local_importance_values = scoring_explainer.explain(automl_explainer_setup_obj.X_test_transform, get_raw=True)
|
||||
raw_local_importance_values = convert_matrix(raw_local_importance_values)
|
||||
|
||||
# You can return any data type as long as it is JSON-serializable
|
||||
return {'predictions': predictions.tolist(),
|
||||
'engineered_local_importance_values': engineered_local_importance_values,
|
||||
|
||||
@@ -92,7 +92,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -115,7 +115,7 @@
|
||||
"output['Resource Group'] = ws.resource_group\n",
|
||||
"output['Location'] = ws.location\n",
|
||||
"output['Run History Name'] = experiment_name\n",
|
||||
"pd.set_option('display.max_colwidth', -1)\n",
|
||||
"pd.set_option('display.max_colwidth', None)\n",
|
||||
"outputDf = pd.DataFrame(data = output, index = [''])\n",
|
||||
"outputDf.T"
|
||||
]
|
||||
@@ -430,7 +430,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"test_pred = plt.scatter(y_test, y_pred_test, color='')\n",
|
||||
"test_pred = plt.scatter(y_test, y_pred_test, color=None)\n",
|
||||
"test_test = plt.scatter(y_test, y_test, color='g')\n",
|
||||
"plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)\n",
|
||||
"plt.show()"
|
||||
|
||||
@@ -81,7 +81,7 @@
|
||||
"source": [
|
||||
"## Create trained model\n",
|
||||
"\n",
|
||||
"For this example, we will train a small model on scikit-learn's [diabetes dataset](https://scikit-learn.org/stable/datasets/index.html#diabetes-dataset). "
|
||||
"For this example, we will train a small model on scikit-learn's [diabetes dataset](https://scikit-learn.org/stable/datasets/toy_dataset.html#diabetes-dataset). "
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -263,7 +263,7 @@
|
||||
"\n",
|
||||
"# explicitly set base_image to None when setting base_dockerfile\n",
|
||||
"myenv.docker.base_image = None\n",
|
||||
"myenv.docker.base_dockerfile = \"FROM mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04\\nRUN echo \\\"this is test\\\"\"\n",
|
||||
"myenv.docker.base_dockerfile = \"FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04\\nRUN echo \\\"this is test\\\"\"\n",
|
||||
"myenv.inferencing_stack_version = \"latest\"\n",
|
||||
"\n",
|
||||
"inference_config = InferenceConfig(source_directory=source_directory,\n",
|
||||
|
||||
@@ -70,7 +70,7 @@
|
||||
"\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"onnx_model_url = \"https://github.com/onnx/models/blob/master/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-7.tar.gz?raw=true\"\n",
|
||||
"onnx_model_url = \"https://github.com/onnx/models/blob/main/vision/body_analysis/emotion_ferplus/model/emotion-ferplus-7.tar.gz?raw=true\"\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"emotion-ferplus-7.tar.gz\")\n",
|
||||
"\n",
|
||||
|
||||
@@ -70,7 +70,7 @@
|
||||
"\n",
|
||||
"import urllib.request\n",
|
||||
"\n",
|
||||
"onnx_model_url = \"https://github.com/onnx/models/blob/master/vision/classification/mnist/model/mnist-7.tar.gz?raw=true\"\n",
|
||||
"onnx_model_url = \"https://github.com/onnx/models/blob/main/vision/classification/mnist/model/mnist-7.tar.gz?raw=true\"\n",
|
||||
"\n",
|
||||
"urllib.request.urlretrieve(onnx_model_url, filename=\"mnist-7.tar.gz\")"
|
||||
]
|
||||
|
||||
@@ -106,7 +106,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -11,4 +11,4 @@ dependencies:
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.15.0
|
||||
- raiwidgets~=0.17.0
|
||||
|
||||
@@ -10,4 +10,5 @@ dependencies:
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.15.0
|
||||
- raiwidgets~=0.17.0
|
||||
- packaging>=20.9
|
||||
|
||||
@@ -391,7 +391,7 @@
|
||||
"\n",
|
||||
"\n",
|
||||
"aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, \n",
|
||||
" memory_gb=1, \n",
|
||||
" memory_gb=2, \n",
|
||||
" tags={\"data\": \"IBM_Attrition\", \n",
|
||||
" \"method\" : \"local_explanation\"}, \n",
|
||||
" description='Get local explanations for IBM Employee Attrition data')\n",
|
||||
|
||||
@@ -10,4 +10,5 @@ dependencies:
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.15.0
|
||||
- raiwidgets~=0.17.0
|
||||
- packaging>=20.9
|
||||
|
||||
@@ -12,4 +12,4 @@ dependencies:
|
||||
- azureml-dataset-runtime
|
||||
- azureml-core
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.15.0
|
||||
- raiwidgets~=0.17.0
|
||||
|
||||
@@ -5,17 +5,6 @@ import argparse
|
||||
import os
|
||||
from azureml.core import Run
|
||||
|
||||
|
||||
def get_dict(dict_str):
|
||||
pairs = dict_str.strip("{}").split(r'\;')
|
||||
new_dict = {}
|
||||
for pair in pairs:
|
||||
key, value = pair.strip().split(":")
|
||||
new_dict[key.strip().strip("'")] = value.strip().strip("'")
|
||||
|
||||
return new_dict
|
||||
|
||||
|
||||
print("Cleans the input data")
|
||||
|
||||
# Get the input green_taxi_data. To learn more about how to access dataset in your script, please
|
||||
@@ -23,7 +12,6 @@ print("Cleans the input data")
|
||||
run = Run.get_context()
|
||||
raw_data = run.input_datasets["raw_data"]
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser("cleanse")
|
||||
parser.add_argument("--output_cleanse", type=str, help="cleaned taxi data directory")
|
||||
parser.add_argument("--useful_columns", type=str, help="useful columns to keep")
|
||||
@@ -38,8 +26,8 @@ print("Argument 3(output cleansed taxi data path): %s" % args.output_cleanse)
|
||||
# These functions ensure that null data is removed from the dataset,
|
||||
# which will help increase machine learning model accuracy.
|
||||
|
||||
useful_columns = [s.strip().strip("'") for s in args.useful_columns.strip("[]").split(r'\;')]
|
||||
columns = get_dict(args.columns)
|
||||
useful_columns = eval(args.useful_columns.replace(';', ','))
|
||||
columns = eval(args.columns.replace(';', ','))
|
||||
|
||||
new_df = (raw_data.to_pandas_dataframe()
|
||||
.dropna(how='all')
|
||||
|
||||
@@ -254,6 +254,7 @@
|
||||
"- conda-forge\n",
|
||||
"dependencies:\n",
|
||||
"- python=3.6.2\n",
|
||||
"- pip=21.3.1\n",
|
||||
"- pip:\n",
|
||||
" - azureml-defaults\n",
|
||||
" - azureml-opendatasets\n",
|
||||
|
||||
@@ -163,7 +163,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fastai_env.docker.base_image = \"fastdotai/fastai:latest\"\n",
|
||||
"fastai_env.docker.base_image = \"fastdotai/fastai:2021-02-11\"\n",
|
||||
"fastai_env.python.user_managed_dependencies = True"
|
||||
]
|
||||
},
|
||||
@@ -199,7 +199,7 @@
|
||||
"Specify docker steps as a string:\n",
|
||||
"```python \n",
|
||||
"dockerfile = r\"\"\" \\\n",
|
||||
"FROM mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04\n",
|
||||
"FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04\n",
|
||||
"RUN echo \"Hello from custom container!\" \\\n",
|
||||
"\"\"\"\n",
|
||||
"```\n",
|
||||
|
||||
@@ -431,6 +431,7 @@
|
||||
"- conda-forge\n",
|
||||
"dependencies:\n",
|
||||
"- python=3.6.2\n",
|
||||
"- pip=21.3.1\n",
|
||||
"- pip:\n",
|
||||
" - h5py<=2.10.0\n",
|
||||
" - azureml-defaults\n",
|
||||
|
||||
@@ -262,6 +262,7 @@
|
||||
"- conda-forge\n",
|
||||
"dependencies:\n",
|
||||
"- python=3.6.2\n",
|
||||
"- pip=21.3.1\n",
|
||||
"- pip:\n",
|
||||
" - azureml-defaults\n",
|
||||
" - torch==1.6.0\n",
|
||||
|
||||
@@ -6,5 +6,5 @@ dependencies:
|
||||
- pillow==5.4.1
|
||||
- matplotlib
|
||||
- numpy==1.19.3
|
||||
- https://download.pytorch.org/whl/cpu/torch-1.6.0%2Bcpu-cp36-cp36m-win_amd64.whl
|
||||
- https://download.pytorch.org/whl/cpu/torchvision-0.7.0%2Bcpu-cp36-cp36m-win_amd64.whl
|
||||
- https://download.pytorch.org/whl/cpu/torch-1.6.0%2Bcpu-cp38-cp38-win_amd64.whl
|
||||
- https://download.pytorch.org/whl/cpu/torchvision-0.7.0%2Bcpu-cp38-cp38-win_amd64.whl
|
||||
|
||||
@@ -261,7 +261,7 @@
|
||||
" \n",
|
||||
" # 2. Execute the Python process via the xvfb-run command to set up the headless display driver.\n",
|
||||
" xvfb_env.python.user_managed_dependencies = True\n",
|
||||
" xvfb_env.python.interpreter_path = \"xvfb-run -s '-screen 0 640x480x16 -ac +extension GLX +render' python\"\n",
|
||||
" xvfb_env.python.interpreter_path = \"xvfb-run -s '-screen 0 640x480x24 -ac +extension GLX +render' python\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"training_estimator = ReinforcementLearningEstimator(\n",
|
||||
@@ -718,7 +718,7 @@
|
||||
"# 2. Execute the Python process via the xvfb-run command to set up the headless display driver.\n",
|
||||
"xvfb_env.python.user_managed_dependencies = True\n",
|
||||
"if video_capture:\n",
|
||||
" xvfb_env.python.interpreter_path = \"xvfb-run -s '-screen 0 640x480x16 -ac +extension GLX +render' python\"\n",
|
||||
" xvfb_env.python.interpreter_path = \"xvfb-run -s '-screen 0 640x480x24 -ac +extension GLX +render' python\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"rollout_estimator = ReinforcementLearningEstimator(\n",
|
||||
|
||||
@@ -26,6 +26,6 @@ RUN conda install -y conda=4.7.12 python=3.7 && conda clean -ay && \
|
||||
ray[rllib,dashboard,tune]==0.8.3 \
|
||||
psutil \
|
||||
setproctitle \
|
||||
gym[atari] && \
|
||||
gym[classic_control]==0.22.0 && \
|
||||
conda install -y -c conda-forge x264='1!152.20180717' ffmpeg=4.0.2 && \
|
||||
conda install -c anaconda opencv
|
||||
|
||||
@@ -95,7 +95,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -8,5 +8,6 @@ dependencies:
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.15.0
|
||||
- raiwidgets~=0.17.0
|
||||
- liac-arff
|
||||
- packaging>=20.9
|
||||
|
||||
@@ -100,7 +100,7 @@
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using SDK version 1.37.0, you are currently running version\", azureml.core.VERSION)"
|
||||
"print(\"This notebook was created using SDK version 1.39.0, you are currently running version\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -184,24 +184,6 @@
|
||||
"myenv.python.conda_dependencies=conda_dep"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Specify environment variables\n",
|
||||
"\n",
|
||||
"You can add environment variables to your environment. These then become available using ```os.environ.get``` in your training script."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"myenv.environment_variables = {\"MESSAGE\":\"Hello from Azure Machine Learning\"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
|
||||
2
index.md
2
index.md
@@ -108,8 +108,8 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
| [auto-ml-continuous-retraining](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/continuous-retraining/auto-ml-continuous-retraining.ipynb) | | | | | | |
|
||||
| [auto-ml-regression-model-proxy](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/experimental/regression-model-proxy/auto-ml-regression-model-proxy.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-backtest-many-models](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-backtest-many-models/auto-ml-forecasting-backtest-many-models.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-beer-remote](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-beer-remote/auto-ml-forecasting-beer-remote.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-energy-demand](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-energy-demand/auto-ml-forecasting-energy-demand.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-github-dau](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-github-dau/auto-ml-forecasting-github-dau.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-hierarchical-timeseries](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-hierarchical-timeseries/auto-ml-forecasting-hierarchical-timeseries.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-many-models](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-many-models/auto-ml-forecasting-many-models.ipynb) | | | | | | |
|
||||
| [auto-ml-forecasting-univariate-recipe-experiment-settings](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/automated-machine-learning/forecasting-recipes-univariate/auto-ml-forecasting-univariate-recipe-experiment-settings.ipynb) | | | | | | |
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.37.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.39.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
"source": [
|
||||
"# Quickstart: Train and deploy a model in Azure Machine Learning in 10 minutes\n",
|
||||
"\n",
|
||||
"In this quickstart, learn how to get started with Azure Machine Learning. You'll train an image classification model using the [MNIST](https://azure.microsoft.com/services/open-datasets/catalog/mnist/) dataset.\n",
|
||||
"In this quickstart, learn how to get started with Azure Machine Learning. You'll train an image classification model using the [MNIST](https://docs.microsoft.com/azure/open-datasets/dataset-mnist) dataset.\n",
|
||||
"\n",
|
||||
"You'll learn how to:\n",
|
||||
"\n",
|
||||
@@ -280,7 +280,7 @@
|
||||
"# get a curated environment\n",
|
||||
"env = Environment.get(\n",
|
||||
" workspace=ws, \n",
|
||||
" name=\"AzureML-sklearn-0.24.1-ubuntu18.04-py37-cpu-inference\",\n",
|
||||
" name=\"AzureML-sklearn-1.0-ubuntu20.04-py38-cpu\",\n",
|
||||
" version=1\n",
|
||||
")\n",
|
||||
"env.inferencing_stack_version='latest'\n",
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
"\n",
|
||||
"In this quickstart, you learn how to submit a batch training job using the Python SDK. In this example, we submit the job to the 'local' machine (the compute instance you are running this notebook on). However, you can use exactly the same method to submit the job to different compute targets (for example, AKS, Azure Machine Learning Compute Cluster, Synapse, etc) by changing a single line of code. A full list of support compute targets can be viewed [here](https://docs.microsoft.com/en-us/azure/machine-learning/concept-compute-target). \n",
|
||||
"\n",
|
||||
"This quickstart trains a simple logistic regression using the [MNIST](https://azure.microsoft.com/services/open-datasets/catalog/mnist/) dataset and [scikit-learn](http://scikit-learn.org) with Azure Machine Learning. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing a number from 0 to 9. The goal is to create a multi-class classifier to identify the digit a given image represents. \n",
|
||||
"This quickstart trains a simple logistic regression using the [MNIST](https://docs.microsoft.com/azure/open-datasets/dataset-mnist) dataset and [scikit-learn](http://scikit-learn.org) with Azure Machine Learning. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing a number from 0 to 9. The goal is to create a multi-class classifier to identify the digit a given image represents. \n",
|
||||
"\n",
|
||||
"You will learn how to:\n",
|
||||
"\n",
|
||||
|
||||
@@ -2,7 +2,7 @@ import argparse
|
||||
import os
|
||||
import numpy as np
|
||||
import glob
|
||||
import joblib
|
||||
# import joblib
|
||||
import mlflow
|
||||
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
@@ -30,8 +30,7 @@ X_train = (
|
||||
os.path.join(data_folder, "**/train-images-idx3-ubyte.gz"), recursive=True
|
||||
)[0],
|
||||
False,
|
||||
) /
|
||||
255.0
|
||||
) / 255.0
|
||||
)
|
||||
X_test = (
|
||||
load_data(
|
||||
@@ -39,8 +38,7 @@ X_test = (
|
||||
os.path.join(data_folder, "**/t10k-images-idx3-ubyte.gz"), recursive=True
|
||||
)[0],
|
||||
False,
|
||||
) /
|
||||
255.0
|
||||
) / 255.0
|
||||
)
|
||||
y_train = load_data(
|
||||
glob.glob(
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
"\n",
|
||||
"In this tutorial, you train a machine learning model on remote compute resources. You'll use the training and deployment workflow for Azure Machine Learning service (preview) in a Python Jupyter notebook. You can then use the notebook as a template to train your own machine learning model with your own data. This tutorial is **part one of a two-part tutorial series**. \n",
|
||||
"\n",
|
||||
"This tutorial trains a simple logistic regression using the [MNIST](https://azure.microsoft.com/services/open-datasets/catalog/mnist/) dataset and [scikit-learn](http://scikit-learn.org) with Azure Machine Learning. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing a number from 0 to 9. The goal is to create a multi-class classifier to identify the digit a given image represents. \n",
|
||||
"This tutorial trains a simple logistic regression using the [MNIST](https://docs.microsoft.com/azure/open-datasets/dataset-mnist) dataset and [scikit-learn](http://scikit-learn.org) with Azure Machine Learning. MNIST is a popular dataset consisting of 70,000 grayscale images. Each image is a handwritten digit of 28x28 pixels, representing a number from 0 to 9. The goal is to create a multi-class classifier to identify the digit a given image represents. \n",
|
||||
"\n",
|
||||
"Learn how to:\n",
|
||||
"\n",
|
||||
|
||||
Reference in New Issue
Block a user