update samples from Release-168 as a part of SDK release

This commit is contained in:
amlrelsa-ms
2022-12-05 17:52:07 +00:00
parent 38d5743bbb
commit 4404e62f58
44 changed files with 187 additions and 814 deletions

View File

@@ -5,32 +5,17 @@ channels:
- main
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.6.0 and later.
- pip==20.2.4
- python>=3.6,<3.9
- matplotlib==3.2.1
- py-xgboost==1.3.3
- pytorch::pytorch=1.11.0
- conda-forge::fbprophet==0.7.1
- cudatoolkit=10.1.243
- scipy==1.5.3
- notebook
- PySocks==1.7.1
- conda-forge::pyqt==5.12.3
- jinja2<=2.11.2
- markupsafe<2.1.0
- tqdm==4.64.1
- jsonschema==4.16.0
- websocket-client==1.4.1
# Azure ML only supports 3.7.0 and later.
- pip==22.3.1
- python>=3.7,<3.9
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.47.0
- azureml-defaults~=1.47.0
- pytorch-transformers==1.0.0
- spacy==2.2.4
- pystan==2.19.1.1
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.47.0/validated_win32_requirements.txt [--no-deps]
- azureml-widgets~=latest
- azureml-defaults~=latest
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/latest/validated_win32_requirements.txt [--no-deps]
- matplotlib==3.6.2
- xgboost==1.3.3
- arch==4.14
- wasabi==0.9.1
- mlflow-skinny==1.30.0

View File

@@ -5,9 +5,9 @@ channels:
- main
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.6.0 and later.
# Azure ML only supports 3.7 and later.
- pip==20.1.1
- python>=3.6,<3.9
- python>=3.7,<3.9
- matplotlib==3.2.1
- numpy>=1.21.6,<=1.22.3
- cython==0.29.14
@@ -25,11 +25,11 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.47.0
- azureml-defaults~=1.47.0
- azureml-widgets~=1.48.0
- azureml-defaults~=1.48.0
- pytorch-transformers==1.0.0
- spacy==2.2.4
- pystan==2.19.1.1
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.47.0/validated_linux_requirements.txt [--no-deps]
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_linux_requirements.txt [--no-deps]
- arch==4.14

View File

@@ -5,9 +5,9 @@ channels:
- main
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.6.0 and later.
# Currently Azure ML only supports 3.7 and later.
- pip==20.1.1
- python>=3.6,<3.9
- python>=3.7,<3.9
- matplotlib==3.2.1
- numpy>=1.21.6,<=1.22.3
- cython==0.29.14
@@ -25,11 +25,11 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.47.0
- azureml-defaults~=1.47.0
- azureml-widgets~=1.48.0
- azureml-defaults~=1.48.0
- pytorch-transformers==1.0.0
- spacy==2.2.4
- pystan==2.19.1.1
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.47.0/validated_darwin_requirements.txt [--no-deps]
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_darwin_requirements.txt [--no-deps]
- arch==4.14

View File

@@ -97,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.47.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -97,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.47.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -1,23 +1,15 @@
name: azure_automl_experimental
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.6.0 and later.
- pip<=20.2.4
- python>=3.6.0,<3.10
- cython==0.29.14
- urllib3==1.26.7
- PyJWT < 2.0.0
- numpy==1.22.3
- pywin32==227
# Currently Azure ML only supports 3.7.0 and later.
- pip<=22.3.1
- python>=3.7.0,<3.10
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azure-core==1.24.1
- azure-identity==1.7.0
- azureml-defaults
- azureml-sdk
- azureml-widgets
- azureml-mlflow
- pandas
- mlflow
- docker<6.0.0

View File

@@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.47.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -91,7 +91,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.47.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -387,6 +387,7 @@
"| **node_count** | The number of compute nodes to be used for running the user script. We recommend to start with 3 and increase the node_count if the training time is taking too long. |\n",
"| **process_count_per_node** | Process count per node, we recommend 2:1 ratio for number of cores: number of processes per node. eg. If node has 16 cores then configure 8 or less process count per node or optimal performance. |\n",
"| **train_pipeline_parameters** | The set of configuration parameters defined in the previous section. |\n",
"| **run_invocation_timeout** | Maximum amount of time in seconds that the ``ParallelRunStep`` class is allowed. This is optional but provides customers with greater control on exit criteria. This must be greater than ``experiment_timeout_hours`` by at least 300 seconds. |\n",
"\n",
"Calling this method will create a new aggregated dataset which is generated dynamically on pipeline execution."
]
@@ -529,6 +530,8 @@
" target_column_name=TARGET_COLNAME,\n",
")\n",
"\n",
"output_file_name = \"parallel_run_step.csv\"\n",
"\n",
"inference_steps = AutoMLPipelineBuilder.get_many_models_batch_inference_steps(\n",
" experiment=experiment,\n",
" inference_data=test_data,\n",
@@ -540,6 +543,7 @@
" train_run_id=training_run.id,\n",
" train_experiment_name=training_run.experiment.name,\n",
" inference_pipeline_parameters=mm_parameters,\n",
" append_row_file_name=output_file_name,\n",
")"
]
},
@@ -587,18 +591,21 @@
"source": [
"from azureml.contrib.automl.pipeline.steps.utilities import get_output_from_mm_pipeline\n",
"\n",
"PREDICTION_COLNAME = \"Predictions\"\n",
"forecasting_results_name = \"forecasting_results\"\n",
"forecasting_output_name = \"many_models_inference_output\"\n",
"forecast_file = get_output_from_mm_pipeline(\n",
" inference_run, forecasting_results_name, forecasting_output_name\n",
" inference_run, forecasting_results_name, forecasting_output_name, output_file_name\n",
")\n",
"df = pd.read_csv(forecast_file, delimiter=\" \", header=None, parse_dates=[0])\n",
"df.columns = list(X_train.columns) + [\"predicted_level\"]\n",
"df = pd.read_csv(forecast_file, parse_dates=[0])\n",
"print(\n",
" \"Prediction has \", df.shape[0], \" rows. Here the first 10 rows are being displayed.\"\n",
")\n",
"# Save the scv file with header to read it in the next step.\n",
"df.rename(columns={TARGET_COLNAME: \"actual_level\"}, inplace=True)\n",
"# Save the csv file to read it in the next step.\n",
"df.rename(\n",
" columns={TARGET_COLNAME: \"actual_level\", PREDICTION_COLNAME: \"predicted_level\"},\n",
" inplace=True,\n",
")\n",
"df.to_csv(os.path.join(forecasting_results_name, \"forecast.csv\"), index=False)\n",
"df.head(10)"
]

View File

@@ -251,8 +251,16 @@
"source": [
"### Set up training parameters\n",
"\n",
"This dictionary defines the AutoML and hierarchy settings. For this forecasting task we need to define several settings inncluding the name of the time column, the maximum forecast horizon, the hierarchy definition, and the level of the hierarchy at which to train.\n",
"We need to provide ``ForecastingParameters``, ``AutoMLConfig`` and ``HTSTrainParameters`` objects. For the forecasting task we need to define several settings including the name of the time column, the maximum forecast horizon, the hierarchy definition, and the level of the hierarchy at which to train.\n",
"\n",
"#### ``ForecastingParameters`` arguments\n",
"| Property | Description|\n",
"| :--------------- | :------------------- |\n",
"| **forecast_horizon** | The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly). Periods are inferred from your data. |\n",
"| **time_column_name** | The name of your time column. |\n",
"| **time_series_id_column_names** | The column names used to uniquely identify timeseries in data that has multiple rows with the same timestamp. |\n",
"\n",
"#### ``AutoMLConfig`` arguments\n",
"| Property | Description|\n",
"| :--------------- | :------------------- |\n",
"| **task** | forecasting |\n",
@@ -262,18 +270,21 @@
"| **iterations** | Number of models to train. This is optional but provides customers with greater control on exit criteria. |\n",
"| **experiment_timeout_hours** | Maximum amount of time in hours that the experiment can take before it terminates. This is optional but provides customers with greater control on exit criteria. |\n",
"| **label_column_name** | The name of the label column. |\n",
"| **forecast_horizon** | The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly). Periods are inferred from your data. |\n",
"|**n_cross_validations**|Number of cross-validation folds to use for model/pipeline selection. The default value is \"auto\", in which case AutoMl determines the number of cross-validations automatically, if a validation set is not provided. Or users could specify an integer value.\n",
"|**cv_step_size**|Number of periods between two consecutive cross-validation folds. The default value is \"auto\", in which case AutoMl determines the cross-validation step size automatically, if a validation set is not provided. Or users could specify an integer value.\n",
"| **n_cross_validations** | Number of cross-validation folds to use for model/pipeline selection. The default value is \\\"auto\\\", in which case AutoMl determines the number of cross-validations automatically, if a validation set is not provided. Or users could specify an integer value. |\n",
"| **cv_step_size** | Number of periods between two consecutive cross-validation folds. The default value is \\\"auto\\\", in which case AutoMl determines the cross-validation step size automatically, if a validation set is not provided. Or users could specify an integer value. |\n",
"| **enable_early_stopping** | Flag to enable early termination if the score is not improving in the short term. |\n",
"| **time_column_name** | The name of your time column. |\n",
"| **hierarchy_column_names** | The names of columns that define the hierarchical structure of the data from highest level to most granular. |\n",
"| **training_level** | The level of the hierarchy to be used for training models. |\n",
"| **enable_engineered_explanations** | Engineered feature explanations will be downloaded if enable_engineered_explanations flag is set to True. By default it is set to False to save storage space. |\n",
"| **time_series_id_column_name** | The column names used to uniquely identify timeseries in data that has multiple rows with the same timestamp. |\n",
"| **track_child_runs** | Flag to disable tracking of child runs. Only best run is tracked if the flag is set to False (this includes the model and metrics of the run). |\n",
"| **pipeline_fetch_max_batch_size** | Determines how many pipelines (training algorithms) to fetch at a time for training, this helps reduce throttling when training at large scale. |\n",
"| **model_explainability** | Flag to disable explaining the best automated ML model at the end of all training iterations. The default is True and will block non-explainable models which may impact the forecast accuracy. For more information, see [Interpretability: model explanations in automated machine learning](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-machine-learning-interpretability-automl). |"
"| **model_explainability** | Flag to disable explaining the best automated ML model at the end of all training iterations. The default is True and will block non-explainable models which may impact the forecast accuracy. For more information, see [Interpretability: model explanations in automated machine learning](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-machine-learning-interpretability-automl). |\n",
"\n",
"#### ``HTSTrainParameters`` arguments\n",
"| Property | Description|\n",
"| :--------------- | :------------------- |\n",
"| **automl_settings** | ``AutoMLConfig`` object.\n",
"| **hierarchy_column_names** | The names of columns that define the hierarchical structure of the data from highest level to most granular. |\n",
"| **training_level** | The level of the hierarchy to be used for training models. |\n",
"| **enable_engineered_explanations** | The switch controls engineered explanations. |"
]
},
{
@@ -287,6 +298,9 @@
"outputs": [],
"source": [
"from azureml.train.automl.runtime._hts.hts_parameters import HTSTrainParameters\n",
"from azureml.automl.core.forecasting_parameters import ForecastingParameters\n",
"from azureml.train.automl.automlconfig import AutoMLConfig\n",
"\n",
"\n",
"model_explainability = True\n",
"\n",
@@ -300,24 +314,26 @@
"label_column_name = \"quantity\"\n",
"forecast_horizon = 7\n",
"\n",
"forecasting_parameters = ForecastingParameters(\n",
" time_column_name=time_column_name,\n",
" forecast_horizon=forecast_horizon,\n",
")\n",
"\n",
"automl_settings = {\n",
" \"task\": \"forecasting\",\n",
" \"primary_metric\": \"normalized_root_mean_squared_error\",\n",
" \"label_column_name\": label_column_name,\n",
" \"time_column_name\": time_column_name,\n",
" \"forecast_horizon\": forecast_horizon,\n",
" \"hierarchy_column_names\": hierarchy,\n",
" \"hierarchy_training_level\": training_level,\n",
" \"track_child_runs\": False,\n",
" \"pipeline_fetch_max_batch_size\": 15,\n",
" \"model_explainability\": model_explainability,\n",
" \"n_cross_validations\": \"auto\", # Feel free to set to a small integer (>=2) if runtime is an issue.\n",
" \"cv_step_size\": \"auto\",\n",
"automl_settings = AutoMLConfig(\n",
" task=\"forecasting\",\n",
" primary_metric=\"normalized_root_mean_squared_error\",\n",
" experiment_timeout_hours=1,\n",
" label_column_name=label_column_name,\n",
" track_child_runs=False,\n",
" forecasting_parameters=forecasting_parameters,\n",
" pipeline_fetch_max_batch_size=15,\n",
" model_explainability=model_explainability,\n",
" n_cross_validations=\"auto\", # Feel free to set to a small integer (>=2) if runtime is an issue.\n",
" cv_step_size=\"auto\",\n",
" # The following settings are specific to this sample and should be adjusted according to your own needs.\n",
" \"iteration_timeout_minutes\": 10,\n",
" \"iterations\": 10,\n",
"}\n",
" iteration_timeout_minutes=10,\n",
" iterations=15,\n",
")\n",
"\n",
"hts_parameters = HTSTrainParameters(\n",
" automl_settings=automl_settings,\n",
@@ -345,6 +361,7 @@
"* **node_count:** The number of compute nodes to be used for running the user script. We recommend to start with 3 and increase the node_count if the training time is taking too long.\n",
"* **process_count_per_node:** Process count per node, we recommend 2:1 ratio for number of cores: number of processes per node. eg. If node has 16 cores then configure 8 or less process count per node or optimal performance.\n",
"* **train_pipeline_parameters:** The set of configuration parameters defined in the previous section. \n",
"* **run_invocation_timeout:** Maximum amount of time in seconds that the ``ParallelRunStep`` class is allowed. This is optional but provides customers with greater control on exit criteria. This must be greater than ``experiment_timeout_hours`` by at least 300 seconds.\n",
"\n",
"Calling this method will create a new aggregated dataset which is generated dynamically on pipeline execution."
]
@@ -635,12 +652,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.13"
},
"vscode": {
"interpreter": {
"hash": "6db9c8d9f0cce2d9127e384e15560d42c3b661994c9f717d0553d1d8985ab1ea"
}
"version": "3.6.8"
}
},
"nbformat": 4,

View File

@@ -379,8 +379,16 @@
"source": [
"### Set up training parameters\n",
"\n",
"This dictionary defines the AutoML and many models settings. For this forecasting task we need to define several settings inncluding the name of the time column, the maximum forecast horizon, and the partition column name definition.\n",
"We need to provide ``ForecastingParameters``, ``AutoMLConfig`` and ``ManyModelsTrainParameters`` objects. For the forecasting task we also need to define several settings including the name of the time column, the maximum forecast horizon, and the partition column name definition.\n",
"\n",
"#### ``ForecastingParameters`` arguments\n",
"| Property | Description|\n",
"| :--------------- | :------------------- |\n",
"| **forecast_horizon** | The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly). Periods are inferred from your data. |\n",
"| **time_column_name** | The name of your time column. |\n",
"| **time_series_id_column_names** | The column names used to uniquely identify timeseries in data that has multiple rows with the same timestamp. |\n",
"\n",
"#### ``AutoMLConfig`` arguments\n",
"| Property | Description|\n",
"| :--------------- | :------------------- |\n",
"| **task** | forecasting |\n",
@@ -390,13 +398,10 @@
"| **iterations** | Number of models to train. This is optional but provides customers with greater control on exit criteria. |\n",
"| **experiment_timeout_hours** | Maximum amount of time in hours that the experiment can take before it terminates. This is optional but provides customers with greater control on exit criteria. |\n",
"| **label_column_name** | The name of the label column. |\n",
"| **forecast_horizon** | The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly). Periods are inferred from your data. |\n",
"| **n_cross_validations** | Number of cross validation splits. The default value is \"auto\", in which case AutoMl determines the number of cross-validations automatically, if a validation set is not provided. Or users could specify an integer value. Rolling Origin Validation is used to split time-series in a temporally consistent way. |\n",
"|**cv_step_size**|Number of periods between two consecutive cross-validation folds. The default value is \"auto\", in which case AutoMl determines the cross-validation step size automatically, if a validation set is not provided. Or users could specify an integer value.\n",
"| **n_cross_validations** | Number of cross validation splits. The default value is \\\"auto\\\", in which case AutoMl determines the number of cross-validations automatically, if a validation set is not provided. Or users could specify an integer value. Rolling Origin Validation is used to split time-series in a temporally consistent way. |\n",
"| **cv_step_size** |Number of periods between two consecutive cross-validation folds. The default value is \\\"auto\\\", in which case AutoMl determines the cross-validation step size automatically, if a validation set is not provided. Or users could specify an integer value. |\n",
"| **enable_early_stopping** | Flag to enable early termination if the score is not improving in the short term. |\n",
"| **time_column_name** | The name of your time column. |\n",
"| **enable_engineered_explanations** | Engineered feature explanations will be downloaded if enable_engineered_explanations flag is set to True. By default it is set to False to save storage space. |\n",
"| **time_series_id_column_names** | The column names used to uniquely identify timeseries in data that has multiple rows with the same timestamp. |\n",
"| **track_child_runs** | Flag to disable tracking of child runs. Only best run is tracked if the flag is set to False (this includes the model and metrics of the run). |\n",
"| **pipeline_fetch_max_batch_size** | Determines how many pipelines (training algorithms) to fetch at a time for training, this helps reduce throttling when training at large scale. |\n",
"| **partition_column_names** | The names of columns used to group your models. For timeseries, the groups must not split up individual time-series. That is, each group must contain one or more whole time-series. |"
@@ -415,23 +420,30 @@
"from azureml.train.automl.runtime._many_models.many_models_parameters import (\n",
" ManyModelsTrainParameters,\n",
")\n",
"from azureml.automl.core.forecasting_parameters import ForecastingParameters\n",
"from azureml.train.automl.automlconfig import AutoMLConfig\n",
"\n",
"partition_column_names = [\"Store\", \"Brand\"]\n",
"automl_settings = {\n",
" \"task\": \"forecasting\",\n",
" \"primary_metric\": \"normalized_root_mean_squared_error\",\n",
" \"iteration_timeout_minutes\": 10, # This needs to be changed based on the dataset. We ask customer to explore how long training is taking before settings this value\n",
" \"iterations\": 15,\n",
" \"experiment_timeout_hours\": 0.25,\n",
" \"label_column_name\": \"Quantity\",\n",
" \"n_cross_validations\": \"auto\", # Feel free to set to a small integer (>=2) if runtime is an issue.\n",
" \"cv_step_size\": \"auto\",\n",
" \"time_column_name\": \"WeekStarting\",\n",
" \"drop_column_names\": \"Revenue\",\n",
" \"forecast_horizon\": 6,\n",
" \"time_series_id_column_names\": partition_column_names,\n",
" \"track_child_runs\": False,\n",
"}\n",
"\n",
"forecasting_parameters = ForecastingParameters(\n",
" time_column_name=\"WeekStarting\",\n",
" drop_column_names=\"Revenue\",\n",
" forecast_horizon=6,\n",
" time_series_id_column_names=partition_column_names,\n",
")\n",
"\n",
"automl_settings = AutoMLConfig(\n",
" task=\"forecasting\",\n",
" primary_metric=\"normalized_root_mean_squared_error\",\n",
" iteration_timeout_minutes=10,\n",
" iterations=15,\n",
" experiment_timeout_hours=0.25,\n",
" label_column_name=\"Quantity\",\n",
" n_cross_validations=\"auto\", # Feel free to set to a small integer (>=2) if runtime is an issue.\n",
" cv_step_size=\"auto\",\n",
" track_child_runs=False,\n",
" forecasting_parameters=forecasting_parameters,\n",
")\n",
"\n",
"mm_paramters = ManyModelsTrainParameters(\n",
" automl_settings=automl_settings, partition_column_names=partition_column_names\n",
@@ -498,6 +510,7 @@
"| **node_count** | The number of compute nodes to be used for running the user script. We recommend to start with 3 and increase the node_count if the training time is taking too long. |\n",
"| **process_count_per_node** | Process count per node, we recommend 2:1 ratio for number of cores: number of processes per node. eg. If node has 16 cores then configure 8 or less process count per node or optimal performance. |\n",
"| **train_pipeline_parameters** | The set of configuration parameters defined in the previous section. |\n",
"| **run_invocation_timeout** | Maximum amount of time in seconds that the ``ParallelRunStep`` class is allowed. This is optional but provides customers with greater control on exit criteria. This must be greater than ``experiment_timeout_hours`` by at least 300 seconds. |\n",
"\n",
"Calling this method will create a new aggregated dataset which is generated dynamically on pipeline execution."
]
@@ -667,9 +680,9 @@
"| :--------------- | :------------------- |\n",
"| **experiment** | The experiment used for inference run. |\n",
"| **inference_data** | The data to use for inferencing. It should be the same schema as used for training.\n",
"| **compute_target** The compute target that runs the inference pipeline.|\n",
"| **compute_target** | The compute target that runs the inference pipeline. |\n",
"| **node_count** | The number of compute nodes to be used for running the user script. We recommend to start with the number of cores per node (varies by compute sku). |\n",
"| **process_count_per_node** The number of processes per node.\n",
"| **process_count_per_node** | The number of processes per node (should be at most half of the number of cores of the compute cluster that will be used for the experiment).\n",
"| **train_run_id** | \\[Optional] The run id of the hierarchy training, by default it is the latest successful training many model run in the experiment. |\n",
"| **train_experiment_name** | \\[Optional] The train experiment that contains the train pipeline. This one is only needed when the train pipeline is not in the same experiement as the inference pipeline. |\n",
"| **process_count_per_node** | \\[Optional] The number of processes per node, by default it's 4. |"
@@ -692,6 +705,8 @@
" target_column_name=\"Quantity\",\n",
")\n",
"\n",
"output_file_name = \"parallel_run_step.csv\"\n",
"\n",
"inference_steps = AutoMLPipelineBuilder.get_many_models_batch_inference_steps(\n",
" experiment=experiment,\n",
" inference_data=inference_ds_small,\n",
@@ -703,6 +718,8 @@
" train_run_id=training_run.id,\n",
" train_experiment_name=training_run.experiment.name,\n",
" inference_pipeline_parameters=mm_parameters,\n",
" append_row_file_name=output_file_name,\n",
" arguments=[\"--forecast_quantiles\", 0.1, 0.9],\n",
")"
]
},
@@ -737,7 +754,7 @@
"\n",
"The following code snippet:\n",
"1. Downloads the contents of the output folder that is passed in the parallel run step \n",
"2. Reads the parallel_run_step.txt file that has the predictions as pandas dataframe and \n",
"2. Reads the output file that has the predictions as pandas dataframe and \n",
"3. Displays the top 10 rows of the predictions"
]
},
@@ -752,19 +769,9 @@
"forecasting_results_name = \"forecasting_results\"\n",
"forecasting_output_name = \"many_models_inference_output\"\n",
"forecast_file = get_output_from_mm_pipeline(\n",
" inference_run, forecasting_results_name, forecasting_output_name\n",
" inference_run, forecasting_results_name, forecasting_output_name, output_file_name\n",
")\n",
"df = pd.read_csv(forecast_file, delimiter=\" \", header=None)\n",
"df.columns = [\n",
" \"Week Starting\",\n",
" \"Store\",\n",
" \"Brand\",\n",
" \"Quantity\",\n",
" \"Advert\",\n",
" \"Price\",\n",
" \"Revenue\",\n",
" \"Predicted\",\n",
"]\n",
"df = pd.read_csv(forecast_file)\n",
"print(\n",
" \"Prediction has \", df.shape[0], \" rows. Here the first 10 rows are being displayed.\"\n",
")\n",

View File

@@ -859,8 +859,8 @@
"outputs": [],
"source": [
"%matplotlib inline\n",
"test_pred = plt.scatter(y_test, y_pred_test, color=\"\")\n",
"test_test = plt.scatter(y_test, y_test, color=\"g\")\n",
"test_pred = plt.scatter(y_test, y_pred_test, c=[\"b\"])\n",
"test_test = plt.scatter(y_test, y_test, c=[\"g\"])\n",
"plt.legend(\n",
" (test_pred, test_test), (\"prediction\", \"truth\"), loc=\"upper left\", fontsize=8\n",
")\n",

View File

@@ -422,8 +422,8 @@
"outputs": [],
"source": [
"%matplotlib inline\n",
"test_pred = plt.scatter(y_test, y_pred_test, color=\"\")\n",
"test_test = plt.scatter(y_test, y_test, color=\"g\")\n",
"test_pred = plt.scatter(y_test, y_pred_test, c=[\"b\"])\n",
"test_test = plt.scatter(y_test, y_test, c=[\"g\"])\n",
"plt.legend(\n",
" (test_pred, test_test), (\"prediction\", \"truth\"), loc=\"upper left\", fontsize=8\n",
")\n",