mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-19 17:17:04 -05:00
update samples from Release-58 as a part of SDK release
This commit is contained in:
@@ -269,10 +269,7 @@
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"\n",
|
||||
"y_df = final_df.pop(\"totalAmount\")\n",
|
||||
"x_df = final_df\n",
|
||||
"\n",
|
||||
"x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=223)"
|
||||
"x_train, x_test = train_test_split(final_df, test_size=0.2, random_state=223)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -311,15 +308,16 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define the experiment parameter and model settings for training. View the full list of [settings](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train). Submitting the experiment with these default settings will take approximately 5-10 min, but if you want a shorter run time, reduce the `iterations` parameter.\n",
|
||||
"Define the experiment parameter and model settings for training. View the full list of [settings](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train). Submitting the experiment with these default settings will take approximately 20 minutes, but if you want a shorter run time, reduce the `experiment_timeout_hours` parameter.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"|Property| Value in this tutorial |Description|\n",
|
||||
"|----|----|---|\n",
|
||||
"|**iteration_timeout_minutes**|2|Time limit in minutes for each iteration. Reduce this value to decrease total runtime.|\n",
|
||||
"|**iterations**|20|Number of iterations. In each iteration, a new machine learning model is trained with your data. This is the primary value that affects total run time.|\n",
|
||||
"|**experiment_timeout_hours**|0.3|Maximum amount of time in hours that all iterations combined can take before the experiment terminates.|\n",
|
||||
"|**enable_early_stopping**|True|Flag to enable early termination if the score is not improving in the short term.|\n",
|
||||
"|**primary_metric**| spearman_correlation | Metric that you want to optimize. The best-fit model will be chosen based on this metric.|\n",
|
||||
"|**preprocess**| True | By using **True**, the experiment can preprocess the input data (handling missing data, converting text to numeric, etc.)|\n",
|
||||
"|**featurization**| auto | By using auto, the experiment can preprocess the input data (handling missing data, converting text to numeric, etc.)|\n",
|
||||
"|**verbosity**| logging.INFO | Controls the level of logging.|\n",
|
||||
"|**n_cross_validations**|5|Number of cross-validation splits to perform when validation data is not specified.|"
|
||||
]
|
||||
@@ -334,9 +332,10 @@
|
||||
"\n",
|
||||
"automl_settings = {\n",
|
||||
" \"iteration_timeout_minutes\": 2,\n",
|
||||
" \"iterations\": 20,\n",
|
||||
" \"experiment_timeout_hours\": 0.3,\n",
|
||||
" \"enable_early_stopping\": True,\n",
|
||||
" \"primary_metric\": 'spearman_correlation',\n",
|
||||
" \"preprocess\": True,\n",
|
||||
" \"featurization\": 'auto',\n",
|
||||
" \"verbosity\": logging.INFO,\n",
|
||||
" \"n_cross_validations\": 5\n",
|
||||
"}"
|
||||
@@ -359,8 +358,8 @@
|
||||
"\n",
|
||||
"automl_config = AutoMLConfig(task='regression',\n",
|
||||
" debug_log='automated_ml_errors.log',\n",
|
||||
" X=x_train.values,\n",
|
||||
" y=y_train.values.flatten(),\n",
|
||||
" training_data=x_train,\n",
|
||||
" label_column_name=\"totalAmount\",\n",
|
||||
" **automl_settings)"
|
||||
]
|
||||
},
|
||||
@@ -467,7 +466,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_predict = fitted_model.predict(x_test.values)\n",
|
||||
"y_test = x_test.pop(\"totalAmount\")\n",
|
||||
"\n",
|
||||
"y_predict = fitted_model.predict(x_test)\n",
|
||||
"print(y_predict[:10])"
|
||||
]
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user