update samples from Release-58 as a part of SDK release

This commit is contained in:
amlrelsa-ms
2020-07-20 20:44:42 +00:00
parent f80512a6db
commit d096535e48
37 changed files with 265 additions and 282 deletions

View File

@@ -269,10 +269,7 @@
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"y_df = final_df.pop(\"totalAmount\")\n",
"x_df = final_df\n",
"\n",
"x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=223)"
"x_train, x_test = train_test_split(final_df, test_size=0.2, random_state=223)"
]
},
{
@@ -311,15 +308,16 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Define the experiment parameter and model settings for training. View the full list of [settings](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train). Submitting the experiment with these default settings will take approximately 5-10 min, but if you want a shorter run time, reduce the `iterations` parameter.\n",
"Define the experiment parameter and model settings for training. View the full list of [settings](https://docs.microsoft.com/azure/machine-learning/service/how-to-configure-auto-train). Submitting the experiment with these default settings will take approximately 20 minutes, but if you want a shorter run time, reduce the `experiment_timeout_hours` parameter.\n",
"\n",
"\n",
"|Property| Value in this tutorial |Description|\n",
"|----|----|---|\n",
"|**iteration_timeout_minutes**|2|Time limit in minutes for each iteration. Reduce this value to decrease total runtime.|\n",
"|**iterations**|20|Number of iterations. In each iteration, a new machine learning model is trained with your data. This is the primary value that affects total run time.|\n",
"|**experiment_timeout_hours**|0.3|Maximum amount of time in hours that all iterations combined can take before the experiment terminates.|\n",
"|**enable_early_stopping**|True|Flag to enable early termination if the score is not improving in the short term.|\n",
"|**primary_metric**| spearman_correlation | Metric that you want to optimize. The best-fit model will be chosen based on this metric.|\n",
"|**preprocess**| True | By using **True**, the experiment can preprocess the input data (handling missing data, converting text to numeric, etc.)|\n",
"|**featurization**| auto | By using auto, the experiment can preprocess the input data (handling missing data, converting text to numeric, etc.)|\n",
"|**verbosity**| logging.INFO | Controls the level of logging.|\n",
"|**n_cross_validations**|5|Number of cross-validation splits to perform when validation data is not specified.|"
]
@@ -334,9 +332,10 @@
"\n",
"automl_settings = {\n",
" \"iteration_timeout_minutes\": 2,\n",
" \"iterations\": 20,\n",
" \"experiment_timeout_hours\": 0.3,\n",
" \"enable_early_stopping\": True,\n",
" \"primary_metric\": 'spearman_correlation',\n",
" \"preprocess\": True,\n",
" \"featurization\": 'auto',\n",
" \"verbosity\": logging.INFO,\n",
" \"n_cross_validations\": 5\n",
"}"
@@ -359,8 +358,8 @@
"\n",
"automl_config = AutoMLConfig(task='regression',\n",
" debug_log='automated_ml_errors.log',\n",
" X=x_train.values,\n",
" y=y_train.values.flatten(),\n",
" training_data=x_train,\n",
" label_column_name=\"totalAmount\",\n",
" **automl_settings)"
]
},
@@ -467,7 +466,9 @@
"metadata": {},
"outputs": [],
"source": [
"y_predict = fitted_model.predict(x_test.values)\n",
"y_test = x_test.pop(\"totalAmount\")\n",
"\n",
"y_predict = fitted_model.predict(x_test)\n",
"print(y_predict[:10])"
]
},