update automl databricks
This commit is contained in:
@@ -123,13 +123,6 @@
|
||||
"ws.get_details()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -270,15 +263,14 @@
|
||||
"#If your data is in a dataframe, please use read_pandas_dataframe to convert a dataframe to dataflow before usind dprep.\n",
|
||||
"\n",
|
||||
"import azureml.dataprep as dprep\n",
|
||||
"# You can use `auto_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
|
||||
"\n",
|
||||
"# The data referenced here was pulled from `sklearn.datasets.load_digits()`.\n",
|
||||
"simple_example_data_root = 'https://dprepdata.blob.core.windows.net/automl-notebook-data/'\n",
|
||||
"X_train = dprep.auto_read_file(simple_example_data_root + 'X.csv').skip(1) # Remove the header row.\n",
|
||||
"\n",
|
||||
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
|
||||
"# and convert column types manually.\n",
|
||||
"# Here we read a comma delimited file and convert all columns to integers.\n",
|
||||
"y_train = dprep.read_csv(simple_example_data_root + 'y.csv').to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
|
||||
"#Convert Pandas DataFrame to DataFlow\n",
|
||||
"#The read_pandas_dataframe reader can take a DataFrame and use it as the data source for a Dataflow.\n",
|
||||
"X_train = dprep.read_pandas_dataframe(pd.read_csv(simple_example_data_root + 'X.csv'), temp_folder='/dbfs/dataset_dataflowX_train') \n",
|
||||
"y_train = dprep.read_pandas_dataframe(pd.read_csv(simple_example_data_root + 'y.csv'), temp_folder='/dbfs/dataset_dataflowy_train').to_long(dprep.ColumnSelector(term='.*', use_regex = True))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -295,7 +287,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train.skip(1).head(5)"
|
||||
"X_train.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_train.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -333,7 +334,8 @@
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" iteration_timeout_minutes = 10,\n",
|
||||
" iterations = 30,\n",
|
||||
" iterations = 5,\n",
|
||||
" preprocess = True,\n",
|
||||
" n_cross_validations = 10,\n",
|
||||
" max_concurrent_iterations = 2, #change it based on number of worker nodes\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
@@ -349,8 +351,7 @@
|
||||
"source": [
|
||||
"## Train the Models\n",
|
||||
"\n",
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -359,7 +360,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output = True) # for higher runs please use show_output=False and use the below"
|
||||
"local_run = experiment.submit(automl_config, show_output = False) # for higher runs please use show_output=False and use the below"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -549,11 +550,11 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.0"
|
||||
"version": "3.6.5"
|
||||
},
|
||||
"name": "auto-ml-classification-local-adb",
|
||||
"notebookId": 817220787969977
|
||||
"notebookId": 587284549713154
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
@@ -99,10 +99,10 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"subscription_id = \"<Your SubscriptionId>\"\n",
|
||||
"resource_group = \"<Resource group - new or existing>\"\n",
|
||||
"workspace_name = \"<workspace to be created>\"\n",
|
||||
"workspace_region = \"<azureregion>\""
|
||||
"subscription_id = \"<Your SubscriptionId>\" #you should be owner or contributor\n",
|
||||
"resource_group = \"<Resource group - new or existing>\" #you should be owner or contributor\n",
|
||||
"workspace_name = \"<workspace to be created>\" #your workspace name\n",
|
||||
"workspace_region = \"<azureregion>\" #your region"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -134,7 +134,7 @@
|
||||
"ws = Workspace.create(name = workspace_name,\n",
|
||||
" subscription_id = subscription_id,\n",
|
||||
" resource_group = resource_group, \n",
|
||||
" location = workspace_region,\n",
|
||||
" location = workspace_region, \n",
|
||||
" exist_ok=True)\n",
|
||||
"ws.get_details()"
|
||||
]
|
||||
@@ -160,7 +160,8 @@
|
||||
" resource_group = resource_group)\n",
|
||||
"\n",
|
||||
"# Persist the subscription id, resource group name, and workspace name in aml_config/config.json.\n",
|
||||
"ws.write_config()"
|
||||
"ws.write_config()\n",
|
||||
"write_config(path=\"/databricks/driver/aml_config/\",file_name=<alias_conf.cfg>)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -262,6 +263,13 @@
|
||||
"set_diagnostics_collection(send_diagnostics = True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Covert Pandas Dataframe to DataFlow"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -276,15 +284,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.dataprep as dprep\n",
|
||||
"# You can use `auto_read_file` which intelligently figures out delimiters and datatypes of a file.\n",
|
||||
"\n",
|
||||
"# The data referenced here was pulled from `sklearn.datasets.load_digits()`.\n",
|
||||
"simple_example_data_root = 'https://dprepdata.blob.core.windows.net/automl-notebook-data/'\n",
|
||||
"X_train = dprep.auto_read_file(simple_example_data_root + 'X.csv').skip(1) # Remove the header row.\n",
|
||||
"\n",
|
||||
"# You can also use `read_csv` and `to_*` transformations to read (with overridable delimiter)\n",
|
||||
"# and convert column types manually.\n",
|
||||
"# Here we read a comma delimited file and convert all columns to integers.\n",
|
||||
"y_train = dprep.read_csv(simple_example_data_root + 'y.csv').to_long(dprep.ColumnSelector(term='.*', use_regex = True))"
|
||||
"#Convert Pandas DataFrame to DataFlow\n",
|
||||
"#The read_pandas_dataframe reader can take a DataFrame and use it as the data source for a Dataflow.\n",
|
||||
"X_train = dprep.read_pandas_dataframe(pd.read_csv(simple_example_data_root + 'X.csv'), temp_folder='/dbfs/dataset_dataflowX_train') \n",
|
||||
"y_train = dprep.read_pandas_dataframe(pd.read_csv(simple_example_data_root + 'y.csv'), temp_folder='/dbfs/dataset_dataflowy_train').to_long(dprep.ColumnSelector(term='.*', use_regex = True))\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -301,7 +310,16 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"X_train.skip(1).head(5)"
|
||||
"X_train.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_train.get_profile()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -339,14 +357,14 @@
|
||||
" debug_log = 'automl_errors.log',\n",
|
||||
" primary_metric = 'AUC_weighted',\n",
|
||||
" iteration_timeout_minutes = 10,\n",
|
||||
" iterations = 5,\n",
|
||||
" n_cross_validations = 2,\n",
|
||||
" max_concurrent_iterations = 4, #change it based on number of worker nodes\n",
|
||||
" iterations = 30,\n",
|
||||
" preprocess = True,\n",
|
||||
" n_cross_validations = 10,\n",
|
||||
" max_concurrent_iterations = 2, #change it based on number of worker nodes\n",
|
||||
" verbosity = logging.INFO,\n",
|
||||
" spark_context=sc, #databricks/spark related\n",
|
||||
" X = X_train, \n",
|
||||
" y = y_train,\n",
|
||||
" enable_cache=False,\n",
|
||||
" path = project_folder)"
|
||||
]
|
||||
},
|
||||
@@ -356,8 +374,7 @@
|
||||
"source": [
|
||||
"## Train the Models\n",
|
||||
"\n",
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while.\n",
|
||||
"In this example, we specify `show_output = True` to print currently running iterations to the console."
|
||||
"Call the `submit` method on the experiment object and pass the run configuration. Execution of local runs is synchronous. Depending on the data and the number of iterations this can run for a while."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -366,7 +383,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"local_run = experiment.submit(automl_config, show_output = True) # for higher runs please use show_output=False and use the below"
|
||||
"local_run = experiment.submit(automl_config, show_output = False) # for higher runs please use show_output=False and use the below"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -419,6 +436,7 @@
|
||||
"metricslist = {}\n",
|
||||
"for run in children:\n",
|
||||
" properties = run.get_properties()\n",
|
||||
" #print(properties)\n",
|
||||
" metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)} \n",
|
||||
" metricslist[int(properties['iteration'])] = metrics\n",
|
||||
"\n",
|
||||
@@ -694,11 +712,11 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.0"
|
||||
"version": "3.6.5"
|
||||
},
|
||||
"name": "auto-ml-classification-local-adb",
|
||||
"notebookId": 3888835968049288
|
||||
"notebookId": 2733885892129020
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
Reference in New Issue
Block a user