Consolidate test and cluster logs under a single directory.

All logs, test results and SQL files generated during data loading and testing are now consolidated under a single new directory $IMPALA_HOME/logs. The goal is to simplify archiving in Jenkins runs and debugging. The new structure is as follows: $IMPALA_HOME/logs/cluster - logs of Hadoop components and Impala $IMPALA_HOME/logs/data_loading - logs and SQL files produced in data loading $IMPALA_HOME/logs/fe_tests - logs and test output of Frontend unit tests $IMPALA_HOME/logs/be_tests - logs and test output of Backend unit tests $IMPALA_HOME/logs/ee_tests - logs and test output of end-to-end tests $IMPALA_HOME/logs/custom_cluster_tests - logs and test output of custom cluster tests I tested this change with a full data load which was successful. Change-Id: Ief1f58f3320ec39d31b3c6bc6ef87f58ff7dfdfa Reviewed-on: http://gerrit.cloudera.org:8080/2456 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Internal Jenkins
2025-12-19 18:12:08 -05:00 · 2016-01-15 19:40:31 -08:00
parent 35dfbafab6
commit 7e76e92bef
25 changed files with 106 additions and 60 deletions
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -60,7 +60,7 @@ parser.add_option("--principal", default=None, dest="principal",

 options, args = parser.parse_args()

-DATA_LOAD_DIR = '/tmp/data-load-files'
+SQL_OUTPUT_DIR = os.environ['IMPALA_DATA_LOADING_SQL_DIR']
 WORKLOAD_DIR = options.workload_dir
 DATASET_DIR = options.dataset_dir
 TESTDATA_BIN_DIR = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin')
@@ -257,9 +257,11 @@ if __name__ == "__main__":
    start_time = time.time()
    dataset = get_dataset_for_workload(workload)
    generate_schema_statements(workload)
-    assert os.path.isdir(os.path.join(DATA_LOAD_DIR, dataset)), ("Data loading files "
-        "do not exist for (%s)" % dataset)
-    os.chdir(os.path.join(DATA_LOAD_DIR, dataset))
+    sql_dir = os.path.join(SQL_OUTPUT_DIR, dataset)
+    assert os.path.isdir(sql_dir),\
+      ("Could not find the generated SQL files for loading dataset '%s'.\
+        \nExpected to find the SQL files in: %s" % (dataset, sql_dir))
+    os.chdir(os.path.join(SQL_OUTPUT_DIR, dataset))
    copy_avro_schemas_to_hdfs(AVRO_SCHEMA_DIR)
    dataset_dir_contents = os.listdir(os.getcwd())
    load_file_substr = "%s-%s" % (workload, options.exploration_strategy)