From 6a795915d678fec79a8bcefd339766d51e884e12 Mon Sep 17 00:00:00 2001 From: Dimitris Tsirogiannis Date: Tue, 24 Jun 2014 15:30:28 -0700 Subject: [PATCH] Fix loading data from snapshopt for alltypesagg table. The alltypesagg table was not loaded correctly from a snapshot file due to a missing ALTER TABLE statement, thereby causing some tests to fail. Change-Id: I74066a99529f24fc268bb5779d3fb64fbd4f66b9 Reviewed-on: http://gerrit.ent.cloudera.com:8080/3248 Reviewed-by: Lenni Kuff Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/3270 Reviewed-by: Dimitris Tsirogiannis --- testdata/bin/generate-schema-statements.py | 9 ++++++++- .../datasets/functional/functional_schema_template.sql | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py index 04ed9e24d..179db4dee 100755 --- a/testdata/bin/generate-schema-statements.py +++ b/testdata/bin/generate-schema-statements.py @@ -537,7 +537,14 @@ def generate_statements(output_name, test_vectors, sections, # moment, it assumes we're only using ALTER for partitioning the table. if alter and file_format != "hbase": use_table = 'USE {db_name};\n'.format(db_name=db) - output.create.append(use_table + alter.format(table_name=table_name)) + if output == hive_output and codec == 'lzo': + if not options.force_reload: + # If this is not a force reload use msck repair to add the partitions + # into the table. This is to work around a problem where the null + # partition cannot be explicitly created in Hive. + output.create.append(use_table + 'msck repair table %s;' % (table_name,)) + else: + output.create.append(use_table + alter.format(table_name=table_name)) # If the directory already exists in HDFS, assume that data files already exist # and skip loading the data. Otherwise, the data is generated using either an diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 53f731fd3..321edf0b0 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -457,6 +457,7 @@ ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION(year=2010, month=1, day=7); ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION(year=2010, month=1, day=8); ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION(year=2010, month=1, day=9); ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION(year=2010, month=1, day=10); +ALTER TABLE {table_name} ADD IF NOT EXISTS PARTITION(year=2010, month=1, day=NULL); ---- DEPENDENT_LOAD INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} partition (year, month, day) SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month, day @@ -474,7 +475,7 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/target/AllTypesAgg/100107.txt' OV LOAD DATA LOCAL INPATH '{impala_home}/testdata/target/AllTypesAgg/100108.txt' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name} PARTITION(year=2010, month=1, day=8); LOAD DATA LOCAL INPATH '{impala_home}/testdata/target/AllTypesAgg/100109.txt' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name} PARTITION(year=2010, month=1, day=9); LOAD DATA LOCAL INPATH '{impala_home}/testdata/target/AllTypesAgg/100110.txt' OVERWRITE INTO TABLE {db_name}{db_suffix}.{table_name} PARTITION(year=2010, month=1, day=10); -INSERT INTO TABLE {db_name}{db_suffix}.{table_name} partition (year, month, day) SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month, tinyint_col as day FROM {db_name}.{table_name} WHERE year=2010 and month=1 and tinyint_col IS NULL order by id; +INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} partition (year, month, day) SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col, year, month, tinyint_col as day FROM {db_name}.{table_name} WHERE year=2010 and month=1 and day != '__HIVE_DEFAULT_PARTITION__' and tinyint_col IS NULL order by id; ==== ---- DATASET functional