From b770d2d378d642dcc1bdda733c99cc80ca239dc3 Mon Sep 17 00:00:00 2001 From: Zoltan Borok-Nagy Date: Fri, 10 Apr 2020 12:22:58 +0200 Subject: [PATCH] Put transactional tables into 'managed' directory HIVE-22794 disallows ACID tables outside of the 'managed' warehouse directory. This change updates data loading to make it conform to the new rules. The following tests had to be modified to use the new paths: * AnalyzeDDLTest.TestCreateTableLikeFileOrc() * create-table-like-file-orc.test Change-Id: Id3b65f56bf7f225b1d29aa397f987fdd7eb7176c Reviewed-on: http://gerrit.cloudera.org:8080/15708 Reviewed-by: Impala Public Jenkins Tested-by: Impala Public Jenkins --- .../impala/analysis/AnalyzeDDLTest.java | 4 +-- testdata/bin/generate-schema-statements.py | 35 +++++++++---------- .../functional/functional_schema_template.sql | 6 ++++ .../QueryTest/create-table-like-file-orc.test | 23 ++---------- 4 files changed, 28 insertions(+), 40 deletions(-) diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java index 7b2709a9e..b5b65fe23 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java @@ -2036,10 +2036,10 @@ public class AnalyzeDDLTest extends FrontendTestBase { // Inferring primitive and complex types AnalyzesOk("create table if not exists newtbl_DNE like orc " + - "'/test-warehouse/alltypestiny_orc_def/year=2009/month=1/" + + "'/test-warehouse/managed/alltypestiny_orc_def/year=2009/month=1/" + "base_0000001/bucket_00000_0'"); AnalyzesOk("create table if not exists newtbl_DNE like orc " + - "'/test-warehouse/complextypestbl_orc_def/base_0000001/bucket_00000_0'"); + "'/test-warehouse/managed/complextypestbl_orc_def/base_0000001/bucket_00000_0'"); // check invalid paths AnalysisError("create table if not exists functional.zipcode_incomes like ORC " + diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py index 811349866..590445c70 100755 --- a/testdata/bin/generate-schema-statements.py +++ b/testdata/bin/generate-schema-statements.py @@ -315,13 +315,6 @@ def build_table_template(file_format, columns, partition_columns, row_format, # Kudu's test tables are managed. external = "" - # ORC tables are full ACID by default. - if (HIVE_MAJOR_VERSION == 3 and - file_format == 'orc' and - 'transactional' not in tblproperties): - external = "" - tblproperties['transactional'] = 'true' - all_tblproperties = [] for key, value in tblproperties.iteritems(): all_tblproperties.append("'{0}' = '{1}'".format(key, value)) @@ -655,15 +648,6 @@ def generate_statements(output_name, test_vectors, sections, force_reload = options.force_reload or (partition_columns and not alter) or \ file_format == 'kudu' - hdfs_location = '{0}.{1}{2}'.format(db_name, table_name, db_suffix) - # hdfs file names for functional datasets are stored - # directly under /test-warehouse - # TODO: We should not need to specify the hdfs file path in the schema file. - # This needs to be done programmatically. - if data_set == 'functional': - hdfs_location = hdfs_location.split('.')[-1] - data_path = os.path.join(options.hive_warehouse_dir, hdfs_location) - # Empty tables (tables with no "LOAD" sections) are assumed to be used for insert # testing. Since Impala currently only supports inserting into TEXT, PARQUET and # HBASE we need to create these tables with a supported insert format. @@ -677,6 +661,23 @@ def generate_statements(output_name, test_vectors, sections, create_file_format = 'text' tblproperties = parse_table_properties(create_file_format, table_properties) + # ORC tables are full ACID by default. + if (HIVE_MAJOR_VERSION == 3 and + create_file_format == 'orc' and + 'transactional' not in tblproperties): + tblproperties['transactional'] = 'true' + + hdfs_location = '{0}.{1}{2}'.format(db_name, table_name, db_suffix) + # hdfs file names for functional datasets are stored + # directly under /test-warehouse + # TODO: We should not need to specify the hdfs file path in the schema file. + # This needs to be done programmatically. + if data_set == 'functional': + hdfs_location = hdfs_location.split('.')[-1] + # Transactional tables need to be put under the 'managed' directory. + if is_transactional(tblproperties): + hdfs_location = os.path.join('managed', hdfs_location) + data_path = os.path.join(options.hive_warehouse_dir, hdfs_location) output = impala_create if create_hive or file_format == 'hbase': @@ -684,8 +685,6 @@ def generate_statements(output_name, test_vectors, sections, elif codec == 'lzo': # Impala CREATE TABLE doesn't allow INPUTFORMAT. output = hive_output - elif is_transactional(tblproperties): - output = hive_output # TODO: Currently, Kudu does not support partitioned tables via Impala. # If a CREATE_KUDU section was provided, assume it handles the partition columns diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 6cf0b4b28..e3dc3dae7 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -332,6 +332,8 @@ LOCATION '{hdfs_location}'; ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=1); ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=2); ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=3); +---- TABLE_PROPERTIES +transactional=false ---- DEPENDENT_LOAD USE {db_name}{db_suffix}; -- Step 4: Stream the data from tmp text table to desired format tmp table @@ -458,6 +460,8 @@ USE {db_name}{db_suffix}; ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=1); ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=2); ALTER TABLE {table_name}_tmp ADD IF NOT EXISTS PARTITION (year=2009, month=3); +---- TABLE_PROPERTIES +transactional=false ---- DEPENDENT_LOAD USE {db_name}{db_suffix}; -- Step 4: Stream the data from tmp text table to desired format tmp table @@ -1526,6 +1530,8 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} ( ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS {file_format} LOCATION '{hdfs_location}'; +---- TABLE_PROPERTIES +transactional=false ---- DEPENDENT_LOAD INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name}; ---- LOAD diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test index 5bbd7c156..3d3629972 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test +++ b/testdata/workloads/functional-query/queries/QueryTest/create-table-like-file-orc.test @@ -23,7 +23,7 @@ STRING, STRING, STRING ==== ---- QUERY create table $DATABASE.temp_chars_table like ORC -'$FILESYSTEM_PREFIX/test-warehouse/chars_tiny_orc_def/base_0000001/bucket_00000_0' +'$NAMENODE/$MANAGED_WAREHOUSE_DIR/chars_tiny_orc_def/base_0000001/bucket_00000_0' ---- RESULTS 'Table has been created.' ==== @@ -115,9 +115,8 @@ STRING, STRING, STRING ==== ---- QUERY create external table transactional_complextypes_clone like ORC -'$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_orc_def/base_0000001/bucket_00000_0' -stored as orc -location '$FILESYSTEM_PREFIX/test-warehouse/complextypestbl_orc_def/'; +'$NAMENODE/$MANAGED_WAREHOUSE_DIR/complextypestbl_orc_def/base_0000001/bucket_00000_0' +stored as orc; ---- RESULTS 'Table has been created.' ==== @@ -133,19 +132,3 @@ describe transactional_complextypes_clone ---- TYPES STRING, STRING, STRING ==== ----- QUERY -select originaltransaction, rowid, `row`.id from transactional_complextypes_clone; ----- LABELS -originaltransaction, rowid, row.id ----- RESULTS -1,0,8 -1,0,1 -1,1,2 -1,2,3 -1,3,4 -1,4,5 -1,5,6 -1,6,7 ----- TYPES -BIGINT, BIGINT, BIGINT -====