Files
impala/testdata/workloads/functional-query/queries/QueryTest/acid-compaction.test
Zoltan Borok-Nagy 329bb41294 IMPALA-10115: Impala should check file schema as well to check full ACIDv2 files
Currently Impala checks file metadata 'hive.acid.version' to decide the
full ACID schema. There are cases when Hive forgets to set this value
for full ACID files, e.g. query-based compactions.

So it's more robust to check the schema elements instead of the metadata
field. Also, sometimes Hive write the schema with different character
cases, e.g. originalTransaction vs originaltransaction, so we should
rather compare the column names in a case insensitive way.

Testing:
* added test for full ACID compaction
* added test_full_acid_schema_without_file_metadata_tag to test full
  ACID file without metadata 'hive.acid.version'

Change-Id: I52642c1755599efd28fa2c90f13396cfe0f5fa14
Reviewed-on: http://gerrit.cloudera.org:8080/16383
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2020-09-01 22:27:27 +00:00

109 lines
2.0 KiB
Plaintext

====
---- HIVE_QUERY
use $DATABASE;
create table tt (x int) tblproperties (
'transactional'='true',
'transactional_properties'='insert_only');
insert into tt values (1);
insert into tt values (2);
insert into tt values (3);
====
---- QUERY
invalidate metadata tt;
select * from tt;
---- RESULTS
1
2
3
====
---- HIVE_QUERY
use $DATABASE;
alter table tt compact 'major' and wait;
====
---- QUERY
refresh tt;
select * from tt
---- RESULTS
1
2
3
====
---- QUERY
show files in tt;
---- LABELS
Path,Size,Partition
---- RESULTS
row_regex:'$NAMENODE/$MANAGED_WAREHOUSE_DIR/$DATABASE.db/tt/base_0000003_v\d+/000000_0','\d+B',''
---- TYPES
STRING,STRING,STRING
====
---- HIVE_QUERY
use $DATABASE;
create table upgraded_table (x int);
insert into upgraded_table values (1);
# Upgrade to the table to insert only acid when there are already values in it.
alter table upgraded_table set tblproperties
('transactional' = 'true', 'transactional_properties' = 'insert_only',
'EXTERNAL'='FALSE');
insert into upgraded_table values (2);
insert into upgraded_table values (3);
====
---- QUERY
invalidate metadata upgraded_table;
select * from upgraded_table;
---- RESULTS
1
2
3
====
---- HIVE_QUERY
use $DATABASE;
alter table upgraded_table compact 'major' and wait;
====
---- QUERY
refresh upgraded_table;
select * from upgraded_table;
---- RESULTS
1
2
3
====
---- HIVE_QUERY
use $DATABASE;
create table full_acid (x int) stored as orc tblproperties('transactional'='true');
insert into full_acid values (1);
insert into full_acid values (2);
insert into full_acid values (3);
====
---- QUERY
invalidate metadata full_acid;
select * from full_acid;
---- RESULTS
1
2
3
====
---- HIVE_QUERY
use $DATABASE;
alter table full_acid compact 'major' and wait;
====
---- QUERY
refresh full_acid;
select * from full_acid
---- RESULTS
1
2
3
====
---- QUERY
show files in full_acid;
---- LABELS
Path,Size,Partition
---- RESULTS
row_regex:'$NAMENODE/$MANAGED_WAREHOUSE_DIR/$DATABASE.db/full_acid/base_0000003_v\d+/.*','\d+B',''
---- TYPES
STRING,STRING,STRING
====