mirror of
https://github.com/apache/impala.git
synced 2026-02-01 21:00:29 -05:00
HiveCatalog is one of Iceberg's catalog implementations. It uses the Hive metastore and it is the recommended catalog implementation when the table data is stored in object stores like S3. This commit updates the Iceberg version to a newer one, and it also retrieves Iceberg from the CDP distribution because that version of Iceberg is built against Hive 3 (Impala is only compatible with Hive 3). This commit makes HiveCatalog the default Iceberg catalog in Impala because it can be used in more environments (e.g. cloud stores), and it is more featureful. Also, other engines that store their table metadata in HMS will probably use HiveCatalog as well. Tables stored in HiveCatalog are similar to Kudu tables with HMS integration, i.e. modifying an Iceberg table via the Iceberg APIs also modifies the HMS table. So in CatalogOpExecutor we handle such Iceberg tables similarly to integrated Kudu tables. Testing: * Added e2e tests for creating, writing, and altering Iceberg tables * Added SHOW CREATE TABLE tests Change-Id: Ie574589a1751aaa9ccbd34a89c6819714d103197 Reviewed-on: http://gerrit.cloudera.org:8080/16721 Reviewed-by: wangsheng <skyyws@163.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
404 lines
10 KiB
Plaintext
404 lines
10 KiB
Plaintext
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_hadoop_tables(
|
|
level STRING,
|
|
event_time TIMESTAMP,
|
|
register_time DATE,
|
|
message STRING,
|
|
price DECIMAL(8,1),
|
|
map_test MAP <STRING, array <STRING>>,
|
|
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
|
|
)
|
|
PARTITION BY SPEC
|
|
(
|
|
level IDENTITY,
|
|
level TRUNCATE 10,
|
|
event_time IDENTITY,
|
|
event_time HOUR,
|
|
event_time BUCKET 1000,
|
|
register_time DAY
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE iceberg_hadoop_tables;
|
|
---- RESULTS
|
|
'level','string',''
|
|
'event_time','timestamp',''
|
|
'register_time','date',''
|
|
'message','string',''
|
|
'price','decimal(8,1)',''
|
|
'map_test','map<string,array<string>>',''
|
|
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>',''
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
SHOW PARTITIONS iceberg_hadoop_tables;
|
|
---- RESULTS
|
|
0,1,1000,'level','IDENTITY'
|
|
0,1,1001,'level_trunc','TRUNCATE 10'
|
|
0,2,1002,'event_time','IDENTITY'
|
|
0,2,1003,'event_time_hour','HOUR'
|
|
0,2,1004,'event_time_bucket','BUCKET 1000'
|
|
0,3,1005,'register_time_day','DAY'
|
|
---- TYPES
|
|
BIGINT,BIGINT,BIGINT,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
DROP TABLE iceberg_hadoop_tables;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_hadoop_tables(
|
|
level STRING
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
|
DESCRIBE iceberg_hadoop_tables;
|
|
---- RESULTS
|
|
'level','string',''
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external(
|
|
level STRING
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
|
---- RESULTS
|
|
'Location is necessary for external iceberg table.'
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_hadoop_tbls_with_loc(
|
|
level STRING
|
|
)
|
|
PARTITION BY SPEC
|
|
(
|
|
level IDENTITY,
|
|
level BUCKET 12345,
|
|
level TRUNCATE 15
|
|
)
|
|
STORED AS ICEBERG
|
|
LOCATION '/$DATABASE.iceberg_test_with_location'
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
|
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external(
|
|
level STRING
|
|
)
|
|
STORED AS ICEBERG
|
|
LOCATION '/$DATABASE.iceberg_test_with_location'
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE iceberg_hadoop_tbls_external;
|
|
---- RESULTS
|
|
'level','string',''
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
SHOW PARTITIONS iceberg_hadoop_tbls_external;
|
|
---- RESULTS
|
|
0,1,1000,'level','IDENTITY'
|
|
0,1,1001,'level_bucket','BUCKET 12345'
|
|
0,1,1002,'level_trunc','TRUNCATE 15'
|
|
---- TYPES
|
|
BIGINT,BIGINT,BIGINT,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external_empty_col
|
|
STORED AS ICEBERG
|
|
LOCATION '/$DATABASE.iceberg_test_with_location'
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE iceberg_hadoop_tbls_external_empty_col;
|
|
---- RESULTS
|
|
'level','string',''
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
SHOW PARTITIONS iceberg_hadoop_tbls_external_empty_col;
|
|
---- RESULTS
|
|
0,1,1000,'level','IDENTITY'
|
|
0,1,1001,'level_bucket','BUCKET 12345'
|
|
0,1,1002,'level_trunc','TRUNCATE 15'
|
|
---- TYPES
|
|
BIGINT,BIGINT,BIGINT,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
DROP TABLE iceberg_hadoop_tbls_with_loc;
|
|
DROP TABLE iceberg_hadoop_tbls_external;
|
|
DROP TABLE iceberg_hadoop_tbls_external_empty_col;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_hadoop_catalog(
|
|
level STRING,
|
|
event_time TIMESTAMP,
|
|
register_time DATE,
|
|
message STRING,
|
|
price DECIMAL(8,1),
|
|
map_test MAP <STRING, array <STRING>>,
|
|
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
|
|
)
|
|
PARTITION BY SPEC
|
|
(
|
|
level IDENTITY,
|
|
level TRUNCATE 10,
|
|
event_time IDENTITY,
|
|
event_time HOUR,
|
|
event_time BUCKET 1000,
|
|
register_time DAY
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE iceberg_hadoop_catalog;
|
|
---- RESULTS
|
|
'level','string',''
|
|
'event_time','timestamp',''
|
|
'register_time','date',''
|
|
'message','string',''
|
|
'price','decimal(8,1)',''
|
|
'map_test','map<string,array<string>>',''
|
|
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>',''
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
SHOW PARTITIONS iceberg_hadoop_catalog;
|
|
---- RESULTS
|
|
0,1,1000,'level','IDENTITY'
|
|
0,1,1001,'level_trunc','TRUNCATE 10'
|
|
0,2,1002,'event_time','IDENTITY'
|
|
0,2,1003,'event_time_hour','HOUR'
|
|
0,2,1004,'event_time_bucket','BUCKET 1000'
|
|
0,3,1005,'register_time_day','DAY'
|
|
---- TYPES
|
|
BIGINT,BIGINT,BIGINT,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
DROP TABLE iceberg_hadoop_catalog;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_hadoop_catalog(
|
|
level STRING
|
|
)
|
|
PARTITION BY SPEC
|
|
(
|
|
level IDENTITY,
|
|
level BUCKET 12345,
|
|
level TRUNCATE 10
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
|
|
CREATE EXTERNAL TABLE iceberg_hadoop_cat_external(
|
|
level STRING
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test', 'iceberg.table_identifier'='$DATABASE.iceberg_hadoop_catalog');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE iceberg_hadoop_cat_external;
|
|
---- RESULTS
|
|
'level','string',''
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
SHOW PARTITIONS iceberg_hadoop_cat_external;
|
|
---- RESULTS
|
|
0,1,1000,'level','IDENTITY'
|
|
0,1,1001,'level_bucket','BUCKET 12345'
|
|
0,1,1002,'level_trunc','TRUNCATE 10'
|
|
---- TYPES
|
|
BIGINT,BIGINT,BIGINT,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
DROP TABLE iceberg_hadoop_catalog;
|
|
DROP TABLE iceberg_hadoop_cat_external;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_hadoop_cat_drop(
|
|
level STRING
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
|
|
CREATE TABLE iceberg_hadoop_cat_query(
|
|
level STRING
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
|
|
DROP TABLE iceberg_hadoop_cat_drop;
|
|
SELECT * FROM iceberg_hadoop_cat_query;
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
DESCRIBE FORMATTED iceberg_hadoop_cat_query;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/$DATABASE/iceberg_hadoop_cat_query','NULL'
|
|
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
|
'','iceberg.file_format ','parquet '
|
|
'','iceberg.catalog ','hadoop.catalog '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_hadoop_cat_with_ident(
|
|
level STRING
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
|
|
'iceberg.table_identifier'='org.db.tbl');
|
|
DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL'
|
|
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
|
|
'','iceberg.file_format ','parquet '
|
|
'','iceberg.catalog ','hadoop.catalog '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_hadoop_cat_with_orc(
|
|
level STRING,
|
|
event_time TIMESTAMP,
|
|
register_time DATE,
|
|
message STRING,
|
|
price DECIMAL(8,1),
|
|
map_test MAP <STRING, array <STRING>>,
|
|
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
|
|
)
|
|
PARTITION BY SPEC
|
|
(
|
|
level IDENTITY,
|
|
event_time IDENTITY,
|
|
event_time HOUR,
|
|
register_time DAY
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.file_format'='orc','iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE iceberg_hadoop_cat_with_orc;
|
|
---- RESULTS
|
|
'level','string',''
|
|
'event_time','timestamp',''
|
|
'register_time','date',''
|
|
'message','string',''
|
|
'price','decimal(8,1)',''
|
|
'map_test','map<string,array<string>>',''
|
|
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>',''
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
SHOW PARTITIONS iceberg_hadoop_cat_with_orc;
|
|
---- RESULTS
|
|
0,1,1000,'level','IDENTITY'
|
|
0,2,1001,'event_time','IDENTITY'
|
|
0,2,1002,'event_time_hour','HOUR'
|
|
0,3,1003,'register_time_day','DAY'
|
|
---- TYPES
|
|
BIGINT,BIGINT,BIGINT,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
DROP TABLE iceberg_hadoop_cat_with_orc;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# Default transactional property doesn't affect Iceberg tables.
|
|
set default_transactional_type=insert_only;
|
|
create table iceberg_default_trans (i int)
|
|
stored as iceberg
|
|
tblproperties('iceberg.catalog'='hadoop.tables');
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
describe formatted iceberg_default_trans
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'','transactional ','true '
|
|
'','transactional_properties','insert_only '
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_part_hive_cat(
|
|
level STRING,
|
|
event_time TIMESTAMP,
|
|
register_time DATE,
|
|
message STRING,
|
|
price DECIMAL(8,1),
|
|
map_test MAP <STRING, array <STRING>>,
|
|
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
|
|
)
|
|
PARTITION BY SPEC
|
|
(
|
|
level IDENTITY,
|
|
event_time IDENTITY,
|
|
event_time HOUR,
|
|
register_time DAY
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hive.catalog')
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE iceberg_part_hive_cat;
|
|
---- RESULTS
|
|
'level','string',''
|
|
'event_time','timestamp',''
|
|
'register_time','date',''
|
|
'message','string',''
|
|
'price','decimal(8,1)',''
|
|
'map_test','map<string,array<string>>',''
|
|
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>',''
|
|
---- TYPES
|
|
STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
DESCRIBE FORMATTED iceberg_part_hive_cat;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/iceberg_part_hive_cat','NULL'
|
|
row_regex:'','metadata_location ','$NAMENODE/test-warehouse/$DATABASE.db/iceberg_part_hive_cat/metadata/.*.metadata.json'
|
|
'','table_type ','ICEBERG '
|
|
'','iceberg.catalog ','hive.catalog '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|