Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
Zoltan Borok-Nagy 4448b8755b IMPALA-10152: Add support for Iceberg HiveCatalog
HiveCatalog is one of Iceberg's catalog implementations. It uses
the Hive metastore and it is the recommended catalog implementation
when the table data is stored in object stores like S3.

This commit updates the Iceberg version to a newer one, and it also
retrieves Iceberg from the CDP distribution because that version of
Iceberg is built against Hive 3 (Impala is only compatible with
Hive 3).

This commit makes HiveCatalog the default Iceberg catalog in Impala
because it can be used in more environments (e.g. cloud stores),
and it is more featureful. Also, other engines that store their
table metadata in HMS will probably use HiveCatalog as well.

Tables stored in HiveCatalog are similar to Kudu tables with HMS
integration, i.e. modifying an Iceberg table via the Iceberg APIs
also modifies the HMS table. So in CatalogOpExecutor we handle
such Iceberg tables similarly to integrated Kudu tables.

Testing:
 * Added e2e tests for creating, writing, and altering Iceberg
   tables
 * Added SHOW CREATE TABLE tests

Change-Id: Ie574589a1751aaa9ccbd34a89c6819714d103197
Reviewed-on: http://gerrit.cloudera.org:8080/16721
Reviewed-by: wangsheng <skyyws@163.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2020-11-20 21:40:28 +00:00

404 lines
10 KiB
Plaintext

====
---- QUERY
CREATE TABLE iceberg_hadoop_tables(
level STRING,
event_time TIMESTAMP,
register_time DATE,
message STRING,
price DECIMAL(8,1),
map_test MAP <STRING, array <STRING>>,
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
)
PARTITION BY SPEC
(
level IDENTITY,
level TRUNCATE 10,
event_time IDENTITY,
event_time HOUR,
event_time BUCKET 1000,
register_time DAY
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_tables;
---- RESULTS
'level','string',''
'event_time','timestamp',''
'register_time','date',''
'message','string',''
'price','decimal(8,1)',''
'map_test','map<string,array<string>>',''
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>',''
---- TYPES
STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_tables;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,1,1001,'level_trunc','TRUNCATE 10'
0,2,1002,'event_time','IDENTITY'
0,2,1003,'event_time_hour','HOUR'
0,2,1004,'event_time_bucket','BUCKET 1000'
0,3,1005,'register_time_day','DAY'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
DROP TABLE iceberg_hadoop_tables;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
CREATE TABLE iceberg_hadoop_tables(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
DESCRIBE iceberg_hadoop_tables;
---- RESULTS
'level','string',''
---- TYPES
STRING,STRING,STRING
====
---- QUERY
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Location is necessary for external iceberg table.'
====
---- QUERY
CREATE TABLE iceberg_hadoop_tbls_with_loc(
level STRING
)
PARTITION BY SPEC
(
level IDENTITY,
level BUCKET 12345,
level TRUNCATE 15
)
STORED AS ICEBERG
LOCATION '/$DATABASE.iceberg_test_with_location'
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external(
level STRING
)
STORED AS ICEBERG
LOCATION '/$DATABASE.iceberg_test_with_location'
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_tbls_external;
---- RESULTS
'level','string',''
---- TYPES
STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_tbls_external;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,1,1001,'level_bucket','BUCKET 12345'
0,1,1002,'level_trunc','TRUNCATE 15'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external_empty_col
STORED AS ICEBERG
LOCATION '/$DATABASE.iceberg_test_with_location'
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_tbls_external_empty_col;
---- RESULTS
'level','string',''
---- TYPES
STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_tbls_external_empty_col;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,1,1001,'level_bucket','BUCKET 12345'
0,1,1002,'level_trunc','TRUNCATE 15'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
DROP TABLE iceberg_hadoop_tbls_with_loc;
DROP TABLE iceberg_hadoop_tbls_external;
DROP TABLE iceberg_hadoop_tbls_external_empty_col;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
CREATE TABLE iceberg_hadoop_catalog(
level STRING,
event_time TIMESTAMP,
register_time DATE,
message STRING,
price DECIMAL(8,1),
map_test MAP <STRING, array <STRING>>,
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
)
PARTITION BY SPEC
(
level IDENTITY,
level TRUNCATE 10,
event_time IDENTITY,
event_time HOUR,
event_time BUCKET 1000,
register_time DAY
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_catalog;
---- RESULTS
'level','string',''
'event_time','timestamp',''
'register_time','date',''
'message','string',''
'price','decimal(8,1)',''
'map_test','map<string,array<string>>',''
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>',''
---- TYPES
STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_catalog;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,1,1001,'level_trunc','TRUNCATE 10'
0,2,1002,'event_time','IDENTITY'
0,2,1003,'event_time_hour','HOUR'
0,2,1004,'event_time_bucket','BUCKET 1000'
0,3,1005,'register_time_day','DAY'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
DROP TABLE iceberg_hadoop_catalog;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
CREATE TABLE iceberg_hadoop_catalog(
level STRING
)
PARTITION BY SPEC
(
level IDENTITY,
level BUCKET 12345,
level TRUNCATE 10
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
CREATE EXTERNAL TABLE iceberg_hadoop_cat_external(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test', 'iceberg.table_identifier'='$DATABASE.iceberg_hadoop_catalog');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_cat_external;
---- RESULTS
'level','string',''
---- TYPES
STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_cat_external;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,1,1001,'level_bucket','BUCKET 12345'
0,1,1002,'level_trunc','TRUNCATE 10'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
DROP TABLE iceberg_hadoop_catalog;
DROP TABLE iceberg_hadoop_cat_external;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
CREATE TABLE iceberg_hadoop_cat_drop(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
CREATE TABLE iceberg_hadoop_cat_query(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
DROP TABLE iceberg_hadoop_cat_drop;
SELECT * FROM iceberg_hadoop_cat_query;
---- TYPES
string
---- RESULTS
====
---- QUERY
DESCRIBE FORMATTED iceberg_hadoop_cat_query;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/$DATABASE/iceberg_hadoop_cat_query','NULL'
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','iceberg.file_format ','parquet '
'','iceberg.catalog ','hadoop.catalog '
---- TYPES
string, string, string
====
---- QUERY
CREATE TABLE iceberg_hadoop_cat_with_ident(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
'iceberg.table_identifier'='org.db.tbl');
DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL'
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','iceberg.file_format ','parquet '
'','iceberg.catalog ','hadoop.catalog '
---- TYPES
string, string, string
====
---- QUERY
CREATE TABLE iceberg_hadoop_cat_with_orc(
level STRING,
event_time TIMESTAMP,
register_time DATE,
message STRING,
price DECIMAL(8,1),
map_test MAP <STRING, array <STRING>>,
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
)
PARTITION BY SPEC
(
level IDENTITY,
event_time IDENTITY,
event_time HOUR,
register_time DAY
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.file_format'='orc','iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_cat_with_orc;
---- RESULTS
'level','string',''
'event_time','timestamp',''
'register_time','date',''
'message','string',''
'price','decimal(8,1)',''
'map_test','map<string,array<string>>',''
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>',''
---- TYPES
STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_cat_with_orc;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,2,1001,'event_time','IDENTITY'
0,2,1002,'event_time_hour','HOUR'
0,3,1003,'register_time_day','DAY'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
DROP TABLE iceberg_hadoop_cat_with_orc;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Default transactional property doesn't affect Iceberg tables.
set default_transactional_type=insert_only;
create table iceberg_default_trans (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
describe formatted iceberg_default_trans
---- RESULTS: VERIFY_IS_NOT_IN
'','transactional ','true '
'','transactional_properties','insert_only '
---- TYPES
STRING, STRING, STRING
====
---- QUERY
CREATE TABLE iceberg_part_hive_cat(
level STRING,
event_time TIMESTAMP,
register_time DATE,
message STRING,
price DECIMAL(8,1),
map_test MAP <STRING, array <STRING>>,
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
)
PARTITION BY SPEC
(
level IDENTITY,
event_time IDENTITY,
event_time HOUR,
register_time DAY
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hive.catalog')
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_part_hive_cat;
---- RESULTS
'level','string',''
'event_time','timestamp',''
'register_time','date',''
'message','string',''
'price','decimal(8,1)',''
'map_test','map<string,array<string>>',''
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>',''
---- TYPES
STRING,STRING,STRING
====
---- QUERY
DESCRIBE FORMATTED iceberg_part_hive_cat;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/iceberg_part_hive_cat','NULL'
row_regex:'','metadata_location ','$NAMENODE/test-warehouse/$DATABASE.db/iceberg_part_hive_cat/metadata/.*.metadata.json'
'','table_type ','ICEBERG '
'','iceberg.catalog ','hive.catalog '
---- TYPES
string, string, string
====