Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-insert.test
Zoltan Borok-Nagy 4448b8755b IMPALA-10152: Add support for Iceberg HiveCatalog
HiveCatalog is one of Iceberg's catalog implementations. It uses
the Hive metastore and it is the recommended catalog implementation
when the table data is stored in object stores like S3.

This commit updates the Iceberg version to a newer one, and it also
retrieves Iceberg from the CDP distribution because that version of
Iceberg is built against Hive 3 (Impala is only compatible with
Hive 3).

This commit makes HiveCatalog the default Iceberg catalog in Impala
because it can be used in more environments (e.g. cloud stores),
and it is more featureful. Also, other engines that store their
table metadata in HMS will probably use HiveCatalog as well.

Tables stored in HiveCatalog are similar to Kudu tables with HMS
integration, i.e. modifying an Iceberg table via the Iceberg APIs
also modifies the HMS table. So in CatalogOpExecutor we handle
such Iceberg tables similarly to integrated Kudu tables.

Testing:
 * Added e2e tests for creating, writing, and altering Iceberg
   tables
 * Added SHOW CREATE TABLE tests

Change-Id: Ie574589a1751aaa9ccbd34a89c6819714d103197
Reviewed-on: http://gerrit.cloudera.org:8080/16721
Reviewed-by: wangsheng <skyyws@163.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2020-11-20 21:40:28 +00:00

295 lines
7.8 KiB
Plaintext

====
---- QUERY
# Create a table that is a subset of 'alltypes' table, i.e. it only
# contains the data types supported by Iceberg.
create table iceberg_alltypes(
id INT COMMENT 'Add a comment',
bool_col BOOLEAN,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_alltypes
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypes
order by id
limit 5;
---- RESULTS
: 5
====
---- QUERY
select * from iceberg_alltypes;
---- RESULTS
0,true,0,0,0,0,2009-01-01,'0',2009-01-01 00:00:00
1,false,1,10,1.100000023841858,10.1,2009-01-01,'1',2009-01-01 00:01:00
2,true,2,20,2.200000047683716,20.2,2009-01-01,'2',2009-01-01 00:02:00.100000000
3,false,3,30,3.299999952316284,30.3,2009-01-01,'3',2009-01-01 00:03:00.300000000
4,true,4,40,4.400000095367432,40.4,2009-01-01,'4',2009-01-01 00:04:00.600000000
---- TYPES
INT, BOOLEAN, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
# Create table with decimal types
CREATE TABLE decimal_tbl (
d1 DECIMAL(9,0),
d2 DECIMAL(10,0),
d3 DECIMAL(20,10),
d4 DECIMAL(38,38),
d5 DECIMAL(10,5),
d6 DECIMAL(9,0)
)
STORED AS iceberg
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into decimal_tbl select * from functional_parquet.decimal_tbl;
select * from decimal_tbl;
---- RESULTS
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
---- TYPES
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
====
---- QUERY
# Create table with string types. VARCHAR is not supported by Iceberg, so
# let's substitute it with STRING.
CREATE TABLE chars_formats (
cs CHAR(5),
cl CHAR(140),
vc STRING
)
STORED AS iceberg
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into chars_formats select * from functional_parquet.chars_formats;
select * from chars_formats;
---- RESULTS
'abcde','88db79c70974e02deb3f01cfdcc5daae2078f21517d1021994f12685c0144addae3ce0dbd6a540b55b88af68486251fa6f0c8f9f94b3b1b4bc64c69714e281f388db79c70974','variable length'
'abc ','8d3fffddf79e9a232ffd19f9ccaa4d6b37a6a243dbe0f23137b108a043d9da13121a9b505c804956b22e93c7f93969f4a7ba8ddea45bf4aab0bebc8f814e09918d3fffddf79e','abc'
'abcde','68f8c4575da360c32abb46689e58193a0eeaa905ae6f4a5e6c702a6ae1db35a6f86f8222b7a5489d96eb0466c755b677a64160d074617096a8c6279038bc720468f8c4575da3','b2fe9d4638503a57f93396098f24103a'
---- TYPES
CHAR, CHAR, STRING
====
---- QUERY
# Create non-Iceberg table with INT96 nanos.
create table int96_nanos (ts timestamp) stored as parquet;
====
---- QUERY
# Insert edge values as "normal" int96 timestamps that can represent all values.
set parquet_timestamp_type=INT96_NANOS;
insert into int96_nanos values
("1400-01-01"),
("2019-01-18 00:00:00.000000001"),
("2019-01-18 00:00:00.000001"),
("2019-01-18 00:00:00.001"),
("2019-01-18 23:59:59.999"),
("2019-01-18 23:59:59.999999"),
("2019-01-18 23:59:59.999999999")
====
---- QUERY
# Iceberg should write timestamps as INT64 micros.
create table ts_iceberg (ts timestamp) stored as iceberg
tblproperties('iceberg.catalog'='hadoop.tables');
insert into ts_iceberg select * from int96_nanos;
select * from ts_iceberg;
---- RESULTS
1400-01-01 00:00:00
2019-01-18 00:00:00
2019-01-18 00:00:00.000001000
2019-01-18 00:00:00.001000000
2019-01-18 23:59:59.999000000
2019-01-18 23:59:59.999999000
2019-01-18 23:59:59.999999000
====
---- QUERY
# Insert into hadoop catalog.
create table iceberg_hadoop_cat (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/hadoop_catalog_test');
insert into iceberg_hadoop_cat values (1), (2), (3);
---- RESULTS
: 3
====
---- QUERY
select * from iceberg_hadoop_cat;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hadoop_cat;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/$DATABASE/iceberg_hadoop_cat/data/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Insert into hadoop catalog with custom table identifier.
create table iceberg_hadoop_cat_ti (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/hadoop_catalog_test',
'iceberg.table_identifier'='test.custom_db.int_table');
insert into iceberg_hadoop_cat_ti values (1), (2), (3);
---- RESULTS
: 3
====
---- QUERY
select * from iceberg_hadoop_cat_ti;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hadoop_cat_ti;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hadoop_catalog_test/test/custom_db/int_table/data/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Insert into table stored in Iceberg's HiveCatalog
create table iceberg_hive_cat (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hive.catalog');
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_hive_cat values (7);
select * from iceberg_hive_cat;
---- RESULTS
7
---- TYPES
INT
====
---- QUERY
# Query external Iceberg table
create external table iceberg_hive_cat_ext (i int)
stored as iceberg
location '/test-warehouse/$DATABASE.db/iceberg_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog',
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
---- RESULTS
'Table has been created.'
====
---- QUERY
select * from iceberg_hive_cat_ext;
---- RESULTS
7
---- TYPES
INT
====
---- QUERY
# INSET INTO external Iceberg table stored in HiveCatalog.
insert into iceberg_hive_cat_ext values (8);
select * from iceberg_hive_cat_ext;
---- RESULTS
7
8
---- TYPES
INT
====
---- QUERY
# Query original table
refresh iceberg_hive_cat;
select * from iceberg_hive_cat;
---- RESULTS
7
8
---- TYPES
INT
====
---- QUERY
# DROP external Iceberg table
drop table iceberg_hive_cat_ext
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Original table is not affected after external table drop.
refresh iceberg_hive_cat;
select * from iceberg_hive_cat;
---- RESULTS
7
8
---- TYPES
INT
====
---- QUERY
# Create another external Iceberg table
create external table iceberg_hive_cat_ext_2 (i int)
stored as iceberg
location '/test-warehouse/$DATABASE.db/iceberg_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog',
'iceberg.table_identifier'='$DATABASE.iceberg_hive_cat');
select * from iceberg_hive_cat_ext_2
---- RESULTS
7
8
====
---- QUERY
# DROP the synchronized Iceberg table (data is purged).
drop table iceberg_hive_cat
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# The data has been purged, so querying the external table fails.
select * from iceberg_hive_cat_ext_2
---- CATCH
Table does not exist
====
---- QUERY
# Insert into hive catalog with custom location.
create table iceberg_hive_cat_custom_loc (i int)
stored as iceberg
location '/test-warehouse/$DATABASE.db/custom_hive_cat'
tblproperties('iceberg.catalog'='hive.catalog');
insert into iceberg_hive_cat_custom_loc values (1), (2), (3);
---- RESULTS
: 3
====
---- QUERY
select * from iceberg_hive_cat_custom_loc;
---- RESULTS
1
2
3
---- TYPES
INT
====
---- QUERY
show files in iceberg_hive_cat_custom_loc;
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/custom_hive_cat/data/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====