Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test
Attila Jeges fabe994d1f IMPALA-10627: Use standard parquet-related Iceberg table properties
This patch adds support for the following standard Iceberg properties:

write.parquet.compression-codec:
  Parquet compression codec. Supported values are: NONE, GZIP, SNAPPY
  (default value), LZ4, ZSTD. The table property will be ignored if
  COMPRESSION_CODEC query option is set.

write.parquet.compression-level:
  Parquet compression level. Used with ZSTD compression only.
  Supported range is [1, 22]. Default value is 3. The table property
  will be ignored if COMPRESSION_CODEC query option is set.

write.parquet.row-group-size-bytes :
  Parquet row group size in bytes. Supported range is [8388608,
  2146435072] (8MB - 2047MB). The table property will be ignored if
  PARQUET_FILE_SIZE query option is set.
  If neither the table property nor the PARQUET_FILE_SIZE query option
  is set, the way Impala calculates row group size will remain
  unchanged.

write.parquet.page-size-bytes:
  Parquet page size in bytes. Used for PLAIN encoding. Supported range
  is [65536, 1073741824] (64KB - 1GB).
  If the table property is unset, the way Impala calculates page size
  will remain unchanged.

write.parquet.dict-size-bytes:
  Parquet dictionary page size in bytes. Used for dictionary encoding.
  Supported range is [65536, 1073741824] (64KB - 1GB).
  If the table property is unset, the way Impala calculates dictionary
  page size will remain unchanged.

This patch also renames 'iceberg.file_format' table property to
'write.format.default' which is the standard Iceberg name for the
table property.

Change-Id: I3b8aa9a52c13c41b48310d2f7c9c7426e1ff5f23
Reviewed-on: http://gerrit.cloudera.org:8080/17654
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2021-07-20 23:58:06 +00:00

485 lines
12 KiB
Plaintext

====
---- QUERY
CREATE TABLE iceberg_hadoop_tables(
level STRING,
event_time TIMESTAMP,
register_time DATE,
message STRING,
price DECIMAL(8,1),
map_test MAP <STRING, array <STRING>>,
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
)
PARTITIONED BY SPEC
(
level,
TRUNCATE(10, level),
event_time,
HOURS(event_time),
BUCKET(1000, event_time),
DAY(register_time)
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_tables;
---- RESULTS
'level','string','','true'
'event_time','timestamp','','true'
'register_time','date','','true'
'message','string','','true'
'price','decimal(8,1)','','true'
'map_test','map<string,array<string>>','','true'
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>','','true'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_tables;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,1,1001,'level_trunc','TRUNCATE 10'
0,2,1002,'event_time','IDENTITY'
0,2,1003,'event_time_hour','HOUR'
0,2,1004,'event_time_bucket','BUCKET 1000'
0,3,1005,'register_time_day','DAY'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
DROP TABLE iceberg_hadoop_tables;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
CREATE TABLE iceberg_hadoop_tables(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
DESCRIBE iceberg_hadoop_tables;
---- RESULTS
'level','string','','true'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Location is necessary for external iceberg table.'
====
---- QUERY
CREATE TABLE iceberg_hadoop_tbls_with_loc(
level STRING
)
PARTITIONED BY SPEC
(
level,
BUCKET(12345, level),
TRUNCATE(15, level)
)
STORED AS ICEBERG
LOCATION '/$DATABASE.iceberg_test_with_location'
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external(
level STRING
)
STORED AS ICEBERG
LOCATION '/$DATABASE.iceberg_test_with_location'
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_tbls_external;
---- RESULTS
'level','string','','true'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_tbls_external;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,1,1001,'level_bucket','BUCKET 12345'
0,1,1002,'level_trunc','TRUNCATE 15'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
CREATE EXTERNAL TABLE iceberg_hadoop_tbls_external_empty_col
STORED AS ICEBERG
LOCATION '/$DATABASE.iceberg_test_with_location'
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_tbls_external_empty_col;
---- RESULTS
'level','string','','true'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_tbls_external_empty_col;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,1,1001,'level_bucket','BUCKET 12345'
0,1,1002,'level_trunc','TRUNCATE 15'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
DROP TABLE iceberg_hadoop_tbls_with_loc;
DROP TABLE iceberg_hadoop_tbls_external;
DROP TABLE iceberg_hadoop_tbls_external_empty_col;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
CREATE TABLE iceberg_hadoop_catalog(
level STRING,
event_time TIMESTAMP,
register_time DATE,
message STRING,
price DECIMAL(8,1),
map_test MAP <STRING, array <STRING>>,
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
)
PARTITIONED BY SPEC
(
level,
TRUNCATE(10, level),
event_time,
HOUR(event_time),
BUCKET(1000, event_time),
DAYS(register_time)
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_catalog;
---- RESULTS
'level','string','','true'
'event_time','timestamp','','true'
'register_time','date','','true'
'message','string','','true'
'price','decimal(8,1)','','true'
'map_test','map<string,array<string>>','','true'
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>','','true'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_catalog;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,1,1001,'level_trunc','TRUNCATE 10'
0,2,1002,'event_time','IDENTITY'
0,2,1003,'event_time_hour','HOUR'
0,2,1004,'event_time_bucket','BUCKET 1000'
0,3,1005,'register_time_day','DAY'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
DROP TABLE iceberg_hadoop_catalog;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
CREATE TABLE iceberg_hadoop_catalog(
level STRING
)
PARTITIONED BY SPEC
(
level,
BUCKET(12345, level),
TRUNCATE(10, level)
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
CREATE EXTERNAL TABLE iceberg_hadoop_cat_external(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test', 'iceberg.table_identifier'='$DATABASE.iceberg_hadoop_catalog');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_cat_external;
---- RESULTS
'level','string','','true'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_cat_external;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,1,1001,'level_bucket','BUCKET 12345'
0,1,1002,'level_trunc','TRUNCATE 10'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
DROP TABLE iceberg_hadoop_catalog;
DROP TABLE iceberg_hadoop_cat_external;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
CREATE TABLE iceberg_hadoop_cat_drop(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
CREATE TABLE iceberg_hadoop_cat_query(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
DROP TABLE iceberg_hadoop_cat_drop;
SELECT * FROM iceberg_hadoop_cat_query;
---- TYPES
string
---- RESULTS
====
---- QUERY
DESCRIBE FORMATTED iceberg_hadoop_cat_query;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/$DATABASE/iceberg_hadoop_cat_query','NULL'
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','write.format.default','parquet '
'','iceberg.catalog ','hadoop.catalog '
---- TYPES
string, string, string
====
---- QUERY
CREATE TABLE iceberg_hadoop_cat_with_ident(
level STRING
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
'iceberg.table_identifier'='org.db.tbl');
DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL'
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','write.format.default','parquet '
'','iceberg.catalog ','hadoop.catalog '
---- TYPES
string, string, string
====
---- QUERY
INSERT INTO iceberg_hadoop_cat_with_ident values ("ice");
====
---- QUERY
CREATE EXTERNAL TABLE iceberg_hadoop_cat_with_ident_ext
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test',
'iceberg.table_identifier'='org.db.tbl');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE FORMATTED iceberg_hadoop_cat_with_ident_ext;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test/org/db/tbl','NULL'
'','iceberg.catalog_location','/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test'
'','write.format.default','parquet '
'','iceberg.catalog ','hadoop.catalog '
---- TYPES
string, string, string
====
---- QUERY
SELECT * FROM iceberg_hadoop_cat_with_ident_ext;
---- RESULTS
'ice'
---- TYPES
STRING
====
---- QUERY
CREATE TABLE iceberg_hadoop_cat_with_orc(
level STRING,
event_time TIMESTAMP,
register_time DATE,
message STRING,
price DECIMAL(8,1),
map_test MAP <STRING, array <STRING>>,
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
)
PARTITIONED BY SPEC
(
level,
event_time,
HOUR(event_time),
DAY(register_time)
)
STORED AS ICEBERG
TBLPROPERTIES('write.format.default'='orc','iceberg.catalog'='hadoop.catalog',
'iceberg.catalog_location'='/test-warehouse/$DATABASE/hadoop_catalog_test/iceberg_test');
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_hadoop_cat_with_orc;
---- RESULTS
'level','string','','true'
'event_time','timestamp','','true'
'register_time','date','','true'
'message','string','','true'
'price','decimal(8,1)','','true'
'map_test','map<string,array<string>>','','true'
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>','','true'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_hadoop_cat_with_orc;
---- RESULTS
0,1,1000,'level','IDENTITY'
0,2,1001,'event_time','IDENTITY'
0,2,1002,'event_time_hour','HOUR'
0,3,1003,'register_time_day','DAY'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====
---- QUERY
DROP TABLE iceberg_hadoop_cat_with_orc;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Default transactional property doesn't affect Iceberg tables.
set default_transactional_type=insert_only;
create table iceberg_default_trans (i int)
stored as iceberg
tblproperties('iceberg.catalog'='hadoop.tables');
---- RESULTS
'Table has been created.'
====
---- QUERY
describe formatted iceberg_default_trans
---- RESULTS: VERIFY_IS_NOT_IN
'','transactional ','true '
'','transactional_properties','insert_only '
---- TYPES
STRING, STRING, STRING
====
---- QUERY
CREATE TABLE iceberg_part_hive_cat(
level STRING,
event_time TIMESTAMP,
register_time DATE,
message STRING,
price DECIMAL(8,1),
map_test MAP <STRING, array <STRING>>,
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
)
PARTITIONED BY SPEC
(
level,
event_time,
HOUR(event_time),
DAY(register_time)
)
STORED AS ICEBERG
TBLPROPERTIES('iceberg.catalog'='hive.catalog')
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_part_hive_cat;
---- RESULTS
'level','string','','true'
'event_time','timestamp','','true'
'register_time','date','','true'
'message','string','','true'
'price','decimal(8,1)','','true'
'map_test','map<string,array<string>>','','true'
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>','','true'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
DESCRIBE FORMATTED iceberg_part_hive_cat;
---- RESULTS: VERIFY_IS_SUBSET
'Location: ','$NAMENODE/test-warehouse/$DATABASE.db/iceberg_part_hive_cat','NULL'
row_regex:'','metadata_location ','$NAMENODE/test-warehouse/$DATABASE.db/iceberg_part_hive_cat/metadata/.*.metadata.json'
'','table_type ','ICEBERG '
'','iceberg.catalog ','hive.catalog '
---- TYPES
string, string, string
====
---- QUERY
CREATE TABLE iceberg_nullable_test(
level STRING NOT NULL,
event_time TIMESTAMP NULL,
register_time DATE
)
STORED AS ICEBERG;
DESCRIBE iceberg_nullable_test;
---- RESULTS
'level','string','','false'
'event_time','timestamp','','true'
'register_time','date','','true'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
CREATE TABLE iceberg_old_style_partitions (
register_time DATE,
message STRING,
price DECIMAL(8,1),
map_test MAP <STRING, array <STRING>>,
struct_test STRUCT <f1: BIGINT, f2: BIGINT>
)
PARTITIONED BY (
level STRING,
event_id INT
)
STORED AS ICEBERG;
---- RESULTS
'Table has been created.'
====
---- QUERY
DESCRIBE iceberg_old_style_partitions;
---- RESULTS
'register_time','date','','true'
'message','string','','true'
'price','decimal(8,1)','','true'
'map_test','map<string,array<string>>','','true'
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>','','true'
'level','string','','true'
'event_id','int','','true'
---- TYPES
STRING,STRING,STRING,STRING
====
---- QUERY
SHOW PARTITIONS iceberg_old_style_partitions;
---- RESULTS
0,6,1000,'level','IDENTITY'
0,7,1001,'event_id','IDENTITY'
---- TYPES
BIGINT,BIGINT,BIGINT,STRING,STRING
====