mirror of
https://github.com/apache/impala.git
synced 2026-02-01 21:00:29 -05:00
With this patch Impala will support partition evolution for
Iceberg tables.
The DDL statement to change the default partition spec is:
ALTER TABLE <tbl> SET PARTITION SPEC(<partition-spec>)
Hive uses the same SQL syntax.
Testing:
- Added FE test to exercise parsing various well-formed and ill-formed
ALTER TABLE SET PARTITION SPEC statements.
- Added e2e tests for:
- ALTER TABLE SET PARTITION SPEC works for tables with HadoopTables
and HadoopCatalog Catalog.
- When evolving partition spec, the old data written with an earlier
spec remains unchanged. New data is written using the new spec in
a new layout. Data written with earlier spec and new spec can be
fetched in a single query.
- Invalid ALTER TABLE SET PARTITION SPEC statements yield the
expected analysis error messages.
Change-Id: I9bd935b8a82e977df9ee90d464b5fe2a7acc83f2
Reviewed-on: http://gerrit.cloudera.org:8080/17723
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
366 lines
10 KiB
Plaintext
366 lines
10 KiB
Plaintext
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_hadoop_tables(
|
|
level STRING
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.tables');
|
|
ALTER TABLE iceberg_hadoop_tables ADD COLUMNS(event_time TIMESTAMP, register_time DATE);
|
|
ALTER TABLE iceberg_hadoop_tables ADD COLUMNS(message STRING, price DECIMAL(8,1));
|
|
ALTER TABLE iceberg_hadoop_tables ADD COLUMNS(map_test MAP <STRING, array <STRING>>, struct_test STRUCT <f1: BIGINT, f2: BIGINT>);
|
|
DESCRIBE iceberg_hadoop_tables;
|
|
---- RESULTS
|
|
'level','string','','true'
|
|
'event_time','timestamp','','true'
|
|
'register_time','date','','true'
|
|
'message','string','','true'
|
|
'price','decimal(8,1)','','true'
|
|
'map_test','map<string,array<string>>','','true'
|
|
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>','','true'
|
|
---- TYPES
|
|
STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_hadoop_tables set TBLPROPERTIES('fake_key'='fake_value');
|
|
DESCRIBE FORMATTED iceberg_hadoop_tables;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','fake_key ','fake_value '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_hadoop_tables set TBLPROPERTIES('fake_key'='fake_value');
|
|
ALTER TABLE iceberg_hadoop_tables unset TBLPROPERTIES('fake_key');
|
|
DESCRIBE FORMATTED iceberg_hadoop_tables;
|
|
---- RESULTS: VERIFY_IS_NOT_IN
|
|
'','fake_key ','fake_value '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_hadoop_tables set OWNER USER fake_user;
|
|
DESCRIBE FORMATTED iceberg_hadoop_tables;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'OwnerType: ','USER ','NULL'
|
|
'Owner: ','fake_user ','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_hadoop_tables set OWNER ROLE fake_role;
|
|
DESCRIBE FORMATTED iceberg_hadoop_tables;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'OwnerType: ','ROLE ','NULL'
|
|
'Owner: ','fake_role ','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_hadoop_tables SET PARTITION SPEC (
|
|
level,
|
|
TRUNCATE(5, level),
|
|
HOUR(event_time),
|
|
YEAR(register_time),
|
|
BUCKET(15, message),
|
|
price);
|
|
SHOW PARTITIONS iceberg_hadoop_tables;
|
|
---- RESULTS
|
|
regex:.+,1,1000,'level','IDENTITY'
|
|
regex:.+,1,1001,'level_trunc','TRUNCATE 5'
|
|
regex:.+,2,1002,'event_time_hour','HOUR'
|
|
regex:.+,3,1003,'register_time_year','YEAR'
|
|
regex:.+,4,1004,'message_bucket','BUCKET 15'
|
|
regex:.+,5,1005,'price','IDENTITY'
|
|
---- TYPES
|
|
BIGINT, BIGINT, BIGINT, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_hadoop_catalog(
|
|
level STRING
|
|
)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hadoop.catalog',
|
|
'iceberg.catalog_location'='/$DATABASE/hadoop_catalog_test');
|
|
ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(event_time TIMESTAMP, register_time DATE);
|
|
ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(message STRING, price DECIMAL(8,1));
|
|
ALTER TABLE iceberg_hadoop_catalog ADD COLUMNS(map_test MAP <STRING, array <STRING>>, struct_test STRUCT <f1: BIGINT, f2: BIGINT>);
|
|
DESCRIBE iceberg_hadoop_catalog;
|
|
---- RESULTS
|
|
'level','string','','true'
|
|
'event_time','timestamp','','true'
|
|
'register_time','date','','true'
|
|
'message','string','','true'
|
|
'price','decimal(8,1)','','true'
|
|
'map_test','map<string,array<string>>','','true'
|
|
'struct_test','struct<\n f1:bigint,\n f2:bigint\n>','','true'
|
|
---- TYPES
|
|
STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_hadoop_catalog set TBLPROPERTIES('test_key'='test_value');
|
|
DESCRIBE FORMATTED iceberg_hadoop_catalog;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','test_key ','test_value '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_hadoop_catalog set OWNER USER fake_user;
|
|
DESCRIBE FORMATTED iceberg_hadoop_catalog;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'OwnerType: ','USER ','NULL'
|
|
'Owner: ','fake_user ','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_hadoop_catalog set OWNER ROLE fake_role;
|
|
DESCRIBE FORMATTED iceberg_hadoop_catalog;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'OwnerType: ','ROLE ','NULL'
|
|
'Owner: ','fake_role ','NULL'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_hadoop_catalog SET PARTITION SPEC (
|
|
level,
|
|
TRUNCATE(5, level),
|
|
HOUR(event_time),
|
|
YEAR(register_time),
|
|
BUCKET(15, message),
|
|
price);
|
|
SHOW PARTITIONS iceberg_hadoop_catalog;
|
|
---- RESULTS
|
|
regex:.+,1,1000,'level','IDENTITY'
|
|
regex:.+,1,1001,'level_trunc','TRUNCATE 5'
|
|
regex:.+,2,1002,'event_time_hour','HOUR'
|
|
regex:.+,3,1003,'register_time_year','YEAR'
|
|
regex:.+,4,1004,'message_bucket','BUCKET 15'
|
|
regex:.+,5,1005,'price','IDENTITY'
|
|
---- TYPES
|
|
BIGINT, BIGINT, BIGINT, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_rename (i int)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('iceberg.catalog'='hive.catalog');
|
|
INSERT INTO iceberg_rename values (42);
|
|
ALTER TABLE iceberg_rename RENAME TO iceberg_rename2;
|
|
SELECT * FROM iceberg_rename2;
|
|
---- RESULTS
|
|
42
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
SELECT * FROM iceberg_rename;
|
|
---- CATCH
|
|
Could not resolve table reference: 'iceberg_rename'
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_changing_fileformats (i int)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES('write.format.default'='orc');
|
|
DESCRIBE FORMATTED iceberg_changing_fileformats;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','write.format.default','orc '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_changing_fileformats set TBLPROPERTIES('write.format.default'='parquet');
|
|
DESCRIBE FORMATTED iceberg_changing_fileformats;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','write.format.default','parquet '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
INSERT INTO iceberg_changing_fileformats values (123);
|
|
SELECT * FROM iceberg_changing_fileformats;
|
|
---- RESULTS
|
|
123
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_changing_fileformats set TBLPROPERTIES('write.format.default'='ORC');
|
|
---- CATCH
|
|
Attempt to set Iceberg data file format to ORC
|
|
====
|
|
---- QUERY
|
|
DESCRIBE FORMATTED iceberg_changing_fileformats;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','write.format.default','parquet '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE ice_alter_cols (f float)
|
|
STORED AS ICEBERG;
|
|
INSERT INTO ice_alter_cols VALUES (2.718);
|
|
SELECT * FROM ice_alter_cols;
|
|
---- RESULTS
|
|
2.718
|
|
---- TYPES
|
|
FLOAT
|
|
====
|
|
---- QUERY
|
|
# Promoting float -> double is allowed by Iceberg.
|
|
ALTER TABLE ice_alter_cols CHANGE COLUMN f d double;
|
|
DESCRIBE ice_alter_cols;
|
|
---- RESULTS
|
|
'd','double','','true'
|
|
---- TYPES
|
|
STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
INSERT INTO ice_alter_cols VALUES (3.14);
|
|
SELECT round(d, 3) FROM ice_alter_cols;
|
|
---- RESULTS
|
|
2.718
|
|
3.140
|
|
---- TYPES
|
|
DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Demoting double -> float is not allowed.
|
|
ALTER TABLE ice_alter_cols CHANGE COLUMN d f float;
|
|
---- CATCH
|
|
Failed to ALTER table 'ice_alter_cols'
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE ice_alter_cols ADD COLUMN x decimal(10, 3);
|
|
INSERT INTO ice_alter_cols VALUES (1.618, 1.618);
|
|
SELECT round(d, 3), x FROM ice_alter_cols;
|
|
---- RESULTS
|
|
2.718,NULL
|
|
3.140,NULL
|
|
1.618,1.618
|
|
---- TYPES
|
|
DOUBLE,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Drop first column and check if data is still correctly read.
|
|
ALTER TABLE ice_alter_cols DROP COLUMN d;
|
|
DESCRIBE ice_alter_cols;
|
|
---- RESULTS
|
|
'x','decimal(10,3)','','true'
|
|
---- TYPES
|
|
STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
SELECT * FROM ice_alter_cols;
|
|
---- RESULTS
|
|
NULL
|
|
NULL
|
|
1.618
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Add column with same name as dropped column creates a different,
|
|
# brand new column.
|
|
ALTER TABLE ice_alter_cols ADD COLUMN d double;
|
|
SELECT * FROM ice_alter_cols;
|
|
---- RESULTS
|
|
NULL,NULL
|
|
NULL,NULL
|
|
1.618,NULL
|
|
---- TYPES
|
|
DECIMAL,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Test type conversion INT -> BIGINT
|
|
# BIGNT -> INT should fail.
|
|
ALTER TABLE ice_alter_cols DROP COLUMN d;
|
|
ALTER TABLE ice_alter_cols ADD COLUMN i int;
|
|
INSERT INTO TABLE ice_alter_cols VALUES (1.11, 11);
|
|
ALTER TABLE ice_alter_cols CHANGE COLUMN i bi bigint;
|
|
INSERT INTO TABLE ice_alter_cols VALUES (2.345, 111222333444555666);
|
|
ALTER TABLE ice_alter_cols CHANGE COLUMN bi i int;
|
|
---- CATCH
|
|
Failed to ALTER table 'ice_alter_cols'
|
|
====
|
|
---- QUERY
|
|
DESCRIBE ice_alter_cols;
|
|
---- RESULTS
|
|
'x','decimal(10,3)','','true'
|
|
'bi','bigint','','true'
|
|
---- TYPES
|
|
STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
SELECT * FROM ice_alter_cols;
|
|
---- RESULTS
|
|
NULL,NULL
|
|
NULL,NULL
|
|
1.618,NULL
|
|
1.110,11
|
|
2.345,111222333444555666
|
|
---- TYPES
|
|
DECIMAL,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Iceberg allows changing precision to a higher value.
|
|
ALTER TABLE ice_alter_cols DROP COLUMN bi;
|
|
ALTER TABLE ice_alter_cols CHANGE COLUMN x d DECIMAL (22, 3);
|
|
SELECT * FROM ice_alter_cols;
|
|
---- RESULTS
|
|
1.618
|
|
1.110
|
|
2.345
|
|
NULL
|
|
NULL
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
DESCRIBE ice_alter_cols;
|
|
---- RESULTS
|
|
'd','decimal(22,3)','','true'
|
|
---- TYPES
|
|
STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
CREATE TABLE iceberg_changing_parq_tblprops (i int)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES (
|
|
'write.parquet.row-group-size-bytes'='134217728',
|
|
'write.parquet.compression-codec'='zstd',
|
|
'write.parquet.compression-level'='12',
|
|
'write.parquet.page-size-bytes'='65536',
|
|
'write.parquet.dict-size-bytes'='131072'
|
|
);
|
|
DESCRIBE FORMATTED iceberg_changing_parq_tblprops;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','write.format.default','parquet '
|
|
'','write.parquet.row-group-size-bytes','134217728 '
|
|
'','write.parquet.compression-codec','zstd '
|
|
'','write.parquet.compression-level','12 '
|
|
'','write.parquet.page-size-bytes','65536 '
|
|
'','write.parquet.dict-size-bytes','131072 '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
ALTER TABLE iceberg_changing_parq_tblprops set TBLPROPERTIES(
|
|
'write.parquet.row-group-size-bytes'='268435456',
|
|
'write.parquet.compression-codec'='snappy',
|
|
'write.parquet.compression-level'='11',
|
|
'write.parquet.page-size-bytes'='131072',
|
|
'write.parquet.dict-size-bytes'='65536'
|
|
);
|
|
DESCRIBE FORMATTED iceberg_changing_parq_tblprops;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','write.format.default','parquet '
|
|
'','write.parquet.row-group-size-bytes','268435456 '
|
|
'','write.parquet.compression-codec','snappy '
|
|
'','write.parquet.compression-level','11 '
|
|
'','write.parquet.page-size-bytes','131072 '
|
|
'','write.parquet.dict-size-bytes','65536 '
|
|
---- TYPES
|
|
string, string, string
|
|
====
|