mirror of
https://github.com/apache/impala.git
synced 2026-01-26 21:02:23 -05:00
Currently we have a DDL syntax for defining Iceberg partitions that differs from SparkSQL: https://iceberg.apache.org/spark-ddl/#partitioned-by E.g. Impala is using the following syntax: CREATE TABLE ice_t (i int, s string, ts timestamp, d date) PARTITION BY SPEC (i BUCKET 5, ts MONTH, d YEAR) STORED AS ICEBERG; The same in Spark is: CREATE TABLE ice_t (i int, s string, ts timestamp, d date) USING ICEBERG PARTITIONED BY (bucket(5, i), months(ts), years(d)) HIVE-25179 added the following syntax for Hive: CREATE TABLE ice_t (i int, s string, ts timestamp, d date) PARTITIONED BY SPEC (bucket(5, i), months(ts), years(d)) STORED BY ICEBERG; I.e. the same syntax as Spark, but adding the keyword "SPEC". This patch makes Impala use Hive's syntax, i.e. we will also use the PARTITIONED BY SPEC clause + the unified partition transform syntax. Testing: * existing tests has been rewritten with the new syntax Change-Id: Ib72ae445fd68fb0ab75d87b34779dbab922bbc62 Reviewed-on: http://gerrit.cloudera.org:8080/17575 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
169 lines
3.4 KiB
Plaintext
169 lines
3.4 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create unpartitioned table for INSERT OVERWRITE
|
|
create table ice_nopart (i int, j int)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
INSERT INTO ice_nopart VALUES (1, 2);
|
|
SELECT * FROM ice_nopart;
|
|
---- RESULTS
|
|
1,2
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
INSERT OVERWRITE ice_nopart VALUES (10, 20);
|
|
SELECT * FROM ice_nopart;
|
|
---- RESULTS
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
INSERT OVERWRITE ice_nopart select cast(i+1 as int), cast(j+1 as int) from ice_nopart;
|
|
SELECT * FROM ice_nopart;
|
|
---- RESULTS
|
|
11,21
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# INSERT empty result set clears table.
|
|
INSERT OVERWRITE ice_nopart select * from ice_nopart where false;
|
|
select * from ice_nopart;
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Create identity-partitioned table for INSERT OVERWRITE
|
|
create table ice_ident (i int)
|
|
partitioned by (j int)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
INSERT INTO ice_ident VALUES (1, 2);
|
|
SELECT * FROM ice_ident;
|
|
---- RESULTS
|
|
1,2
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Add values to a new partition keeping the old ones.
|
|
INSERT OVERWRITE ice_ident VALUES (10, 20);
|
|
SELECT * FROM ice_ident;
|
|
---- RESULTS
|
|
1,2
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# INSERT only updates the affected partition.
|
|
INSERT OVERWRITE ice_ident select cast(i+1 as int), j from ice_ident where j = 2;
|
|
SELECT * FROM ice_ident;
|
|
---- RESULTS
|
|
2,2
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# INSERT empty result set has no effect on partitioned table.
|
|
INSERT OVERWRITE ice_ident select * from ice_ident where false;
|
|
select * from ice_ident;
|
|
---- RESULTS
|
|
2,2
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Create DAY-partitioned table for INSERT OVERWRITE
|
|
create table ice_day (ts timestamp)
|
|
partitioned by spec (DAY(ts))
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_day values ('2021-02-01 16:59:36.630928000');
|
|
insert into ice_day values ('2021-02-02 16:59:36.630928000');
|
|
insert into ice_day values ('2021-02-02 16:59:39.630928000');
|
|
insert into ice_day values ('2021-02-03 16:59:36.630928000');
|
|
====
|
|
---- QUERY
|
|
select * from ice_day;
|
|
---- RESULTS
|
|
2021-02-01 16:59:36.630928000
|
|
2021-02-02 16:59:36.630928000
|
|
2021-02-02 16:59:39.630928000
|
|
2021-02-03 16:59:36.630928000
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Update data for partition '2021-02-02'.
|
|
insert overwrite ice_day values ('2021-02-02 00:00:00');
|
|
select * from ice_day;
|
|
---- RESULTS
|
|
2021-02-01 16:59:36.630928000
|
|
2021-02-02 00:00:00
|
|
2021-02-03 16:59:36.630928000
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# INSERT empty result set has no effect on partitioned table.
|
|
INSERT OVERWRITE ice_day select * from ice_day where false;
|
|
select * from ice_day;
|
|
---- RESULTS
|
|
2021-02-01 16:59:36.630928000
|
|
2021-02-02 00:00:00
|
|
2021-02-03 16:59:36.630928000
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Create TRUNCATE-partitioned table for INSERT OVERWRITE
|
|
create table ice_trunc (d decimal(10, 2))
|
|
partitioned by spec (TRUNCATE(100, d))
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_trunc values (1.11);
|
|
insert into ice_trunc values (1.12);
|
|
insert into ice_trunc values (2.22);
|
|
insert into ice_trunc values (3.33);
|
|
====
|
|
---- QUERY
|
|
select * from ice_trunc;
|
|
---- RESULTS
|
|
1.11
|
|
1.12
|
|
2.22
|
|
3.33
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
insert overwrite ice_trunc values(1.88), (1.9), (3.99), (4.44), (4.45), (5);
|
|
select * from ice_trunc
|
|
---- RESULTS
|
|
1.88
|
|
1.90
|
|
2.22
|
|
3.99
|
|
4.44
|
|
4.45
|
|
5.00
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|