Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-overwrite.test
Zoltan Borok-Nagy d0749d59de IMPALA-10732: Use consistent DDL for specifying Iceberg partitions
Currently we have a DDL syntax for defining Iceberg partitions that
differs from SparkSQL:
https://iceberg.apache.org/spark-ddl/#partitioned-by

E.g. Impala is using the following syntax:

CREATE TABLE ice_t (i int, s string, ts timestamp, d date)
PARTITION BY SPEC (i BUCKET 5, ts MONTH, d YEAR)
STORED AS ICEBERG;

The same in Spark is:

CREATE TABLE ice_t (i int, s string, ts timestamp, d date)
USING ICEBERG
PARTITIONED BY (bucket(5, i), months(ts), years(d))

HIVE-25179 added the following syntax for Hive:

CREATE TABLE ice_t (i int, s string, ts timestamp, d date)
PARTITIONED BY SPEC (bucket(5, i), months(ts), years(d))
STORED BY ICEBERG;

I.e. the same syntax as Spark, but adding the keyword "SPEC".

This patch makes Impala use Hive's syntax, i.e. we will also
use the PARTITIONED BY SPEC clause + the unified partition
transform syntax.

Testing:
 * existing tests has been rewritten with the new syntax

Change-Id: Ib72ae445fd68fb0ab75d87b34779dbab922bbc62
Reviewed-on: http://gerrit.cloudera.org:8080/17575
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2021-07-15 15:15:07 +00:00

169 lines
3.4 KiB
Plaintext

====
---- QUERY
# Create unpartitioned table for INSERT OVERWRITE
create table ice_nopart (i int, j int)
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
INSERT INTO ice_nopart VALUES (1, 2);
SELECT * FROM ice_nopart;
---- RESULTS
1,2
---- TYPES
INT,INT
====
---- QUERY
INSERT OVERWRITE ice_nopart VALUES (10, 20);
SELECT * FROM ice_nopart;
---- RESULTS
10,20
---- TYPES
INT,INT
====
---- QUERY
INSERT OVERWRITE ice_nopart select cast(i+1 as int), cast(j+1 as int) from ice_nopart;
SELECT * FROM ice_nopart;
---- RESULTS
11,21
---- TYPES
INT,INT
====
---- QUERY
# INSERT empty result set clears table.
INSERT OVERWRITE ice_nopart select * from ice_nopart where false;
select * from ice_nopart;
---- RESULTS
====
---- QUERY
# Create identity-partitioned table for INSERT OVERWRITE
create table ice_ident (i int)
partitioned by (j int)
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
INSERT INTO ice_ident VALUES (1, 2);
SELECT * FROM ice_ident;
---- RESULTS
1,2
---- TYPES
INT,INT
====
---- QUERY
# Add values to a new partition keeping the old ones.
INSERT OVERWRITE ice_ident VALUES (10, 20);
SELECT * FROM ice_ident;
---- RESULTS
1,2
10,20
---- TYPES
INT,INT
====
---- QUERY
# INSERT only updates the affected partition.
INSERT OVERWRITE ice_ident select cast(i+1 as int), j from ice_ident where j = 2;
SELECT * FROM ice_ident;
---- RESULTS
2,2
10,20
---- TYPES
INT,INT
====
---- QUERY
# INSERT empty result set has no effect on partitioned table.
INSERT OVERWRITE ice_ident select * from ice_ident where false;
select * from ice_ident;
---- RESULTS
2,2
10,20
---- TYPES
INT,INT
====
---- QUERY
# Create DAY-partitioned table for INSERT OVERWRITE
create table ice_day (ts timestamp)
partitioned by spec (DAY(ts))
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into ice_day values ('2021-02-01 16:59:36.630928000');
insert into ice_day values ('2021-02-02 16:59:36.630928000');
insert into ice_day values ('2021-02-02 16:59:39.630928000');
insert into ice_day values ('2021-02-03 16:59:36.630928000');
====
---- QUERY
select * from ice_day;
---- RESULTS
2021-02-01 16:59:36.630928000
2021-02-02 16:59:36.630928000
2021-02-02 16:59:39.630928000
2021-02-03 16:59:36.630928000
---- TYPES
TIMESTAMP
====
---- QUERY
# Update data for partition '2021-02-02'.
insert overwrite ice_day values ('2021-02-02 00:00:00');
select * from ice_day;
---- RESULTS
2021-02-01 16:59:36.630928000
2021-02-02 00:00:00
2021-02-03 16:59:36.630928000
---- TYPES
TIMESTAMP
====
---- QUERY
# INSERT empty result set has no effect on partitioned table.
INSERT OVERWRITE ice_day select * from ice_day where false;
select * from ice_day;
---- RESULTS
2021-02-01 16:59:36.630928000
2021-02-02 00:00:00
2021-02-03 16:59:36.630928000
---- TYPES
TIMESTAMP
====
---- QUERY
# Create TRUNCATE-partitioned table for INSERT OVERWRITE
create table ice_trunc (d decimal(10, 2))
partitioned by spec (TRUNCATE(100, d))
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into ice_trunc values (1.11);
insert into ice_trunc values (1.12);
insert into ice_trunc values (2.22);
insert into ice_trunc values (3.33);
====
---- QUERY
select * from ice_trunc;
---- RESULTS
1.11
1.12
2.22
3.33
---- TYPES
DECIMAL
====
---- QUERY
insert overwrite ice_trunc values(1.88), (1.9), (3.99), (4.44), (4.45), (5);
select * from ice_trunc
---- RESULTS
1.88
1.90
2.22
3.99
4.44
4.45
5.00
---- TYPES
DECIMAL
====