mirror of
https://github.com/apache/impala.git
synced 2026-02-02 06:00:36 -05:00
Currently we have a DDL syntax for defining Iceberg partitions that differs from SparkSQL: https://iceberg.apache.org/spark-ddl/#partitioned-by E.g. Impala is using the following syntax: CREATE TABLE ice_t (i int, s string, ts timestamp, d date) PARTITION BY SPEC (i BUCKET 5, ts MONTH, d YEAR) STORED AS ICEBERG; The same in Spark is: CREATE TABLE ice_t (i int, s string, ts timestamp, d date) USING ICEBERG PARTITIONED BY (bucket(5, i), months(ts), years(d)) HIVE-25179 added the following syntax for Hive: CREATE TABLE ice_t (i int, s string, ts timestamp, d date) PARTITIONED BY SPEC (bucket(5, i), months(ts), years(d)) STORED BY ICEBERG; I.e. the same syntax as Spark, but adding the keyword "SPEC". This patch makes Impala use Hive's syntax, i.e. we will also use the PARTITIONED BY SPEC clause + the unified partition transform syntax. Testing: * existing tests has been rewritten with the new syntax Change-Id: Ib72ae445fd68fb0ab75d87b34779dbab922bbc62 Reviewed-on: http://gerrit.cloudera.org:8080/17575 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
83 lines
1.8 KiB
Plaintext
83 lines
1.8 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create unpartitioned Iceberg table
|
|
create table ice_nopart (i int)
|
|
stored as iceberg;
|
|
insert into ice_nopart values (1), (2), (3);
|
|
select * from ice_nopart;
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
====
|
|
---- QUERY
|
|
# Column tats should be cleared by TRUNCATE.
|
|
compute stats ice_nopart;
|
|
show column stats ice_nopart;
|
|
---- RESULTS
|
|
'i','INT',3,0,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# TRUNCATE iceberg table
|
|
truncate table ice_nopart;
|
|
====
|
|
---- QUERY
|
|
# SELECT from truncated table
|
|
select * from ice_nopart
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show column stats ice_nopart;
|
|
---- RESULTS
|
|
'i','INT',-1,-1,4,4,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Create partitioned Iceberg table
|
|
create table ice_part (i int, s string, t timestamp)
|
|
partitioned by spec (year(t), bucket(10, i))
|
|
stored as iceberg;
|
|
insert into ice_part
|
|
values (1, 'ice', '2021-01-27 18:57:25.155746000'),
|
|
(2, 'berg', '2020-01-27 18:57:25.155746000');
|
|
select * from ice_part;
|
|
---- RESULTS
|
|
1,'ice',2021-01-27 18:57:25.155746000
|
|
2,'berg',2020-01-27 18:57:25.155746000
|
|
---- TYPES
|
|
INT,STRING,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Column tats should be cleared by TRUNCATE.
|
|
compute stats ice_part;
|
|
show column stats ice_part;
|
|
---- RESULTS
|
|
'i','INT',2,0,4,4,-1,-1
|
|
's','STRING',2,0,4,3.5,-1,-1
|
|
't','TIMESTAMP',2,0,16,16,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# TRUNCATE iceberg table
|
|
truncate table ice_part;
|
|
====
|
|
---- QUERY
|
|
# SELECT from truncated table
|
|
select * from ice_part
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Column tats should be cleared by TRUNCATE.
|
|
show column stats ice_part;
|
|
---- RESULTS
|
|
'i','INT',-1,-1,4,4,-1,-1
|
|
's','STRING',-1,-1,-1,-1,-1,-1
|
|
't','TIMESTAMP',-1,-1,16,16,-1,-1
|
|
---- TYPES
|
|
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
|
====
|