Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-truncate.test
Zoltan Borok-Nagy d0749d59de IMPALA-10732: Use consistent DDL for specifying Iceberg partitions
Currently we have a DDL syntax for defining Iceberg partitions that
differs from SparkSQL:
https://iceberg.apache.org/spark-ddl/#partitioned-by

E.g. Impala is using the following syntax:

CREATE TABLE ice_t (i int, s string, ts timestamp, d date)
PARTITION BY SPEC (i BUCKET 5, ts MONTH, d YEAR)
STORED AS ICEBERG;

The same in Spark is:

CREATE TABLE ice_t (i int, s string, ts timestamp, d date)
USING ICEBERG
PARTITIONED BY (bucket(5, i), months(ts), years(d))

HIVE-25179 added the following syntax for Hive:

CREATE TABLE ice_t (i int, s string, ts timestamp, d date)
PARTITIONED BY SPEC (bucket(5, i), months(ts), years(d))
STORED BY ICEBERG;

I.e. the same syntax as Spark, but adding the keyword "SPEC".

This patch makes Impala use Hive's syntax, i.e. we will also
use the PARTITIONED BY SPEC clause + the unified partition
transform syntax.

Testing:
 * existing tests has been rewritten with the new syntax

Change-Id: Ib72ae445fd68fb0ab75d87b34779dbab922bbc62
Reviewed-on: http://gerrit.cloudera.org:8080/17575
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2021-07-15 15:15:07 +00:00

83 lines
1.8 KiB
Plaintext

====
---- QUERY
# Create unpartitioned Iceberg table
create table ice_nopart (i int)
stored as iceberg;
insert into ice_nopart values (1), (2), (3);
select * from ice_nopart;
---- RESULTS
1
2
3
====
---- QUERY
# Column tats should be cleared by TRUNCATE.
compute stats ice_nopart;
show column stats ice_nopart;
---- RESULTS
'i','INT',3,0,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# TRUNCATE iceberg table
truncate table ice_nopart;
====
---- QUERY
# SELECT from truncated table
select * from ice_nopart
---- RESULTS
====
---- QUERY
show column stats ice_nopart;
---- RESULTS
'i','INT',-1,-1,4,4,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# Create partitioned Iceberg table
create table ice_part (i int, s string, t timestamp)
partitioned by spec (year(t), bucket(10, i))
stored as iceberg;
insert into ice_part
values (1, 'ice', '2021-01-27 18:57:25.155746000'),
(2, 'berg', '2020-01-27 18:57:25.155746000');
select * from ice_part;
---- RESULTS
1,'ice',2021-01-27 18:57:25.155746000
2,'berg',2020-01-27 18:57:25.155746000
---- TYPES
INT,STRING,TIMESTAMP
====
---- QUERY
# Column tats should be cleared by TRUNCATE.
compute stats ice_part;
show column stats ice_part;
---- RESULTS
'i','INT',2,0,4,4,-1,-1
's','STRING',2,0,4,3.5,-1,-1
't','TIMESTAMP',2,0,16,16,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
# TRUNCATE iceberg table
truncate table ice_part;
====
---- QUERY
# SELECT from truncated table
select * from ice_part
---- RESULTS
====
---- QUERY
# Column tats should be cleared by TRUNCATE.
show column stats ice_part;
---- RESULTS
'i','INT',-1,-1,4,4,-1,-1
's','STRING',-1,-1,-1,-1,-1,-1
't','TIMESTAMP',-1,-1,16,16,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====