Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-partitioned-insert.test
Zoltan Borok-Nagy 301c3cebad IMPALA-11591: Avoid calling planFiles() on Iceberg tables
Iceberg's planFiles() API is very expensive as it needs to read all
the relevant manifest files. It's especially expensive on object
stores like S3.

When there are no predicates on the table and we are not doing
time travel it's possible to avoid calling planFiles() and do the
scan planning from cached metadata. When none of the predicates are
on partition columns there's little benefit of pushing down predicates
to Iceberg. So with this patch we only push down predicates (and
hence invoke planFiles()) when at least one of the predicates are
on partition columns.

This patch introduces a new class to store content files:
IcebergContentFileStore. It separates data, delete, and "old" content
files. "Old" content files are the ones that are not part of the current
snapshot. We add such data files during time travel. Storing "old"
content files in a separate concurrent hash map also fixes a concurrency
bug in the current code.

Testing:
 * executed current e2e tests
 * updated predicate push down tests

Change-Id: Iadb883a28602bb68cf4f61e57cdd691605045ac5
Reviewed-on: http://gerrit.cloudera.org:8080/19043
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2022-11-02 22:14:47 +00:00

669 lines
21 KiB
Plaintext

====
---- QUERY
# Test partitioned INSERTs with single column that is also
# the partitioned column.
create table ice_only_part (i int)
partitioned by spec (i)
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into ice_only_part values (1), (2), (3);
insert into ice_only_part values (cast(4 as tinyint));
insert into ice_only_part values (NULL);
select * from ice_only_part;
---- RESULTS
1
2
3
4
NULL
---- TYPES
INT
====
---- QUERY
show files in ice_only_part;
---- LABELS
Path,Size,Partition
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_only_part/data/i=1/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_only_part/data/i=2/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_only_part/data/i=3/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_only_part/data/i=4/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_only_part/data/i=__HIVE_DEFAULT_PARTITION__/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
SHOW PARTITIONS ice_only_part;
---- RESULTS
'{"i":"1"}',1,1
'{"i":"2"}',1,1
'{"i":"3"}',1,1
'{"i":"4"}',1,1
'{"i":null}',1,1
---- TYPES
STRING, BIGINT, BIGINT
====
---- QUERY
# Test partition pruning with RUNTIME_PROFILE.
select * from ice_only_part
where i = 1;
---- RESULTS
1
---- TYPES
INT
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 1
====
---- QUERY
# Test inserts with multple partition columns.
create table ice_multi_part (i int, d date, s string)
partitioned by spec(i, d)
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into ice_multi_part
values (1, '2020-12-07', 'first'), (2, '2020-12-08', 'second');
select * from ice_multi_part;
---- RESULTS
1,2020-12-07,'first'
2,2020-12-08,'second'
---- TYPES
INT, DATE, STRING
====
---- QUERY
insert into
ice_multi_part (d, s)
values
('2022-07-26', 'third'),
('2022-07-27', 'fourth'),
('2022-07-28', 'fifth');
select * from ice_multi_part order by d;
---- RESULTS
1,2020-12-07,'first'
2,2020-12-08,'second'
NULL,2022-07-26,'third'
NULL,2022-07-27,'fourth'
NULL,2022-07-28,'fifth'
---- TYPES
INT, DATE, STRING
====
---- QUERY
insert into
ice_multi_part (i, s)
values
(6, 'sixth'),
(7, 'seventh');
select * from ice_multi_part order by d, i;
---- RESULTS
1,2020-12-07,'first'
2,2020-12-08,'second'
NULL,2022-07-26,'third'
NULL,2022-07-27,'fourth'
NULL,2022-07-28,'fifth'
6,NULL,'sixth'
7,NULL,'seventh'
---- TYPES
INT, DATE, STRING
====
---- QUERY
show files in ice_multi_part;
---- LABELS
Path,Size,Partition
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_multi_part/data/i=1/d=2020-12-07/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_multi_part/data/i=2/d=2020-12-08/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_multi_part/data/i=6/d=__HIVE_DEFAULT_PARTITION__/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_multi_part/data/i=7/d=__HIVE_DEFAULT_PARTITION__/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_multi_part/data/i=__HIVE_DEFAULT_PARTITION__/d=2022-07-26/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_multi_part/data/i=__HIVE_DEFAULT_PARTITION__/d=2022-07-27/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_multi_part/data/i=__HIVE_DEFAULT_PARTITION__/d=2022-07-28/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
select * from ice_multi_part
where d = '2020-12-08';
---- RESULTS
2,2020-12-08,'second'
---- TYPES
INT, DATE, STRING
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 1
====
---- QUERY
select
*
from
ice_multi_part
where
i is null
order by
d;
---- RESULTS
NULL,2022-07-26,'third'
NULL,2022-07-27,'fourth'
NULL,2022-07-28,'fifth'
---- TYPES
INT, DATE, STRING
---- RUNTIME_PROFILE
aggregation(SUM, RowsRead): 3
====
---- QUERY
# Test that Impala only writes one file per partitions.
create table ice_bigints (i BIGINT, j BIGINT, k BIGINT)
partitioned by spec (i, j)
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into ice_bigints select id % 2, id % 3, id from functional.alltypes;
select count(*) from ice_bigints;
---- RESULTS
7300
---- TYPES
BIGINT
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 0
aggregation(SUM, NumFileMetadataRead): 0
====
---- QUERY
select count(*) from ice_bigints
where i = 0 and j = 0;
---- RESULTS
1217
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
aggregation(SUM, RowsRead): 1217
====
---- QUERY
# Test partitioning with all supported types.
# Only timestamp-based partitions are not supported.
# Also test partition pruning with all of the supported types.
create table alltypes_part (
id INT,
bool_col BOOLEAN,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP)
partitioned by spec (
id,
bool_col,
int_col,
bigint_col,
float_col,
double_col,
date_col,
string_col)
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into alltypes_part
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypestiny;
select count(*) from alltypes_part;
---- RESULTS
8
---- TYPES
BIGINT
====
---- QUERY
show files in alltypes_part;
---- LABELS
Path,Size,Partition
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=0/bool_col=true/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-01-01/string_col=0/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=1/bool_col=false/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-01-01/string_col=1/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=2/bool_col=true/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-02-01/string_col=0/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=3/bool_col=false/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-02-01/string_col=1/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=4/bool_col=true/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-03-01/string_col=0/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=5/bool_col=false/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-03-01/string_col=1/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=6/bool_col=true/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-04-01/string_col=0/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=7/bool_col=false/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-04-01/string_col=1/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
SHOW PARTITIONS alltypes_part;
---- RESULTS
'{"id":"0","bool_col":"true","int_col":"0","bigint_col":"0","float_col":"0.0","double_col":"0.0","date_col":"14245","string_col":"0"}',1,1
'{"id":"1","bool_col":"false","int_col":"1","bigint_col":"10","float_col":"1.1","double_col":"10.1","date_col":"14245","string_col":"1"}',1,1
'{"id":"2","bool_col":"true","int_col":"0","bigint_col":"0","float_col":"0.0","double_col":"0.0","date_col":"14276","string_col":"0"}',1,1
'{"id":"3","bool_col":"false","int_col":"1","bigint_col":"10","float_col":"1.1","double_col":"10.1","date_col":"14276","string_col":"1"}',1,1
'{"id":"4","bool_col":"true","int_col":"0","bigint_col":"0","float_col":"0.0","double_col":"0.0","date_col":"14304","string_col":"0"}',1,1
'{"id":"5","bool_col":"false","int_col":"1","bigint_col":"10","float_col":"1.1","double_col":"10.1","date_col":"14304","string_col":"1"}',1,1
'{"id":"6","bool_col":"true","int_col":"0","bigint_col":"0","float_col":"0.0","double_col":"0.0","date_col":"14335","string_col":"0"}',1,1
'{"id":"7","bool_col":"false","int_col":"1","bigint_col":"10","float_col":"1.1","double_col":"10.1","date_col":"14335","string_col":"1"}',1,1
---- TYPES
STRING, BIGINT, BIGINT
====
---- QUERY
# INSERTs with wrong value orderings are rejected.
insert into alltypes_part
select bool_col, id, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
from functional.alltypestiny;
select count(*) from alltypes_part;
---- CATCH
Expression 'id' (type: INT) would need to be cast to BOOLEAN for column 'bool_col'
====
---- QUERY
insert into alltypes_part
select id, bool_col, int_col, bigint_col, float_col, double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'), timestamp_col, string_col
from functional.alltypestiny;
select count(*) from alltypes_part;
---- CATCH
Expression 'timestamp_col' (type: TIMESTAMP) would need to be cast to STRING for column 'string_col'
====
---- QUERY
select count(*) from alltypes_part
where bool_col = true;
---- RESULTS
4
---- TYPES
BIGINT
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
====
---- QUERY
select count(*) from alltypes_part
where float_col = 0;
---- RESULTS
4
---- TYPES
BIGINT
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
====
---- QUERY
select count(*) from alltypes_part
where double_col = 0;
---- RESULTS
4
---- TYPES
BIGINT
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
====
---- QUERY
select count(*) from alltypes_part
where date_col = '2009-01-01';
---- RESULTS
2
---- TYPES
BIGINT
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 2
====
---- QUERY
select count(*) from alltypes_part
where string_col = '0';
---- RESULTS
4
---- TYPES
BIGINT
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 4
====
---- QUERY
# 'timestamp_col' is not a partitioning column, so min/max stats will not be used to
# eliminate row groups
select count(*) from alltypes_part
where timestamp_col = now();
---- RESULTS
0
---- TYPES
BIGINT
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 8
====
---- QUERY
create table alltypes_part_2 like alltypes_part;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into
alltypes_part_2(
id,
int_col,
bigint_col,
float_col,
double_col,
date_col,
string_col,
timestamp_col
)
select
id,
int_col,
bigint_col,
float_col,
double_col,
CAST(date_string_col as date FORMAT 'MM/DD/YY'),
string_col,
timestamp_col
from
functional.alltypestiny;
select count(*) from alltypes_part;
---- RESULTS
8
---- TYPES
BIGINT
====
---- QUERY
show files in alltypes_part_2;
---- LABELS
Path,Size,Partition
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part_2/data/id=0/bool_col=__HIVE_DEFAULT_PARTITION__/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-01-01/string_col=0/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part_2/data/id=1/bool_col=__HIVE_DEFAULT_PARTITION__/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-01-01/string_col=1/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part_2/data/id=2/bool_col=__HIVE_DEFAULT_PARTITION__/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-02-01/string_col=0/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part_2/data/id=3/bool_col=__HIVE_DEFAULT_PARTITION__/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-02-01/string_col=1/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part_2/data/id=4/bool_col=__HIVE_DEFAULT_PARTITION__/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-03-01/string_col=0/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part_2/data/id=5/bool_col=__HIVE_DEFAULT_PARTITION__/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-03-01/string_col=1/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part_2/data/id=6/bool_col=__HIVE_DEFAULT_PARTITION__/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-04-01/string_col=0/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part_2/data/id=7/bool_col=__HIVE_DEFAULT_PARTITION__/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-04-01/string_col=1/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
# Iceberg partitions independent of column order
---- QUERY
# Test inserts with multple partition columns.
create table ice_part_non_order (i int, d date, s string)
partitioned by spec(s, d)
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into ice_part_non_order
values (1, '2020-12-07', 'first'), (2, '2020-12-08', 'second');
select * from ice_part_non_order where s='second';
---- RESULTS
2,2020-12-08,'second'
---- TYPES
INT, DATE, STRING
---- RUNTIME_PROFILE
aggregation(SUM, NumRowGroups): 1
====
---- QUERY
show files in ice_part_non_order;
---- LABELS
Path,Size,Partition
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_part_non_order/data/s=first/d=2020-12-07/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_part_non_order/data/s=second/d=2020-12-08/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
SHOW PARTITIONS ice_part_non_order;
---- RESULTS
'{"s":"first","d":"18603"}',1,1
'{"s":"second","d":"18604"}',1,1
---- TYPES
STRING, BIGINT, BIGINT
====
---- QUERY
# Test inserts with different partition specs.
create table ice_alter_part (i int, d date, s string)
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into ice_alter_part
values (1, '2020-12-07', 'first'), (2, '2020-12-08', 'second');
select * from ice_alter_part;
---- RESULTS
1,2020-12-07,'first'
2,2020-12-08,'second'
---- TYPES
INT, DATE, STRING
====
---- QUERY
alter table ice_alter_part set partition spec(i, d);
---- RESULTS
'Updated partition spec.'
====
---- QUERY
insert into ice_alter_part
values (3, '2020-12-09', 'third'), (4, '2020-12-10', 'fourth');
select * from ice_alter_part;
---- RESULTS
1,2020-12-07,'first'
2,2020-12-08,'second'
3,2020-12-09,'third'
4,2020-12-10,'fourth'
---- TYPES
INT, DATE, STRING
====
---- QUERY
alter table ice_alter_part set partition spec(year(d), i, bucket(5, s));
---- RESULTS
'Updated partition spec.'
====
---- QUERY
insert into ice_alter_part
values (5, '2020-12-11', 'fifth'), (6, '2020-12-12', 'sixth');
select * from ice_alter_part;
---- RESULTS
1,2020-12-07,'first'
2,2020-12-08,'second'
3,2020-12-09,'third'
4,2020-12-10,'fourth'
5,2020-12-11,'fifth'
6,2020-12-12,'sixth'
---- TYPES
INT, DATE, STRING
====
---- QUERY
show files in ice_alter_part;
---- LABELS
Path,Size,Partition
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_alter_part/data/[^=]*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_alter_part/data/i=3/d=2020-12-09/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_alter_part/data/i=4/d=2020-12-10/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_alter_part/data/d_year=2020/i=5/s_bucket=2/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_alter_part/data/d_year=2020/i=6/s_bucket=0/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
SHOW PARTITIONS ice_alter_part;
---- RESULTS
'{"d_year":"50","i":"5","s_bucket":"2"}',1,1
'{"d_year":"50","i":"6","s_bucket":"0"}',1,1
'{"i":"3","d":"18605"}',1,1
'{"i":"4","d":"18606"}',1,1
'{}',2,1
---- TYPES
STRING, BIGINT, BIGINT
====
---- QUERY
create table ice_void (i int, s string, d date)
partitioned by spec (void(i), truncate(1, s), year(d))
stored as iceberg;
insert into ice_void values (1, 'one', '2001-01-01'),
(11,'other','2001-01-11'),
(2, 'two', '2002-02-02'),
(3, 'three', '2003-03-03');
select * from ice_void;
---- RESULTS
1,'one',2001-01-01
11,'other',2001-01-11
2,'two',2002-02-02
3,'three',2003-03-03
---- TYPES
INT, STRING, DATE
====
---- QUERY
show files in ice_void;
---- LABELS
Path,Size,Partition
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=o/d_year=2001/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2002/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2003/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
SHOW PARTITIONS ice_void;
---- RESULTS
'{"i_null":null,"s_trunc":"o","d_year":"31"}',2,1
'{"i_null":null,"s_trunc":"t","d_year":"32"}',1,1
'{"i_null":null,"s_trunc":"t","d_year":"33"}',1,1
---- TYPES
STRING, BIGINT, BIGINT
====
---- QUERY
alter table ice_void set partition spec (i, void(s), year(d));
insert into ice_void values (4, 'four', '2004-04-04');
select * from ice_void;
---- RESULTS
1,'one',2001-01-01
11,'other',2001-01-11
2,'two',2002-02-02
3,'three',2003-03-03
4,'four',2004-04-04
---- TYPES
INT, STRING, DATE
====
---- QUERY
show files in ice_void;
---- LABELS
Path,Size,Partition
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=o/d_year=2001/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2002/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2003/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/i=4/d_year=2004/.*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
SHOW PARTITIONS ice_void;
---- RESULTS
'{"i":"4","s_null":null,"d_year":"34"}',1,1
'{"i_null":null,"s_trunc":"o","d_year":"31"}',2,1
'{"i_null":null,"s_trunc":"t","d_year":"32"}',1,1
'{"i_null":null,"s_trunc":"t","d_year":"33"}',1,1
---- TYPES
STRING, BIGINT, BIGINT
====
---- QUERY
alter table ice_void set partition spec (i, void(s), void(d));
insert into ice_void values (5, 'five', '2005-05-05'), (5, 'other five', '1995-05-05');
select * from ice_void;
---- RESULTS
1,'one',2001-01-01
11,'other',2001-01-11
2,'two',2002-02-02
3,'three',2003-03-03
4,'four',2004-04-04
5,'five',2005-05-05
5,'other five',1995-05-05
---- TYPES
INT, STRING, DATE
====
---- QUERY
show files in ice_void;
---- LABELS
Path,Size,Partition
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=o/d_year=2001/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2002/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2003/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/i=4/d_year=2004/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/i=5/[^=/]*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
SHOW PARTITIONS ice_void;
---- RESULTS
'{"i":"4","s_null":null,"d_year":"34"}',1,1
'{"i":"5","s_null":null,"d_null":null}',2,1
'{"i_null":null,"s_trunc":"o","d_year":"31"}',2,1
'{"i_null":null,"s_trunc":"t","d_year":"32"}',1,1
'{"i_null":null,"s_trunc":"t","d_year":"33"}',1,1
---- TYPES
STRING, BIGINT, BIGINT
====
---- QUERY
alter table ice_void set partition spec (void(i), void(s), void(d));
insert into ice_void values (6, 'six', '2006-06-06'), (7, 'seven', '2007-07-07');
select * from ice_void;
---- RESULTS
1,'one',2001-01-01
11,'other',2001-01-11
2,'two',2002-02-02
3,'three',2003-03-03
4,'four',2004-04-04
5,'five',2005-05-05
5,'other five',1995-05-05
6,'six',2006-06-06
7,'seven',2007-07-07
---- TYPES
INT, STRING, DATE
====
---- QUERY
show files in ice_void;
---- LABELS
Path,Size,Partition
---- RESULTS: VERIFY_IS_SUBSET
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=o/d_year=2001/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2002/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2003/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/i=4/d_year=2004/.*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/i=5/[^=/]*.0.parq','.*',''
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/[^=/]*.0.parq','.*',''
---- TYPES
STRING, STRING, STRING
====
---- QUERY
SHOW PARTITIONS ice_void;
---- RESULTS
'{"i":"4","s_null":null,"d_year":"34"}',1,1
'{"i":"5","s_null":null,"d_null":null}',2,1
'{"i_null":null,"s_null":null,"d_null":null}',2,1
'{"i_null":null,"s_trunc":"o","d_year":"31"}',2,1
'{"i_null":null,"s_trunc":"t","d_year":"32"}',1,1
'{"i_null":null,"s_trunc":"t","d_year":"33"}',1,1
---- TYPES
STRING, BIGINT, BIGINT
====
---- QUERY
create table store_sales partitioned by spec (ss_sold_date_sk) stored as iceberg
as select * from tpcds_parquet.store_sales;
select count(*) from store_sales;
---- RESULTS
2880404
---- TYPES
BIGINT
====
---- QUERY
select count(*) from store_sales where ss_sold_date_sk is null;
---- RESULTS
130093
---- TYPES
BIGINT
====