mirror of
https://github.com/apache/impala.git
synced 2026-01-31 09:00:19 -05:00
When Impala created the metadata objects about the Iceberg data files it tried to convert the partition values to strings. But the partition values can be NULLs as well. The code didn't expect this, so we got a NullPointerException. With this patch we pass the table's null partition key value in case of NULLs. Testing: * added e2e tests Change-Id: I88c4f7a2c2db4f6390c8ee5c08baddc96b04602e Reviewed-on: http://gerrit.cloudera.org:8080/18307 Reviewed-by: Tamas Mate <tmater@apache.org> Reviewed-by: Gabor Kaszab <gaborkaszab@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
921 lines
28 KiB
Plaintext
921 lines
28 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Test partitioned INSERTs with single column that is also
|
|
# the partitioned column. Partition transform is BUCKET.
|
|
create table single_col_bucket (s string)
|
|
partitioned by spec (bucket(7, s))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into single_col_bucket values ('a'), ('b'), ('c'), ('d'), ('e'), ('f'), (NULL);
|
|
select * from single_col_bucket;
|
|
---- RESULTS
|
|
'a'
|
|
'b'
|
|
'c'
|
|
'd'
|
|
'e'
|
|
'f'
|
|
'NULL'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show files in single_col_bucket;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_bucket/data/s_bucket=0/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_bucket/data/s_bucket=2/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_bucket/data/s_bucket=4/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_bucket/data/s_bucket=6/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_bucket/data/s_bucket=__HIVE_DEFAULT_PARTITION__/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from single_col_bucket
|
|
where s = 'a';
|
|
---- RESULTS
|
|
'a'
|
|
---- TYPES
|
|
STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from single_col_bucket
|
|
where s = 'c';
|
|
---- RESULTS
|
|
'c'
|
|
---- TYPES
|
|
STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Bucket transform for multiple columns.
|
|
create table multi_col_bucket (i int, s string, d date, t timestamp)
|
|
partitioned by spec (bucket(3, i), bucket(5, s), bucket(5, d), bucket(5, t))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into multi_col_bucket
|
|
values (1, 'a', '2021-01-01', '2021-01-01 01:01:01.01'),
|
|
(2, 'b', '2021-01-02', '2021-01-02 02:02:02.02'),
|
|
(3, 'c', '2021-01-03', '2021-01-03 03:03:03.03'),
|
|
(4, 'd', '2021-01-04', '2021-01-04 04:04:04.04');
|
|
select * from multi_col_bucket;
|
|
---- RESULTS
|
|
1,'a',2021-01-01,2021-01-01 01:01:01.010000000
|
|
2,'b',2021-01-02,2021-01-02 02:02:02.020000000
|
|
3,'c',2021-01-03,2021-01-03 03:03:03.030000000
|
|
4,'d',2021-01-04,2021-01-04 04:04:04.040000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
show files in multi_col_bucket;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_bucket/data/i_bucket=0/s_bucket=1/d_bucket=2/t_bucket=4/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_bucket/data/i_bucket=0/s_bucket=2/d_bucket=3/t_bucket=0/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_bucket/data/i_bucket=0/s_bucket=4/d_bucket=1/t_bucket=3/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_bucket/data/i_bucket=2/s_bucket=0/d_bucket=0/t_bucket=4/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_bucket
|
|
where i = 1;
|
|
---- RESULTS
|
|
1,'a',2021-01-01,2021-01-01 01:01:01.010000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_bucket
|
|
where i = 2;
|
|
---- RESULTS
|
|
2,'b',2021-01-02,2021-01-02 02:02:02.020000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_bucket
|
|
where s = 'd';
|
|
---- RESULTS
|
|
4,'d',2021-01-04,2021-01-04 04:04:04.040000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_bucket
|
|
where d = '2021-01-03';
|
|
---- RESULTS
|
|
3,'c',2021-01-03,2021-01-03 03:03:03.030000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_bucket
|
|
where t = '2021-01-03 03:03:03.030000000';
|
|
---- RESULTS
|
|
3,'c',2021-01-03,2021-01-03 03:03:03.030000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test single col TRUNCATE
|
|
create table single_col_truncate (d decimal(10, 4))
|
|
partitioned by spec (truncate(100, d))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into single_col_truncate
|
|
values (1.1234), (10.5432), (200.786), (314.1592), (98765.4321),
|
|
(1.1235), (10.5433), (200.789), (314.1598), (98765.43), (NULL);
|
|
select * from single_col_truncate;
|
|
---- RESULTS
|
|
1.1234
|
|
1.1235
|
|
10.5432
|
|
10.5433
|
|
200.7860
|
|
200.7890
|
|
314.1592
|
|
314.1598
|
|
98765.4300
|
|
98765.4321
|
|
NULL
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
show files in single_col_truncate;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=1.1200/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=10.5400/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=200.7800/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=314.1500/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=98765.4300/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=__HIVE_DEFAULT_PARTITION__/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from single_col_truncate
|
|
where d = 1.1234;
|
|
---- RESULTS
|
|
1.1234
|
|
---- TYPES
|
|
DECIMAL
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from single_col_truncate
|
|
where 200 <= d and d <= 400;
|
|
---- RESULTS
|
|
200.7860
|
|
200.7890
|
|
314.1592
|
|
314.1598
|
|
---- TYPES
|
|
DECIMAL
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 4
|
|
aggregation(SUM, NumRowGroups): 2
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from single_col_truncate
|
|
where d > 200.78;
|
|
---- RESULTS
|
|
200.7860
|
|
200.7890
|
|
314.1592
|
|
314.1598
|
|
98765.4300
|
|
98765.4321
|
|
---- TYPES
|
|
DECIMAL
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 6
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
# Test single col TRUNCATE
|
|
create table multi_col_truncate (i int, b bigint, d decimal(16, 6), s string)
|
|
partitioned by spec (truncate(15, s), truncate(5, i), truncate(11, b), truncate(100000, d))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into multi_col_truncate values
|
|
(1, 11, 11111.111111, 'the quick brown fox jumps over the lazy dog'),
|
|
(2, 222, 421, 'the quick brown fox jumps over the lazy dog'),
|
|
(6, 333, 113211.2, 'the quick brown fox over the lazy dog the quick'),
|
|
(7, 444, 1111154.11432, 'the quick fox brown jump impala over the quick fox brown the over jump'),
|
|
(18, 555, 9999913.232432,'the quick impala fox dog parrot lion'),
|
|
(NULL, NULL, NULL, NULL);
|
|
select * from multi_col_truncate;
|
|
---- RESULTS
|
|
1,11,11111.111111,'the quick brown fox jumps over the lazy dog'
|
|
2,222,421.000000,'the quick brown fox jumps over the lazy dog'
|
|
6,333,113211.200000,'the quick brown fox over the lazy dog the quick'
|
|
7,444,1111154.114320,'the quick fox brown jump impala over the quick fox brown the over jump'
|
|
18,555,9999913.232432,'the quick impala fox dog parrot lion'
|
|
NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
====
|
|
---- QUERY
|
|
show files in multi_col_truncate;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=the quick brown/i_trunc=0/b_trunc=11/d_trunc=11111.100000/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=the quick brown/i_trunc=0/b_trunc=220/d_trunc=421.000000/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=the quick brown/i_trunc=5/b_trunc=330/d_trunc=113211.200000/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=the quick fox b/i_trunc=5/b_trunc=440/d_trunc=1111154.100000/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=the quick impal/i_trunc=15/b_trunc=550/d_trunc=9999913.200000/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=__HIVE_DEFAULT_PARTITION__/i_trunc=__HIVE_DEFAULT_PARTITION__/b_trunc=__HIVE_DEFAULT_PARTITION__/d_trunc=__HIVE_DEFAULT_PARTITION__/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where s = 'the quick brown fox jumps over the lazy dog';
|
|
---- RESULTS
|
|
1,11,11111.111111,'the quick brown fox jumps over the lazy dog'
|
|
2,222,421.000000,'the quick brown fox jumps over the lazy dog'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 2
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where s = 'the quick impala';
|
|
---- RESULTS
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 0
|
|
aggregation(SUM, NumRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where i < 5;
|
|
---- RESULTS
|
|
1,11,11111.111111,'the quick brown fox jumps over the lazy dog'
|
|
2,222,421.000000,'the quick brown fox jumps over the lazy dog'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where b > 330;
|
|
---- RESULTS
|
|
6,333,113211.200000,'the quick brown fox over the lazy dog the quick'
|
|
7,444,1111154.114320,'the quick fox brown jump impala over the quick fox brown the over jump'
|
|
18,555,9999913.232432,'the quick impala fox dog parrot lion'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 3
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where 400 < d and d < 100000;
|
|
---- RESULTS
|
|
1,11,11111.111111,'the quick brown fox jumps over the lazy dog'
|
|
2,222,421.000000,'the quick brown fox jumps over the lazy dog'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 2
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where b > 100 and d < 100000;
|
|
---- RESULTS
|
|
2,222,421.000000,'the quick brown fox jumps over the lazy dog'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where b is NULL;
|
|
---- RESULTS
|
|
NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
====
|
|
---- QUERY
|
|
# Create table with YEAR partition transform
|
|
create table year_transform(t timestamp, d date)
|
|
partitioned by spec (year(t), year(d))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into year_transform values
|
|
('2021-01-08 14:04:03', '2021-01-08'),
|
|
('2021-01-07 14:04:03', '2021-01-07'),
|
|
('1970-01-01 00:00:00', '1970-01-01'),
|
|
('1970-11-01 00:00:00', '1970-11-01'),
|
|
('1969-12-15 13:55:03', '1969-12-15'),
|
|
('1969-02-15 13:55:03', '1969-02-15'),
|
|
(NULL, NULL);
|
|
select * from year_transform;
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
2021-01-07 14:04:03,2021-01-07
|
|
1970-01-01 00:00:00,1970-01-01
|
|
1970-11-01 00:00:00,1970-11-01
|
|
1969-12-15 13:55:03,1969-12-15
|
|
1969-02-15 13:55:03,1969-02-15
|
|
NULL,NULL
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
====
|
|
---- QUERY
|
|
show files in year_transform;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/year_transform/data/t_year=1969/d_year=1969/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/year_transform/data/t_year=1970/d_year=1970/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/year_transform/data/t_year=2021/d_year=2021/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/year_transform/data/t_year=__HIVE_DEFAULT_PARTITION__/d_year=__HIVE_DEFAULT_PARTITION__/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where t = '2021-01-08 14:04:03';
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where t = '1970-01-01 00:00:00';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where t = '1969-02-15 13:55:03';
|
|
---- RESULTS
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where t <= '1970-01-01 00:00:00';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
1969-12-15 13:55:03,1969-12-15
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 4
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where d = '2021-01-07';
|
|
---- RESULTS
|
|
2021-01-07 14:04:03,2021-01-07
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where d = '1970-01-01';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where d = '1969-12-15';
|
|
---- RESULTS
|
|
1969-12-15 13:55:03,1969-12-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Create table with MONTH partition transform
|
|
create table month_transform(t timestamp, d date)
|
|
partitioned by spec (month(t), month(d))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into month_transform values
|
|
('2021-01-08 14:04:03', '2021-01-08'),
|
|
('2021-01-07 14:04:03', '2021-01-07'),
|
|
('1970-01-01 00:00:00', '1970-01-01'),
|
|
('1970-11-01 00:00:00', '1970-11-01'),
|
|
('1969-12-15 13:55:03', '1969-12-15'),
|
|
('1969-02-15 13:55:03', '1969-02-15'),
|
|
(NULL, NULL);
|
|
select * from month_transform;
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
2021-01-07 14:04:03,2021-01-07
|
|
1970-01-01 00:00:00,1970-01-01
|
|
1970-11-01 00:00:00,1970-11-01
|
|
1969-12-15 13:55:03,1969-12-15
|
|
1969-02-15 13:55:03,1969-02-15
|
|
NULL,NULL
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
====
|
|
---- QUERY
|
|
show files in month_transform;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=1969-02/d_month=1969-02/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=1969-12/d_month=1969-12/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=1970-01/d_month=1970-01/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=1970-11/d_month=1970-11/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=2021-01/d_month=2021-01/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=__HIVE_DEFAULT_PARTITION__/d_month=__HIVE_DEFAULT_PARTITION__/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where t = '2021-01-08 14:04:03';
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where t = '1970-01-01 00:00:00';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where t = '1969-02-15 13:55:03';
|
|
---- RESULTS
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where d = '2021-01-07';
|
|
---- RESULTS
|
|
2021-01-07 14:04:03,2021-01-07
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where d = '1970-01-01';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where d = '1969-12-15';
|
|
---- RESULTS
|
|
1969-12-15 13:55:03,1969-12-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Create table with DAY partition transform
|
|
create table day_transform(t timestamp, d date)
|
|
partitioned by spec (day(t), day(d))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into day_transform values
|
|
('2021-01-08 14:04:03', '2021-01-08'),
|
|
('2021-01-08 15:04:03', '2021-01-08'),
|
|
('2021-01-07 14:04:03', '2021-01-07'),
|
|
('1970-01-01 00:00:00', '1970-01-01'),
|
|
('1970-11-01 00:00:00', '1970-11-01'),
|
|
('1969-12-15 13:55:03', '1969-12-15'),
|
|
('1969-02-15 13:55:03', '1969-02-15'),
|
|
(NULL, NULL);
|
|
select * from day_transform;
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
2021-01-08 15:04:03,2021-01-08
|
|
2021-01-07 14:04:03,2021-01-07
|
|
1970-01-01 00:00:00,1970-01-01
|
|
1970-11-01 00:00:00,1970-11-01
|
|
1969-12-15 13:55:03,1969-12-15
|
|
1969-02-15 13:55:03,1969-02-15
|
|
NULL,NULL
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
====
|
|
---- QUERY
|
|
show files in day_transform;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=1969-02-15/d_day=1969-02-15/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=1969-12-15/d_day=1969-12-15/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=1970-01-01/d_day=1970-01-01/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=1970-11-01/d_day=1970-11-01/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=2021-01-07/d_day=2021-01-07/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=2021-01-08/d_day=2021-01-08/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=2021-01-08/d_day=2021-01-08/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=__HIVE_DEFAULT_PARTITION__/d_day=__HIVE_DEFAULT_PARTITION__/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where t = '2021-01-08 14:04:03';
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where t = '2021-01-07 14:04:03';
|
|
---- RESULTS
|
|
2021-01-07 14:04:03,2021-01-07
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where t = '1970-01-01 00:00:00';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where t = '1969-02-15 13:55:03';
|
|
---- RESULTS
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where d = '2021-01-07';
|
|
---- RESULTS
|
|
2021-01-07 14:04:03,2021-01-07
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where d = '1970-01-01';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where d = '1969-12-15';
|
|
---- RESULTS
|
|
1969-12-15 13:55:03,1969-12-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Create table with HOUR partition transform
|
|
create table hour_transform(t timestamp)
|
|
partitioned by spec (hour(t))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into hour_transform values
|
|
('1969-12-31 22:55:03'),
|
|
('1969-12-31 23:55:03'),
|
|
('1969-12-31 23:56:03'),
|
|
('1970-01-01 00:00:00'),
|
|
('1970-01-01 00:00:01'),
|
|
('1970-01-01 01:00:00'),
|
|
('2021-01-08 14:04:03'),
|
|
('2021-01-08 15:04:03'),
|
|
('2021-01-08 15:04:33'),
|
|
(NULL);
|
|
select * from hour_transform;
|
|
---- RESULTS
|
|
1969-12-31 22:55:03
|
|
1969-12-31 23:55:03
|
|
1969-12-31 23:56:03
|
|
1970-01-01 00:00:00
|
|
1970-01-01 00:00:01
|
|
1970-01-01 01:00:00
|
|
2021-01-08 14:04:03
|
|
2021-01-08 15:04:03
|
|
2021-01-08 15:04:33
|
|
NULL
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
show files in hour_transform;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=1969-12-31-22/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=1969-12-31-23/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=1970-01-01-00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=1970-01-01-01/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=2021-01-08-14/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=2021-01-08-15/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=__HIVE_DEFAULT_PARTITION__/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from hour_transform
|
|
where t = '1969-12-31 22:55:03';
|
|
---- RESULTS
|
|
1969-12-31 22:55:03
|
|
---- TYPES
|
|
TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from hour_transform
|
|
where t = '1969-12-31 23:55:03';
|
|
---- RESULTS
|
|
1969-12-31 23:55:03
|
|
---- TYPES
|
|
TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from hour_transform
|
|
where t < '1970-01-01 01:00:00';
|
|
---- RESULTS
|
|
1969-12-31 22:55:03
|
|
1969-12-31 23:55:03
|
|
1969-12-31 23:56:03
|
|
1970-01-01 00:00:00
|
|
1970-01-01 00:00:01
|
|
---- TYPES
|
|
TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 5
|
|
aggregation(SUM, NumRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from hour_transform
|
|
where t >= '1970-01-01 01:00:00';
|
|
---- RESULTS
|
|
1970-01-01 01:00:00
|
|
2021-01-08 14:04:03
|
|
2021-01-08 15:04:03
|
|
2021-01-08 15:04:33
|
|
---- TYPES
|
|
TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 4
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
create table mixed_and_shuffled (s string, b bigint, de decimal(6, 2), t timestamp, da date)
|
|
partitioned by spec (day(t), year(da), truncate(5, s), bucket(3, b), truncate(100, de))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into mixed_and_shuffled values
|
|
('quick brown fox', 12345, 123.45, '2021-01-01 01:02:03', '2021-01-01'),
|
|
('quick brown impala', 54321, 543.21, '2021-01-01 02:03:04', '2021-01-01'),
|
|
('quick brown dog', 31, 3333.45, '2020-01-06 05:06:07', '2020-01-06'),
|
|
('pink parrot', 9999777999, 9.9, '1999-09-09 09:09:09', '1999-09-09'),
|
|
('green python', 717171, 71, '1971-07-01 07:07:07', '1971-07-01'),
|
|
(NULL, NULL, NULL, NULL, NULL);
|
|
select * from mixed_and_shuffled;
|
|
---- RESULTS
|
|
'quick brown fox',12345,123.45,2021-01-01 01:02:03,2021-01-01
|
|
'quick brown impala',54321,543.21,2021-01-01 02:03:04,2021-01-01
|
|
'quick brown dog',31,3333.45,2020-01-06 05:06:07,2020-01-06
|
|
'pink parrot',9999777999,9.90,1999-09-09 09:09:09,1999-09-09
|
|
'green python',717171,71.00,1971-07-01 07:07:07,1971-07-01
|
|
'NULL',NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
====
|
|
---- QUERY
|
|
show files in mixed_and_shuffled;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=1971-07-01/da_year=1971/s_trunc=green/b_bucket=1/de_trunc=71.00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=1999-09-09/da_year=1999/s_trunc=pink /b_bucket=1/de_trunc=9.00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=2020-01-06/da_year=2020/s_trunc=quick/b_bucket=1/de_trunc=3333.00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=2021-01-01/da_year=2021/s_trunc=quick/b_bucket=1/de_trunc=543.00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=2021-01-01/da_year=2021/s_trunc=quick/b_bucket=2/de_trunc=123.00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=__HIVE_DEFAULT_PARTITION__/da_year=__HIVE_DEFAULT_PARTITION__/s_trunc=__HIVE_DEFAULT_PARTITION__/b_bucket=__HIVE_DEFAULT_PARTITION__/de_trunc=__HIVE_DEFAULT_PARTITION__/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from mixed_and_shuffled
|
|
where s = 'quick brown dog';
|
|
---- RESULTS
|
|
'quick brown dog',31,3333.45,2020-01-06 05:06:07,2020-01-06
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from mixed_and_shuffled
|
|
where s = 'quick brown dog' and de > 1000;
|
|
---- RESULTS
|
|
'quick brown dog',31,3333.45,2020-01-06 05:06:07,2020-01-06
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from mixed_and_shuffled
|
|
where da='1999-09-09';
|
|
---- RESULTS
|
|
'pink parrot',9999777999,9.90,1999-09-09 09:09:09,1999-09-09
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from mixed_and_shuffled
|
|
where de > 100;
|
|
---- RESULTS
|
|
'quick brown fox',12345,123.45,2021-01-01 01:02:03,2021-01-01
|
|
'quick brown impala',54321,543.21,2021-01-01 02:03:04,2021-01-01
|
|
'quick brown dog',31,3333.45,2020-01-06 05:06:07,2020-01-06
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 3
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
select * from mixed_and_shuffled
|
|
where s is null;
|
|
---- RESULTS
|
|
'NULL',NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
====
|
|
---- QUERY
|
|
select * from mixed_and_shuffled
|
|
where da is not null;
|
|
---- RESULTS
|
|
'quick brown fox',12345,123.45,2021-01-01 01:02:03,2021-01-01
|
|
'quick brown impala',54321,543.21,2021-01-01 02:03:04,2021-01-01
|
|
'quick brown dog',31,3333.45,2020-01-06 05:06:07,2020-01-06
|
|
'pink parrot',9999777999,9.90,1999-09-09 09:09:09,1999-09-09
|
|
'green python',717171,71.00,1971-07-01 07:07:07,1971-07-01
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
====
|