mirror of
https://github.com/apache/impala.git
synced 2026-01-26 21:02:23 -05:00
This patch adds parquet stats to iceberg manifest as per-datafile metrics. The following metrics are supported: - column_sizes : Map from column id to the total size on disk of all regions that store the column. Does not include bytes necessary to read other columns, like footers. - null_value_counts : Map from column id to number of null values in the column. - lower_bounds : Map from column id to lower bound in the column serialized as binary. Each value must be less than or equal to all non-null, non-NaN values in the column for the file. - upper_bounds : Map from column id to upper bound in the column serialized as binary. Each value must be greater than or equal to all non-null, non-Nan values in the column for the file. The corresponding parquet stats are collected by 'ColumnStats' (in 'min_value_', 'max_value_', 'null_count_' members) and 'HdfsParquetTableWriter::BaseColumnWriter' (in 'total_compressed_byte_size_' member). Testing: - New e2e test was added to verify that the metrics are written to the Iceberg manifest upon inserting data. - New e2e test was added to verify that lower_bounds/upper_bounds metrics are used to prune data files on querying iceberg tables. - Existing e2e tests were updated to work with the new behavior. - BE test for single-value serialization. Relevant Iceberg documentation: - Manifest: https://iceberg.apache.org/spec/#manifests - Values in lower_bounds and upper_bounds maps should be Single-value serialized to binary: https://iceberg.apache.org/spec/#appendix-d-single-value-serialization Change-Id: Ic31f2260bc6f6a7f307ac955ff05eb154917675b Reviewed-on: http://gerrit.cloudera.org:8080/17806 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Attila Jeges <attilaj@cloudera.com>
870 lines
26 KiB
Plaintext
870 lines
26 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Test partitioned INSERTs with single column that is also
|
|
# the partitioned column. Partition transform is BUCKET.
|
|
create table single_col_bucket (s string)
|
|
partitioned by spec (bucket(7, s))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into single_col_bucket values ('a'), ('b'), ('c'), ('d'), ('e'), ('f');
|
|
select * from single_col_bucket;
|
|
---- RESULTS
|
|
'a'
|
|
'b'
|
|
'c'
|
|
'd'
|
|
'e'
|
|
'f'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
show files in single_col_bucket;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_bucket/data/s_bucket=0/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_bucket/data/s_bucket=2/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_bucket/data/s_bucket=4/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_bucket/data/s_bucket=6/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from single_col_bucket
|
|
where s = 'a';
|
|
---- RESULTS
|
|
'a'
|
|
---- TYPES
|
|
STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from single_col_bucket
|
|
where s = 'c';
|
|
---- RESULTS
|
|
'c'
|
|
---- TYPES
|
|
STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Bucket transform for multiple columns.
|
|
create table multi_col_bucket (i int, s string, d date, t timestamp)
|
|
partitioned by spec (bucket(3, i), bucket(5, s), bucket(5, d), bucket(5, t))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into multi_col_bucket
|
|
values (1, 'a', '2021-01-01', '2021-01-01 01:01:01.01'),
|
|
(2, 'b', '2021-01-02', '2021-01-02 02:02:02.02'),
|
|
(3, 'c', '2021-01-03', '2021-01-03 03:03:03.03'),
|
|
(4, 'd', '2021-01-04', '2021-01-04 04:04:04.04');
|
|
select * from multi_col_bucket;
|
|
---- RESULTS
|
|
1,'a',2021-01-01,2021-01-01 01:01:01.010000000
|
|
2,'b',2021-01-02,2021-01-02 02:02:02.020000000
|
|
3,'c',2021-01-03,2021-01-03 03:03:03.030000000
|
|
4,'d',2021-01-04,2021-01-04 04:04:04.040000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
show files in multi_col_bucket;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_bucket/data/i_bucket=0/s_bucket=1/d_bucket=2/t_bucket=4/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_bucket/data/i_bucket=0/s_bucket=2/d_bucket=3/t_bucket=0/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_bucket/data/i_bucket=0/s_bucket=4/d_bucket=1/t_bucket=3/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_bucket/data/i_bucket=2/s_bucket=0/d_bucket=0/t_bucket=4/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_bucket
|
|
where i = 1;
|
|
---- RESULTS
|
|
1,'a',2021-01-01,2021-01-01 01:01:01.010000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_bucket
|
|
where i = 2;
|
|
---- RESULTS
|
|
2,'b',2021-01-02,2021-01-02 02:02:02.020000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_bucket
|
|
where s = 'd';
|
|
---- RESULTS
|
|
4,'d',2021-01-04,2021-01-04 04:04:04.040000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_bucket
|
|
where d = '2021-01-03';
|
|
---- RESULTS
|
|
3,'c',2021-01-03,2021-01-03 03:03:03.030000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_bucket
|
|
where t = '2021-01-03 03:03:03.030000000';
|
|
---- RESULTS
|
|
3,'c',2021-01-03,2021-01-03 03:03:03.030000000
|
|
---- TYPES
|
|
INT,STRING,DATE,TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test single col TRUNCATE
|
|
create table single_col_truncate (d decimal(10, 4))
|
|
partitioned by spec (truncate(100, d))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into single_col_truncate
|
|
values (1.1234), (10.5432), (200.786), (314.1592), (98765.4321),
|
|
(1.1235), (10.5433), (200.789), (314.1598), (98765.43);
|
|
select * from single_col_truncate;
|
|
---- RESULTS
|
|
1.1234
|
|
1.1235
|
|
10.5432
|
|
10.5433
|
|
200.7860
|
|
200.7890
|
|
314.1592
|
|
314.1598
|
|
98765.4300
|
|
98765.4321
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
show files in single_col_truncate;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=1.1200/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=10.5400/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=200.7800/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=314.1500/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/single_col_truncate/data/d_trunc=98765.4300/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from single_col_truncate
|
|
where d = 1.1234;
|
|
---- RESULTS
|
|
1.1234
|
|
---- TYPES
|
|
DECIMAL
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from single_col_truncate
|
|
where 200 <= d and d <= 400;
|
|
---- RESULTS
|
|
200.7860
|
|
200.7890
|
|
314.1592
|
|
314.1598
|
|
---- TYPES
|
|
DECIMAL
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 4
|
|
aggregation(SUM, NumRowGroups): 2
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from single_col_truncate
|
|
where d > 200.78;
|
|
---- RESULTS
|
|
200.7860
|
|
200.7890
|
|
314.1592
|
|
314.1598
|
|
98765.4300
|
|
98765.4321
|
|
---- TYPES
|
|
DECIMAL
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 6
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
# Test single col TRUNCATE
|
|
create table multi_col_truncate (i int, b bigint, d decimal(16, 6), s string)
|
|
partitioned by spec (truncate(15, s), truncate(5, i), truncate(11, b), truncate(100000, d))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into multi_col_truncate values
|
|
(1, 11, 11111.111111, 'the quick brown fox jumps over the lazy dog'),
|
|
(2, 222, 421, 'the quick brown fox jumps over the lazy dog'),
|
|
(6, 333, 113211.2, 'the quick brown fox over the lazy dog the quick'),
|
|
(7, 444, 1111154.11432, 'the quick fox brown jump impala over the quick fox brown the over jump'),
|
|
(18, 555, 9999913.232432,'the quick impala fox dog parrot lion');
|
|
select * from multi_col_truncate;
|
|
---- RESULTS
|
|
1,11,11111.111111,'the quick brown fox jumps over the lazy dog'
|
|
2,222,421.000000,'the quick brown fox jumps over the lazy dog'
|
|
6,333,113211.200000,'the quick brown fox over the lazy dog the quick'
|
|
7,444,1111154.114320,'the quick fox brown jump impala over the quick fox brown the over jump'
|
|
18,555,9999913.232432,'the quick impala fox dog parrot lion'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
====
|
|
---- QUERY
|
|
show files in multi_col_truncate;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=the quick brown/i_trunc=0/b_trunc=11/d_trunc=11111.100000/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=the quick brown/i_trunc=0/b_trunc=220/d_trunc=421.000000/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=the quick brown/i_trunc=5/b_trunc=330/d_trunc=113211.200000/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=the quick fox b/i_trunc=5/b_trunc=440/d_trunc=1111154.100000/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/multi_col_truncate/data/s_trunc=the quick impal/i_trunc=15/b_trunc=550/d_trunc=9999913.200000/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where s = 'the quick brown fox jumps over the lazy dog';
|
|
---- RESULTS
|
|
1,11,11111.111111,'the quick brown fox jumps over the lazy dog'
|
|
2,222,421.000000,'the quick brown fox jumps over the lazy dog'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 2
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where s = 'the quick impala';
|
|
---- RESULTS
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 0
|
|
aggregation(SUM, NumRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where i < 5;
|
|
---- RESULTS
|
|
1,11,11111.111111,'the quick brown fox jumps over the lazy dog'
|
|
2,222,421.000000,'the quick brown fox jumps over the lazy dog'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 2
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where b > 330;
|
|
---- RESULTS
|
|
6,333,113211.200000,'the quick brown fox over the lazy dog the quick'
|
|
7,444,1111154.114320,'the quick fox brown jump impala over the quick fox brown the over jump'
|
|
18,555,9999913.232432,'the quick impala fox dog parrot lion'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 3
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where 400 < d and d < 100000;
|
|
---- RESULTS
|
|
1,11,11111.111111,'the quick brown fox jumps over the lazy dog'
|
|
2,222,421.000000,'the quick brown fox jumps over the lazy dog'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 2
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from multi_col_truncate
|
|
where b > 100 and d < 100000;
|
|
---- RESULTS
|
|
2,222,421.000000,'the quick brown fox jumps over the lazy dog'
|
|
---- TYPES
|
|
INT,BIGINT,DECIMAL,STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Create table with YEAR partition transform
|
|
create table year_transform(t timestamp, d date)
|
|
partitioned by spec (year(t), year(d))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into year_transform values
|
|
('2021-01-08 14:04:03', '2021-01-08'),
|
|
('2021-01-07 14:04:03', '2021-01-07'),
|
|
('1970-01-01 00:00:00', '1970-01-01'),
|
|
('1970-11-01 00:00:00', '1970-11-01'),
|
|
('1969-12-15 13:55:03', '1969-12-15'),
|
|
('1969-02-15 13:55:03', '1969-02-15');
|
|
select * from year_transform;
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
2021-01-07 14:04:03,2021-01-07
|
|
1970-01-01 00:00:00,1970-01-01
|
|
1970-11-01 00:00:00,1970-11-01
|
|
1969-12-15 13:55:03,1969-12-15
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
====
|
|
---- QUERY
|
|
show files in year_transform;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/year_transform/data/t_year=1969/d_year=1969/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/year_transform/data/t_year=1970/d_year=1970/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/year_transform/data/t_year=2021/d_year=2021/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where t = '2021-01-08 14:04:03';
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where t = '1970-01-01 00:00:00';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where t = '1969-02-15 13:55:03';
|
|
---- RESULTS
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where t <= '1970-01-01 00:00:00';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
1969-12-15 13:55:03,1969-12-15
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 4
|
|
aggregation(SUM, NumRowGroups): 2
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where d = '2021-01-07';
|
|
---- RESULTS
|
|
2021-01-07 14:04:03,2021-01-07
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where d = '1970-01-01';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from year_transform
|
|
where d = '1969-12-15';
|
|
---- RESULTS
|
|
1969-12-15 13:55:03,1969-12-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Create table with MONTH partition transform
|
|
create table month_transform(t timestamp, d date)
|
|
partitioned by spec (month(t), month(d))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into month_transform values
|
|
('2021-01-08 14:04:03', '2021-01-08'),
|
|
('2021-01-07 14:04:03', '2021-01-07'),
|
|
('1970-01-01 00:00:00', '1970-01-01'),
|
|
('1970-11-01 00:00:00', '1970-11-01'),
|
|
('1969-12-15 13:55:03', '1969-12-15'),
|
|
('1969-02-15 13:55:03', '1969-02-15');
|
|
select * from month_transform;
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
2021-01-07 14:04:03,2021-01-07
|
|
1970-01-01 00:00:00,1970-01-01
|
|
1970-11-01 00:00:00,1970-11-01
|
|
1969-12-15 13:55:03,1969-12-15
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
====
|
|
---- QUERY
|
|
show files in month_transform;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=1969-02/d_month=1969-02/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=1969-12/d_month=1969-12/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=1970-01/d_month=1970-01/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=1970-11/d_month=1970-11/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/month_transform/data/t_month=2021-01/d_month=2021-01/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where t = '2021-01-08 14:04:03';
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where t = '1970-01-01 00:00:00';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where t = '1969-02-15 13:55:03';
|
|
---- RESULTS
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where d = '2021-01-07';
|
|
---- RESULTS
|
|
2021-01-07 14:04:03,2021-01-07
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where d = '1970-01-01';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from month_transform
|
|
where d = '1969-12-15';
|
|
---- RESULTS
|
|
1969-12-15 13:55:03,1969-12-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Create table with DAY partition transform
|
|
create table day_transform(t timestamp, d date)
|
|
partitioned by spec (day(t), day(d))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into day_transform values
|
|
('2021-01-08 14:04:03', '2021-01-08'),
|
|
('2021-01-08 15:04:03', '2021-01-08'),
|
|
('2021-01-07 14:04:03', '2021-01-07'),
|
|
('1970-01-01 00:00:00', '1970-01-01'),
|
|
('1970-11-01 00:00:00', '1970-11-01'),
|
|
('1969-12-15 13:55:03', '1969-12-15'),
|
|
('1969-02-15 13:55:03', '1969-02-15');
|
|
select * from day_transform;
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
2021-01-08 15:04:03,2021-01-08
|
|
2021-01-07 14:04:03,2021-01-07
|
|
1970-01-01 00:00:00,1970-01-01
|
|
1970-11-01 00:00:00,1970-11-01
|
|
1969-12-15 13:55:03,1969-12-15
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
====
|
|
---- QUERY
|
|
show files in day_transform;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=1969-02-15/d_day=1969-02-15/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=1969-12-15/d_day=1969-12-15/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=1970-01-01/d_day=1970-01-01/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=1970-11-01/d_day=1970-11-01/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=2021-01-07/d_day=2021-01-07/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=2021-01-08/d_day=2021-01-08/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/day_transform/data/t_day=2021-01-08/d_day=2021-01-08/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where t = '2021-01-08 14:04:03';
|
|
---- RESULTS
|
|
2021-01-08 14:04:03,2021-01-08
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where t = '2021-01-07 14:04:03';
|
|
---- RESULTS
|
|
2021-01-07 14:04:03,2021-01-07
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where t = '1970-01-01 00:00:00';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where t = '1969-02-15 13:55:03';
|
|
---- RESULTS
|
|
1969-02-15 13:55:03,1969-02-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where d = '2021-01-07';
|
|
---- RESULTS
|
|
2021-01-07 14:04:03,2021-01-07
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where d = '1970-01-01';
|
|
---- RESULTS
|
|
1970-01-01 00:00:00,1970-01-01
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from day_transform
|
|
where d = '1969-12-15';
|
|
---- RESULTS
|
|
1969-12-15 13:55:03,1969-12-15
|
|
---- TYPES
|
|
TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Create table with HOUR partition transform
|
|
create table hour_transform(t timestamp)
|
|
partitioned by spec (hour(t))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into hour_transform values
|
|
('1969-12-31 22:55:03'),
|
|
('1969-12-31 23:55:03'),
|
|
('1969-12-31 23:56:03'),
|
|
('1970-01-01 00:00:00'),
|
|
('1970-01-01 00:00:01'),
|
|
('1970-01-01 01:00:00'),
|
|
('2021-01-08 14:04:03'),
|
|
('2021-01-08 15:04:03'),
|
|
('2021-01-08 15:04:33');
|
|
select * from hour_transform;
|
|
---- RESULTS
|
|
1969-12-31 22:55:03
|
|
1969-12-31 23:55:03
|
|
1969-12-31 23:56:03
|
|
1970-01-01 00:00:00
|
|
1970-01-01 00:00:01
|
|
1970-01-01 01:00:00
|
|
2021-01-08 14:04:03
|
|
2021-01-08 15:04:03
|
|
2021-01-08 15:04:33
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
show files in hour_transform;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=1969-12-31-22/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=1969-12-31-23/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=1970-01-01-00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=1970-01-01-01/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=2021-01-08-14/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/hour_transform/data/t_hour=2021-01-08-15/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from hour_transform
|
|
where t = '1969-12-31 22:55:03';
|
|
---- RESULTS
|
|
1969-12-31 22:55:03
|
|
---- TYPES
|
|
TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from hour_transform
|
|
where t = '1969-12-31 23:55:03';
|
|
---- RESULTS
|
|
1969-12-31 23:55:03
|
|
---- TYPES
|
|
TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 2
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from hour_transform
|
|
where t < '1970-01-01 01:00:00';
|
|
---- RESULTS
|
|
1969-12-31 22:55:03
|
|
1969-12-31 23:55:03
|
|
1969-12-31 23:56:03
|
|
1970-01-01 00:00:00
|
|
1970-01-01 00:00:01
|
|
---- TYPES
|
|
TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 5
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from hour_transform
|
|
where t >= '1970-01-01 01:00:00';
|
|
---- RESULTS
|
|
1970-01-01 01:00:00
|
|
2021-01-08 14:04:03
|
|
2021-01-08 15:04:03
|
|
2021-01-08 15:04:33
|
|
---- TYPES
|
|
TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 4
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|
|
---- QUERY
|
|
create table mixed_and_shuffled (s string, b bigint, de decimal(6, 2), t timestamp, da date)
|
|
partitioned by spec (day(t), year(da), truncate(5, s), bucket(3, b), truncate(100, de))
|
|
stored as iceberg;
|
|
====
|
|
---- QUERY
|
|
insert into mixed_and_shuffled values
|
|
('quick brown fox', 12345, 123.45, '2021-01-01 01:02:03', '2021-01-01'),
|
|
('quick brown impala', 54321, 543.21, '2021-01-01 02:03:04', '2021-01-01'),
|
|
('quick brown dog', 31, 3333.45, '2020-01-06 05:06:07', '2020-01-06'),
|
|
('pink parrot', 9999777999, 9.9, '1999-09-09 09:09:09', '1999-09-09'),
|
|
('green python', 717171, 71, '1971-07-01 07:07:07', '1971-07-01');
|
|
select * from mixed_and_shuffled;
|
|
---- RESULTS
|
|
'quick brown fox',12345,123.45,2021-01-01 01:02:03,2021-01-01
|
|
'quick brown impala',54321,543.21,2021-01-01 02:03:04,2021-01-01
|
|
'quick brown dog',31,3333.45,2020-01-06 05:06:07,2020-01-06
|
|
'pink parrot',9999777999,9.90,1999-09-09 09:09:09,1999-09-09
|
|
'green python',717171,71.00,1971-07-01 07:07:07,1971-07-01
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
====
|
|
---- QUERY
|
|
show files in mixed_and_shuffled;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=1971-07-01/da_year=1971/s_trunc=green/b_bucket=1/de_trunc=71.00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=1999-09-09/da_year=1999/s_trunc=pink /b_bucket=1/de_trunc=9.00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=2020-01-06/da_year=2020/s_trunc=quick/b_bucket=1/de_trunc=3333.00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=2021-01-01/da_year=2021/s_trunc=quick/b_bucket=1/de_trunc=543.00/.*.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/mixed_and_shuffled/data/t_day=2021-01-01/da_year=2021/s_trunc=quick/b_bucket=2/de_trunc=123.00/.*.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from mixed_and_shuffled
|
|
where s = 'quick brown dog';
|
|
---- RESULTS
|
|
'quick brown dog',31,3333.45,2020-01-06 05:06:07,2020-01-06
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from mixed_and_shuffled
|
|
where s = 'quick brown dog' and de > 1000;
|
|
---- RESULTS
|
|
'quick brown dog',31,3333.45,2020-01-06 05:06:07,2020-01-06
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from mixed_and_shuffled
|
|
where da='1999-09-09';
|
|
---- RESULTS
|
|
'pink parrot',9999777999,9.90,1999-09-09 09:09:09,1999-09-09
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from mixed_and_shuffled
|
|
where de > 100;
|
|
---- RESULTS
|
|
'quick brown fox',12345,123.45,2021-01-01 01:02:03,2021-01-01
|
|
'quick brown impala',54321,543.21,2021-01-01 02:03:04,2021-01-01
|
|
'quick brown dog',31,3333.45,2020-01-06 05:06:07,2020-01-06
|
|
---- TYPES
|
|
STRING,BIGINT,DECIMAL,TIMESTAMP,DATE
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 3
|
|
aggregation(SUM, NumRowGroups): 3
|
|
====
|