mirror of
https://github.com/apache/impala.git
synced 2026-02-01 21:00:29 -05:00
TestIcebergTable.Test_partitioned_insert test is not stable because SHOW FILES on Iceberg table will sort the list of FILES. So restore the original VERIFY_IS_SUBSET for some flaky cases. Change-Id: Ic38b399ab51903edb59b3f2d1066cd5f5cbff4d4 Reviewed-on: http://gerrit.cloudera.org:8080/18465 Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
465 lines
14 KiB
Plaintext
465 lines
14 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Test partitioned INSERTs with single column that is also
|
|
# the partitioned column.
|
|
create table ice_only_part (i int)
|
|
partitioned by spec (i)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_only_part values (1), (2), (3);
|
|
insert into ice_only_part values (cast(4 as tinyint));
|
|
insert into ice_only_part values (NULL);
|
|
select * from ice_only_part;
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
4
|
|
NULL
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
show files in ice_only_part;
|
|
---- LABELS
|
|
Path,Size,Partition
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_only_part/data/i=1/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_only_part/data/i=2/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_only_part/data/i=3/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_only_part/data/i=4/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_only_part/data/i=__HIVE_DEFAULT_PARTITION__/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test partition pruning with RUNTIME_PROFILE.
|
|
select * from ice_only_part
|
|
where i = 1;
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
INT
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
====
|
|
---- QUERY
|
|
# Test inserts with multple partition columns.
|
|
create table ice_multi_part (i int, d date, s string)
|
|
partitioned by spec(i, d)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_multi_part
|
|
values (1, '2020-12-07', 'first'), (2, '2020-12-08', 'second');
|
|
select * from ice_multi_part;
|
|
---- RESULTS
|
|
1,2020-12-07,'first'
|
|
2,2020-12-08,'second'
|
|
---- TYPES
|
|
INT, DATE, STRING
|
|
====
|
|
---- QUERY
|
|
select * from ice_multi_part
|
|
where d = '2020-12-08';
|
|
---- RESULTS
|
|
2,2020-12-08,'second'
|
|
---- TYPES
|
|
INT, DATE, STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, RowsRead): 1
|
|
====
|
|
---- QUERY
|
|
# Test that Impala only writes one file per partitions.
|
|
create table ice_bigints (i BIGINT, j BIGINT, k BIGINT)
|
|
partitioned by spec (i, j)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_bigints select id % 2, id % 3, id from functional.alltypes;
|
|
select count(*) from ice_bigints;
|
|
---- RESULTS
|
|
7300
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 0
|
|
aggregation(SUM, NumFileMetadataRead): 6
|
|
====
|
|
---- QUERY
|
|
select count(*) from ice_bigints
|
|
where i = 0 and j = 0;
|
|
---- RESULTS
|
|
1217
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
aggregation(SUM, RowsRead): 1217
|
|
====
|
|
---- QUERY
|
|
# Test partitioning with all supported types.
|
|
# Only timestamp-based partitions are not supported.
|
|
# Also test partition pruning with all of the supported types.
|
|
create table alltypes_part (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_col DATE,
|
|
string_col STRING,
|
|
timestamp_col TIMESTAMP)
|
|
partitioned by spec (
|
|
id,
|
|
bool_col,
|
|
int_col,
|
|
bigint_col,
|
|
float_col,
|
|
double_col,
|
|
date_col,
|
|
string_col)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into alltypes_part
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypestiny;
|
|
select count(*) from alltypes_part;
|
|
---- RESULTS
|
|
8
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
show files in alltypes_part;
|
|
---- LABELS
|
|
Path,Size,Partition
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=0/bool_col=true/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-01-01/string_col=0/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=1/bool_col=false/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-01-01/string_col=1/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=2/bool_col=true/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-02-01/string_col=0/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=3/bool_col=false/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-02-01/string_col=1/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=4/bool_col=true/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-03-01/string_col=0/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=5/bool_col=false/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-03-01/string_col=1/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=6/bool_col=true/int_col=0/bigint_col=0/float_col=0/double_col=0/date_col=2009-04-01/string_col=0/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/alltypes_part/data/id=7/bool_col=false/int_col=1/bigint_col=10/float_col=1.100000023841858/double_col=10.1/date_col=2009-04-01/string_col=1/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# INSERTs with wrong value orderings are rejected.
|
|
insert into alltypes_part
|
|
select bool_col, id, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), string_col, timestamp_col
|
|
from functional.alltypestiny;
|
|
select count(*) from alltypes_part;
|
|
---- CATCH
|
|
Expression 'id' (type: INT) would need to be cast to BOOLEAN for column 'bool_col'
|
|
====
|
|
---- QUERY
|
|
insert into alltypes_part
|
|
select id, bool_col, int_col, bigint_col, float_col, double_col,
|
|
CAST(date_string_col as date FORMAT 'MM/DD/YY'), timestamp_col, string_col
|
|
from functional.alltypestiny;
|
|
select count(*) from alltypes_part;
|
|
---- CATCH
|
|
Expression 'timestamp_col' (type: TIMESTAMP) would need to be cast to STRING for column 'string_col'
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypes_part
|
|
where bool_col = true;
|
|
---- RESULTS
|
|
4
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypes_part
|
|
where float_col = 0;
|
|
---- RESULTS
|
|
4
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypes_part
|
|
where double_col = 0;
|
|
---- RESULTS
|
|
4
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypes_part
|
|
where date_col = '2009-01-01';
|
|
---- RESULTS
|
|
2
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 2
|
|
====
|
|
---- QUERY
|
|
select count(*) from alltypes_part
|
|
where string_col = '0';
|
|
---- RESULTS
|
|
4
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 4
|
|
====
|
|
---- QUERY
|
|
# 'timestamp_col' is not a partitioning column, but min/max stats will be used to
|
|
# eliminate row groups
|
|
select count(*) from alltypes_part
|
|
where timestamp_col = now();
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 0
|
|
====
|
|
---- QUERY
|
|
# Iceberg partitions independent of column order
|
|
---- QUERY
|
|
# Test inserts with multple partition columns.
|
|
create table ice_part_non_order (i int, d date, s string)
|
|
partitioned by spec(s, d)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_part_non_order
|
|
values (1, '2020-12-07', 'first'), (2, '2020-12-08', 'second');
|
|
select * from ice_part_non_order where s='second';
|
|
---- RESULTS
|
|
2,2020-12-08,'second'
|
|
---- TYPES
|
|
INT, DATE, STRING
|
|
---- RUNTIME_PROFILE
|
|
aggregation(SUM, NumRowGroups): 1
|
|
====
|
|
---- QUERY
|
|
show files in ice_part_non_order;
|
|
---- LABELS
|
|
Path,Size,Partition
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_part_non_order/data/s=first/d=2020-12-07/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_part_non_order/data/s=second/d=2020-12-08/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
# Test inserts with different partition specs.
|
|
create table ice_alter_part (i int, d date, s string)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_alter_part
|
|
values (1, '2020-12-07', 'first'), (2, '2020-12-08', 'second');
|
|
select * from ice_alter_part;
|
|
---- RESULTS
|
|
1,2020-12-07,'first'
|
|
2,2020-12-08,'second'
|
|
---- TYPES
|
|
INT, DATE, STRING
|
|
====
|
|
---- QUERY
|
|
alter table ice_alter_part set partition spec(i, d);
|
|
---- RESULTS
|
|
'Updated partition spec.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_alter_part
|
|
values (3, '2020-12-09', 'third'), (4, '2020-12-10', 'fourth');
|
|
select * from ice_alter_part;
|
|
---- RESULTS
|
|
1,2020-12-07,'first'
|
|
2,2020-12-08,'second'
|
|
3,2020-12-09,'third'
|
|
4,2020-12-10,'fourth'
|
|
---- TYPES
|
|
INT, DATE, STRING
|
|
====
|
|
---- QUERY
|
|
alter table ice_alter_part set partition spec(year(d), i, bucket(5, s));
|
|
---- RESULTS
|
|
'Updated partition spec.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_alter_part
|
|
values (5, '2020-12-11', 'fifth'), (6, '2020-12-12', 'sixth');
|
|
select * from ice_alter_part;
|
|
---- RESULTS
|
|
1,2020-12-07,'first'
|
|
2,2020-12-08,'second'
|
|
3,2020-12-09,'third'
|
|
4,2020-12-10,'fourth'
|
|
5,2020-12-11,'fifth'
|
|
6,2020-12-12,'sixth'
|
|
---- TYPES
|
|
INT, DATE, STRING
|
|
====
|
|
---- QUERY
|
|
show files in ice_alter_part;
|
|
---- LABELS
|
|
Path,Size,Partition
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_alter_part/data/[^=]*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_alter_part/data/i=3/d=2020-12-09/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_alter_part/data/i=4/d=2020-12-10/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_alter_part/data/d_year=2020/i=5/s_bucket=2/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_alter_part/data/d_year=2020/i=6/s_bucket=0/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table ice_void (i int, s string, d date)
|
|
partitioned by spec (void(i), truncate(1, s), year(d))
|
|
stored as iceberg;
|
|
insert into ice_void values (1, 'one', '2001-01-01'),
|
|
(11,'other','2001-01-11'),
|
|
(2, 'two', '2002-02-02'),
|
|
(3, 'three', '2003-03-03');
|
|
select * from ice_void;
|
|
---- RESULTS
|
|
1,'one',2001-01-01
|
|
11,'other',2001-01-11
|
|
2,'two',2002-02-02
|
|
3,'three',2003-03-03
|
|
---- TYPES
|
|
INT, STRING, DATE
|
|
====
|
|
---- QUERY
|
|
show files in ice_void;
|
|
---- LABELS
|
|
Path,Size,Partition
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=o/d_year=2001/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2002/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2003/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
alter table ice_void set partition spec (i, void(s), year(d));
|
|
insert into ice_void values (4, 'four', '2004-04-04');
|
|
select * from ice_void;
|
|
---- RESULTS
|
|
1,'one',2001-01-01
|
|
11,'other',2001-01-11
|
|
2,'two',2002-02-02
|
|
3,'three',2003-03-03
|
|
4,'four',2004-04-04
|
|
---- TYPES
|
|
INT, STRING, DATE
|
|
====
|
|
---- QUERY
|
|
show files in ice_void;
|
|
---- LABELS
|
|
Path,Size,Partition
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=o/d_year=2001/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2002/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2003/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/i=4/d_year=2004/.*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
alter table ice_void set partition spec (i, void(s), void(d));
|
|
insert into ice_void values (5, 'five', '2005-05-05'), (5, 'other five', '1995-05-05');
|
|
select * from ice_void;
|
|
---- RESULTS
|
|
1,'one',2001-01-01
|
|
11,'other',2001-01-11
|
|
2,'two',2002-02-02
|
|
3,'three',2003-03-03
|
|
4,'four',2004-04-04
|
|
5,'five',2005-05-05
|
|
5,'other five',1995-05-05
|
|
---- TYPES
|
|
INT, STRING, DATE
|
|
====
|
|
---- QUERY
|
|
show files in ice_void;
|
|
---- LABELS
|
|
Path,Size,Partition
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=o/d_year=2001/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2002/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2003/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/i=4/d_year=2004/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/i=5/[^=/]*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
alter table ice_void set partition spec (void(i), void(s), void(d));
|
|
insert into ice_void values (6, 'six', '2006-06-06'), (7, 'seven', '2007-07-07');
|
|
select * from ice_void;
|
|
---- RESULTS
|
|
1,'one',2001-01-01
|
|
11,'other',2001-01-11
|
|
2,'two',2002-02-02
|
|
3,'three',2003-03-03
|
|
4,'four',2004-04-04
|
|
5,'five',2005-05-05
|
|
5,'other five',1995-05-05
|
|
6,'six',2006-06-06
|
|
7,'seven',2007-07-07
|
|
---- TYPES
|
|
INT, STRING, DATE
|
|
====
|
|
---- QUERY
|
|
show files in ice_void;
|
|
---- LABELS
|
|
Path,Size,Partition
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=o/d_year=2001/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2002/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/s_trunc=t/d_year=2003/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/i=4/d_year=2004/.*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/i=5/[^=/]*.0.parq','.*',''
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_void/data/[^=/]*.0.parq','.*',''
|
|
---- TYPES
|
|
STRING, STRING, STRING
|
|
====
|
|
---- QUERY
|
|
create table store_sales partitioned by spec (ss_sold_date_sk) stored as iceberg
|
|
as select * from tpcds_parquet.store_sales;
|
|
select count(*) from store_sales;
|
|
---- RESULTS
|
|
2880404
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(*) from store_sales where ss_sold_date_sk is null;
|
|
---- RESULTS
|
|
130093
|
|
---- TYPES
|
|
BIGINT
|
|
====
|