mirror of
https://github.com/apache/impala.git
synced 2026-02-02 06:00:36 -05:00
INSERT INTO Iceberg tables that use partition transforms. Partition transforms are functions that calculate partition data from row data. There are the following partition transforms in Iceberg: https://iceberg.apache.org/spec/#partition-transforms * IDENTITY * BUCKET * TRUNCATE * YEAR * MONTH * DAY * HOUR INSERT INTO identity-partitioned Iceberg tables are already supported. This patch adds support for the rest of the transforms. We create the partitioning expressions in InsertStmt. Based on these expressions data are automatically shuffled and sorted by the backend executors before rows are given to the table sink operators. The table sink operator writes the partitions one-by-one and creates a human-readable partition path for them. In the end, we will convert the partition path to partition data and create Iceberg DataFiles with information about the files written. Testing: * added planner test * added e2e tests Change-Id: I3edf02048cea78703837b248c55219c22d512b78 Reviewed-on: http://gerrit.cloudera.org:8080/16939 Reviewed-by: wangsheng <skyyws@163.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
1004 lines
35 KiB
Plaintext
1004 lines
35 KiB
Plaintext
# insert into an unpartitioned table
|
|
insert into table functional.alltypesnopart
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col
|
|
from functional.alltypes
|
|
where year=2009 and month=05
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` = 5
|
|
HDFS partitions=1/24 files=1 size=20.36KB
|
|
row-size=81B cardinality=310
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=5/090501.txt 0:20853
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` = 5
|
|
HDFS partitions=1/24 files=1 size=20.36KB
|
|
row-size=81B cardinality=310
|
|
====
|
|
# insert into a static partition
|
|
insert into table functional.alltypessmall
|
|
partition (year=2009, month=04)
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col
|
|
from functional.alltypes
|
|
where year=2009 and month=05
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,4)]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` = 5
|
|
HDFS partitions=1/24 files=1 size=20.36KB
|
|
row-size=81B cardinality=310
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=5/090501.txt 0:20853
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,4)]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` = 5
|
|
HDFS partitions=1/24 files=1 size=20.36KB
|
|
row-size=81B cardinality=310
|
|
====
|
|
# overwrite a static partition
|
|
insert overwrite table functional.alltypessmall
|
|
partition (year=2009, month=04)
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col
|
|
from functional.alltypes
|
|
where year=2009 and month=05
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(2009,4)]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` = 5
|
|
HDFS partitions=1/24 files=1 size=20.36KB
|
|
row-size=81B cardinality=310
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=5/090501.txt 0:20853
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(2009,4)]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` = 5
|
|
HDFS partitions=1/24 files=1 size=20.36KB
|
|
row-size=81B cardinality=310
|
|
====
|
|
# insert into fully dynamic partitions
|
|
insert into table functional.alltypessmall
|
|
partition (year, month)
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
|
from functional.alltypes
|
|
where year=2009 and month>10
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=610
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=40.07KB
|
|
row-size=89B cardinality=610
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=12/091201.txt 0:20853
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
02:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=610
|
|
|
|
|
01:EXCHANGE [HASH(`year`,`month`)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=40.07KB
|
|
row-size=89B cardinality=610
|
|
====
|
|
# IMPALA-5293: noclustered hint prevents adding sort node
|
|
insert into table functional.alltypessmall
|
|
partition (year, month) /* +noclustered */
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
|
from functional.alltypes
|
|
where year=2009 and month>10
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(`year`,`month`)]
|
|
| partitions=24
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=40.07KB
|
|
row-size=89B cardinality=610
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=12/091201.txt 0:20853
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(`year`,`month`)]
|
|
| partitions=24
|
|
|
|
|
01:EXCHANGE [HASH(`year`,`month`)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=40.07KB
|
|
row-size=89B cardinality=610
|
|
====
|
|
# insert into fully dynamic partitions. The source table has no stats and the insert
|
|
# statement has a partition clause, so hash partition before the sink.
|
|
insert into table functional.alltypessmall
|
|
partition (year, month)
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col, int_col, int_col
|
|
from functional_seq_snap.alltypes
|
|
where year=2009 and month>10
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(int_col,int_col)]
|
|
| partitions=unavailable
|
|
|
|
|
01:SORT
|
|
| order by: int_col ASC NULLS LAST, int_col ASC NULLS LAST
|
|
| row-size=72B cardinality=520
|
|
|
|
|
00:SCAN HDFS [functional_seq_snap.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=11.34KB
|
|
row-size=72B cardinality=520
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(int_col,int_col)]
|
|
| partitions=unavailable
|
|
|
|
|
02:SORT
|
|
| order by: int_col ASC NULLS LAST, int_col ASC NULLS LAST
|
|
| row-size=72B cardinality=520
|
|
|
|
|
01:EXCHANGE [HASH(int_col,int_col)]
|
|
|
|
|
00:SCAN HDFS [functional_seq_snap.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=11.34KB
|
|
row-size=72B cardinality=520
|
|
====
|
|
# insert into fully dynamic partitions;
|
|
# partitioned output doesn't require repartitioning
|
|
insert into table functional.alltypessmall
|
|
partition (year, month)
|
|
select min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col),
|
|
min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col),
|
|
min(timestamp_col), year, month
|
|
from functional.alltypes
|
|
where year=2009 and month>10
|
|
group by year, month
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(`year`,`month`)]
|
|
| partitions=24
|
|
|
|
|
02:SORT
|
|
| order by: `year` ASC NULLS LAST, `month` ASC NULLS LAST
|
|
| row-size=80B cardinality=24
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
|
|
| group by: `year`, `month`
|
|
| row-size=80B cardinality=24
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=40.07KB
|
|
row-size=89B cardinality=610
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=12/091201.txt 0:20853
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(`year`,`month`)]
|
|
| partitions=24
|
|
|
|
|
04:SORT
|
|
| order by: `year` ASC NULLS LAST, `month` ASC NULLS LAST
|
|
| row-size=80B cardinality=24
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: min:merge(id), min:merge(bool_col), min:merge(tinyint_col), min:merge(smallint_col), min:merge(int_col), min:merge(bigint_col), min:merge(float_col), min:merge(double_col), min:merge(date_string_col), min:merge(string_col), min:merge(timestamp_col)
|
|
| group by: `year`, `month`
|
|
| row-size=80B cardinality=24
|
|
|
|
|
02:EXCHANGE [HASH(`year`,`month`)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
|
|
| group by: `year`, `month`
|
|
| row-size=80B cardinality=24
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=40.07KB
|
|
row-size=89B cardinality=610
|
|
====
|
|
# insert into a partially dynamic partition
|
|
insert into table functional.alltypessmall
|
|
partition (year=2009, month)
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col, month
|
|
from functional.alltypes
|
|
where year=2009 and month>10
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,month)]
|
|
| partitions=12
|
|
|
|
|
01:SORT
|
|
| order by: month ASC NULLS LAST
|
|
| row-size=85B cardinality=610
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=40.07KB
|
|
row-size=85B cardinality=610
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=12/091201.txt 0:20853
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,month)]
|
|
| partitions=12
|
|
|
|
|
02:SORT
|
|
| order by: month ASC NULLS LAST
|
|
| row-size=85B cardinality=610
|
|
|
|
|
01:EXCHANGE [HASH(`month`)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=40.07KB
|
|
row-size=85B cardinality=610
|
|
====
|
|
# insert into a partially dynamic partition
|
|
# partitioned output doesn't require repartitioning
|
|
insert into table functional.alltypessmall
|
|
partition (year=2009, month)
|
|
select min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col),
|
|
min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col),
|
|
min(timestamp_col), month
|
|
from functional.alltypes
|
|
where year=2009 and month>10
|
|
group by month
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,`month`)]
|
|
| partitions=12
|
|
|
|
|
02:SORT
|
|
| order by: `month` ASC NULLS LAST
|
|
| row-size=76B cardinality=12
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
|
|
| group by: `month`
|
|
| row-size=76B cardinality=12
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=40.07KB
|
|
row-size=85B cardinality=610
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=12/091201.txt 0:20853
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,`month`)]
|
|
| partitions=12
|
|
|
|
|
04:SORT
|
|
| order by: `month` ASC NULLS LAST
|
|
| row-size=76B cardinality=12
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: min:merge(id), min:merge(bool_col), min:merge(tinyint_col), min:merge(smallint_col), min:merge(int_col), min:merge(bigint_col), min:merge(float_col), min:merge(double_col), min:merge(date_string_col), min:merge(string_col), min:merge(timestamp_col)
|
|
| group by: `month`
|
|
| row-size=76B cardinality=12
|
|
|
|
|
02:EXCHANGE [HASH(`month`)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
|
|
| group by: `month`
|
|
| row-size=76B cardinality=12
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` > 10
|
|
HDFS partitions=2/24 files=2 size=40.07KB
|
|
row-size=85B cardinality=610
|
|
====
|
|
# insert into a partially dynamic partition
|
|
insert into table functional.alltypessmall
|
|
partition (year, month=4)
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col, year
|
|
from functional.alltypes
|
|
where year>2009 and month=4
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,4)]
|
|
| partitions=2
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST
|
|
| row-size=85B cardinality=300
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` > 2009, `month` = 4
|
|
HDFS partitions=1/24 files=1 size=19.71KB
|
|
row-size=85B cardinality=300
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2010/month=4/100401.txt 0:20179
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,4)]
|
|
| partitions=2
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST
|
|
| row-size=85B cardinality=300
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` > 2009, `month` = 4
|
|
HDFS partitions=1/24 files=1 size=19.71KB
|
|
row-size=85B cardinality=300
|
|
====
|
|
# insert with limit from partitioned table.
|
|
insert into table functional.alltypesnopart
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col
|
|
from functional.alltypes where year=2009 and month=1 limit 10
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` = 1
|
|
HDFS partitions=1/24 files=1 size=19.95KB
|
|
limit: 10
|
|
row-size=81B cardinality=10
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=1/090101.txt 0:20433
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
| limit: 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partition predicates: `year` = 2009, `month` = 1
|
|
HDFS partitions=1/24 files=1 size=19.95KB
|
|
limit: 10
|
|
row-size=81B cardinality=10
|
|
====
|
|
# static partition insert from a constant select
|
|
insert into table functional.alltypessmall
|
|
partition (year=2010, month=4)
|
|
select 100, false, 1, 1, 1, 10,
|
|
10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp)
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|
|
| partitions=1
|
|
|
|
|
00:UNION
|
|
constant-operands=1
|
|
row-size=54B cardinality=1
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|
|
| partitions=1
|
|
|
|
|
00:UNION
|
|
constant-operands=1
|
|
row-size=54B cardinality=1
|
|
====
|
|
# dynamic partition insert from a constant select
|
|
insert into table functional.alltypessmall
|
|
partition (year, month)
|
|
select 100, false, 1, 1, 1, 10,
|
|
10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp), 2010, 4
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|
|
| partitions=1
|
|
|
|
|
00:UNION
|
|
constant-operands=1
|
|
row-size=57B cardinality=1
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|
|
| partitions=1
|
|
|
|
|
00:UNION
|
|
constant-operands=1
|
|
row-size=57B cardinality=1
|
|
====
|
|
# static partition insert from values statement
|
|
insert into table functional.alltypessmall
|
|
partition (year=2010, month=4) values
|
|
(100, false, 1, 1, 1, 10, 10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp)),
|
|
(200, true, 2, 2, 2, 20, 20.0, 20.0, "02/02/09", "2", cast("2009-02-02 00:01:00" as timestamp)),
|
|
(300, false, 3, 3, 3, 30, 30.0, 30.0, "02/03/09", "3", cast("2009-02-03 00:01:00" as timestamp))
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|
|
| partitions=1
|
|
|
|
|
00:UNION
|
|
constant-operands=3
|
|
row-size=55B cardinality=3
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|
|
| partitions=1
|
|
|
|
|
00:UNION
|
|
constant-operands=3
|
|
row-size=55B cardinality=3
|
|
====
|
|
# dynamic partition insert from values statement
|
|
insert into table functional.alltypessmall
|
|
partition (year, month) values
|
|
(100, false, 1, 1, 1, 10, 10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp), 2010, 4),
|
|
(200, true, 2, 2, 2, 20, 20.0, 20.0, "02/02/09", "2", cast("2009-02-02 00:01:00" as timestamp), 2010, 5),
|
|
(300, false, 3, 3, 3, 30, 30.0, 30.0, "02/03/09", "3", cast("2009-02-03 00:01:00" as timestamp), 2010, 6)
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|
|
| partitions=9
|
|
|
|
|
01:SORT
|
|
| order by: 2010 ASC NULLS LAST, 4 ASC NULLS LAST
|
|
| row-size=58B cardinality=3
|
|
|
|
|
00:UNION
|
|
constant-operands=3
|
|
row-size=58B cardinality=3
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|
|
| partitions=9
|
|
|
|
|
01:SORT
|
|
| order by: 2010 ASC NULLS LAST, 4 ASC NULLS LAST
|
|
| row-size=58B cardinality=3
|
|
|
|
|
00:UNION
|
|
constant-operands=3
|
|
row-size=58B cardinality=3
|
|
====
|
|
# test static partition insert from a query with grouped aggregation
|
|
# we expect the insert fragment to be partitioned by the grouping exprs of the query stmt
|
|
# and not by the partition exprs of the insert stmt
|
|
insert into functional.alltypes(bigint_col, string_col) partition (year=2010, month=10)
|
|
select count(int_col), string_col from functional.alltypes
|
|
group by string_col
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
|
|
| partitions=1
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: count(int_col)
|
|
| group by: string_col
|
|
| row-size=21B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=17B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
|
|
| partitions=1
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: count:merge(int_col)
|
|
| group by: string_col
|
|
| row-size=21B cardinality=10
|
|
|
|
|
02:EXCHANGE [HASH(string_col)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| output: count(int_col)
|
|
| group by: string_col
|
|
| row-size=21B cardinality=10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=17B cardinality=7.30K
|
|
====
|
|
# test static partition insert from a query with distinct grouped aggregation
|
|
# we expect the insert fragment to be partitioned by the grouping exprs of the query stmt
|
|
# and not by the partition exprs of the insert stmt
|
|
insert into functional.alltypes(bigint_col, string_col) partition (year=2010, month=10)
|
|
select count(distinct int_col), string_col from functional.alltypes
|
|
group by string_col
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
|
|
| partitions=1
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(int_col)
|
|
| group by: string_col
|
|
| row-size=21B cardinality=10
|
|
|
|
|
01:AGGREGATE
|
|
| group by: string_col, int_col
|
|
| row-size=17B cardinality=100
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=17B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
|
|
| partitions=1
|
|
|
|
|
06:AGGREGATE [FINALIZE]
|
|
| output: count:merge(int_col)
|
|
| group by: string_col
|
|
| row-size=21B cardinality=10
|
|
|
|
|
05:EXCHANGE [HASH(string_col)]
|
|
|
|
|
02:AGGREGATE [STREAMING]
|
|
| output: count(int_col)
|
|
| group by: string_col
|
|
| row-size=21B cardinality=10
|
|
|
|
|
04:AGGREGATE
|
|
| group by: string_col, int_col
|
|
| row-size=17B cardinality=100
|
|
|
|
|
03:EXCHANGE [HASH(string_col,int_col)]
|
|
|
|
|
01:AGGREGATE [STREAMING]
|
|
| group by: string_col, int_col
|
|
| row-size=17B cardinality=100
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=17B cardinality=7.30K
|
|
====
|
|
# test that the planner chooses to repartition before the table sink
|
|
# alltypes has column stats and based on the product of the NDVs of year and month
|
|
# the planner should choose to repartition before the table sink
|
|
insert into table functional.alltypes partition(year, month)
|
|
select * from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
02:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=7.30K
|
|
|
|
|
01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=89B cardinality=7.30K
|
|
====
|
|
# test noshuffle hint to prevent repartitioning (same query as above with hint)
|
|
insert into table functional.alltypes partition(year, month) [noshuffle]
|
|
select * from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=89B cardinality=7.30K
|
|
====
|
|
# same as above but with traditional commented hint at default hint location
|
|
insert into table functional.alltypes partition(year, month) /* +noshuffle */
|
|
select * from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=89B cardinality=7.30K
|
|
====
|
|
# same as above but with traditional commented hint at Oracle hint location
|
|
insert /* +noshuffle */ into table functional.alltypes partition(year, month)
|
|
select * from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=89B cardinality=7.30K
|
|
====
|
|
# same as above but with enf-of-line commented hint
|
|
insert into table functional.alltypes partition(year, month)
|
|
-- +noshuffle
|
|
select * from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=89B cardinality=7.30K
|
|
====
|
|
# test that the planner does not repartition before the table sink
|
|
# alltypes has column stats and since year only has 2 distinct values the planner
|
|
# should choose not to repartition before the table sink
|
|
insert into table functional.alltypes partition(year, month=1)
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col, year
|
|
from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,1)]
|
|
| partitions=2
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST
|
|
| row-size=85B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=85B cardinality=7.30K
|
|
====
|
|
# test shuffle hint to force repartitioning (same query as above with hint)
|
|
insert into table functional.alltypes partition(year, month=1) [shuffle]
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col, year
|
|
from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,1)]
|
|
| partitions=2
|
|
|
|
|
02:SORT
|
|
| order by: year ASC NULLS LAST
|
|
| row-size=85B cardinality=7.30K
|
|
|
|
|
01:EXCHANGE [HASH(`year`)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=85B cardinality=7.30K
|
|
====
|
|
# test insert/select stmt that contains an analytic function (IMPALA-1400)
|
|
insert into table functional.alltypestiny partition(year=2009, month=1)
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col,
|
|
lag(timestamp_col, 1) over (partition by id order by id) as timestamp_col
|
|
from functional.alltypestiny
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypestiny, OVERWRITE=false, PARTITION-KEYS=(2009,1)]
|
|
| partitions=1
|
|
|
|
|
02:ANALYTIC
|
|
| functions: lag(timestamp_col, 1, NULL)
|
|
| partition by: id
|
|
| order by: id ASC
|
|
| window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
|
|
| row-size=97B cardinality=8
|
|
|
|
|
01:SORT
|
|
| order by: id ASC NULLS LAST
|
|
| row-size=81B cardinality=8
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
HDFS partitions=4/4 files=4 size=460B
|
|
row-size=81B cardinality=8
|
|
====
|
|
# IMPALA-3930: Test insert with shuffle hint on constant partition exprs. The table sink
|
|
# is executed at the coordinator.
|
|
insert into table functional.alltypes partition(year=2009, month=1) /* +shuffle */
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col
|
|
from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2009,1)]
|
|
| partitions=1
|
|
|
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=81B cardinality=7.30K
|
|
====
|
|
# IMPALA-3930: Same as above but with a dynamic partition insert.
|
|
insert into table functional.alltypes partition(year, month) /* +shuffle */
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col, 2009, 1
|
|
from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2009,1)]
|
|
| partitions=1
|
|
|
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=81B cardinality=7.30K
|
|
====
|
|
# IMPALA-3930: Same as above but with a mix of static/dynamic partition exprs, and
|
|
# with more complex constant exprs.
|
|
insert into table functional.alltypes partition(year, month=cast(10/2 as int)) /* +shuffle */
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col, cast(concat("2", "010") as smallint) - 1
|
|
from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2009,5)]
|
|
| partitions=1
|
|
|
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=81B cardinality=7.30K
|
|
====
|
|
# Test insert into an unpartitioned table with shuffle hint.
|
|
insert into table functional.alltypesnopart /* +shuffle */
|
|
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
|
float_col, double_col, date_string_col, string_col, timestamp_col
|
|
from functional.alltypes
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=81B cardinality=7.30K
|
|
====
|
|
# IMPALA-5293: ensure insert into partitioned table adds sort node without clustered hint.
|
|
insert into table functional.alltypes partition(year, month)
|
|
select * from functional.alltypes
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=89B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
02:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=7.30K
|
|
|
|
|
01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=89B cardinality=7.30K
|
|
====
|
|
# IMPALA-5293: ensure insert into partitioned table adds sort node without clustered hint.
|
|
insert into table functional.alltypes partition(year, month) /*+ noshuffle */
|
|
select * from functional.alltypes
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=89B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
01:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=89B cardinality=7.30K
|
|
====
|
|
# IMPALA-5293: ensure insert into partitioned table adds sort node without clustered hint.
|
|
# Subquery in WHERE-clause exercises the reset() + analyze() path during rewrite.
|
|
insert into table functional.alltypes partition(year, month)
|
|
select * from functional.alltypes
|
|
where int_col = (select max(int_col) from functional.alltypes)
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
04:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=730
|
|
|
|
|
03:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: int_col = max(int_col)
|
|
| runtime filters: RF000 <- max(int_col)
|
|
| row-size=89B cardinality=730
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: max(int_col)
|
|
| | row-size=4B cardinality=1
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes]
|
|
| HDFS partitions=24/24 files=24 size=478.45KB
|
|
| row-size=4B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
runtime filters: RF000 -> int_col
|
|
row-size=89B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=24
|
|
|
|
|
08:SORT
|
|
| order by: year ASC NULLS LAST, month ASC NULLS LAST
|
|
| row-size=89B cardinality=730
|
|
|
|
|
07:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
|
|
|
|
03:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
|
|
| hash predicates: int_col = max(int_col)
|
|
| runtime filters: RF000 <- max(int_col)
|
|
| row-size=89B cardinality=730
|
|
|
|
|
|--06:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 05:AGGREGATE [FINALIZE]
|
|
| | output: max:merge(int_col)
|
|
| | row-size=4B cardinality=1
|
|
| |
|
|
| 04:EXCHANGE [UNPARTITIONED]
|
|
| |
|
|
| 02:AGGREGATE
|
|
| | output: max(int_col)
|
|
| | row-size=4B cardinality=1
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes]
|
|
| HDFS partitions=24/24 files=24 size=478.45KB
|
|
| row-size=4B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
runtime filters: RF000 -> int_col
|
|
row-size=89B cardinality=7.30K
|
|
====
|
|
# IMPALA-5293: ensure insert into non-partitioned table does not add sort node.
|
|
insert into table functional.alltypesnopart
|
|
select * from functional.alltypesnopart
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypesnopart]
|
|
HDFS partitions=1/1 files=0 size=0B
|
|
row-size=72B cardinality=0
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypesnopart]
|
|
HDFS partitions=1/1 files=0 size=0B
|
|
row-size=72B cardinality=0
|
|
====
|
|
# IMPALA-5293: ensure insert into non-partitioned table does not add sort node.
|
|
insert into table functional.alltypesnopart /*+ shuffle */
|
|
select * from functional.alltypesnopart
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
00:SCAN HDFS [functional.alltypesnopart]
|
|
HDFS partitions=1/1 files=0 size=0B
|
|
row-size=72B cardinality=0
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|
|
| partitions=1
|
|
|
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypesnopart]
|
|
HDFS partitions=1/1 files=0 size=0B
|
|
row-size=72B cardinality=0
|
|
====
|
|
# Test that Iceberg partitioned inserts shuffle and sort data based on the
|
|
# partitioning columns.
|
|
insert into functional_parquet.iceberg_int_partitioned
|
|
select id % 3, id % 2, id from functional.alltypes
|
|
---- PLAN
|
|
WRITE TO HDFS [functional_parquet.iceberg_int_partitioned, OVERWRITE=false, PARTITION-KEYS=(id % 3,id % 2)]
|
|
| partitions=53290000
|
|
|
|
|
01:SORT
|
|
| order by: id % 3 ASC NULLS LAST, id % 2 ASC NULLS LAST
|
|
| row-size=4B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=4B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional_parquet.iceberg_int_partitioned, OVERWRITE=false, PARTITION-KEYS=(id % 3,id % 2)]
|
|
| partitions=53290000
|
|
|
|
|
02:SORT
|
|
| order by: id % 3 ASC NULLS LAST, id % 2 ASC NULLS LAST
|
|
| row-size=4B cardinality=7.30K
|
|
|
|
|
01:EXCHANGE [HASH(id % 3,id % 2)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=4B cardinality=7.30K
|
|
====
|
|
# Test that Iceberg partition transform inserts shuffle and sort data based on the
|
|
# transforms. Also the table is ZORDERed, which should be also present in the SORT node.
|
|
insert into functional_parquet.iceberg_partition_transforms_zorder
|
|
select years_add(timestamp_col, id % 3),
|
|
concat(string_col, date_string_col),
|
|
cast(id * 3 as int),
|
|
cast(10000 - id as int)
|
|
from functional.alltypes
|
|
---- PLAN
|
|
WRITE TO HDFS [functional_parquet.iceberg_partition_transforms_zorder, OVERWRITE=false, PARTITION-KEYS=(year(years_add(timestamp_col, id % 3)),iceberg_bucket_transform(concat(string_col, date_string_col), 5))]
|
|
| partitions=5372800
|
|
|
|
|
01:SORT
|
|
| order by: LEXICAL: year(years_add(timestamp_col, id % 3)) ASC NULLS LAST, iceberg_bucket_transform(concat(string_col, date_string_col), 5) ASC NULLS LAST, ZORDER: CAST(id * 3 AS INT), CAST(10000 - id AS INT)
|
|
| row-size=61B cardinality=7.30K
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=53B cardinality=7.30K
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional_parquet.iceberg_partition_transforms_zorder, OVERWRITE=false, PARTITION-KEYS=(year(years_add(timestamp_col, id % 3)),iceberg_bucket_transform(concat(string_col, date_string_col), 5))]
|
|
| partitions=5372800
|
|
|
|
|
02:SORT
|
|
| order by: LEXICAL: year(years_add(timestamp_col, id % 3)) ASC NULLS LAST, iceberg_bucket_transform(concat(string_col, date_string_col), 5) ASC NULLS LAST, ZORDER: CAST(id * 3 AS INT), CAST(10000 - id AS INT)
|
|
| row-size=61B cardinality=7.30K
|
|
|
|
|
01:EXCHANGE [HASH(year(years_add(timestamp_col, id % 3)),iceberg_bucket_transform(concat(string_col, date_string_col), 5))]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
HDFS partitions=24/24 files=24 size=478.45KB
|
|
row-size=53B cardinality=7.30K
|
|
====
|