impala/testdata/workloads/functional-planner/queries/PlannerTest/insert.test

# insert into an unpartitioned table
insert into table functional.alltypesnopart
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from functional.alltypes
where year=2009 and month=05
---- PLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|  partitions=1
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` = 5
   HDFS partitions=1/24 files=1 size=20.36KB
   row-size=81B cardinality=310
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=5/090501.txt 0:20853
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|  partitions=1
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` = 5
   HDFS partitions=1/24 files=1 size=20.36KB
   row-size=81B cardinality=310
====
# insert into a static partition
insert into table functional.alltypessmall
partition (year=2009, month=04)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from functional.alltypes
where year=2009 and month=05
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,4)]
|  partitions=1
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` = 5
   HDFS partitions=1/24 files=1 size=20.36KB
   row-size=81B cardinality=310
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=5/090501.txt 0:20853
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,4)]
|  partitions=1
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` = 5
   HDFS partitions=1/24 files=1 size=20.36KB
   row-size=81B cardinality=310
====
# overwrite a static partition
insert overwrite table functional.alltypessmall
partition (year=2009, month=04)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from functional.alltypes
where year=2009 and month=05
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(2009,4)]
|  partitions=1
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` = 5
   HDFS partitions=1/24 files=1 size=20.36KB
   row-size=81B cardinality=310
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=5/090501.txt 0:20853
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=true, PARTITION-KEYS=(2009,4)]
|  partitions=1
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` = 5
   HDFS partitions=1/24 files=1 size=20.36KB
   row-size=81B cardinality=310
====
# insert into fully dynamic partitions
insert into table functional.alltypessmall
partition (year, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, year, month
from functional.alltypes
where year=2009 and month>10
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
01:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=610
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=40.07KB
   row-size=89B cardinality=610
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=12/091201.txt 0:20853
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
02:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=610
|
01:EXCHANGE [HASH(`year`,`month`)]
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=40.07KB
   row-size=89B cardinality=610
====
# IMPALA-5293: noclustered hint prevents adding sort node
insert into table functional.alltypessmall
partition (year, month) /* +noclustered */
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, year, month
from functional.alltypes
where year=2009 and month>10
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(`year`,`month`)]
|  partitions=24
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=40.07KB
   row-size=89B cardinality=610
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=12/091201.txt 0:20853
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(`year`,`month`)]
|  partitions=24
|
01:EXCHANGE [HASH(`year`,`month`)]
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=40.07KB
   row-size=89B cardinality=610
====
# insert into fully dynamic partitions. The source table has no stats and the insert
# statement has a partition clause, so hash partition before the sink.
insert into table functional.alltypessmall
partition (year, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, int_col, int_col
from functional_seq_snap.alltypes
where year=2009 and month>10
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(int_col,int_col)]
|  partitions=unavailable
|
01:SORT
|  order by: int_col ASC NULLS LAST, int_col ASC NULLS LAST
|  row-size=72B cardinality=520
|
00:SCAN HDFS [functional_seq_snap.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=11.34KB
   row-size=72B cardinality=520
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(int_col,int_col)]
|  partitions=unavailable
|
02:SORT
|  order by: int_col ASC NULLS LAST, int_col ASC NULLS LAST
|  row-size=72B cardinality=520
|
01:EXCHANGE [HASH(int_col,int_col)]
|
00:SCAN HDFS [functional_seq_snap.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=11.34KB
   row-size=72B cardinality=520
====
# insert into fully dynamic partitions;
# partitioned output doesn't require repartitioning
insert into table functional.alltypessmall
partition (year, month)
select min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col),
min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col),
min(timestamp_col), year, month
from functional.alltypes
where year=2009 and month>10
group by year, month
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(`year`,`month`)]
|  partitions=24
|
02:SORT
|  order by: `year` ASC NULLS LAST, `month` ASC NULLS LAST
|  row-size=80B cardinality=24
|
01:AGGREGATE [FINALIZE]
|  output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
|  group by: `year`, `month`
|  row-size=80B cardinality=24
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=40.07KB
   row-size=89B cardinality=610
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=12/091201.txt 0:20853
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(`year`,`month`)]
|  partitions=24
|
04:SORT
|  order by: `year` ASC NULLS LAST, `month` ASC NULLS LAST
|  row-size=80B cardinality=24
|
03:AGGREGATE [FINALIZE]
|  output: min:merge(id), min:merge(bool_col), min:merge(tinyint_col), min:merge(smallint_col), min:merge(int_col), min:merge(bigint_col), min:merge(float_col), min:merge(double_col), min:merge(date_string_col), min:merge(string_col), min:merge(timestamp_col)
|  group by: `year`, `month`
|  row-size=80B cardinality=24
|
02:EXCHANGE [HASH(`year`,`month`)]
|
01:AGGREGATE [STREAMING]
|  output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
|  group by: `year`, `month`
|  row-size=80B cardinality=24
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=40.07KB
   row-size=89B cardinality=610
====
# insert into a partially dynamic partition
insert into table functional.alltypessmall
partition (year=2009, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, month
from functional.alltypes
where year=2009 and month>10
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,month)]
|  partitions=12
|
01:SORT
|  order by: month ASC NULLS LAST
|  row-size=85B cardinality=610
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=40.07KB
   row-size=85B cardinality=610
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=12/091201.txt 0:20853
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,month)]
|  partitions=12
|
02:SORT
|  order by: month ASC NULLS LAST
|  row-size=85B cardinality=610
|
01:EXCHANGE [HASH(`month`)]
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=40.07KB
   row-size=85B cardinality=610
====
# insert into a partially dynamic partition
# partitioned output doesn't require repartitioning
insert into table functional.alltypessmall
partition (year=2009, month)
select min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col),
min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col),
min(timestamp_col), month
from functional.alltypes
where year=2009 and month>10
group by month
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,`month`)]
|  partitions=12
|
02:SORT
|  order by: `month` ASC NULLS LAST
|  row-size=76B cardinality=12
|
01:AGGREGATE [FINALIZE]
|  output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
|  group by: `month`
|  row-size=76B cardinality=12
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=40.07KB
   row-size=85B cardinality=610
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=11/091101.txt 0:20179
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=12/091201.txt 0:20853
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2009,`month`)]
|  partitions=12
|
04:SORT
|  order by: `month` ASC NULLS LAST
|  row-size=76B cardinality=12
|
03:AGGREGATE [FINALIZE]
|  output: min:merge(id), min:merge(bool_col), min:merge(tinyint_col), min:merge(smallint_col), min:merge(int_col), min:merge(bigint_col), min:merge(float_col), min:merge(double_col), min:merge(date_string_col), min:merge(string_col), min:merge(timestamp_col)
|  group by: `month`
|  row-size=76B cardinality=12
|
02:EXCHANGE [HASH(`month`)]
|
01:AGGREGATE [STREAMING]
|  output: min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col)
|  group by: `month`
|  row-size=76B cardinality=12
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` > 10
   HDFS partitions=2/24 files=2 size=40.07KB
   row-size=85B cardinality=610
====
# insert into a partially dynamic partition
insert into table functional.alltypessmall
partition (year, month=4)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, year
from functional.alltypes
where year>2009 and month=4
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,4)]
|  partitions=2
|
01:SORT
|  order by: year ASC NULLS LAST
|  row-size=85B cardinality=300
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` > 2009, `month` = 4
   HDFS partitions=1/24 files=1 size=19.71KB
   row-size=85B cardinality=300
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2010/month=4/100401.txt 0:20179
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(year,4)]
|  partitions=2
|
01:SORT
|  order by: year ASC NULLS LAST
|  row-size=85B cardinality=300
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` > 2009, `month` = 4
   HDFS partitions=1/24 files=1 size=19.71KB
   row-size=85B cardinality=300
====
# insert with limit from partitioned table.
insert into table functional.alltypesnopart
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from functional.alltypes where year=2009 and month=1 limit 10
---- PLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|  partitions=1
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` = 1
   HDFS partitions=1/24 files=1 size=19.95KB
   limit: 10
   row-size=81B cardinality=10
---- SCANRANGELOCATIONS
NODE 0:
  HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=1/090101.txt 0:20433
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|  partitions=1
|
01:EXCHANGE [UNPARTITIONED]
|  limit: 10
|
00:SCAN HDFS [functional.alltypes]
   partition predicates: `year` = 2009, `month` = 1
   HDFS partitions=1/24 files=1 size=19.95KB
   limit: 10
   row-size=81B cardinality=10
====
# static partition insert from a constant select
insert into table functional.alltypessmall
partition (year=2010, month=4)
select 100, false, 1, 1, 1, 10,
10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp)
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|  partitions=1
|
00:UNION
   constant-operands=1
   row-size=54B cardinality=1
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|  partitions=1
|
00:UNION
   constant-operands=1
   row-size=54B cardinality=1
====
# dynamic partition insert from a constant select
insert into table functional.alltypessmall
partition (year, month)
select 100, false, 1, 1, 1, 10,
10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp), 2010, 4
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|  partitions=1
|
00:UNION
   constant-operands=1
   row-size=57B cardinality=1
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|  partitions=1
|
00:UNION
   constant-operands=1
   row-size=57B cardinality=1
====
# static partition insert from values statement
insert into table functional.alltypessmall
partition (year=2010, month=4) values
(100, false, 1, 1, 1, 10, 10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp)),
(200, true, 2, 2, 2, 20, 20.0, 20.0, "02/02/09", "2", cast("2009-02-02 00:01:00" as timestamp)),
(300, false, 3, 3, 3, 30, 30.0, 30.0, "02/03/09", "3", cast("2009-02-03 00:01:00" as timestamp))
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|  partitions=1
|
00:UNION
   constant-operands=3
   row-size=55B cardinality=3
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|  partitions=1
|
00:UNION
   constant-operands=3
   row-size=55B cardinality=3
====
# dynamic partition insert from values statement
insert into table functional.alltypessmall
partition (year, month) values
(100, false, 1, 1, 1, 10, 10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp), 2010, 4),
(200, true, 2, 2, 2, 20, 20.0, 20.0, "02/02/09", "2", cast("2009-02-02 00:01:00" as timestamp), 2010, 5),
(300, false, 3, 3, 3, 30, 30.0, 30.0, "02/03/09", "3", cast("2009-02-03 00:01:00" as timestamp), 2010, 6)
---- PLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|  partitions=9
|
01:SORT
|  order by: 2010 ASC NULLS LAST, 4 ASC NULLS LAST
|  row-size=58B cardinality=3
|
00:UNION
   constant-operands=3
   row-size=58B cardinality=3
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypessmall, OVERWRITE=false, PARTITION-KEYS=(2010,4)]
|  partitions=9
|
01:SORT
|  order by: 2010 ASC NULLS LAST, 4 ASC NULLS LAST
|  row-size=58B cardinality=3
|
00:UNION
   constant-operands=3
   row-size=58B cardinality=3
====
# test static partition insert from a query with grouped aggregation
# we expect the insert fragment to be partitioned by the grouping exprs of the query stmt
# and not by the partition exprs of the insert stmt
insert into functional.alltypes(bigint_col, string_col) partition (year=2010, month=10)
select count(int_col), string_col from functional.alltypes
group by string_col
---- PLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
|  partitions=1
|
01:AGGREGATE [FINALIZE]
|  output: count(int_col)
|  group by: string_col
|  row-size=21B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=17B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
|  partitions=1
|
03:AGGREGATE [FINALIZE]
|  output: count:merge(int_col)
|  group by: string_col
|  row-size=21B cardinality=10
|
02:EXCHANGE [HASH(string_col)]
|
01:AGGREGATE [STREAMING]
|  output: count(int_col)
|  group by: string_col
|  row-size=21B cardinality=10
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=17B cardinality=7.30K
====
# test static partition insert from a query with distinct grouped aggregation
# we expect the insert fragment to be partitioned by the grouping exprs of the query stmt
# and not by the partition exprs of the insert stmt
insert into functional.alltypes(bigint_col, string_col) partition (year=2010, month=10)
select count(distinct int_col), string_col from functional.alltypes
group by string_col
---- PLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
|  partitions=1
|
02:AGGREGATE [FINALIZE]
|  output: count(int_col)
|  group by: string_col
|  row-size=21B cardinality=10
|
01:AGGREGATE
|  group by: string_col, int_col
|  row-size=17B cardinality=100
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=17B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2010,10)]
|  partitions=1
|
06:AGGREGATE [FINALIZE]
|  output: count:merge(int_col)
|  group by: string_col
|  row-size=21B cardinality=10
|
05:EXCHANGE [HASH(string_col)]
|
02:AGGREGATE [STREAMING]
|  output: count(int_col)
|  group by: string_col
|  row-size=21B cardinality=10
|
04:AGGREGATE
|  group by: string_col, int_col
|  row-size=17B cardinality=100
|
03:EXCHANGE [HASH(string_col,int_col)]
|
01:AGGREGATE [STREAMING]
|  group by: string_col, int_col
|  row-size=17B cardinality=100
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=17B cardinality=7.30K
====
# test that the planner chooses to repartition before the table sink
# alltypes has column stats and based on the product of the NDVs of year and month
# the planner should choose to repartition before the table sink
insert into table functional.alltypes partition(year, month)
select * from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
02:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=7.30K
|
01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=89B cardinality=7.30K
====
# test noshuffle hint to prevent repartitioning (same query as above with hint)
insert into table functional.alltypes partition(year, month) [noshuffle]
select * from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
01:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=89B cardinality=7.30K
====
# same as above but with traditional commented hint at default hint location
insert into table functional.alltypes partition(year, month) /* +noshuffle */
select * from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
01:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=89B cardinality=7.30K
====
# same as above but with traditional commented hint at Oracle hint location
insert /* +noshuffle */ into table functional.alltypes partition(year, month)
select * from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
01:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=89B cardinality=7.30K
====
# same as above but with enf-of-line commented hint
insert into table functional.alltypes partition(year, month)
-- +noshuffle
select * from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
01:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=89B cardinality=7.30K
====
# test that the planner does not repartition before the table sink
# alltypes has column stats and since year only has 2 distinct values the planner
# should choose not to repartition before the table sink
insert into table functional.alltypes partition(year, month=1)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, year
from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,1)]
|  partitions=2
|
01:SORT
|  order by: year ASC NULLS LAST
|  row-size=85B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=85B cardinality=7.30K
====
# test shuffle hint to force repartitioning (same query as above with hint)
insert into table functional.alltypes partition(year, month=1) [shuffle]
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, year
from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,1)]
|  partitions=2
|
02:SORT
|  order by: year ASC NULLS LAST
|  row-size=85B cardinality=7.30K
|
01:EXCHANGE [HASH(`year`)]
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=85B cardinality=7.30K
====
# test insert/select stmt that contains an analytic function (IMPALA-1400)
insert into table functional.alltypestiny partition(year=2009, month=1)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
  float_col, double_col, date_string_col, string_col,
  lag(timestamp_col, 1) over (partition by id order by id) as timestamp_col
from functional.alltypestiny
---- PLAN
WRITE TO HDFS [functional.alltypestiny, OVERWRITE=false, PARTITION-KEYS=(2009,1)]
|  partitions=1
|
02:ANALYTIC
|  functions: lag(timestamp_col, 1, NULL)
|  partition by: id
|  order by: id ASC
|  window: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
|  row-size=97B cardinality=8
|
01:SORT
|  order by: id ASC NULLS LAST
|  row-size=81B cardinality=8
|
00:SCAN HDFS [functional.alltypestiny]
   HDFS partitions=4/4 files=4 size=460B
   row-size=81B cardinality=8
====
# IMPALA-3930: Test insert with shuffle hint on constant partition exprs. The table sink
# is executed at the coordinator.
insert into table functional.alltypes partition(year=2009, month=1) /* +shuffle */
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2009,1)]
|  partitions=1
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=81B cardinality=7.30K
====
# IMPALA-3930: Same as above but with a dynamic partition insert.
insert into table functional.alltypes partition(year, month) /* +shuffle */
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, 2009, 1
from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2009,1)]
|  partitions=1
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=81B cardinality=7.30K
====
# IMPALA-3930: Same as above but with a mix of static/dynamic partition exprs, and
# with more complex constant exprs.
insert into table functional.alltypes partition(year, month=cast(10/2 as int)) /* +shuffle */
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, cast(concat("2", "010") as smallint) - 1
from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(2009,5)]
|  partitions=1
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=81B cardinality=7.30K
====
# Test insert into an unpartitioned table with shuffle hint.
insert into table functional.alltypesnopart /* +shuffle */
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from functional.alltypes
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|  partitions=1
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=81B cardinality=7.30K
====
# IMPALA-5293: ensure insert into partitioned table adds sort node without clustered hint.
insert into table functional.alltypes partition(year, month)
select * from functional.alltypes
---- PLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
01:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
02:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=7.30K
|
01:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=89B cardinality=7.30K
====
# IMPALA-5293: ensure insert into partitioned table adds sort node without clustered hint.
insert into table functional.alltypes partition(year, month) /*+ noshuffle */
select * from functional.alltypes
---- PLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
01:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
01:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=89B cardinality=7.30K
====
# IMPALA-5293: ensure insert into partitioned table adds sort node without clustered hint.
# Subquery in WHERE-clause exercises the reset() + analyze() path during rewrite.
insert into table functional.alltypes partition(year, month)
select * from functional.alltypes
where int_col = (select max(int_col) from functional.alltypes)
---- PLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
04:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=730
|
03:HASH JOIN [LEFT SEMI JOIN]
|  hash predicates: int_col = max(int_col)
|  runtime filters: RF000 <- max(int_col)
|  row-size=89B cardinality=730
|
|--02:AGGREGATE [FINALIZE]
|  |  output: max(int_col)
|  |  row-size=4B cardinality=1
|  |
|  01:SCAN HDFS [functional.alltypes]
|     HDFS partitions=24/24 files=24 size=478.45KB
|     row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   runtime filters: RF000 -> int_col
   row-size=89B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypes, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|  partitions=24
|
08:SORT
|  order by: year ASC NULLS LAST, month ASC NULLS LAST
|  row-size=89B cardinality=730
|
07:EXCHANGE [HASH(functional.alltypes.year,functional.alltypes.month)]
|
03:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
|  hash predicates: int_col = max(int_col)
|  runtime filters: RF000 <- max(int_col)
|  row-size=89B cardinality=730
|
|--06:EXCHANGE [BROADCAST]
|  |
|  05:AGGREGATE [FINALIZE]
|  |  output: max:merge(int_col)
|  |  row-size=4B cardinality=1
|  |
|  04:EXCHANGE [UNPARTITIONED]
|  |
|  02:AGGREGATE
|  |  output: max(int_col)
|  |  row-size=4B cardinality=1
|  |
|  01:SCAN HDFS [functional.alltypes]
|     HDFS partitions=24/24 files=24 size=478.45KB
|     row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   runtime filters: RF000 -> int_col
   row-size=89B cardinality=7.30K
====
# IMPALA-5293: ensure insert into non-partitioned table does not add sort node.
insert into table functional.alltypesnopart
select * from functional.alltypesnopart
---- PLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|  partitions=1
|
00:SCAN HDFS [functional.alltypesnopart]
   HDFS partitions=1/1 files=0 size=0B
   row-size=72B cardinality=0
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|  partitions=1
|
00:SCAN HDFS [functional.alltypesnopart]
   HDFS partitions=1/1 files=0 size=0B
   row-size=72B cardinality=0
====
# IMPALA-5293: ensure insert into non-partitioned table does not add sort node.
insert into table functional.alltypesnopart /*+ shuffle */
select * from functional.alltypesnopart
---- PLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|  partitions=1
|
00:SCAN HDFS [functional.alltypesnopart]
   HDFS partitions=1/1 files=0 size=0B
   row-size=72B cardinality=0
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional.alltypesnopart, OVERWRITE=false]
|  partitions=1
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HDFS [functional.alltypesnopart]
   HDFS partitions=1/1 files=0 size=0B
   row-size=72B cardinality=0
====
# Test that Iceberg partitioned inserts shuffle and sort data based on the
# partitioning columns.
insert into functional_parquet.iceberg_int_partitioned
select id % 3, id % 2, id from functional.alltypes
---- PLAN
WRITE TO HDFS [functional_parquet.iceberg_int_partitioned, OVERWRITE=false, PARTITION-KEYS=(id % 3,id % 2)]
|  partitions=53290000
|
01:SORT
|  order by: id % 3 ASC NULLS LAST, id % 2 ASC NULLS LAST
|  row-size=4B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=4B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional_parquet.iceberg_int_partitioned, OVERWRITE=false, PARTITION-KEYS=(id % 3,id % 2)]
|  partitions=53290000
|
02:SORT
|  order by: id % 3 ASC NULLS LAST, id % 2 ASC NULLS LAST
|  row-size=4B cardinality=7.30K
|
01:EXCHANGE [HASH(id % 3,id % 2)]
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
   row-size=4B cardinality=7.30K
====
# Test that Iceberg partition transform inserts shuffle and sort data based on the
# transforms. Also the table is ZORDERed, which should be also present in the SORT node.
insert into functional_parquet.iceberg_partition_transforms_zorder
select years_add(timestamp_col, id % 3),
       concat(string_col, date_string_col),
       cast(id * 3 as int),
       cast(10000 - id as int)
from functional.alltypes
---- PLAN
WRITE TO HDFS [functional_parquet.iceberg_partition_transforms_zorder, OVERWRITE=false, PARTITION-KEYS=(year(years_add(timestamp_col, id % 3)),iceberg_bucket_transform(concat(string_col, date_string_col), 5))]
|  partitions=5372800
|
01:SORT
|  order by: LEXICAL: year(years_add(timestamp_col, id % 3)) ASC NULLS LAST, iceberg_bucket_transform(concat(string_col, date_string_col), 5) ASC NULLS LAST, ZORDER: CAST(id * 3 AS INT), CAST(10000 - id AS INT)
|  row-size=61B cardinality=7.30K
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
      row-size=53B cardinality=7.30K
---- DISTRIBUTEDPLAN
WRITE TO HDFS [functional_parquet.iceberg_partition_transforms_zorder, OVERWRITE=false, PARTITION-KEYS=(year(years_add(timestamp_col, id % 3)),iceberg_bucket_transform(concat(string_col, date_string_col), 5))]
|  partitions=5372800
|
02:SORT
|  order by: LEXICAL: year(years_add(timestamp_col, id % 3)) ASC NULLS LAST, iceberg_bucket_transform(concat(string_col, date_string_col), 5) ASC NULLS LAST, ZORDER: CAST(id * 3 AS INT), CAST(10000 - id AS INT)
|  row-size=61B cardinality=7.30K
|
01:EXCHANGE [HASH(year(years_add(timestamp_col, id % 3)),iceberg_bucket_transform(concat(string_col, date_string_col), 5))]
|
00:SCAN HDFS [functional.alltypes]
   HDFS partitions=24/24 files=24 size=478.45KB
      row-size=53B cardinality=7.30K
====