mirror of
https://github.com/apache/impala.git
synced 2026-01-01 09:00:42 -05:00
The following changes are included in this commit: 1. Modified the alltypesagg table to include an additional partition key that has nulls. 2. Added a number of tests in hdfs.test that exercise the partition pruning logic (see IMPALA-887). 3. Modified all the tests that are affected by the change in alltypesagg. Change-Id: I1a769375aaa71273341522eb94490ba5e4c6f00d Reviewed-on: http://gerrit.ent.cloudera.com:8080/2874 Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com> Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/3236
1039 lines
30 KiB
Plaintext
1039 lines
30 KiB
Plaintext
# subquery with aggregation and order by/limit, as left-hand side of join;
|
|
# having clause in subquery is transfered to merge agg step in distrib plan
|
|
select *
|
|
from (
|
|
select int_col, count(*)
|
|
from functional.alltypessmall
|
|
where month = 1
|
|
group by int_col
|
|
having count(*) > 1
|
|
order by count(*) desc limit 5
|
|
) t1
|
|
join functional.alltypes t2 on (t1.int_col = t2.int_col)
|
|
where month = 1
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: t2.int_col = int_col
|
|
|
|
|
|--02:TOP-N [LIMIT=5]
|
|
| | order by: count(*) DESC
|
|
| |
|
|
| 01:AGGREGATE [FINALIZE]
|
|
| | output: count(*)
|
|
| | group by: int_col
|
|
| | having: count(*) > 1
|
|
| |
|
|
| 00:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=1/4 size=1.57KB
|
|
|
|
|
03:SCAN HDFS [functional.alltypes t2]
|
|
partitions=2/24 size=40.32KB
|
|
---- DISTRIBUTEDPLAN
|
|
09:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: t2.int_col = int_col
|
|
|
|
|
|--08:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 07:MERGING-EXCHANGE [UNPARTITIONED]
|
|
| | order by: count(*) DESC
|
|
| | limit: 5
|
|
| |
|
|
| 02:TOP-N [LIMIT=5]
|
|
| | order by: count(*) DESC
|
|
| |
|
|
| 06:AGGREGATE [MERGE FINALIZE]
|
|
| | output: sum(count(*))
|
|
| | group by: int_col
|
|
| | having: count(*) > 1
|
|
| |
|
|
| 05:EXCHANGE [HASH(int_col)]
|
|
| |
|
|
| 01:AGGREGATE
|
|
| | output: count(*)
|
|
| | group by: int_col
|
|
| |
|
|
| 00:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=1/4 size=1.57KB
|
|
|
|
|
03:SCAN HDFS [functional.alltypes t2]
|
|
partitions=2/24 size=40.32KB
|
|
====
|
|
# simple full scan subquery
|
|
select * from (select y x from (select id y from functional_hbase.alltypessmall) a) b
|
|
---- PLAN
|
|
00:SCAN HBASE [functional_hbase.alltypessmall]
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
00:SCAN HBASE [functional_hbase.alltypessmall]
|
|
====
|
|
# subquery doing join
|
|
select * from (select t2.*
|
|
from functional.testtbl t1 join functional.testtbl t2 using(id)
|
|
where t1.zip = 94611) x
|
|
---- PLAN
|
|
02:HASH JOIN [INNER JOIN]
|
|
| hash predicates: t1.id = t2.id
|
|
|
|
|
|--01:SCAN HDFS [functional.testtbl t2]
|
|
| partitions=1/1 size=0B compact
|
|
|
|
|
00:SCAN HDFS [functional.testtbl t1]
|
|
partitions=1/1 size=0B
|
|
predicates: t1.zip = 94611
|
|
---- DISTRIBUTEDPLAN
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: t1.id = t2.id
|
|
|
|
|
|--03:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.testtbl t2]
|
|
| partitions=1/1 size=0B
|
|
|
|
|
00:SCAN HDFS [functional.testtbl t1]
|
|
partitions=1/1 size=0B
|
|
predicates: t1.zip = 94611
|
|
====
|
|
# subquery doing join
|
|
# multiple join predicates;
|
|
# scan predicates get propagated correctly;
|
|
# non-eq join predicates are evaluated as extra conjuncts by the join node
|
|
select *
|
|
from
|
|
(select a.*
|
|
from functional.alltypesagg a
|
|
right outer join functional.alltypessmall b using (id, int_col)
|
|
where a.day >= 6
|
|
and b.month > 2
|
|
and a.tinyint_col = 15
|
|
and b.string_col = '15'
|
|
and a.tinyint_col + b.tinyint_col < 15) x
|
|
---- PLAN
|
|
02:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: a.id = b.id, a.int_col = b.int_col
|
|
| other predicates: a.day >= 6, a.tinyint_col = 15, a.tinyint_col + b.tinyint_col < 15
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypessmall b]
|
|
| partitions=2/4 size=3.17KB compact
|
|
| predicates: b.string_col = '15'
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=5/11 size=372.38KB
|
|
predicates: a.tinyint_col = 15
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
---- DISTRIBUTEDPLAN
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: a.id = b.id, a.int_col = b.int_col
|
|
| other predicates: a.day >= 6, a.tinyint_col = 15, a.tinyint_col + b.tinyint_col < 15
|
|
|
|
|
|--04:EXCHANGE [HASH(b.id,b.int_col)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall b]
|
|
| partitions=2/4 size=3.17KB
|
|
| predicates: b.string_col = '15'
|
|
|
|
|
03:EXCHANGE [HASH(a.id,a.int_col)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=5/11 size=372.38KB
|
|
predicates: a.tinyint_col = 15
|
|
====
|
|
# predicate pushdown
|
|
select * from (select * from functional_hbase.alltypessmall) a where id < 5
|
|
---- PLAN
|
|
00:SCAN HBASE [functional_hbase.alltypessmall]
|
|
predicates: functional_hbase.alltypessmall.id < 5
|
|
====
|
|
# subquery join
|
|
# multiple join predicates;
|
|
# scan predicates get propagated correctly;
|
|
# non-eq join predicates are evaluated as extra conjuncts by the join node
|
|
select *
|
|
from
|
|
(select id, int_col, day, tinyint_col from functional.alltypesagg) a
|
|
right outer join
|
|
(select id, int_col, month, string_col, tinyint_col
|
|
from functional.alltypessmall) b using (id, int_col)
|
|
where a.day >= 6
|
|
and b.month > 2
|
|
and a.tinyint_col = 15
|
|
and b.string_col = '15'
|
|
and a.tinyint_col + b.tinyint_col < 15
|
|
and b.id + 15 = 27
|
|
---- PLAN
|
|
02:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: id = id, int_col = int_col
|
|
| other predicates: day >= 6, tinyint_col = 15, tinyint_col + tinyint_col < 15
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=2/4 size=3.17KB compact
|
|
| predicates: functional.alltypessmall.string_col = '15', functional.alltypessmall.id + 15 = 27
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=5/11 size=372.38KB
|
|
predicates: functional.alltypesagg.tinyint_col = 15, functional.alltypesagg.id + 15 = 27
|
|
---- DISTRIBUTEDPLAN
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: id = id, int_col = int_col
|
|
| other predicates: day >= 6, tinyint_col = 15, tinyint_col + tinyint_col < 15
|
|
|
|
|
|--04:EXCHANGE [HASH(id,int_col)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=2/4 size=3.17KB
|
|
| predicates: functional.alltypessmall.string_col = '15', functional.alltypessmall.id + 15 = 27
|
|
|
|
|
03:EXCHANGE [HASH(id,int_col)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=5/11 size=372.38KB
|
|
predicates: functional.alltypesagg.tinyint_col = 15, functional.alltypesagg.id + 15 = 27
|
|
====
|
|
# subquery join
|
|
# multiple join predicates;
|
|
# scan predicates get propagated correctly;
|
|
# non-eq join predicates are evaluated as extra conjuncts by the join node
|
|
select *
|
|
from
|
|
(select id, int_col, day, tinyint_col
|
|
from
|
|
(select id, int_col, day, tinyint_col from functional.alltypesagg) a0
|
|
where a0.day >= 6) a
|
|
right outer join
|
|
(select id, int_col, month, string_col, tinyint_col from functional.alltypessmall) b
|
|
using (id, int_col)
|
|
where b.month > 2
|
|
and a.tinyint_col = 15
|
|
and b.string_col = '15'
|
|
and a.tinyint_col + b.tinyint_col < 15
|
|
---- PLAN
|
|
02:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: id = id, int_col = int_col
|
|
| other predicates: tinyint_col = 15, tinyint_col + tinyint_col < 15
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=2/4 size=3.17KB compact
|
|
| predicates: functional.alltypessmall.string_col = '15'
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=5/11 size=372.38KB
|
|
predicates: functional.alltypesagg.tinyint_col = 15
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
---- DISTRIBUTEDPLAN
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: id = id, int_col = int_col
|
|
| other predicates: tinyint_col = 15, tinyint_col + tinyint_col < 15
|
|
|
|
|
|--04:EXCHANGE [HASH(id,int_col)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=2/4 size=3.17KB
|
|
| predicates: functional.alltypessmall.string_col = '15'
|
|
|
|
|
03:EXCHANGE [HASH(id,int_col)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=5/11 size=372.38KB
|
|
predicates: functional.alltypesagg.tinyint_col = 15
|
|
====
|
|
# complex join, having joined subquery on the rhs, and predicate
|
|
# at multiple subquery level. This tests that both sides of a join
|
|
# that is itself on the build side of another join get compacted.
|
|
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
|
|
from functional.alltypessmall c
|
|
join (
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month, b.float_col float_col, b.id id
|
|
from ( select * from functional.alltypesagg a where month=1 ) a
|
|
join functional.alltypessmall b on (a.smallint_col = b.id)
|
|
) x on (x.tinyint_col = c.id)
|
|
where x.day=1
|
|
and x.int_col > 899
|
|
and x.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: a.tinyint_col = c.id
|
|
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypessmall c]
|
|
| partitions=4/4 size=6.32KB compact
|
|
| predicates: c.string_col < '7'
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: a.smallint_col = b.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall b]
|
|
| partitions=4/4 size=6.32KB compact
|
|
| predicates: b.float_col > 4.5
|
|
|
|
|
01:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=1/11 size=73.39KB
|
|
predicates: a.int_col > 899
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
NODE 2:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
---- DISTRIBUTEDPLAN
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: a.tinyint_col = c.id
|
|
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
|
|
|
|
|
|--06:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 00:SCAN HDFS [functional.alltypessmall c]
|
|
| partitions=4/4 size=6.32KB
|
|
| predicates: c.string_col < '7'
|
|
|
|
|
03:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: a.smallint_col = b.id
|
|
|
|
|
|--05:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypessmall b]
|
|
| partitions=4/4 size=6.32KB
|
|
| predicates: b.float_col > 4.5
|
|
|
|
|
01:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=1/11 size=73.39KB
|
|
predicates: a.int_col > 899
|
|
====
|
|
# with grouping
|
|
select tinyint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
|
|
avg(tinyint_col)
|
|
from (select * from functional.alltypesagg) a
|
|
group by 1
|
|
---- PLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: count(*), min(functional.alltypesagg.tinyint_col), max(functional.alltypesagg.tinyint_col), sum(functional.alltypesagg.tinyint_col), count(functional.alltypesagg.tinyint_col)
|
|
| group by: functional.alltypesagg.tinyint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=__HIVE_DEFAULT_PARTITION__/000000_0 0:72759
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
---- DISTRIBUTEDPLAN
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
03:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(*)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col)), sum(count(tinyint_col))
|
|
| group by: tinyint_col
|
|
|
|
|
02:EXCHANGE [HASH(tinyint_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(*), min(functional.alltypesagg.tinyint_col), max(functional.alltypesagg.tinyint_col), sum(functional.alltypesagg.tinyint_col), count(functional.alltypesagg.tinyint_col)
|
|
| group by: functional.alltypesagg.tinyint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
# with grouping
|
|
select * from (
|
|
select tinyint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
|
|
avg(tinyint_col)
|
|
from functional.alltypesagg
|
|
group by 1
|
|
) a
|
|
---- PLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col)
|
|
| group by: tinyint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
03:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(*)), min(min(tinyint_col)), max(max(tinyint_col)), sum(sum(tinyint_col)), sum(count(tinyint_col))
|
|
| group by: tinyint_col
|
|
|
|
|
02:EXCHANGE [HASH(tinyint_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), count(tinyint_col)
|
|
| group by: tinyint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
select c1, c2, c3
|
|
from
|
|
(select c1, c2, c3
|
|
from
|
|
(select int_col c1, sum(float_col) c2, min(float_col) c3
|
|
from functional_hbase.alltypessmall
|
|
group by 1) x
|
|
order by 2,3 desc
|
|
limit 5
|
|
) y
|
|
---- PLAN
|
|
02:TOP-N [LIMIT=5]
|
|
| order by: c2 ASC, c3 DESC
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: sum(float_col), min(float_col)
|
|
| group by: int_col
|
|
|
|
|
00:SCAN HBASE [functional_hbase.alltypessmall]
|
|
---- DISTRIBUTEDPLAN
|
|
05:MERGING-EXCHANGE [UNPARTITIONED]
|
|
| order by: c2 ASC, c3 DESC
|
|
| limit: 5
|
|
|
|
|
02:TOP-N [LIMIT=5]
|
|
| order by: c2 ASC, c3 DESC
|
|
|
|
|
04:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(sum(float_col)), min(min(float_col))
|
|
| group by: int_col
|
|
|
|
|
03:EXCHANGE [HASH(int_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: sum(float_col), min(float_col)
|
|
| group by: int_col
|
|
|
|
|
00:SCAN HBASE [functional_hbase.alltypessmall]
|
|
====
|
|
select c1, x2
|
|
from (
|
|
select c1, min(c2) x2
|
|
from (
|
|
select c1, c2, c3
|
|
from (
|
|
select int_col c1, tinyint_col c2, min(float_col) c3
|
|
from functional_hbase.alltypessmall
|
|
group by 1, 2
|
|
order by 1,2
|
|
limit 1
|
|
) x
|
|
) x2
|
|
group by c1
|
|
) y
|
|
order by 2,1 desc
|
|
limit 0
|
|
---- PLAN
|
|
04:TOP-N [LIMIT=0]
|
|
| order by: x2 ASC, c1 DESC
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: min(tinyint_col)
|
|
| group by: int_col
|
|
|
|
|
02:TOP-N [LIMIT=1]
|
|
| order by: int_col ASC, tinyint_col ASC
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: min(float_col)
|
|
| group by: int_col, tinyint_col
|
|
|
|
|
00:SCAN HBASE [functional_hbase.alltypessmall]
|
|
---- DISTRIBUTEDPLAN
|
|
04:TOP-N [LIMIT=0]
|
|
| order by: x2 ASC, c1 DESC
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: min(tinyint_col)
|
|
| group by: int_col
|
|
|
|
|
07:MERGING-EXCHANGE [UNPARTITIONED]
|
|
| order by: int_col ASC, tinyint_col ASC
|
|
| limit: 1
|
|
|
|
|
02:TOP-N [LIMIT=1]
|
|
| order by: int_col ASC, tinyint_col ASC
|
|
|
|
|
06:AGGREGATE [MERGE FINALIZE]
|
|
| output: min(min(float_col))
|
|
| group by: int_col, tinyint_col
|
|
|
|
|
05:EXCHANGE [HASH(int_col,tinyint_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: min(float_col)
|
|
| group by: int_col, tinyint_col
|
|
|
|
|
00:SCAN HBASE [functional_hbase.alltypessmall]
|
|
====
|
|
# distinct *
|
|
select distinct *
|
|
from (select distinct * from functional.testtbl) x
|
|
---- PLAN
|
|
02:AGGREGATE [FINALIZE]
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
---- DISTRIBUTEDPLAN
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
06:AGGREGATE [MERGE FINALIZE]
|
|
| group by: x.id, x.name, x.zip
|
|
|
|
|
05:EXCHANGE [HASH(x.id,x.name,x.zip)]
|
|
|
|
|
02:AGGREGATE
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
04:AGGREGATE [MERGE FINALIZE]
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
03:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
====
|
|
# distinct w/ explicit select list
|
|
select distinct id, zip
|
|
from (select distinct * from functional.testtbl) x
|
|
---- PLAN
|
|
02:AGGREGATE [FINALIZE]
|
|
| group by: functional.testtbl.id, functional.testtbl.zip
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
---- DISTRIBUTEDPLAN
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
06:AGGREGATE [MERGE FINALIZE]
|
|
| group by: id, zip
|
|
|
|
|
05:EXCHANGE [HASH(id,zip)]
|
|
|
|
|
02:AGGREGATE
|
|
| group by: functional.testtbl.id, functional.testtbl.zip
|
|
|
|
|
04:AGGREGATE [MERGE FINALIZE]
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
03:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)]
|
|
|
|
|
01:AGGREGATE
|
|
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
|
|
|
|
00:SCAN HDFS [functional.testtbl]
|
|
partitions=1/1 size=0B
|
|
====
|
|
# aggregate with group-by, having
|
|
select *
|
|
from (
|
|
select int_col % 7 c1, count(*) c2, avg(int_col) c3
|
|
from (
|
|
select * from functional.alltypesagg
|
|
) a
|
|
group by 1
|
|
having avg(int_col) > 500 or count(*) = 10
|
|
) b
|
|
where c1 is not null
|
|
and c2 > 10
|
|
---- PLAN
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: count(*), sum(functional.alltypesagg.int_col), count(functional.alltypesagg.int_col)
|
|
| group by: functional.alltypesagg.int_col % 7
|
|
| having: sum(int_col) / count(int_col) > 500 OR count(*) = 10, count(*) > 10, int_col % 7 IS NOT NULL
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
04:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
03:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(*)), sum(sum(int_col)), sum(count(int_col))
|
|
| group by: int_col % 7
|
|
| having: sum(int_col) / count(int_col) > 500 OR count(*) = 10, count(*) > 10, int_col % 7 IS NOT NULL
|
|
|
|
|
02:EXCHANGE [HASH(int_col % 7)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: count(*), sum(functional.alltypesagg.int_col), count(functional.alltypesagg.int_col)
|
|
| group by: functional.alltypesagg.int_col % 7
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 size=814.73KB
|
|
====
|
|
# subquery with left outer join
|
|
select j.*, d.*
|
|
from (
|
|
select *
|
|
from functional.JoinTbl a
|
|
) j
|
|
left outer join
|
|
(
|
|
select *
|
|
from functional.DimTbl b
|
|
) d
|
|
on (j.test_name = d.name)
|
|
where j.test_id <= 1006
|
|
---- PLAN
|
|
02:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a.test_name = b.name
|
|
|
|
|
|--01:SCAN HDFS [functional.dimtbl b]
|
|
| partitions=1/1 size=171B compact
|
|
|
|
|
00:SCAN HDFS [functional.jointbl a]
|
|
partitions=1/1 size=433B
|
|
predicates: a.test_id <= 1006
|
|
---- DISTRIBUTEDPLAN
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: a.test_name = b.name
|
|
|
|
|
|--04:EXCHANGE [HASH(b.name)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.dimtbl b]
|
|
| partitions=1/1 size=171B
|
|
|
|
|
03:EXCHANGE [HASH(a.test_name)]
|
|
|
|
|
00:SCAN HDFS [functional.jointbl a]
|
|
partitions=1/1 size=433B
|
|
predicates: a.test_id <= 1006
|
|
====
|
|
# complex join, having joined subquery on the rhs, and predicate
|
|
# at multiple subquery level
|
|
select x.smallint_col, count(x.id)
|
|
from functional.alltypessmall c
|
|
left outer join
|
|
(
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month, b.float_col float_col, b.id id
|
|
from (
|
|
select *
|
|
from functional.alltypesagg a
|
|
) a
|
|
join
|
|
functional.alltypessmall b
|
|
on (a.smallint_col = b.id)
|
|
) x
|
|
on (x.tinyint_col = c.id)
|
|
group by x.smallint_col
|
|
---- PLAN
|
|
05:AGGREGATE [FINALIZE]
|
|
| output: count(b.id)
|
|
| group by: a.smallint_col
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: c.id = a.tinyint_col
|
|
|
|
|
|--03:HASH JOIN [INNER JOIN]
|
|
| | hash predicates: a.smallint_col = b.id
|
|
| |
|
|
| |--02:SCAN HDFS [functional.alltypessmall b]
|
|
| | partitions=4/4 size=6.32KB compact
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=11/11 size=814.73KB compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypessmall c]
|
|
partitions=4/4 size=6.32KB
|
|
---- DISTRIBUTEDPLAN
|
|
11:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
10:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(x.id))
|
|
| group by: x.smallint_col
|
|
|
|
|
09:EXCHANGE [HASH(x.smallint_col)]
|
|
|
|
|
05:AGGREGATE
|
|
| output: count(b.id)
|
|
| group by: a.smallint_col
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: c.id = a.tinyint_col
|
|
|
|
|
|--08:EXCHANGE [HASH(a.tinyint_col)]
|
|
| |
|
|
| 03:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| | hash predicates: a.smallint_col = b.id
|
|
| |
|
|
| |--06:EXCHANGE [BROADCAST]
|
|
| | |
|
|
| | 02:SCAN HDFS [functional.alltypessmall b]
|
|
| | partitions=4/4 size=6.32KB
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=11/11 size=814.73KB
|
|
|
|
|
07:EXCHANGE [HASH(c.id)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypessmall c]
|
|
partitions=4/4 size=6.32KB
|
|
====
|
|
# complex join, having joined subquery on the lhs, and predicate
|
|
# at multiple subquery level
|
|
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
|
|
from
|
|
(
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month, b.float_col float_col, b.id id
|
|
from (
|
|
select *
|
|
from functional.alltypesagg a
|
|
where month=1
|
|
) a
|
|
join
|
|
functional.alltypessmall b
|
|
on (a.smallint_col = b.id)
|
|
) x
|
|
join
|
|
functional.alltypessmall c
|
|
on (x.tinyint_col = c.id)
|
|
where x.day=1
|
|
and x.int_col > 899
|
|
and x.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and x.int_col + x.float_col + CAST(c.string_col AS FLOAT) < 1000
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: a.tinyint_col = c.id
|
|
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
|
|
|
|
|
|--03:SCAN HDFS [functional.alltypessmall c]
|
|
| partitions=4/4 size=6.32KB compact
|
|
| predicates: c.string_col < '7'
|
|
|
|
|
02:HASH JOIN [INNER JOIN]
|
|
| hash predicates: a.smallint_col = b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypessmall b]
|
|
| partitions=4/4 size=6.32KB compact
|
|
| predicates: b.float_col > 4.5
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=1/11 size=73.39KB
|
|
predicates: a.int_col > 899
|
|
---- DISTRIBUTEDPLAN
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: a.tinyint_col = c.id
|
|
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
|
|
|
|
|
|--06:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 03:SCAN HDFS [functional.alltypessmall c]
|
|
| partitions=4/4 size=6.32KB
|
|
| predicates: c.string_col < '7'
|
|
|
|
|
02:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: a.smallint_col = b.id
|
|
|
|
|
|--05:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall b]
|
|
| partitions=4/4 size=6.32KB
|
|
| predicates: b.float_col > 4.5
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=1/11 size=73.39KB
|
|
predicates: a.int_col > 899
|
|
====
|
|
# complex join, having joined aggregate subquery on the rhs, and predicate
|
|
# at multiple subquery level
|
|
select x.smallint_col, sum(x.cnt)
|
|
from functional.alltypessmall c
|
|
join (
|
|
select count(a.id) cnt, b.smallint_col smallint_col
|
|
from ( select * from functional.alltypesagg a ) a
|
|
join functional.alltypessmall b on (a.smallint_col = b.id)
|
|
group by b.smallint_col
|
|
) x on (x.smallint_col = c.id)
|
|
group by x.smallint_col
|
|
---- PLAN
|
|
06:AGGREGATE [FINALIZE]
|
|
| output: sum(count(a.id))
|
|
| group by: b.smallint_col
|
|
|
|
|
05:HASH JOIN [INNER JOIN]
|
|
| hash predicates: c.id = b.smallint_col
|
|
|
|
|
|--04:AGGREGATE [FINALIZE]
|
|
| | output: count(a.id)
|
|
| | group by: b.smallint_col
|
|
| |
|
|
| 03:HASH JOIN [INNER JOIN]
|
|
| | hash predicates: a.smallint_col = b.id
|
|
| |
|
|
| |--02:SCAN HDFS [functional.alltypessmall b]
|
|
| | partitions=4/4 size=6.32KB compact
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=11/11 size=814.73KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypessmall c]
|
|
partitions=4/4 size=6.32KB
|
|
---- DISTRIBUTEDPLAN
|
|
13:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
12:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(sum(x.cnt))
|
|
| group by: x.smallint_col
|
|
|
|
|
11:EXCHANGE [HASH(x.smallint_col)]
|
|
|
|
|
06:AGGREGATE
|
|
| output: sum(count(a.id))
|
|
| group by: b.smallint_col
|
|
|
|
|
05:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: c.id = b.smallint_col
|
|
|
|
|
|--10:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 09:AGGREGATE [MERGE FINALIZE]
|
|
| | output: sum(count(a.id))
|
|
| | group by: b.smallint_col
|
|
| |
|
|
| 08:EXCHANGE [HASH(b.smallint_col)]
|
|
| |
|
|
| 04:AGGREGATE
|
|
| | output: count(a.id)
|
|
| | group by: b.smallint_col
|
|
| |
|
|
| 03:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| | hash predicates: a.smallint_col = b.id
|
|
| |
|
|
| |--07:EXCHANGE [BROADCAST]
|
|
| | |
|
|
| | 02:SCAN HDFS [functional.alltypessmall b]
|
|
| | partitions=4/4 size=6.32KB
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=11/11 size=814.73KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypessmall c]
|
|
partitions=4/4 size=6.32KB
|
|
====
|
|
# Values statement in subqueries with predicate
|
|
select * from (select y from (values((1 as y),(11))) a where y < 10) b
|
|
---- PLAN
|
|
00:UNION
|
|
constant-operands=1
|
|
---- DISTRIBUTEDPLAN
|
|
00:UNION
|
|
constant-operands=1
|
|
====
|
|
# Mixed constant and non-constant select; the predicate is evaluated directly
|
|
# by the non-const select
|
|
select * from
|
|
(select y from
|
|
((select 1 as y)
|
|
union all
|
|
(select tinyint_col from functional.alltypes)) a
|
|
where y < 10) b
|
|
---- PLAN
|
|
00:UNION
|
|
| constant-operands=1
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
predicates: functional.alltypes.tinyint_col < 10
|
|
---- DISTRIBUTEDPLAN
|
|
02:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
00:UNION
|
|
| constant-operands=1
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
predicates: functional.alltypes.tinyint_col < 10
|
|
====
|
|
# Union of constant selects in subquery
|
|
# TODO: We could combine the merge nodes below.
|
|
select * from (select 1 as y union all select 2 union all select * from (select 11) a) b
|
|
where y < 10
|
|
---- PLAN
|
|
00:UNION
|
|
| constant-operands=2
|
|
|
|
|
01:UNION
|
|
predicates: 11 < 10
|
|
constant-operands=1
|
|
---- DISTRIBUTEDPLAN
|
|
00:UNION
|
|
| constant-operands=2
|
|
|
|
|
01:UNION
|
|
predicates: 11 < 10
|
|
constant-operands=1
|
|
====
|
|
# Union of values statements in subquery
|
|
# TODO: We could combine the merge nodes below.
|
|
select * from (values(1 as y) union all values(2) union all select * from (values(11)) a) b
|
|
where y < 10
|
|
---- PLAN
|
|
00:UNION
|
|
| constant-operands=2
|
|
|
|
|
01:UNION
|
|
---- DISTRIBUTEDPLAN
|
|
00:UNION
|
|
| constant-operands=2
|
|
|
|
|
01:UNION
|
|
====
|
|
# Inner join on inline views made up of unions of constant selects
|
|
select * from
|
|
(select 1 a, 2 b union all select 1 a, 2 b) x
|
|
inner join
|
|
(select 1 a, 3 b union all select 1 a, 2 b) y on x.a = y.a
|
|
inner join
|
|
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = y.b
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: b = b
|
|
|
|
|
|--02:UNION
|
|
| constant-operands=2
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: a = a
|
|
|
|
|
|--01:UNION
|
|
| constant-operands=2
|
|
|
|
|
00:UNION
|
|
constant-operands=2
|
|
---- DISTRIBUTEDPLAN
|
|
04:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: b = b
|
|
|
|
|
|--06:EXCHANGE [UNPARTITIONED]
|
|
| |
|
|
| 02:UNION
|
|
| constant-operands=2
|
|
|
|
|
03:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: a = a
|
|
|
|
|
|--05:EXCHANGE [UNPARTITIONED]
|
|
| |
|
|
| 01:UNION
|
|
| constant-operands=2
|
|
|
|
|
00:UNION
|
|
constant-operands=2
|
|
====
|
|
# Semi and inner join on a table and on inline views made up of constant selects
|
|
select * from functional.alltypessmall x
|
|
left semi join
|
|
(select 1 a, 3 b union all select 1 a, 3 b) y on y.a = x.id
|
|
inner join
|
|
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = y.b
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: b = b
|
|
|
|
|
|--02:UNION
|
|
| constant-operands=2
|
|
|
|
|
03:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: x.id = a
|
|
|
|
|
|--01:UNION
|
|
| constant-operands=2
|
|
|
|
|
00:SCAN HDFS [functional.alltypessmall x]
|
|
partitions=4/4 size=6.32KB
|
|
---- DISTRIBUTEDPLAN
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: b = b
|
|
|
|
|
|--06:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 02:UNION
|
|
| constant-operands=2
|
|
|
|
|
03:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
|
|
| hash predicates: x.id = a
|
|
|
|
|
|--05:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:UNION
|
|
| constant-operands=2
|
|
|
|
|
00:SCAN HDFS [functional.alltypessmall x]
|
|
partitions=4/4 size=6.32KB
|
|
====
|
|
# Tests that views correctly reanalyze cloned exprs. (IMPALA-984)
|
|
select b.* from functional.decimal_tbl a left outer join
|
|
(select d1, d1 + NULL IS NULL x from functional.decimal_tbl) b
|
|
on (a.d1 = b.d1)
|
|
---- PLAN
|
|
02:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a.d1 = d1
|
|
|
|
|
|--01:SCAN HDFS [functional.decimal_tbl]
|
|
| partitions=1/1 size=195B compact
|
|
|
|
|
00:SCAN HDFS [functional.decimal_tbl a]
|
|
partitions=1/1 size=195B
|
|
====
|