mirror of
https://github.com/apache/impala.git
synced 2026-01-09 15:00:11 -05:00
Fixing predicate assignment for outer joins: - On clause predicates for outer joins are now assigned to the join node - the exception are On clause predicates that can be directly evaluated by the outer-joined tables themselves; those are "pushed down" - Where clause predicates for outer-joined tables are assigned to the join node that materializes the outer join
1442 lines
41 KiB
Plaintext
1442 lines
41 KiB
Plaintext
# subquery with aggregation and order by/limit, as left-hand side of join;
|
|
# having clause in subquery is transfered to merge agg step in distrib plan
|
|
select *
|
|
from (
|
|
select int_col, count(*)
|
|
from alltypessmall
|
|
where month = 1
|
|
group by int_col
|
|
having count(*) > 1
|
|
order by count(*) desc limit 5
|
|
) t1
|
|
join alltypes t2 on (t1.int_col = t2.int_col)
|
|
where month = 1
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
<slot 2> = t2.int_col
|
|
TUPLE IDS: 1 3
|
|
TOP-N
|
|
ORDER BY: <slot 3> DESC
|
|
LIMIT: 5
|
|
TUPLE IDS: 1
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*)
|
|
GROUP BY: int_col
|
|
HAVING: <slot 3> > 1
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypessmall (0)
|
|
TUPLE IDS: 0
|
|
SCAN HDFS table=default.alltypes (3) compact
|
|
TUPLE IDS: 3
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
<slot 2> = t2.int_col
|
|
TUPLE IDS: 1 3
|
|
TOP-N
|
|
ORDER BY: <slot 3> DESC
|
|
LIMIT: 5
|
|
TUPLE IDS: 1
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 3>)
|
|
GROUP BY: <slot 2>
|
|
HAVING: <slot 3> > 1
|
|
TUPLE IDS: 1
|
|
EXCHANGE (5)
|
|
TUPLE IDS: 1
|
|
EXCHANGE (7)
|
|
TUPLE IDS: 3
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 7
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.alltypes (3) compact
|
|
TUPLE IDS: 3
|
|
|
|
Plan Fragment 2
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 5
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*)
|
|
GROUP BY: int_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypessmall (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
LOCATIONS:
|
|
NODE 3:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=1/090101.txt 0:20433
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2010/month=1/100101.txt 0:20853
|
|
LOCATIONS:
|
|
====
|
|
# simple full scan subquery
|
|
select * from (select y x from (select id y from hbasealltypessmall) a) b
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
SCAN HBASE table=hbasealltypessmall (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
EXCHANGE (1)
|
|
TUPLE IDS: 0
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 1
|
|
UNPARTITIONED
|
|
|
|
SCAN HBASE table=hbasealltypessmall (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HBASE KEYRANGE <unbounded>:<unbounded>
|
|
LOCATIONS:
|
|
====
|
|
# subquery doing join
|
|
select * from (select t2.*
|
|
from testtbl t1 join testtbl t2 using(id)
|
|
where t1.zip = 94611) x
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
t1.id = t2.id
|
|
TUPLE IDS: 0 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
PREDICATES: t1.zip = 94611
|
|
TUPLE IDS: 0
|
|
SCAN HDFS table=default.testtbl (1) compact
|
|
TUPLE IDS: 1
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
EXCHANGE (4)
|
|
TUPLE IDS: 0 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 4
|
|
UNPARTITIONED
|
|
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
t1.id = t2.id
|
|
TUPLE IDS: 0 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
PREDICATES: t1.zip = 94611
|
|
TUPLE IDS: 0
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 2
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.testtbl (1) compact
|
|
TUPLE IDS: 1
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
NODE 1:
|
|
====
|
|
# subquery doing join
|
|
# multiple join predicates;
|
|
# scan predicates get propagated correctly;
|
|
# non-eq join predicates are evaluated as extra conjuncts by the join node
|
|
select * from (select a.*
|
|
from alltypesagg a right outer join alltypessmall b using (id, int_col)
|
|
where a.day >= 6
|
|
and b.month > 2
|
|
and a.tinyint_col = 15
|
|
and b.string_col = '15'
|
|
and a.tinyint_col + b.tinyint_col < 15) x
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
HASH JOIN
|
|
JOIN OP: RIGHT OUTER JOIN
|
|
HASH PREDICATES:
|
|
a.id = b.id
|
|
a.int_col = b.int_col
|
|
OTHER PREDICATES: a.day >= 6, a.tinyint_col = 15, a.tinyint_col + b.tinyint_col < 15
|
|
TUPLE IDS: 0N 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
SCAN HDFS table=default.alltypessmall (1) compact
|
|
PREDICATES: b.string_col = '15'
|
|
TUPLE IDS: 1
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
EXCHANGE (4)
|
|
TUPLE IDS: 0N 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 4
|
|
UNPARTITIONED
|
|
|
|
HASH JOIN
|
|
JOIN OP: RIGHT OUTER JOIN
|
|
HASH PREDICATES:
|
|
a.id = b.id
|
|
a.int_col = b.int_col
|
|
OTHER PREDICATES: a.day >= 6, a.tinyint_col = 15, a.tinyint_col + b.tinyint_col < 15
|
|
TUPLE IDS: 0N 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 2
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.alltypessmall (1) compact
|
|
PREDICATES: b.string_col = '15'
|
|
TUPLE IDS: 1
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
====
|
|
# predicate pushdown
|
|
select * from (select * from hbasealltypessmall) a where id < 5
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
SCAN HBASE table=hbasealltypessmall (0)
|
|
PREDICATES: hbasealltypessmall.id < 5
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
EXCHANGE (1)
|
|
TUPLE IDS: 0
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 1
|
|
UNPARTITIONED
|
|
|
|
SCAN HBASE table=hbasealltypessmall (0)
|
|
PREDICATES: hbasealltypessmall.id < 5
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HBASE KEYRANGE <unbounded>:<unbounded>
|
|
LOCATIONS:
|
|
====
|
|
# subquery join
|
|
# multiple join predicates;
|
|
# scan predicates get propagated correctly;
|
|
# non-eq join predicates are evaluated as extra conjuncts by the join node
|
|
select *
|
|
from (select id, int_col, day, tinyint_col from alltypesagg) a right outer join
|
|
(select id, int_col, month, string_col, tinyint_col from alltypessmall) b using (id, int_col)
|
|
where a.day >= 6
|
|
and b.month > 2
|
|
and a.tinyint_col = 15
|
|
and b.string_col = '15'
|
|
and a.tinyint_col + b.tinyint_col < 15
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
HASH JOIN
|
|
JOIN OP: RIGHT OUTER JOIN
|
|
HASH PREDICATES:
|
|
id = id
|
|
int_col = int_col
|
|
OTHER PREDICATES: day >= 6, tinyint_col = 15, tinyint_col + tinyint_col < 15
|
|
TUPLE IDS: 0N 2
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
SCAN HDFS table=default.alltypessmall (1) compact
|
|
PREDICATES: string_col = '15'
|
|
TUPLE IDS: 2
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
EXCHANGE (4)
|
|
TUPLE IDS: 0N 2
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 4
|
|
UNPARTITIONED
|
|
|
|
HASH JOIN
|
|
JOIN OP: RIGHT OUTER JOIN
|
|
HASH PREDICATES:
|
|
id = id
|
|
int_col = int_col
|
|
OTHER PREDICATES: day >= 6, tinyint_col = 15, tinyint_col + tinyint_col < 15
|
|
TUPLE IDS: 0N 2
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 2
|
|
|
|
Plan Fragment 2
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.alltypessmall (1) compact
|
|
PREDICATES: string_col = '15'
|
|
TUPLE IDS: 2
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
====
|
|
# subquery join
|
|
# multiple join predicates;
|
|
# scan predicates get propagated correctly;
|
|
# non-eq join predicates are evaluated as extra conjuncts by the join node
|
|
select *
|
|
from (select id, int_col, day, tinyint_col
|
|
from (select id, int_col, day, tinyint_col from alltypesagg) a0
|
|
where a0.day >= 6) a
|
|
right outer join
|
|
(select id, int_col, month, string_col, tinyint_col from alltypessmall) b using (id, int_col)
|
|
where b.month > 2
|
|
and a.tinyint_col = 15
|
|
and b.string_col = '15'
|
|
and a.tinyint_col + b.tinyint_col < 15
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
HASH JOIN
|
|
JOIN OP: RIGHT OUTER JOIN
|
|
HASH PREDICATES:
|
|
id = id
|
|
int_col = int_col
|
|
OTHER PREDICATES: tinyint_col = 15, tinyint_col + tinyint_col < 15
|
|
TUPLE IDS: 0N 3
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
SCAN HDFS table=default.alltypessmall (1) compact
|
|
PREDICATES: string_col = '15'
|
|
TUPLE IDS: 3
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
EXCHANGE (4)
|
|
TUPLE IDS: 0N 3
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 4
|
|
UNPARTITIONED
|
|
|
|
HASH JOIN
|
|
JOIN OP: RIGHT OUTER JOIN
|
|
HASH PREDICATES:
|
|
id = id
|
|
int_col = int_col
|
|
OTHER PREDICATES: tinyint_col = 15, tinyint_col + tinyint_col < 15
|
|
TUPLE IDS: 0N 3
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 3
|
|
|
|
Plan Fragment 2
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.alltypessmall (1) compact
|
|
PREDICATES: string_col = '15'
|
|
TUPLE IDS: 3
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
====
|
|
# complex join, having joined subquery on the rhs, and predicate
|
|
# at multiple subquery level. This tests that both sides of a join
|
|
# that is itself on the build side of another join get compacted.
|
|
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
|
|
from alltypessmall c
|
|
join (
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month, b.float_col float_col, b.id id
|
|
from ( select * from alltypesagg a where month=1 ) a
|
|
join alltypessmall b on (a.smallint_col = b.id)
|
|
) x on (x.tinyint_col = c.id)
|
|
where x.day=1
|
|
and x.int_col > 899
|
|
and x.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
c.id = a.tinyint_col
|
|
OTHER PREDICATES: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000.0
|
|
TUPLE IDS: 0 1 3
|
|
SCAN HDFS table=default.alltypessmall (0)
|
|
PREDICATES: c.string_col < '7'
|
|
TUPLE IDS: 0
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
a.smallint_col = b.id
|
|
TUPLE IDS: 1 3
|
|
SCAN HDFS table=default.alltypesagg (1) compact
|
|
PREDICATES: a.int_col > 899
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypessmall (2) compact
|
|
PREDICATES: b.float_col > 4.5
|
|
TUPLE IDS: 3
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
EXCHANGE (7)
|
|
TUPLE IDS: 0 1 3
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 7
|
|
UNPARTITIONED
|
|
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
c.id = a.tinyint_col
|
|
OTHER PREDICATES: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000.0
|
|
TUPLE IDS: 0 1 3
|
|
SCAN HDFS table=default.alltypessmall (0)
|
|
PREDICATES: c.string_col < '7'
|
|
TUPLE IDS: 0
|
|
EXCHANGE (6)
|
|
TUPLE IDS: 1 3
|
|
|
|
Plan Fragment 2
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 6
|
|
UNPARTITIONED
|
|
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
a.smallint_col = b.id
|
|
TUPLE IDS: 1 3
|
|
SCAN HDFS table=default.alltypesagg (1) compact
|
|
PREDICATES: a.int_col > 899
|
|
TUPLE IDS: 1
|
|
EXCHANGE (5)
|
|
TUPLE IDS: 3
|
|
|
|
Plan Fragment 3
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 5
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.alltypessmall (2) compact
|
|
PREDICATES: b.float_col > 4.5
|
|
TUPLE IDS: 3
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
NODE 2:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
====
|
|
# with grouping
|
|
select tinyint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
|
|
avg(tinyint_col)
|
|
from (select * from alltypesagg) a
|
|
group by 1
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), MIN(alltypesagg.tinyint_col), MAX(alltypesagg.tinyint_col), SUM(alltypesagg.tinyint_col), COUNT(alltypesagg.tinyint_col)
|
|
GROUP BY: alltypesagg.tinyint_col
|
|
TUPLE IDS: 2
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 29>), MIN(<slot 30>), MAX(<slot 31>), SUM(<slot 32>), SUM(<slot 33>)
|
|
GROUP BY: <slot 28>
|
|
TUPLE IDS: 2
|
|
EXCHANGE (2)
|
|
TUPLE IDS: 2
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 2
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), MIN(alltypesagg.tinyint_col), MAX(alltypesagg.tinyint_col), SUM(alltypesagg.tinyint_col), COUNT(alltypesagg.tinyint_col)
|
|
GROUP BY: alltypesagg.tinyint_col
|
|
TUPLE IDS: 2
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
====
|
|
# with grouping
|
|
select * from (
|
|
select tinyint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
|
|
avg(tinyint_col)
|
|
from alltypesagg
|
|
group by 1
|
|
) a
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), MIN(tinyint_col), MAX(tinyint_col), SUM(tinyint_col), COUNT(tinyint_col)
|
|
GROUP BY: tinyint_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 2>), MIN(<slot 3>), MAX(<slot 4>), SUM(<slot 5>), SUM(<slot 6>)
|
|
GROUP BY: <slot 1>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (2)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 2
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), MIN(tinyint_col), MAX(tinyint_col), SUM(tinyint_col), COUNT(tinyint_col)
|
|
GROUP BY: tinyint_col
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
====
|
|
select c1, c2, c3
|
|
from (
|
|
select c1, c2, c3
|
|
from (
|
|
select int_col c1, sum(float_col) c2, min(float_col) c3
|
|
from hbasealltypessmall
|
|
group by 1
|
|
) x
|
|
order by 2,3 desc
|
|
limit 5
|
|
) y
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
TOP-N
|
|
ORDER BY: <slot 3> ASC, <slot 4> DESC
|
|
LIMIT: 5
|
|
TUPLE IDS: 1
|
|
AGGREGATE
|
|
OUTPUT: SUM(float_col), MIN(float_col)
|
|
GROUP BY: int_col
|
|
TUPLE IDS: 1
|
|
SCAN HBASE table=hbasealltypessmall (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
TOP-N
|
|
ORDER BY: <slot 3> ASC, <slot 4> DESC
|
|
LIMIT: 5
|
|
TUPLE IDS: 1
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 3>), MIN(<slot 4>)
|
|
GROUP BY: <slot 2>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: SUM(float_col), MIN(float_col)
|
|
GROUP BY: int_col
|
|
TUPLE IDS: 1
|
|
SCAN HBASE table=hbasealltypessmall (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HBASE KEYRANGE <unbounded>:<unbounded>
|
|
LOCATIONS:
|
|
====
|
|
select c1, x2
|
|
from (
|
|
select c1, min(c2) x2
|
|
from (
|
|
select c1, c2, c3
|
|
from (
|
|
select int_col c1, tinyint_col c2, min(float_col) c3
|
|
from hbasealltypessmall
|
|
group by 1, 2
|
|
order by 1,2
|
|
limit 1
|
|
) x
|
|
) x2
|
|
group by c1
|
|
) y
|
|
order by 2,1 desc
|
|
limit 0
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
TOP-N
|
|
ORDER BY: <slot 13> ASC, <slot 12> DESC
|
|
LIMIT: 0
|
|
TUPLE IDS: 4
|
|
AGGREGATE
|
|
OUTPUT: MIN(<slot 4>)
|
|
GROUP BY: <slot 3>
|
|
TUPLE IDS: 4
|
|
TOP-N
|
|
ORDER BY: <slot 3> ASC, <slot 4> ASC
|
|
LIMIT: 1
|
|
TUPLE IDS: 1
|
|
AGGREGATE
|
|
OUTPUT: MIN(float_col)
|
|
GROUP BY: int_col, tinyint_col
|
|
TUPLE IDS: 1
|
|
SCAN HBASE table=hbasealltypessmall (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
TOP-N
|
|
ORDER BY: <slot 13> ASC, <slot 12> DESC
|
|
LIMIT: 0
|
|
TUPLE IDS: 4
|
|
AGGREGATE
|
|
OUTPUT: MIN(<slot 4>)
|
|
GROUP BY: <slot 3>
|
|
TUPLE IDS: 4
|
|
TOP-N
|
|
ORDER BY: <slot 3> ASC, <slot 4> ASC
|
|
LIMIT: 1
|
|
TUPLE IDS: 1
|
|
AGGREGATE
|
|
OUTPUT: MIN(<slot 5>)
|
|
GROUP BY: <slot 3>, <slot 4>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (5)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 5
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: MIN(float_col)
|
|
GROUP BY: int_col, tinyint_col
|
|
TUPLE IDS: 1
|
|
SCAN HBASE table=hbasealltypessmall (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HBASE KEYRANGE <unbounded>:<unbounded>
|
|
LOCATIONS:
|
|
====
|
|
# distinct *
|
|
select distinct *
|
|
from (select distinct * from testtbl) x
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 3>, <slot 4>, <slot 5>
|
|
TUPLE IDS: 3
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 3>, <slot 4>, <slot 5>
|
|
TUPLE IDS: 3
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 3>, <slot 4>, <slot 5>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
====
|
|
# distinct w/ explicit select list
|
|
select distinct id, zip
|
|
from (select distinct * from testtbl) x
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 3>, <slot 5>
|
|
TUPLE IDS: 3
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 3>, <slot 5>
|
|
TUPLE IDS: 3
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: <slot 3>, <slot 4>, <slot 5>
|
|
TUPLE IDS: 1
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 1
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT:
|
|
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.testtbl (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
====
|
|
# aggregate with group-by, having
|
|
select *
|
|
from (
|
|
select int_col % 7 c1, count(*) c2, avg(int_col) c3
|
|
from (
|
|
select * from alltypesagg
|
|
) a
|
|
group by 1
|
|
having avg(int_col) > 500 or count(*) = 10
|
|
) b
|
|
where c1 is not null
|
|
and c2 > 10
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), SUM(alltypesagg.int_col), COUNT(alltypesagg.int_col)
|
|
GROUP BY: alltypesagg.int_col % 7
|
|
HAVING: <slot 30> / <slot 31> > 500.0 OR <slot 29> = 10, <slot 28> IS NOT NULL, <slot 29> > 10
|
|
TUPLE IDS: 2
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 29>), SUM(<slot 30>), SUM(<slot 31>)
|
|
GROUP BY: <slot 28>
|
|
HAVING: <slot 30> / <slot 31> > 500.0 OR <slot 29> = 10, <slot 28> IS NOT NULL, <slot 29> > 10
|
|
TUPLE IDS: 2
|
|
EXCHANGE (2)
|
|
TUPLE IDS: 2
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 2
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: COUNT(*), SUM(alltypesagg.int_col), COUNT(alltypesagg.int_col)
|
|
GROUP BY: alltypesagg.int_col % 7
|
|
TUPLE IDS: 2
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
TUPLE IDS: 0
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
====
|
|
# subquery with left outer join
|
|
select j.*, d.*
|
|
from (
|
|
select *
|
|
from JoinTbl a
|
|
) j
|
|
left outer join
|
|
(
|
|
select *
|
|
from DimTbl b
|
|
) d
|
|
on (j.test_name = d.name)
|
|
where j.test_id <= 1006
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
HASH JOIN
|
|
JOIN OP: LEFT OUTER JOIN
|
|
HASH PREDICATES:
|
|
a.test_name = b.name
|
|
TUPLE IDS: 0 2N
|
|
SCAN HDFS table=default.jointbl (0)
|
|
PREDICATES: a.test_id <= 1006
|
|
TUPLE IDS: 0
|
|
SCAN HDFS table=default.dimtbl (1) compact
|
|
TUPLE IDS: 2
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
EXCHANGE (4)
|
|
TUPLE IDS: 0 2N
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 4
|
|
UNPARTITIONED
|
|
|
|
HASH JOIN
|
|
JOIN OP: LEFT OUTER JOIN
|
|
HASH PREDICATES:
|
|
a.test_name = b.name
|
|
TUPLE IDS: 0 2N
|
|
SCAN HDFS table=default.jointbl (0)
|
|
PREDICATES: a.test_id <= 1006
|
|
TUPLE IDS: 0
|
|
EXCHANGE (3)
|
|
TUPLE IDS: 2
|
|
|
|
Plan Fragment 2
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 3
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.dimtbl (1) compact
|
|
TUPLE IDS: 2
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/jointbl/data.csv 0:433
|
|
LOCATIONS:
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/dimtbl/data.csv 0:171
|
|
LOCATIONS:
|
|
====
|
|
# complex join, having joined subquery on the rhs, and predicate
|
|
# at multiple subquery level
|
|
select x.smallint_col, count(x.id)
|
|
from alltypessmall c
|
|
left outer join
|
|
(
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month, b.float_col float_col, b.id id
|
|
from (
|
|
select *
|
|
from alltypesagg a
|
|
) a
|
|
join
|
|
alltypessmall b
|
|
on (a.smallint_col = b.id)
|
|
) x
|
|
on (x.tinyint_col = c.id)
|
|
group by x.smallint_col
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: COUNT(b.id)
|
|
GROUP BY: a.smallint_col
|
|
TUPLE IDS: 5
|
|
HASH JOIN
|
|
JOIN OP: LEFT OUTER JOIN
|
|
HASH PREDICATES:
|
|
c.id = a.tinyint_col
|
|
TUPLE IDS: 0 1N 3N
|
|
SCAN HDFS table=default.alltypessmall (0)
|
|
TUPLE IDS: 0
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
a.smallint_col = b.id
|
|
TUPLE IDS: 1 3
|
|
SCAN HDFS table=default.alltypesagg (1) compact
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypessmall (2) compact
|
|
TUPLE IDS: 3
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 39>)
|
|
GROUP BY: <slot 38>
|
|
TUPLE IDS: 5
|
|
EXCHANGE (8)
|
|
TUPLE IDS: 5
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 8
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: COUNT(b.id)
|
|
GROUP BY: a.smallint_col
|
|
TUPLE IDS: 5
|
|
HASH JOIN
|
|
JOIN OP: LEFT OUTER JOIN
|
|
HASH PREDICATES:
|
|
c.id = a.tinyint_col
|
|
TUPLE IDS: 0 1N 3N
|
|
SCAN HDFS table=default.alltypessmall (0)
|
|
TUPLE IDS: 0
|
|
EXCHANGE (7)
|
|
TUPLE IDS: 1 3
|
|
|
|
Plan Fragment 2
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 7
|
|
UNPARTITIONED
|
|
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
a.smallint_col = b.id
|
|
TUPLE IDS: 1 3
|
|
SCAN HDFS table=default.alltypesagg (1) compact
|
|
TUPLE IDS: 1
|
|
EXCHANGE (6)
|
|
TUPLE IDS: 3
|
|
|
|
Plan Fragment 3
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 6
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.alltypessmall (2) compact
|
|
TUPLE IDS: 3
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
NODE 2:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
====
|
|
# complex join, having joined subquery on the lhs, and predicate
|
|
# at multiple subquery level
|
|
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
|
|
from
|
|
(
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month, b.float_col float_col, b.id id
|
|
from (
|
|
select *
|
|
from alltypesagg a
|
|
where month=1
|
|
) a
|
|
join
|
|
alltypessmall b
|
|
on (a.smallint_col = b.id)
|
|
) x
|
|
join
|
|
alltypessmall c
|
|
on (x.tinyint_col = c.id)
|
|
where x.day=1
|
|
and x.int_col > 899
|
|
and x.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and x.int_col + x.float_col + CAST(c.string_col AS FLOAT) < 1000
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
a.tinyint_col = c.id
|
|
OTHER PREDICATES: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000.0
|
|
TUPLE IDS: 0 2 4
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
a.smallint_col = b.id
|
|
TUPLE IDS: 0 2
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
PREDICATES: a.int_col > 899
|
|
TUPLE IDS: 0
|
|
SCAN HDFS table=default.alltypessmall (1) compact
|
|
PREDICATES: b.float_col > 4.5
|
|
TUPLE IDS: 2
|
|
SCAN HDFS table=default.alltypessmall (3) compact
|
|
PREDICATES: c.string_col < '7'
|
|
TUPLE IDS: 4
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
EXCHANGE (7)
|
|
TUPLE IDS: 0 2 4
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 7
|
|
UNPARTITIONED
|
|
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
a.tinyint_col = c.id
|
|
OTHER PREDICATES: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000.0
|
|
TUPLE IDS: 0 2 4
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
a.smallint_col = b.id
|
|
TUPLE IDS: 0 2
|
|
SCAN HDFS table=default.alltypesagg (0)
|
|
PREDICATES: a.int_col > 899
|
|
TUPLE IDS: 0
|
|
EXCHANGE (5)
|
|
TUPLE IDS: 2
|
|
EXCHANGE (6)
|
|
TUPLE IDS: 4
|
|
|
|
Plan Fragment 2
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 6
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.alltypessmall (3) compact
|
|
PREDICATES: c.string_col < '7'
|
|
TUPLE IDS: 4
|
|
|
|
Plan Fragment 3
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 5
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.alltypessmall (1) compact
|
|
PREDICATES: b.float_col > 4.5
|
|
TUPLE IDS: 2
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
NODE 3:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
====
|
|
# complex join, having joined aggregate subquery on the rhs, and predicate
|
|
# at multiple subquery level
|
|
select x.smallint_col, sum(x.cnt)
|
|
from alltypessmall c
|
|
join (
|
|
select count(a.id) cnt, b.smallint_col smallint_col
|
|
from ( select * from alltypesagg a ) a
|
|
join alltypessmall b on (a.smallint_col = b.id)
|
|
group by b.smallint_col
|
|
) x on (x.smallint_col = c.id)
|
|
group by x.smallint_col
|
|
---- PLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 31>)
|
|
GROUP BY: <slot 30>
|
|
TUPLE IDS: 6
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
c.id = <slot 30>
|
|
TUPLE IDS: 0 4
|
|
SCAN HDFS table=default.alltypessmall (0)
|
|
TUPLE IDS: 0
|
|
AGGREGATE
|
|
OUTPUT: COUNT(a.id)
|
|
GROUP BY: b.smallint_col
|
|
TUPLE IDS: 4
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
a.smallint_col = b.id
|
|
TUPLE IDS: 1 3
|
|
SCAN HDFS table=default.alltypesagg (1)
|
|
TUPLE IDS: 1
|
|
SCAN HDFS table=default.alltypessmall (2) compact
|
|
TUPLE IDS: 3
|
|
---- DISTRIBUTEDPLAN
|
|
Plan Fragment 0
|
|
UNPARTITIONED
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 36>)
|
|
GROUP BY: <slot 35>
|
|
TUPLE IDS: 6
|
|
EXCHANGE (11)
|
|
TUPLE IDS: 6
|
|
|
|
Plan Fragment 1
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 11
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 31>)
|
|
GROUP BY: <slot 30>
|
|
TUPLE IDS: 6
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
c.id = <slot 30>
|
|
TUPLE IDS: 0 4
|
|
SCAN HDFS table=default.alltypessmall (0)
|
|
TUPLE IDS: 0
|
|
EXCHANGE (10)
|
|
TUPLE IDS: 4
|
|
|
|
Plan Fragment 2
|
|
UNPARTITIONED
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 10
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: SUM(<slot 31>)
|
|
GROUP BY: <slot 30>
|
|
TUPLE IDS: 4
|
|
EXCHANGE (8)
|
|
TUPLE IDS: 4
|
|
|
|
Plan Fragment 3
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 8
|
|
UNPARTITIONED
|
|
|
|
AGGREGATE
|
|
OUTPUT: COUNT(a.id)
|
|
GROUP BY: b.smallint_col
|
|
TUPLE IDS: 4
|
|
HASH JOIN
|
|
JOIN OP: INNER JOIN
|
|
HASH PREDICATES:
|
|
a.smallint_col = b.id
|
|
TUPLE IDS: 1 3
|
|
SCAN HDFS table=default.alltypesagg (1)
|
|
TUPLE IDS: 1
|
|
EXCHANGE (7)
|
|
TUPLE IDS: 3
|
|
|
|
Plan Fragment 4
|
|
RANDOM
|
|
STREAM DATA SINK
|
|
EXCHANGE ID: 7
|
|
UNPARTITIONED
|
|
|
|
SCAN HDFS table=default.alltypessmall (2) compact
|
|
TUPLE IDS: 3
|
|
---- SCANRANGELOCATIONS
|
|
NODE 0:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
NODE 1:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
|
|
LOCATIONS:
|
|
NODE 2:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
|
|
LOCATIONS:
|
|
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
|
|
LOCATIONS:
|
|
====
|