Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/subquery.test
Marcel Kornacker f6af9316d9 Fix for IMP-137: incorrect predicate placement for outer joins
Fixing predicate assignment for outer joins:
- On clause predicates for outer joins are now assigned to the join node
- the exception are On clause predicates that can be directly evaluated
  by the outer-joined tables themselves; those are "pushed down"
- Where clause predicates for outer-joined tables are assigned to the join node
  that materializes the outer join
2014-01-08 10:46:50 -08:00

1442 lines
41 KiB
Plaintext

# subquery with aggregation and order by/limit, as left-hand side of join;
# having clause in subquery is transfered to merge agg step in distrib plan
select *
from (
select int_col, count(*)
from alltypessmall
where month = 1
group by int_col
having count(*) > 1
order by count(*) desc limit 5
) t1
join alltypes t2 on (t1.int_col = t2.int_col)
where month = 1
---- PLAN
Plan Fragment 0
UNPARTITIONED
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
<slot 2> = t2.int_col
TUPLE IDS: 1 3
TOP-N
ORDER BY: <slot 3> DESC
LIMIT: 5
TUPLE IDS: 1
AGGREGATE
OUTPUT: COUNT(*)
GROUP BY: int_col
HAVING: <slot 3> > 1
TUPLE IDS: 1
SCAN HDFS table=default.alltypessmall (0)
TUPLE IDS: 0
SCAN HDFS table=default.alltypes (3) compact
TUPLE IDS: 3
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
<slot 2> = t2.int_col
TUPLE IDS: 1 3
TOP-N
ORDER BY: <slot 3> DESC
LIMIT: 5
TUPLE IDS: 1
AGGREGATE
OUTPUT: SUM(<slot 3>)
GROUP BY: <slot 2>
HAVING: <slot 3> > 1
TUPLE IDS: 1
EXCHANGE (5)
TUPLE IDS: 1
EXCHANGE (7)
TUPLE IDS: 3
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 7
UNPARTITIONED
SCAN HDFS table=default.alltypes (3) compact
TUPLE IDS: 3
Plan Fragment 2
RANDOM
STREAM DATA SINK
EXCHANGE ID: 5
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*)
GROUP BY: int_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypessmall (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
LOCATIONS:
NODE 3:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2009/month=1/090101.txt 0:20433
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypes/year=2010/month=1/100101.txt 0:20853
LOCATIONS:
====
# simple full scan subquery
select * from (select y x from (select id y from hbasealltypessmall) a) b
---- PLAN
Plan Fragment 0
UNPARTITIONED
SCAN HBASE table=hbasealltypessmall (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
EXCHANGE (1)
TUPLE IDS: 0
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 1
UNPARTITIONED
SCAN HBASE table=hbasealltypessmall (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HBASE KEYRANGE <unbounded>:<unbounded>
LOCATIONS:
====
# subquery doing join
select * from (select t2.*
from testtbl t1 join testtbl t2 using(id)
where t1.zip = 94611) x
---- PLAN
Plan Fragment 0
UNPARTITIONED
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
t1.id = t2.id
TUPLE IDS: 0 1
SCAN HDFS table=default.testtbl (0)
PREDICATES: t1.zip = 94611
TUPLE IDS: 0
SCAN HDFS table=default.testtbl (1) compact
TUPLE IDS: 1
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
EXCHANGE (4)
TUPLE IDS: 0 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 4
UNPARTITIONED
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
t1.id = t2.id
TUPLE IDS: 0 1
SCAN HDFS table=default.testtbl (0)
PREDICATES: t1.zip = 94611
TUPLE IDS: 0
EXCHANGE (3)
TUPLE IDS: 1
Plan Fragment 2
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
SCAN HDFS table=default.testtbl (1) compact
TUPLE IDS: 1
---- SCANRANGELOCATIONS
NODE 0:
NODE 1:
====
# subquery doing join
# multiple join predicates;
# scan predicates get propagated correctly;
# non-eq join predicates are evaluated as extra conjuncts by the join node
select * from (select a.*
from alltypesagg a right outer join alltypessmall b using (id, int_col)
where a.day >= 6
and b.month > 2
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15) x
---- PLAN
Plan Fragment 0
UNPARTITIONED
HASH JOIN
JOIN OP: RIGHT OUTER JOIN
HASH PREDICATES:
a.id = b.id
a.int_col = b.int_col
OTHER PREDICATES: a.day >= 6, a.tinyint_col = 15, a.tinyint_col + b.tinyint_col < 15
TUPLE IDS: 0N 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
SCAN HDFS table=default.alltypessmall (1) compact
PREDICATES: b.string_col = '15'
TUPLE IDS: 1
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
EXCHANGE (4)
TUPLE IDS: 0N 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 4
UNPARTITIONED
HASH JOIN
JOIN OP: RIGHT OUTER JOIN
HASH PREDICATES:
a.id = b.id
a.int_col = b.int_col
OTHER PREDICATES: a.day >= 6, a.tinyint_col = 15, a.tinyint_col + b.tinyint_col < 15
TUPLE IDS: 0N 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
EXCHANGE (3)
TUPLE IDS: 1
Plan Fragment 2
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
SCAN HDFS table=default.alltypessmall (1) compact
PREDICATES: b.string_col = '15'
TUPLE IDS: 1
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
====
# predicate pushdown
select * from (select * from hbasealltypessmall) a where id < 5
---- PLAN
Plan Fragment 0
UNPARTITIONED
SCAN HBASE table=hbasealltypessmall (0)
PREDICATES: hbasealltypessmall.id < 5
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
EXCHANGE (1)
TUPLE IDS: 0
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 1
UNPARTITIONED
SCAN HBASE table=hbasealltypessmall (0)
PREDICATES: hbasealltypessmall.id < 5
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HBASE KEYRANGE <unbounded>:<unbounded>
LOCATIONS:
====
# subquery join
# multiple join predicates;
# scan predicates get propagated correctly;
# non-eq join predicates are evaluated as extra conjuncts by the join node
select *
from (select id, int_col, day, tinyint_col from alltypesagg) a right outer join
(select id, int_col, month, string_col, tinyint_col from alltypessmall) b using (id, int_col)
where a.day >= 6
and b.month > 2
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15
---- PLAN
Plan Fragment 0
UNPARTITIONED
HASH JOIN
JOIN OP: RIGHT OUTER JOIN
HASH PREDICATES:
id = id
int_col = int_col
OTHER PREDICATES: day >= 6, tinyint_col = 15, tinyint_col + tinyint_col < 15
TUPLE IDS: 0N 2
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
SCAN HDFS table=default.alltypessmall (1) compact
PREDICATES: string_col = '15'
TUPLE IDS: 2
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
EXCHANGE (4)
TUPLE IDS: 0N 2
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 4
UNPARTITIONED
HASH JOIN
JOIN OP: RIGHT OUTER JOIN
HASH PREDICATES:
id = id
int_col = int_col
OTHER PREDICATES: day >= 6, tinyint_col = 15, tinyint_col + tinyint_col < 15
TUPLE IDS: 0N 2
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
EXCHANGE (3)
TUPLE IDS: 2
Plan Fragment 2
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
SCAN HDFS table=default.alltypessmall (1) compact
PREDICATES: string_col = '15'
TUPLE IDS: 2
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
====
# subquery join
# multiple join predicates;
# scan predicates get propagated correctly;
# non-eq join predicates are evaluated as extra conjuncts by the join node
select *
from (select id, int_col, day, tinyint_col
from (select id, int_col, day, tinyint_col from alltypesagg) a0
where a0.day >= 6) a
right outer join
(select id, int_col, month, string_col, tinyint_col from alltypessmall) b using (id, int_col)
where b.month > 2
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15
---- PLAN
Plan Fragment 0
UNPARTITIONED
HASH JOIN
JOIN OP: RIGHT OUTER JOIN
HASH PREDICATES:
id = id
int_col = int_col
OTHER PREDICATES: tinyint_col = 15, tinyint_col + tinyint_col < 15
TUPLE IDS: 0N 3
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
SCAN HDFS table=default.alltypessmall (1) compact
PREDICATES: string_col = '15'
TUPLE IDS: 3
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
EXCHANGE (4)
TUPLE IDS: 0N 3
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 4
UNPARTITIONED
HASH JOIN
JOIN OP: RIGHT OUTER JOIN
HASH PREDICATES:
id = id
int_col = int_col
OTHER PREDICATES: tinyint_col = 15, tinyint_col + tinyint_col < 15
TUPLE IDS: 0N 3
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
EXCHANGE (3)
TUPLE IDS: 3
Plan Fragment 2
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
SCAN HDFS table=default.alltypessmall (1) compact
PREDICATES: string_col = '15'
TUPLE IDS: 3
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
====
# complex join, having joined subquery on the rhs, and predicate
# at multiple subquery level. This tests that both sides of a join
# that is itself on the build side of another join get compacted.
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
from alltypessmall c
join (
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month, b.float_col float_col, b.id id
from ( select * from alltypesagg a where month=1 ) a
join alltypessmall b on (a.smallint_col = b.id)
) x on (x.tinyint_col = c.id)
where x.day=1
and x.int_col > 899
and x.float_col > 4.5
and c.string_col < '7'
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
---- PLAN
Plan Fragment 0
UNPARTITIONED
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
c.id = a.tinyint_col
OTHER PREDICATES: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000.0
TUPLE IDS: 0 1 3
SCAN HDFS table=default.alltypessmall (0)
PREDICATES: c.string_col < '7'
TUPLE IDS: 0
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
a.smallint_col = b.id
TUPLE IDS: 1 3
SCAN HDFS table=default.alltypesagg (1) compact
PREDICATES: a.int_col > 899
TUPLE IDS: 1
SCAN HDFS table=default.alltypessmall (2) compact
PREDICATES: b.float_col > 4.5
TUPLE IDS: 3
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
EXCHANGE (7)
TUPLE IDS: 0 1 3
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 7
UNPARTITIONED
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
c.id = a.tinyint_col
OTHER PREDICATES: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000.0
TUPLE IDS: 0 1 3
SCAN HDFS table=default.alltypessmall (0)
PREDICATES: c.string_col < '7'
TUPLE IDS: 0
EXCHANGE (6)
TUPLE IDS: 1 3
Plan Fragment 2
RANDOM
STREAM DATA SINK
EXCHANGE ID: 6
UNPARTITIONED
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
a.smallint_col = b.id
TUPLE IDS: 1 3
SCAN HDFS table=default.alltypesagg (1) compact
PREDICATES: a.int_col > 899
TUPLE IDS: 1
EXCHANGE (5)
TUPLE IDS: 3
Plan Fragment 3
RANDOM
STREAM DATA SINK
EXCHANGE ID: 5
UNPARTITIONED
SCAN HDFS table=default.alltypessmall (2) compact
PREDICATES: b.float_col > 4.5
TUPLE IDS: 3
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
NODE 2:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
====
# with grouping
select tinyint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from (select * from alltypesagg) a
group by 1
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), MIN(alltypesagg.tinyint_col), MAX(alltypesagg.tinyint_col), SUM(alltypesagg.tinyint_col), COUNT(alltypesagg.tinyint_col)
GROUP BY: alltypesagg.tinyint_col
TUPLE IDS: 2
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: SUM(<slot 29>), MIN(<slot 30>), MAX(<slot 31>), SUM(<slot 32>), SUM(<slot 33>)
GROUP BY: <slot 28>
TUPLE IDS: 2
EXCHANGE (2)
TUPLE IDS: 2
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 2
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), MIN(alltypesagg.tinyint_col), MAX(alltypesagg.tinyint_col), SUM(alltypesagg.tinyint_col), COUNT(alltypesagg.tinyint_col)
GROUP BY: alltypesagg.tinyint_col
TUPLE IDS: 2
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
====
# with grouping
select * from (
select tinyint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from alltypesagg
group by 1
) a
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), MIN(tinyint_col), MAX(tinyint_col), SUM(tinyint_col), COUNT(tinyint_col)
GROUP BY: tinyint_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: SUM(<slot 2>), MIN(<slot 3>), MAX(<slot 4>), SUM(<slot 5>), SUM(<slot 6>)
GROUP BY: <slot 1>
TUPLE IDS: 1
EXCHANGE (2)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 2
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), MIN(tinyint_col), MAX(tinyint_col), SUM(tinyint_col), COUNT(tinyint_col)
GROUP BY: tinyint_col
TUPLE IDS: 1
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
====
select c1, c2, c3
from (
select c1, c2, c3
from (
select int_col c1, sum(float_col) c2, min(float_col) c3
from hbasealltypessmall
group by 1
) x
order by 2,3 desc
limit 5
) y
---- PLAN
Plan Fragment 0
UNPARTITIONED
TOP-N
ORDER BY: <slot 3> ASC, <slot 4> DESC
LIMIT: 5
TUPLE IDS: 1
AGGREGATE
OUTPUT: SUM(float_col), MIN(float_col)
GROUP BY: int_col
TUPLE IDS: 1
SCAN HBASE table=hbasealltypessmall (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
TOP-N
ORDER BY: <slot 3> ASC, <slot 4> DESC
LIMIT: 5
TUPLE IDS: 1
AGGREGATE
OUTPUT: SUM(<slot 3>), MIN(<slot 4>)
GROUP BY: <slot 2>
TUPLE IDS: 1
EXCHANGE (3)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
AGGREGATE
OUTPUT: SUM(float_col), MIN(float_col)
GROUP BY: int_col
TUPLE IDS: 1
SCAN HBASE table=hbasealltypessmall (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HBASE KEYRANGE <unbounded>:<unbounded>
LOCATIONS:
====
select c1, x2
from (
select c1, min(c2) x2
from (
select c1, c2, c3
from (
select int_col c1, tinyint_col c2, min(float_col) c3
from hbasealltypessmall
group by 1, 2
order by 1,2
limit 1
) x
) x2
group by c1
) y
order by 2,1 desc
limit 0
---- PLAN
Plan Fragment 0
UNPARTITIONED
TOP-N
ORDER BY: <slot 13> ASC, <slot 12> DESC
LIMIT: 0
TUPLE IDS: 4
AGGREGATE
OUTPUT: MIN(<slot 4>)
GROUP BY: <slot 3>
TUPLE IDS: 4
TOP-N
ORDER BY: <slot 3> ASC, <slot 4> ASC
LIMIT: 1
TUPLE IDS: 1
AGGREGATE
OUTPUT: MIN(float_col)
GROUP BY: int_col, tinyint_col
TUPLE IDS: 1
SCAN HBASE table=hbasealltypessmall (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
TOP-N
ORDER BY: <slot 13> ASC, <slot 12> DESC
LIMIT: 0
TUPLE IDS: 4
AGGREGATE
OUTPUT: MIN(<slot 4>)
GROUP BY: <slot 3>
TUPLE IDS: 4
TOP-N
ORDER BY: <slot 3> ASC, <slot 4> ASC
LIMIT: 1
TUPLE IDS: 1
AGGREGATE
OUTPUT: MIN(<slot 5>)
GROUP BY: <slot 3>, <slot 4>
TUPLE IDS: 1
EXCHANGE (5)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 5
UNPARTITIONED
AGGREGATE
OUTPUT: MIN(float_col)
GROUP BY: int_col, tinyint_col
TUPLE IDS: 1
SCAN HBASE table=hbasealltypessmall (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HBASE KEYRANGE <unbounded>:<unbounded>
LOCATIONS:
====
# distinct *
select distinct *
from (select distinct * from testtbl) x
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: <slot 3>, <slot 4>, <slot 5>
TUPLE IDS: 3
AGGREGATE
OUTPUT:
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
TUPLE IDS: 1
SCAN HDFS table=default.testtbl (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: <slot 3>, <slot 4>, <slot 5>
TUPLE IDS: 3
AGGREGATE
OUTPUT:
GROUP BY: <slot 3>, <slot 4>, <slot 5>
TUPLE IDS: 1
EXCHANGE (3)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
TUPLE IDS: 1
SCAN HDFS table=default.testtbl (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
====
# distinct w/ explicit select list
select distinct id, zip
from (select distinct * from testtbl) x
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: <slot 3>, <slot 5>
TUPLE IDS: 3
AGGREGATE
OUTPUT:
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
TUPLE IDS: 1
SCAN HDFS table=default.testtbl (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: <slot 3>, <slot 5>
TUPLE IDS: 3
AGGREGATE
OUTPUT:
GROUP BY: <slot 3>, <slot 4>, <slot 5>
TUPLE IDS: 1
EXCHANGE (3)
TUPLE IDS: 1
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
AGGREGATE
OUTPUT:
GROUP BY: testtbl.id, testtbl.name, testtbl.zip
TUPLE IDS: 1
SCAN HDFS table=default.testtbl (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
====
# aggregate with group-by, having
select *
from (
select int_col % 7 c1, count(*) c2, avg(int_col) c3
from (
select * from alltypesagg
) a
group by 1
having avg(int_col) > 500 or count(*) = 10
) b
where c1 is not null
and c2 > 10
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), SUM(alltypesagg.int_col), COUNT(alltypesagg.int_col)
GROUP BY: alltypesagg.int_col % 7
HAVING: <slot 30> / <slot 31> > 500.0 OR <slot 29> = 10, <slot 28> IS NOT NULL, <slot 29> > 10
TUPLE IDS: 2
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: SUM(<slot 29>), SUM(<slot 30>), SUM(<slot 31>)
GROUP BY: <slot 28>
HAVING: <slot 30> / <slot 31> > 500.0 OR <slot 29> = 10, <slot 28> IS NOT NULL, <slot 29> > 10
TUPLE IDS: 2
EXCHANGE (2)
TUPLE IDS: 2
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 2
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(*), SUM(alltypesagg.int_col), COUNT(alltypesagg.int_col)
GROUP BY: alltypesagg.int_col % 7
TUPLE IDS: 2
SCAN HDFS table=default.alltypesagg (0)
TUPLE IDS: 0
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
====
# subquery with left outer join
select j.*, d.*
from (
select *
from JoinTbl a
) j
left outer join
(
select *
from DimTbl b
) d
on (j.test_name = d.name)
where j.test_id <= 1006
---- PLAN
Plan Fragment 0
UNPARTITIONED
HASH JOIN
JOIN OP: LEFT OUTER JOIN
HASH PREDICATES:
a.test_name = b.name
TUPLE IDS: 0 2N
SCAN HDFS table=default.jointbl (0)
PREDICATES: a.test_id <= 1006
TUPLE IDS: 0
SCAN HDFS table=default.dimtbl (1) compact
TUPLE IDS: 2
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
EXCHANGE (4)
TUPLE IDS: 0 2N
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 4
UNPARTITIONED
HASH JOIN
JOIN OP: LEFT OUTER JOIN
HASH PREDICATES:
a.test_name = b.name
TUPLE IDS: 0 2N
SCAN HDFS table=default.jointbl (0)
PREDICATES: a.test_id <= 1006
TUPLE IDS: 0
EXCHANGE (3)
TUPLE IDS: 2
Plan Fragment 2
RANDOM
STREAM DATA SINK
EXCHANGE ID: 3
UNPARTITIONED
SCAN HDFS table=default.dimtbl (1) compact
TUPLE IDS: 2
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/jointbl/data.csv 0:433
LOCATIONS:
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/dimtbl/data.csv 0:171
LOCATIONS:
====
# complex join, having joined subquery on the rhs, and predicate
# at multiple subquery level
select x.smallint_col, count(x.id)
from alltypessmall c
left outer join
(
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month, b.float_col float_col, b.id id
from (
select *
from alltypesagg a
) a
join
alltypessmall b
on (a.smallint_col = b.id)
) x
on (x.tinyint_col = c.id)
group by x.smallint_col
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(b.id)
GROUP BY: a.smallint_col
TUPLE IDS: 5
HASH JOIN
JOIN OP: LEFT OUTER JOIN
HASH PREDICATES:
c.id = a.tinyint_col
TUPLE IDS: 0 1N 3N
SCAN HDFS table=default.alltypessmall (0)
TUPLE IDS: 0
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
a.smallint_col = b.id
TUPLE IDS: 1 3
SCAN HDFS table=default.alltypesagg (1) compact
TUPLE IDS: 1
SCAN HDFS table=default.alltypessmall (2) compact
TUPLE IDS: 3
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: SUM(<slot 39>)
GROUP BY: <slot 38>
TUPLE IDS: 5
EXCHANGE (8)
TUPLE IDS: 5
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 8
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(b.id)
GROUP BY: a.smallint_col
TUPLE IDS: 5
HASH JOIN
JOIN OP: LEFT OUTER JOIN
HASH PREDICATES:
c.id = a.tinyint_col
TUPLE IDS: 0 1N 3N
SCAN HDFS table=default.alltypessmall (0)
TUPLE IDS: 0
EXCHANGE (7)
TUPLE IDS: 1 3
Plan Fragment 2
RANDOM
STREAM DATA SINK
EXCHANGE ID: 7
UNPARTITIONED
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
a.smallint_col = b.id
TUPLE IDS: 1 3
SCAN HDFS table=default.alltypesagg (1) compact
TUPLE IDS: 1
EXCHANGE (6)
TUPLE IDS: 3
Plan Fragment 3
RANDOM
STREAM DATA SINK
EXCHANGE ID: 6
UNPARTITIONED
SCAN HDFS table=default.alltypessmall (2) compact
TUPLE IDS: 3
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
NODE 2:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
====
# complex join, having joined subquery on the lhs, and predicate
# at multiple subquery level
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
from
(
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month, b.float_col float_col, b.id id
from (
select *
from alltypesagg a
where month=1
) a
join
alltypessmall b
on (a.smallint_col = b.id)
) x
join
alltypessmall c
on (x.tinyint_col = c.id)
where x.day=1
and x.int_col > 899
and x.float_col > 4.5
and c.string_col < '7'
and x.int_col + x.float_col + CAST(c.string_col AS FLOAT) < 1000
---- PLAN
Plan Fragment 0
UNPARTITIONED
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
a.tinyint_col = c.id
OTHER PREDICATES: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000.0
TUPLE IDS: 0 2 4
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
a.smallint_col = b.id
TUPLE IDS: 0 2
SCAN HDFS table=default.alltypesagg (0)
PREDICATES: a.int_col > 899
TUPLE IDS: 0
SCAN HDFS table=default.alltypessmall (1) compact
PREDICATES: b.float_col > 4.5
TUPLE IDS: 2
SCAN HDFS table=default.alltypessmall (3) compact
PREDICATES: c.string_col < '7'
TUPLE IDS: 4
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
EXCHANGE (7)
TUPLE IDS: 0 2 4
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 7
UNPARTITIONED
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
a.tinyint_col = c.id
OTHER PREDICATES: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000.0
TUPLE IDS: 0 2 4
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
a.smallint_col = b.id
TUPLE IDS: 0 2
SCAN HDFS table=default.alltypesagg (0)
PREDICATES: a.int_col > 899
TUPLE IDS: 0
EXCHANGE (5)
TUPLE IDS: 2
EXCHANGE (6)
TUPLE IDS: 4
Plan Fragment 2
RANDOM
STREAM DATA SINK
EXCHANGE ID: 6
UNPARTITIONED
SCAN HDFS table=default.alltypessmall (3) compact
PREDICATES: c.string_col < '7'
TUPLE IDS: 4
Plan Fragment 3
RANDOM
STREAM DATA SINK
EXCHANGE ID: 5
UNPARTITIONED
SCAN HDFS table=default.alltypessmall (1) compact
PREDICATES: b.float_col > 4.5
TUPLE IDS: 2
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
NODE 3:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
====
# complex join, having joined aggregate subquery on the rhs, and predicate
# at multiple subquery level
select x.smallint_col, sum(x.cnt)
from alltypessmall c
join (
select count(a.id) cnt, b.smallint_col smallint_col
from ( select * from alltypesagg a ) a
join alltypessmall b on (a.smallint_col = b.id)
group by b.smallint_col
) x on (x.smallint_col = c.id)
group by x.smallint_col
---- PLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: SUM(<slot 31>)
GROUP BY: <slot 30>
TUPLE IDS: 6
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
c.id = <slot 30>
TUPLE IDS: 0 4
SCAN HDFS table=default.alltypessmall (0)
TUPLE IDS: 0
AGGREGATE
OUTPUT: COUNT(a.id)
GROUP BY: b.smallint_col
TUPLE IDS: 4
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
a.smallint_col = b.id
TUPLE IDS: 1 3
SCAN HDFS table=default.alltypesagg (1)
TUPLE IDS: 1
SCAN HDFS table=default.alltypessmall (2) compact
TUPLE IDS: 3
---- DISTRIBUTEDPLAN
Plan Fragment 0
UNPARTITIONED
AGGREGATE
OUTPUT: SUM(<slot 36>)
GROUP BY: <slot 35>
TUPLE IDS: 6
EXCHANGE (11)
TUPLE IDS: 6
Plan Fragment 1
RANDOM
STREAM DATA SINK
EXCHANGE ID: 11
UNPARTITIONED
AGGREGATE
OUTPUT: SUM(<slot 31>)
GROUP BY: <slot 30>
TUPLE IDS: 6
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
c.id = <slot 30>
TUPLE IDS: 0 4
SCAN HDFS table=default.alltypessmall (0)
TUPLE IDS: 0
EXCHANGE (10)
TUPLE IDS: 4
Plan Fragment 2
UNPARTITIONED
STREAM DATA SINK
EXCHANGE ID: 10
UNPARTITIONED
AGGREGATE
OUTPUT: SUM(<slot 31>)
GROUP BY: <slot 30>
TUPLE IDS: 4
EXCHANGE (8)
TUPLE IDS: 4
Plan Fragment 3
RANDOM
STREAM DATA SINK
EXCHANGE ID: 8
UNPARTITIONED
AGGREGATE
OUTPUT: COUNT(a.id)
GROUP BY: b.smallint_col
TUPLE IDS: 4
HASH JOIN
JOIN OP: INNER JOIN
HASH PREDICATES:
a.smallint_col = b.id
TUPLE IDS: 1 3
SCAN HDFS table=default.alltypesagg (1)
TUPLE IDS: 1
EXCHANGE (7)
TUPLE IDS: 3
Plan Fragment 4
RANDOM
STREAM DATA SINK
EXCHANGE ID: 7
UNPARTITIONED
SCAN HDFS table=default.alltypessmall (2) compact
TUPLE IDS: 3
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
LOCATIONS:
NODE 2:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
LOCATIONS:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
LOCATIONS:
====