Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
Taras Bobrovytsky a50c344077 IMPALA-3586: Implement union passthrough
The union node acts as pass through operator and forwards row batches
from it's children without materializing. This is done in the case
when the child's tuple layout is identical to union node tuple layout
and no functions need to be applied to the child row batches.

Removed operand reordering in the FE because it's simpler and safer to
handle all passthrough children before non-passthrough children in the
BE. The recent improvements to memory management allowed us to remove
this requirement.

Testing:
- Added new planner and end to end tests that cover the new
  functionality.
- Updated existing tests to reflect the new behavior.

Perf:
Ran a benchmark on a local 10 GB tpcds dataset. I used an unpartitioned
version of the store_sales table. There was over a 2x performance
improvement for the following query:

SELECT
  COUNT(ss_sold_time_sk),
  COUNT(ss_item_sk),
  COUNT(ss_customer_sk),
  COUNT(ss_cdemo_sk),
  COUNT(ss_hdemo_sk),
  COUNT(ss_addr_sk),
  COUNT(ss_store_sk),
  COUNT(ss_promo_sk),
  COUNT(ss_ticket_number),
  COUNT(ss_quantity),
  COUNT(ss_wholesale_cost),
  COUNT(ss_list_price),
  COUNT(ss_sales_price),
  COUNT(ss_ext_discount_amt),
  COUNT(ss_ext_sales_price),
  COUNT(ss_ext_wholesale_cost),
  COUNT(ss_ext_list_price),
  COUNT(ss_ext_tax),
  COUNT(ss_coupon_amt),
  COUNT(ss_net_paid),
  COUNT(ss_net_paid_inc_tax),
  COUNT(ss_net_profit),
  COUNT(ss_sold_date_sk)
FROM (
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
) t

Before:
Total Time: 43s164ms

Summary:
Operator          #Hosts   Avg Time   Max Time    #Rows  Est. #Rows   Peak Mem  Est. Peak Mem  Detail
------------------------------------------------------------------------------------------------------------------------------
13:AGGREGATE           1  224.721us  224.721us        1           1   28.00 KB        -1.00 B  FINALIZE
12:EXCHANGE            1   24.578us   24.578us        3           1          0        -1.00 B  UNPARTITIONED
11:AGGREGATE           3    2s402ms    3s060ms        3           1  119.00 KB       10.00 MB
00:UNION               3   35s380ms   37s846ms  288.01M     288.01M    3.08 MB              0
|--02:SCAN HDFS        3  184.197ms  219.931ms   28.80M      28.80M  535.03 MB        1.88 GB  store_sales_unpartitioned
|--03:SCAN HDFS        3  131.956ms  153.401ms   28.80M      28.80M  534.98 MB        1.88 GB  store_sales_unpartitioned
|--04:SCAN HDFS        3  178.456ms  247.721ms   28.80M      28.80M  534.98 MB        1.88 GB  store_sales_unpartitioned
|--05:SCAN HDFS        3  189.398ms  242.251ms   28.80M      28.80M  535.01 MB        1.88 GB  store_sales_unpartitioned
|--06:SCAN HDFS        3  122.786ms  156.528ms   28.80M      28.80M  534.98 MB        1.88 GB  store_sales_unpartitioned
|--07:SCAN HDFS        3  147.467ms  183.391ms   28.80M      28.80M  535.13 MB        1.88 GB  store_sales_unpartitioned
|--08:SCAN HDFS        3  147.502ms  186.273ms   28.80M      28.80M  535.01 MB        1.88 GB  store_sales_unpartitioned
|--09:SCAN HDFS        3  130.086ms  154.682ms   28.80M      28.80M  535.04 MB        1.88 GB  store_sales_unpartitioned
|--10:SCAN HDFS        3  122.701ms  161.056ms   28.80M      28.80M  534.89 MB        1.88 GB  store_sales_unpartitioned
01:SCAN HDFS           3  287.863ms  330.436ms   28.80M      28.80M  534.98 MB        1.88 GB  store_sales_unpartitioned

After:
Total Time: 19s139ms

Summary:
Operator          #Hosts   Avg Time   Max Time    #Rows  Est. #Rows   Peak Mem  Est. Peak Mem  Detail
------------------------------------------------------------------------------------------------------------------------------
13:AGGREGATE           1  166.241us  166.241us        1           1   28.00 KB        -1.00 B  FINALIZE
12:EXCHANGE            1   71.695us   71.695us        3           1          0        -1.00 B  UNPARTITIONED
11:AGGREGATE           3    2s971ms    3s809ms        3           1    3.08 MB       10.00 MB
00:UNION               3  207.956ms  222.846ms  288.01M     288.01M          0              0
|--02:SCAN HDFS        3    1s533ms    1s535ms   28.80M      28.80M  532.28 MB        1.88 GB  store_sales_unpartitioned
|--03:SCAN HDFS        3    1s554ms    1s669ms   28.80M      28.80M  525.73 MB        1.88 GB  store_sales_unpartitioned
|--04:SCAN HDFS        3    1s568ms    1s716ms   28.80M      28.80M  525.03 MB        1.88 GB  store_sales_unpartitioned
|--05:SCAN HDFS        3    1s503ms    1s617ms   28.80M      28.80M  527.43 MB        1.88 GB  store_sales_unpartitioned
|--06:SCAN HDFS        3    1s560ms    1s634ms   28.80M      28.80M  528.52 MB        1.88 GB  store_sales_unpartitioned
|--07:SCAN HDFS        3    1s489ms    1s643ms   28.80M      28.80M  534.81 MB        1.88 GB  store_sales_unpartitioned
|--08:SCAN HDFS        3    1s534ms    1s581ms   28.80M      28.80M  528.10 MB        1.88 GB  store_sales_unpartitioned
|--09:SCAN HDFS        3    1s558ms    1s674ms   28.80M      28.80M  526.77 MB        1.88 GB  store_sales_unpartitioned
|--10:SCAN HDFS        3    1s504ms    1s692ms   28.80M      28.80M  527.83 MB        1.88 GB  store_sales_unpartitioned
01:SCAN HDFS           3    1s682ms    1s911ms   28.80M      28.80M  526.14 MB        1.88 GB  store_sales_unpartitioned

Change-Id: Ia8f6d5062724ba5b78174c3227a7a796d10d8416
Reviewed-on: http://gerrit.cloudera.org:8080/5816
Reviewed-by: Dan Hecht <dhecht@cloudera.com>
Tested-by: Impala Public Jenkins
2017-03-21 22:24:01 +00:00

1324 lines
38 KiB
Plaintext

# subquery with aggregation and order by/limit, as left-hand side of join;
# having clause in subquery is transfered to merge agg step in distrib plan
select *
from (
select int_col, count(*)
from functional.alltypessmall
where month = 1
group by int_col
having count(*) > 1
order by count(*) desc limit 5
) t1
join functional.alltypes t2 on (t1.int_col = t2.int_col)
where month = 1
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t2.int_col = int_col
| runtime filters: RF000 <- int_col
|
|--02:TOP-N [LIMIT=5]
| | order by: count(*) DESC
| |
| 01:AGGREGATE [FINALIZE]
| | output: count(*)
| | group by: int_col
| | having: count(*) > 1
| |
| 00:SCAN HDFS [functional.alltypessmall]
| partitions=1/4 files=1 size=1.57KB
|
03:SCAN HDFS [functional.alltypes t2]
partitions=2/24 files=2 size=40.32KB
runtime filters: RF000 -> t2.int_col
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: t2.int_col = int_col
| runtime filters: RF000 <- int_col
|
|--08:EXCHANGE [BROADCAST]
| |
| 07:MERGING-EXCHANGE [UNPARTITIONED]
| | order by: count(*) DESC
| | limit: 5
| |
| 02:TOP-N [LIMIT=5]
| | order by: count(*) DESC
| |
| 06:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | group by: int_col
| | having: count(*) > 1
| |
| 05:EXCHANGE [HASH(int_col)]
| |
| 01:AGGREGATE [STREAMING]
| | output: count(*)
| | group by: int_col
| |
| 00:SCAN HDFS [functional.alltypessmall]
| partitions=1/4 files=1 size=1.57KB
|
03:SCAN HDFS [functional.alltypes t2]
partitions=2/24 files=2 size=40.32KB
runtime filters: RF000 -> t2.int_col
====
# simple full scan subquery
select * from (select y x from (select id y from functional_hbase.alltypessmall) a) b
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypessmall]
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
|
00:SCAN HBASE [functional_hbase.alltypessmall]
====
# subquery doing join
select * from (select t2.*
from functional.testtbl t1 join functional.testtbl t2 using(id)
where t1.zip = 94611) x
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
|
|--01:SCAN HDFS [functional.testtbl t2]
| partitions=1/1 files=0 size=0B
|
00:SCAN HDFS [functional.testtbl t1]
partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
runtime filters: RF000 -> t1.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: t1.id = t2.id
| runtime filters: RF000 <- t2.id
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl t2]
| partitions=1/1 files=0 size=0B
|
00:SCAN HDFS [functional.testtbl t1]
partitions=1/1 files=0 size=0B
predicates: t1.zip = 94611
runtime filters: RF000 -> t1.id
====
# subquery doing join
# multiple join predicates;
# scan predicates get propagated correctly;
# non-eq join predicates are evaluated as extra conjuncts by the join node
select *
from
(select a.*
from functional.alltypesagg a
right outer join functional.alltypessmall b using (id, int_col)
where a.day >= 6
and b.month > 2
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15) x
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: a.id = b.id, a.int_col = b.int_col
| other predicates: a.tinyint_col = 15, a.day >= 6, a.tinyint_col + b.tinyint_col < 15
| runtime filters: RF000 <- b.id, RF001 <- b.int_col
|
|--01:SCAN HDFS [functional.alltypessmall b]
| partitions=2/4 files=2 size=3.17KB
| predicates: b.string_col = '15'
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=5/11 files=5 size=372.38KB
predicates: a.tinyint_col = 15
runtime filters: RF000 -> a.id, RF001 -> a.int_col
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: a.id = b.id, a.int_col = b.int_col
| other predicates: a.tinyint_col = 15, a.day >= 6, a.tinyint_col + b.tinyint_col < 15
| runtime filters: RF000 <- b.id, RF001 <- b.int_col
|
|--04:EXCHANGE [HASH(b.id,b.int_col)]
| |
| 01:SCAN HDFS [functional.alltypessmall b]
| partitions=2/4 files=2 size=3.17KB
| predicates: b.string_col = '15'
|
03:EXCHANGE [HASH(a.id,a.int_col)]
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=5/11 files=5 size=372.38KB
predicates: a.tinyint_col = 15
runtime filters: RF000 -> a.id, RF001 -> a.int_col
====
# predicate pushdown
select * from (select * from functional_hbase.alltypessmall) a where id < 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypessmall]
predicates: functional_hbase.alltypessmall.id < 5
====
# subquery join
# multiple join predicates;
# scan predicates get propagated correctly;
# non-eq join predicates are evaluated as extra conjuncts by the join node
select *
from
(select id, int_col, day, tinyint_col from functional.alltypesagg) a
right outer join
(select id, int_col, month, string_col, tinyint_col
from functional.alltypessmall) b using (id, int_col)
where a.day >= 6
and b.month > 2
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15
and b.id + 15 = 27
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: id = id, int_col = int_col
| other predicates: tinyint_col = 15, day >= 6, tinyint_col + tinyint_col < 15
| runtime filters: RF000 <- id, RF001 <- int_col
|
|--01:SCAN HDFS [functional.alltypessmall]
| partitions=2/4 files=2 size=3.17KB
| predicates: functional.alltypessmall.id + 15 = 27, functional.alltypessmall.string_col = '15'
|
00:SCAN HDFS [functional.alltypesagg]
partitions=5/11 files=5 size=372.38KB
predicates: functional.alltypesagg.tinyint_col = 15, functional.alltypesagg.id + 15 = 27
runtime filters: RF000 -> id, RF001 -> int_col
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: id = id, int_col = int_col
| other predicates: tinyint_col = 15, day >= 6, tinyint_col + tinyint_col < 15
| runtime filters: RF000 <- id, RF001 <- int_col
|
|--04:EXCHANGE [HASH(id,int_col)]
| |
| 01:SCAN HDFS [functional.alltypessmall]
| partitions=2/4 files=2 size=3.17KB
| predicates: functional.alltypessmall.id + 15 = 27, functional.alltypessmall.string_col = '15'
|
03:EXCHANGE [HASH(id,int_col)]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=5/11 files=5 size=372.38KB
predicates: functional.alltypesagg.tinyint_col = 15, functional.alltypesagg.id + 15 = 27
runtime filters: RF000 -> id, RF001 -> int_col
====
# subquery join
# multiple join predicates;
# scan predicates get propagated correctly;
# non-eq join predicates are evaluated as extra conjuncts by the join node
select *
from
(select id, int_col, day, tinyint_col
from
(select id, int_col, day, tinyint_col from functional.alltypesagg) a0
where a0.day >= 6) a
right outer join
(select id, int_col, month, string_col, tinyint_col from functional.alltypessmall) b
using (id, int_col)
where b.month > 2
and a.tinyint_col = 15
and b.string_col = '15'
and a.tinyint_col + b.tinyint_col < 15
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: id = id, int_col = int_col
| other predicates: tinyint_col = 15, tinyint_col + tinyint_col < 15
| runtime filters: RF000 <- id, RF001 <- int_col
|
|--01:SCAN HDFS [functional.alltypessmall]
| partitions=2/4 files=2 size=3.17KB
| predicates: functional.alltypessmall.string_col = '15'
|
00:SCAN HDFS [functional.alltypesagg]
partitions=5/11 files=5 size=372.38KB
predicates: functional.alltypesagg.tinyint_col = 15
runtime filters: RF000 -> id, RF001 -> int_col
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: id = id, int_col = int_col
| other predicates: tinyint_col = 15, tinyint_col + tinyint_col < 15
| runtime filters: RF000 <- id, RF001 <- int_col
|
|--04:EXCHANGE [HASH(id,int_col)]
| |
| 01:SCAN HDFS [functional.alltypessmall]
| partitions=2/4 files=2 size=3.17KB
| predicates: functional.alltypessmall.string_col = '15'
|
03:EXCHANGE [HASH(id,int_col)]
|
00:SCAN HDFS [functional.alltypesagg]
partitions=5/11 files=5 size=372.38KB
predicates: functional.alltypesagg.tinyint_col = 15
runtime filters: RF000 -> id, RF001 -> int_col
====
# complex join, having joined subquery on the rhs, and predicate
# at multiple subquery level. This tests that both sides of a join
# that is itself on the build side of another join get compacted.
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
from functional.alltypessmall c
join (
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month, b.float_col float_col, b.id id
from ( select * from functional.alltypesagg a where month=1 ) a
join functional.alltypessmall b on (a.smallint_col = b.id)
) x on (x.tinyint_col = c.id)
where x.day=1
and x.int_col > 899
and x.float_col > 4.5
and c.string_col < '7'
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: c.id = a.tinyint_col
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
| runtime filters: RF000 <- a.tinyint_col
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: a.smallint_col = b.id
| | runtime filters: RF001 <- b.id
| |
| |--02:SCAN HDFS [functional.alltypessmall b]
| | partitions=4/4 files=4 size=6.32KB
| | predicates: b.float_col > 4.5
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| partitions=1/11 files=1 size=73.39KB
| predicates: a.int_col > 899
| runtime filters: RF001 -> a.smallint_col
|
00:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
predicates: c.string_col < '7'
runtime filters: RF000 -> c.id
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
NODE 1:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
NODE 2:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=1/090101.txt 0:1610
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=2/090201.txt 0:1621
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=3/090301.txt 0:1620
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: c.id = a.tinyint_col
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
| runtime filters: RF000 <- a.tinyint_col
|
|--07:EXCHANGE [HASH(a.tinyint_col)]
| |
| 03:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: a.smallint_col = b.id
| | runtime filters: RF001 <- b.id
| |
| |--05:EXCHANGE [BROADCAST]
| | |
| | 02:SCAN HDFS [functional.alltypessmall b]
| | partitions=4/4 files=4 size=6.32KB
| | predicates: b.float_col > 4.5
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| partitions=1/11 files=1 size=73.39KB
| predicates: a.int_col > 899
| runtime filters: RF001 -> a.smallint_col
|
06:EXCHANGE [HASH(c.id)]
|
00:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
predicates: c.string_col < '7'
runtime filters: RF000 -> c.id
====
# with grouping
select tinyint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from (select * from functional.alltypesagg) a
group by 1
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*), min(functional.alltypesagg.tinyint_col), max(functional.alltypesagg.tinyint_col), sum(functional.alltypesagg.tinyint_col), avg(functional.alltypesagg.tinyint_col)
| group by: functional.alltypesagg.tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- SCANRANGELOCATIONS
NODE 0:
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=3/100103.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=6/100106.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=9/100109.txt 0:76263
HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=__HIVE_DEFAULT_PARTITION__/000000_0 0:72759
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [FINALIZE]
| output: count:merge(*), min:merge(tinyint_col), max:merge(tinyint_col), sum:merge(tinyint_col), avg:merge(tinyint_col)
| group by: tinyint_col
|
02:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE [STREAMING]
| output: count(*), min(functional.alltypesagg.tinyint_col), max(functional.alltypesagg.tinyint_col), sum(functional.alltypesagg.tinyint_col), avg(functional.alltypesagg.tinyint_col)
| group by: functional.alltypesagg.tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# with grouping
select * from (
select tinyint_col, count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from functional.alltypesagg
group by 1
) a
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
| group by: tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [FINALIZE]
| output: count:merge(*), min:merge(tinyint_col), max:merge(tinyint_col), sum:merge(tinyint_col), avg:merge(tinyint_col)
| group by: tinyint_col
|
02:EXCHANGE [HASH(tinyint_col)]
|
01:AGGREGATE [STREAMING]
| output: count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
| group by: tinyint_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
select c1, c2, c3
from
(select c1, c2, c3
from
(select int_col c1, sum(float_col) c2, min(float_col) c3
from functional_hbase.alltypessmall
group by 1) x
order by 2,3 desc
limit 5
) y
---- PLAN
PLAN-ROOT SINK
|
02:TOP-N [LIMIT=5]
| order by: c2 ASC, c3 DESC
|
01:AGGREGATE [FINALIZE]
| output: sum(float_col), min(float_col)
| group by: int_col
|
00:SCAN HBASE [functional_hbase.alltypessmall]
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: c2 ASC, c3 DESC
| limit: 5
|
02:TOP-N [LIMIT=5]
| order by: c2 ASC, c3 DESC
|
04:AGGREGATE [FINALIZE]
| output: sum:merge(float_col), min:merge(float_col)
| group by: int_col
|
03:EXCHANGE [HASH(int_col)]
|
01:AGGREGATE [STREAMING]
| output: sum(float_col), min(float_col)
| group by: int_col
|
00:SCAN HBASE [functional_hbase.alltypessmall]
====
select c1, x2
from (
select c1, min(c2) x2
from (
select c1, c2, c3
from (
select int_col c1, tinyint_col c2, min(float_col) c3
from functional_hbase.alltypessmall
group by 1, 2
order by 1,2
limit 1
) x
) x2
group by c1
) y
order by 2,1 desc
limit 0
---- PLAN
PLAN-ROOT SINK
|
00:EMPTYSET
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:EMPTYSET
====
# distinct *
select distinct *
from (select distinct * from functional.testtbl) x
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
01:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
04:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
03:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)]
|
01:AGGREGATE [STREAMING]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
====
# distinct w/ explicit select list
select distinct id, zip
from (select distinct * from functional.testtbl) x
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.zip
|
01:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
06:AGGREGATE [FINALIZE]
| group by: id, zip
|
05:EXCHANGE [HASH(id,zip)]
|
02:AGGREGATE [STREAMING]
| group by: functional.testtbl.id, functional.testtbl.zip
|
04:AGGREGATE [FINALIZE]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
03:EXCHANGE [HASH(functional.testtbl.id,functional.testtbl.name,functional.testtbl.zip)]
|
01:AGGREGATE [STREAMING]
| group by: functional.testtbl.id, functional.testtbl.name, functional.testtbl.zip
|
00:SCAN HDFS [functional.testtbl]
partitions=1/1 files=0 size=0B
====
# aggregate with group-by, having
select *
from (
select int_col % 7 c1, count(*) c2, avg(int_col) c3
from (
select * from functional.alltypesagg
) a
group by 1
having avg(int_col) > 500 or count(*) = 10
) b
where c1 is not null
and c2 > 10
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: count(*), avg(functional.alltypesagg.int_col)
| group by: functional.alltypesagg.int_col % 7
| having: int_col % 7 IS NOT NULL, count(*) > 10, avg(int_col) > 500 OR count(*) = 10
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE [FINALIZE]
| output: count:merge(*), avg:merge(int_col)
| group by: int_col % 7
| having: int_col % 7 IS NOT NULL, count(*) > 10, avg(int_col) > 500 OR count(*) = 10
|
02:EXCHANGE [HASH(int_col % 7)]
|
01:AGGREGATE [STREAMING]
| output: count(*), avg(functional.alltypesagg.int_col)
| group by: functional.alltypesagg.int_col % 7
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
====
# subquery with left outer join
select j.*, d.*
from (
select *
from functional.JoinTbl a
) j
left outer join
(
select *
from functional.DimTbl b
) d
on (j.test_name = d.name)
where j.test_id <= 1006
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: b.name = a.test_name
| runtime filters: RF000 <- a.test_name
|
|--00:SCAN HDFS [functional.jointbl a]
| partitions=1/1 files=1 size=433B
| predicates: a.test_id <= 1006
|
01:SCAN HDFS [functional.dimtbl b]
partitions=1/1 files=1 size=171B
runtime filters: RF000 -> b.name
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: b.name = a.test_name
| runtime filters: RF000 <- a.test_name
|
|--04:EXCHANGE [HASH(a.test_name)]
| |
| 00:SCAN HDFS [functional.jointbl a]
| partitions=1/1 files=1 size=433B
| predicates: a.test_id <= 1006
|
03:EXCHANGE [HASH(b.name)]
|
01:SCAN HDFS [functional.dimtbl b]
partitions=1/1 files=1 size=171B
runtime filters: RF000 -> b.name
====
# complex join, having joined subquery on the rhs, and predicate
# at multiple subquery level
select x.smallint_col, count(x.id)
from functional.alltypessmall c
left outer join
(
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month, b.float_col float_col, b.id id
from (
select *
from functional.alltypesagg a
) a
join
functional.alltypessmall b
on (a.smallint_col = b.id)
) x
on (x.tinyint_col = c.id)
group by x.smallint_col
---- PLAN
PLAN-ROOT SINK
|
05:AGGREGATE [FINALIZE]
| output: count(b.id)
| group by: a.smallint_col
|
04:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: a.tinyint_col = c.id
| runtime filters: RF000 <- c.id
|
|--00:SCAN HDFS [functional.alltypessmall c]
| partitions=4/4 files=4 size=6.32KB
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.smallint_col = b.id
| runtime filters: RF001 <- b.id
|
|--02:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
|
01:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> a.tinyint_col, RF001 -> a.smallint_col
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
11:EXCHANGE [UNPARTITIONED]
|
10:AGGREGATE [FINALIZE]
| output: count:merge(x.id)
| group by: x.smallint_col
|
09:EXCHANGE [HASH(x.smallint_col)]
|
05:AGGREGATE [STREAMING]
| output: count(b.id)
| group by: a.smallint_col
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: a.tinyint_col = c.id
| runtime filters: RF000 <- c.id
|
|--08:EXCHANGE [HASH(c.id)]
| |
| 00:SCAN HDFS [functional.alltypessmall c]
| partitions=4/4 files=4 size=6.32KB
|
07:EXCHANGE [HASH(a.tinyint_col)]
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.smallint_col = b.id
| runtime filters: RF001 <- b.id
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
|
01:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> a.tinyint_col, RF001 -> a.smallint_col
====
# complex join, having joined subquery on the lhs, and predicate
# at multiple subquery level
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
from
(
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month, b.float_col float_col, b.id id
from (
select *
from functional.alltypesagg a
where month=1
) a
join
functional.alltypessmall b
on (a.smallint_col = b.id)
) x
join
functional.alltypessmall c
on (x.tinyint_col = c.id)
where x.day=1
and x.int_col > 899
and x.float_col > 4.5
and c.string_col < '7'
and x.int_col + x.float_col + CAST(c.string_col AS FLOAT) < 1000
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: c.id = a.tinyint_col
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
| runtime filters: RF000 <- a.tinyint_col
|
|--02:HASH JOIN [INNER JOIN]
| | hash predicates: a.smallint_col = b.id
| | runtime filters: RF001 <- b.id
| |
| |--01:SCAN HDFS [functional.alltypessmall b]
| | partitions=4/4 files=4 size=6.32KB
| | predicates: b.float_col > 4.5
| |
| 00:SCAN HDFS [functional.alltypesagg a]
| partitions=1/11 files=1 size=73.39KB
| predicates: a.int_col > 899
| runtime filters: RF001 -> a.smallint_col
|
03:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
predicates: c.string_col < '7'
runtime filters: RF000 -> c.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
08:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: c.id = a.tinyint_col
| other predicates: a.int_col + b.float_col + CAST(c.string_col AS FLOAT) < 1000
| runtime filters: RF000 <- a.tinyint_col
|
|--07:EXCHANGE [HASH(a.tinyint_col)]
| |
| 02:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: a.smallint_col = b.id
| | runtime filters: RF001 <- b.id
| |
| |--05:EXCHANGE [BROADCAST]
| | |
| | 01:SCAN HDFS [functional.alltypessmall b]
| | partitions=4/4 files=4 size=6.32KB
| | predicates: b.float_col > 4.5
| |
| 00:SCAN HDFS [functional.alltypesagg a]
| partitions=1/11 files=1 size=73.39KB
| predicates: a.int_col > 899
| runtime filters: RF001 -> a.smallint_col
|
06:EXCHANGE [HASH(c.id)]
|
03:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
predicates: c.string_col < '7'
runtime filters: RF000 -> c.id
====
# complex join, having joined aggregate subquery on the rhs, and predicate
# at multiple subquery level
select x.smallint_col, sum(x.cnt)
from functional.alltypessmall c
join (
select count(a.id) cnt, b.smallint_col smallint_col
from ( select * from functional.alltypesagg a ) a
join functional.alltypessmall b on (a.smallint_col = b.id)
group by b.smallint_col
) x on (x.smallint_col = c.id)
group by x.smallint_col
---- PLAN
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: sum(count(a.id))
| group by: b.smallint_col
|
05:HASH JOIN [INNER JOIN]
| hash predicates: c.id = b.smallint_col
| runtime filters: RF000 <- b.smallint_col
|
|--04:AGGREGATE [FINALIZE]
| | output: count(a.id)
| | group by: b.smallint_col
| |
| 03:HASH JOIN [INNER JOIN]
| | hash predicates: a.smallint_col = b.id
| | runtime filters: RF001 <- b.id
| |
| |--02:SCAN HDFS [functional.alltypessmall b]
| | partitions=4/4 files=4 size=6.32KB
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 files=11 size=814.73KB
| runtime filters: RF001 -> a.smallint_col
|
00:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> c.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
13:EXCHANGE [UNPARTITIONED]
|
12:AGGREGATE [FINALIZE]
| output: sum:merge(x.cnt)
| group by: x.smallint_col
|
11:EXCHANGE [HASH(x.smallint_col)]
|
06:AGGREGATE [STREAMING]
| output: sum(count(a.id))
| group by: b.smallint_col
|
05:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c.id = b.smallint_col
| runtime filters: RF000 <- b.smallint_col
|
|--10:EXCHANGE [BROADCAST]
| |
| 09:AGGREGATE [FINALIZE]
| | output: count:merge(a.id)
| | group by: b.smallint_col
| |
| 08:EXCHANGE [HASH(b.smallint_col)]
| |
| 04:AGGREGATE [STREAMING]
| | output: count(a.id)
| | group by: b.smallint_col
| |
| 03:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: a.smallint_col = b.id
| | runtime filters: RF001 <- b.id
| |
| |--07:EXCHANGE [BROADCAST]
| | |
| | 02:SCAN HDFS [functional.alltypessmall b]
| | partitions=4/4 files=4 size=6.32KB
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 files=11 size=814.73KB
| runtime filters: RF001 -> a.smallint_col
|
00:SCAN HDFS [functional.alltypessmall c]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> c.id
====
# Values statement in subqueries with predicate
select * from (select y from (values((1 as y),(11))) a where y < 10) b
---- PLAN
PLAN-ROOT SINK
|
00:UNION
constant-operands=1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
constant-operands=1
====
# Mixed constant and non-constant select; the predicate is evaluated directly
# by the non-const select
select * from
(select y from
((select 1 as y)
union all
(select tinyint_col from functional.alltypes)) a
where y < 10) b
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| constant-operands=1
| pass-through-operands: all
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: functional.alltypes.tinyint_col < 10
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
02:EXCHANGE [UNPARTITIONED]
|
00:UNION
| constant-operands=1
| pass-through-operands: all
|
01:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: functional.alltypes.tinyint_col < 10
====
# Union of constant selects in subquery
select * from (select 1 as y union all select 2 union all select * from (select 11) a) b
where y < 10
---- PLAN
PLAN-ROOT SINK
|
00:UNION
constant-operands=2
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
constant-operands=2
====
# Union of values statements in subquery
# TODO: We could combine the merge nodes below.
select * from (values(1 as y) union all values(2) union all select * from (values(11)) a) b
where y < 10
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| constant-operands=2
|
01:UNION
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
| constant-operands=2
|
01:UNION
====
# Inner join on inline views made up of unions of constant selects
select * from
(select 1 a, 2 b union all select 1 a, 2 b) x
inner join
(select 1 a, 3 b union all select 1 a, 2 b) y on x.a = y.a
inner join
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = y.b
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: b = b
|
|--02:UNION
| constant-operands=2
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a = a
|
|--01:UNION
| constant-operands=2
|
00:UNION
constant-operands=2
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: b = b
|
|--06:EXCHANGE [UNPARTITIONED]
| |
| 02:UNION
| constant-operands=2
|
03:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a = a
|
|--05:EXCHANGE [UNPARTITIONED]
| |
| 01:UNION
| constant-operands=2
|
00:UNION
constant-operands=2
====
# Semi and inner join on a table and on inline views made up of constant selects
select * from functional.alltypessmall x
left semi join
(select 1 a, 3 b union all select 1 a, 3 b) y on y.a = x.id
inner join
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = x.id + 2
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: x.id + 2 = b
| runtime filters: RF000 <- b
|
|--02:UNION
| constant-operands=2
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: x.id = a
| runtime filters: RF001 <- a
|
|--01:UNION
| constant-operands=2
|
00:SCAN HDFS [functional.alltypessmall x]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> x.id + 2, RF001 -> x.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: x.id + 2 = b
| runtime filters: RF000 <- b
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:UNION
| constant-operands=2
|
03:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: x.id = a
| runtime filters: RF001 <- a
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:UNION
| constant-operands=2
|
00:SCAN HDFS [functional.alltypessmall x]
partitions=4/4 files=4 size=6.32KB
runtime filters: RF000 -> x.id + 2, RF001 -> x.id
====
# Tests that views correctly reanalyze cloned exprs. (IMPALA-984)
select b.* from functional.decimal_tbl a left outer join
(select d1, d1 + NULL IS NULL x from functional.decimal_tbl) b
on (a.d1 = b.d1)
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.d1 = d1
|
|--01:SCAN HDFS [functional.decimal_tbl]
| partitions=1/1 files=1 size=195B
|
00:SCAN HDFS [functional.decimal_tbl a]
partitions=1/1 files=1 size=195B
====
# Test predicate assignment through inline view when the query contains
# group by and distinct (IMPALA-1165)
select foo, sum(distinct foo)
from (select int_col + int_col as foo from functional.alltypesagg) t
where foo = 10
group by foo
limit 10
---- PLAN
PLAN-ROOT SINK
|
02:AGGREGATE [FINALIZE]
| output: sum(foo)
| group by: foo
| limit: 10
|
01:AGGREGATE
| group by: int_col + int_col, int_col + int_col
|
00:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
predicates: int_col + int_col = 10
====
# Test enforcement of inline-view slot equivalences when the inline-view
# contains an outer join (IMPALA-1441)
select * from
(select t1.int_col, t1.tinyint_col, t2.int_col as int_col2, t2.tinyint_col as tinyint_col2
from functional.alltypestiny t1 left outer join functional.alltypes t2
on t1.int_col = t2.int_col and t1.tinyint_col = t2.tinyint_col) t
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t2.int_col = t1.int_col, t2.tinyint_col = t1.tinyint_col
| runtime filters: RF000 <- t1.int_col, RF001 <- t1.tinyint_col
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| partitions=4/4 files=4 size=460B
|
01:SCAN HDFS [functional.alltypes t2]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> t2.int_col, RF001 -> t2.tinyint_col
====
# IMPALA-1459: Test correct assignment of On-clause predicate from an enclosing block
# inside an inline view with an outer join.
select 1 from
(select a.id aid, b.id bid from
functional.alltypes a inner join functional.alltypes b
on a.id = b.id
full outer join functional.alltypessmall c on a.id = c.id) v
inner join functional.alltypestiny c
on (aid < bid and aid = c.id)
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [INNER JOIN]
| hash predicates: a.id = c.id
| runtime filters: RF000 <- c.id
|
|--05:SCAN HDFS [functional.alltypestiny c]
| partitions=4/4 files=4 size=460B
|
04:HASH JOIN [FULL OUTER JOIN]
| hash predicates: a.id = c.id
| other predicates: a.id < b.id, a.id = b.id
|
|--02:SCAN HDFS [functional.alltypessmall c]
| partitions=4/4 files=4 size=6.32KB
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.id = b.id
| runtime filters: RF001 <- b.id
|
|--01:SCAN HDFS [functional.alltypes b]
| partitions=24/24 files=24 size=478.45KB
| runtime filters: RF000 -> b.id
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
runtime filters: RF000 -> a.id, RF001 -> a.id
====
# IMPALA-2665: Test correct assignment of On-clause predicate from an enclosing block
# inside an inline view with an outer join.
select 1 from functional.alltypes t1
inner join
(select a.id, b.int_col
from functional.alltypes a left outer join functional.alltypes b
on a.id = b.int_col) v
on (t1.id = v.id and v.int_col is null and v.int_col < 10 and v.id < 10)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
| hash predicates: a.id = t1.id
| runtime filters: RF000 <- t1.id
|
|--00:SCAN HDFS [functional.alltypes t1]
| partitions=24/24 files=24 size=478.45KB
| predicates: t1.id < 10
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.id = b.int_col
| other predicates: b.int_col IS NULL, b.int_col < 10
|
|--02:SCAN HDFS [functional.alltypes b]
| partitions=24/24 files=24 size=478.45KB
| predicates: b.int_col < 10
| runtime filters: RF000 -> b.int_col
|
01:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.id < 10
runtime filters: RF000 -> a.id
====
# IMPALA-2643: Test inline views with duplicate exprs in their select list.
# Inferred predicate referencing the same expr gets filtered out.
select * from
(select * from
(select bigint_col, bigint_col as bigint_col2
from functional.alltypestiny) iv
) ivv
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
====
# IMPALA-2643: Explicit predicates remain unafftected.
select * from
(select * from
(select bigint_col, bigint_col as bigint_col2
from functional.alltypestiny) iv
) ivv where bigint_col = bigint_col2
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
predicates: bigint_col = bigint_col
====
# IMPALA-2643: Test aggregation.
# Inferred predicate referencing the same expr gets filtered out.
select * from
(select * from
(select sum(bigint_col) as s1, sum(bigint_col) as s2
from functional.alltypestiny) iv
) ivv
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: sum(bigint_col)
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
====
# IMPALA-2643: Explicit predicates remain unafftected.
select * from
(select * from
(select sum(bigint_col) as s1, sum(bigint_col) as s2
from functional.alltypestiny) iv
) ivv where s1 = s2
---- PLAN
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: sum(bigint_col)
| having: sum(bigint_col) = sum(bigint_col)
|
00:SCAN HDFS [functional.alltypestiny]
partitions=4/4 files=4 size=460B
====