Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/small-query-opt.test
Taras Bobrovytsky a50c344077 IMPALA-3586: Implement union passthrough
The union node acts as pass through operator and forwards row batches
from it's children without materializing. This is done in the case
when the child's tuple layout is identical to union node tuple layout
and no functions need to be applied to the child row batches.

Removed operand reordering in the FE because it's simpler and safer to
handle all passthrough children before non-passthrough children in the
BE. The recent improvements to memory management allowed us to remove
this requirement.

Testing:
- Added new planner and end to end tests that cover the new
  functionality.
- Updated existing tests to reflect the new behavior.

Perf:
Ran a benchmark on a local 10 GB tpcds dataset. I used an unpartitioned
version of the store_sales table. There was over a 2x performance
improvement for the following query:

SELECT
  COUNT(ss_sold_time_sk),
  COUNT(ss_item_sk),
  COUNT(ss_customer_sk),
  COUNT(ss_cdemo_sk),
  COUNT(ss_hdemo_sk),
  COUNT(ss_addr_sk),
  COUNT(ss_store_sk),
  COUNT(ss_promo_sk),
  COUNT(ss_ticket_number),
  COUNT(ss_quantity),
  COUNT(ss_wholesale_cost),
  COUNT(ss_list_price),
  COUNT(ss_sales_price),
  COUNT(ss_ext_discount_amt),
  COUNT(ss_ext_sales_price),
  COUNT(ss_ext_wholesale_cost),
  COUNT(ss_ext_list_price),
  COUNT(ss_ext_tax),
  COUNT(ss_coupon_amt),
  COUNT(ss_net_paid),
  COUNT(ss_net_paid_inc_tax),
  COUNT(ss_net_profit),
  COUNT(ss_sold_date_sk)
FROM (
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
  union all
  select * from tpcds_10_parquet.store_sales_unpartitioned
) t

Before:
Total Time: 43s164ms

Summary:
Operator          #Hosts   Avg Time   Max Time    #Rows  Est. #Rows   Peak Mem  Est. Peak Mem  Detail
------------------------------------------------------------------------------------------------------------------------------
13:AGGREGATE           1  224.721us  224.721us        1           1   28.00 KB        -1.00 B  FINALIZE
12:EXCHANGE            1   24.578us   24.578us        3           1          0        -1.00 B  UNPARTITIONED
11:AGGREGATE           3    2s402ms    3s060ms        3           1  119.00 KB       10.00 MB
00:UNION               3   35s380ms   37s846ms  288.01M     288.01M    3.08 MB              0
|--02:SCAN HDFS        3  184.197ms  219.931ms   28.80M      28.80M  535.03 MB        1.88 GB  store_sales_unpartitioned
|--03:SCAN HDFS        3  131.956ms  153.401ms   28.80M      28.80M  534.98 MB        1.88 GB  store_sales_unpartitioned
|--04:SCAN HDFS        3  178.456ms  247.721ms   28.80M      28.80M  534.98 MB        1.88 GB  store_sales_unpartitioned
|--05:SCAN HDFS        3  189.398ms  242.251ms   28.80M      28.80M  535.01 MB        1.88 GB  store_sales_unpartitioned
|--06:SCAN HDFS        3  122.786ms  156.528ms   28.80M      28.80M  534.98 MB        1.88 GB  store_sales_unpartitioned
|--07:SCAN HDFS        3  147.467ms  183.391ms   28.80M      28.80M  535.13 MB        1.88 GB  store_sales_unpartitioned
|--08:SCAN HDFS        3  147.502ms  186.273ms   28.80M      28.80M  535.01 MB        1.88 GB  store_sales_unpartitioned
|--09:SCAN HDFS        3  130.086ms  154.682ms   28.80M      28.80M  535.04 MB        1.88 GB  store_sales_unpartitioned
|--10:SCAN HDFS        3  122.701ms  161.056ms   28.80M      28.80M  534.89 MB        1.88 GB  store_sales_unpartitioned
01:SCAN HDFS           3  287.863ms  330.436ms   28.80M      28.80M  534.98 MB        1.88 GB  store_sales_unpartitioned

After:
Total Time: 19s139ms

Summary:
Operator          #Hosts   Avg Time   Max Time    #Rows  Est. #Rows   Peak Mem  Est. Peak Mem  Detail
------------------------------------------------------------------------------------------------------------------------------
13:AGGREGATE           1  166.241us  166.241us        1           1   28.00 KB        -1.00 B  FINALIZE
12:EXCHANGE            1   71.695us   71.695us        3           1          0        -1.00 B  UNPARTITIONED
11:AGGREGATE           3    2s971ms    3s809ms        3           1    3.08 MB       10.00 MB
00:UNION               3  207.956ms  222.846ms  288.01M     288.01M          0              0
|--02:SCAN HDFS        3    1s533ms    1s535ms   28.80M      28.80M  532.28 MB        1.88 GB  store_sales_unpartitioned
|--03:SCAN HDFS        3    1s554ms    1s669ms   28.80M      28.80M  525.73 MB        1.88 GB  store_sales_unpartitioned
|--04:SCAN HDFS        3    1s568ms    1s716ms   28.80M      28.80M  525.03 MB        1.88 GB  store_sales_unpartitioned
|--05:SCAN HDFS        3    1s503ms    1s617ms   28.80M      28.80M  527.43 MB        1.88 GB  store_sales_unpartitioned
|--06:SCAN HDFS        3    1s560ms    1s634ms   28.80M      28.80M  528.52 MB        1.88 GB  store_sales_unpartitioned
|--07:SCAN HDFS        3    1s489ms    1s643ms   28.80M      28.80M  534.81 MB        1.88 GB  store_sales_unpartitioned
|--08:SCAN HDFS        3    1s534ms    1s581ms   28.80M      28.80M  528.10 MB        1.88 GB  store_sales_unpartitioned
|--09:SCAN HDFS        3    1s558ms    1s674ms   28.80M      28.80M  526.77 MB        1.88 GB  store_sales_unpartitioned
|--10:SCAN HDFS        3    1s504ms    1s692ms   28.80M      28.80M  527.83 MB        1.88 GB  store_sales_unpartitioned
01:SCAN HDFS           3    1s682ms    1s911ms   28.80M      28.80M  526.14 MB        1.88 GB  store_sales_unpartitioned

Change-Id: Ia8f6d5062724ba5b78174c3227a7a796d10d8416
Reviewed-on: http://gerrit.cloudera.org:8080/5816
Reviewed-by: Dan Hecht <dhecht@cloudera.com>
Tested-by: Impala Public Jenkins
2017-03-21 22:24:01 +00:00

313 lines
6.8 KiB
Plaintext

select * from functional_seq.alltypes t1 limit 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional_seq.alltypes t1]
partitions=24/24 files=24 size=562.59KB
limit: 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional_seq.alltypes t1]
partitions=24/24 files=24 size=562.59KB
limit: 5
====
# Query is over the limit of 8 rows to be optimized, will distribute the query
select * from functional.alltypes t1 limit 10
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
limit: 10
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
| limit: 10
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
limit: 10
====
# Query is optimized, run on coordinator only
select * from functional.alltypes t1 limit 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
limit: 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
limit: 5
====
# If a predicate is applied the optimization is disabled
select * from functional.alltypes t1 where t1.id < 99 limit 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
predicates: t1.id < 99
limit: 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
| limit: 5
|
00:SCAN HDFS [functional.alltypes t1]
partitions=24/24 files=24 size=478.45KB
predicates: t1.id < 99
limit: 5
====
# No optimization for hbase tables
select * from functional_hbase.alltypes t1 where t1.id < 99 limit 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypes t1]
predicates: t1.id < 99
limit: 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
| limit: 5
|
00:SCAN HBASE [functional_hbase.alltypes t1]
predicates: t1.id < 99
limit: 5
====
# Applies optimization for small queries in hbase
select * from functional_hbase.alltypes t1 limit 5
---- PLAN
PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypes t1]
limit: 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:SCAN HBASE [functional_hbase.alltypes t1]
limit: 5
====
insert into
functional_hbase.alltypes
values (1, 1, true, "1999-12-01", 2.0, 1.0, 1, 12, 2, "abs",
cast(now() as timestamp), 1, 1999)
---- PLAN
WRITE TO HBASE table=functional_hbase.alltypes
|
00:UNION
constant-operands=1
---- DISTRIBUTEDPLAN
WRITE TO HBASE table=functional_hbase.alltypes
|
00:UNION
constant-operands=1
====
create table tm as select * from functional_hbase.alltypes limit 5
---- PLAN
WRITE TO HDFS [default.tm, OVERWRITE=false]
| partitions=1
|
00:SCAN HBASE [functional_hbase.alltypes]
limit: 5
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.tm, OVERWRITE=false]
| partitions=1
|
00:SCAN HBASE [functional_hbase.alltypes]
limit: 5
====
create table tm as select * from functional_hbase.alltypes limit 50
---- PLAN
WRITE TO HDFS [default.tm, OVERWRITE=false]
| partitions=1
|
00:SCAN HBASE [functional_hbase.alltypes]
limit: 50
---- DISTRIBUTEDPLAN
WRITE TO HDFS [default.tm, OVERWRITE=false]
| partitions=1
|
01:EXCHANGE [UNPARTITIONED]
| limit: 50
|
00:SCAN HBASE [functional_hbase.alltypes]
limit: 50
====
select * from functional_hbase.alltypes limit 5
union all
select * from functional_hbase.alltypes limit 2
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
|
|--02:SCAN HBASE [functional_hbase.alltypes]
| limit: 2
|
01:SCAN HBASE [functional_hbase.alltypes]
limit: 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
|
|--02:SCAN HBASE [functional_hbase.alltypes]
| limit: 2
|
01:SCAN HBASE [functional_hbase.alltypes]
limit: 5
====
select * from functional_hbase.alltypes limit 5
union all
select * from functional_hbase.alltypes limit 5
---- PLAN
PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
|
|--02:SCAN HBASE [functional_hbase.alltypes]
| limit: 5
|
01:SCAN HBASE [functional_hbase.alltypes]
limit: 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
00:UNION
| pass-through-operands: all
|
|--04:EXCHANGE [UNPARTITIONED]
| | limit: 5
| |
| 02:SCAN HBASE [functional_hbase.alltypes]
| limit: 5
|
03:EXCHANGE [UNPARTITIONED]
| limit: 5
|
01:SCAN HBASE [functional_hbase.alltypes]
limit: 5
====
# Two scans cannot run in the same fragment. IMPALA-561
select * from
functional.testtbl a join functional.testtbl b on a.id = b.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: a.id = b.id
| runtime filters: RF000 <- b.id
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl b]
| partitions=1/1 files=0 size=0B
|
00:SCAN HDFS [functional.testtbl a]
partitions=1/1 files=0 size=0B
runtime filters: RF000 -> a.id
====
select * from
functional.testtbl a, functional.testtbl b
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:EXCHANGE [UNPARTITIONED]
|
02:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
|
|--03:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl b]
| partitions=1/1 files=0 size=0B
|
00:SCAN HDFS [functional.testtbl a]
partitions=1/1 files=0 size=0B
====
select * from
functional.alltypestiny a
where a.id in (select id from functional.alltypestiny limit 5) limit 5
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
| limit: 5
|
02:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: a.id = id
| runtime filters: RF000 <- id
| limit: 5
|
|--04:EXCHANGE [BROADCAST]
| |
| 03:EXCHANGE [UNPARTITIONED]
| | limit: 5
| |
| 01:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
| limit: 5
|
00:SCAN HDFS [functional.alltypestiny a]
partitions=4/4 files=4 size=460B
runtime filters: RF000 -> a.id
====
# Test correct single-node planning for mixed union distinct/all (IMPALA-1553).
select
id, bool_col
from functional.alltypestiny a
where year=2009 and month=1
union distinct
select id, bool_col
from functional.alltypestiny b
where year=2009 and month=1
union all
select id, bool_col
from functional.alltypestiny c
where year=2009 and month=2
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
04:UNION
| pass-through-operands: 03
|
|--05:SCAN HDFS [functional.alltypestiny c]
| partitions=1/4 files=1 size=115B
|
03:AGGREGATE [FINALIZE]
| group by: id, bool_col
|
00:UNION
|
|--02:SCAN HDFS [functional.alltypestiny b]
| partitions=1/4 files=1 size=115B
|
01:SCAN HDFS [functional.alltypestiny a]
partitions=1/4 files=1 size=115B
====
# IMPALA-2527: Tests that the small query optimization is disabled for colleciton types
select key from functional.allcomplextypes.map_map_col.value limit 5;
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
01:EXCHANGE [UNPARTITIONED]
| limit: 5
|
00:SCAN HDFS [functional.allcomplextypes.map_map_col.value]
partitions=0/0 files=0 size=0B
limit: 5
====