Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test
Tim Armstrong 63f5e8ec00 IMPALA-1270: add distinct aggregation to semi joins
When generating plans with left semi/anti joins (typically
resulting from subquery rewrites), the planner now
considers inserting a distinct aggregation on the inner
side of the join. The decision is based on whether that
aggregation would reduce the number of rows by more than
75%. This is fairly conservative and the optimization
might be beneficial for smaller reductions, but the
conservative threshold is chosen to reduce the number
of potential plan regressions.

The aggregation can both reduce the # of rows and the
width of the rows, by projecting out unneeded slots.

ENABLE_DISTINCT_SEMI_JOIN_OPTIMIZATION query option is
added to allow toggling the optimization.

Tests:
* Add positive and negative planner tests for various
  cases - including semi/anti joins, missing stats,
  broadcast/shuffle, different numbers of join predicates.
* Add some end-to-end tests to verify plans execute correctly.

Change-Id: Icbb955e805d9e764edf11c57b98f341b88a37fcc
Reviewed-on: http://gerrit.cloudera.org:8080/16180
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2020-07-15 17:10:50 +00:00

3264 lines
106 KiB
Plaintext

# TPCH-Q1
# Q1 - Pricing Summary Report Query
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
customer.c_orders.o_lineitems
where
l_shipdate <= '1998-09-02'
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus
---- PLAN
Max Per-Host Resource Reservation: Memory=66.00MB Threads=2
Per-Host Resource Estimates: Memory=744MB
PLAN-ROOT SINK
|
02:SORT
| order by: l_returnflag ASC, l_linestatus ASC
| row-size=120B cardinality=1.50M
|
01:AGGREGATE [FINALIZE]
| output: sum(l_quantity), sum(l_extendedprice), sum(l_extendedprice * (1 - l_discount)), sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)), avg(l_quantity), avg(l_extendedprice), avg(l_discount), count(*)
| group by: l_returnflag, l_linestatus
| row-size=120B cardinality=1.50M
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_shipdate <= '1998-09-02'
row-size=68B cardinality=1.50M
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=112.00MB Threads=4
Per-Host Resource Estimates: Memory=916MB
PLAN-ROOT SINK
|
05:MERGING-EXCHANGE [UNPARTITIONED]
| order by: l_returnflag ASC, l_linestatus ASC
|
02:SORT
| order by: l_returnflag ASC, l_linestatus ASC
| row-size=120B cardinality=1.50M
|
04:AGGREGATE [FINALIZE]
| output: sum:merge(l_quantity), sum:merge(l_extendedprice), sum:merge(l_extendedprice * (1 - l_discount)), sum:merge(l_extendedprice * (1 - l_discount) * (1 + l_tax)), avg:merge(l_quantity), avg:merge(l_extendedprice), avg:merge(l_discount), count:merge(*)
| group by: l_returnflag, l_linestatus
| row-size=120B cardinality=1.50M
|
03:EXCHANGE [HASH(l_returnflag,l_linestatus)]
|
01:AGGREGATE [STREAMING]
| output: sum(l_quantity), sum(l_extendedprice), sum(l_extendedprice * (1 - l_discount)), sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)), avg(l_quantity), avg(l_extendedprice), avg(l_discount), count(*)
| group by: l_returnflag, l_linestatus
| row-size=120B cardinality=1.50M
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_shipdate <= '1998-09-02'
row-size=68B cardinality=1.50M
====
# TPCH-Q2
# Q2 - Minimum Cost Supplier Query
select
s_acctbal,
s_name,
n_name,
p_partkey,
p_mfgr,
s_address,
s_phone,
s_comment
from
supplier s,
s.s_partsupps ps,
part p,
region r,
r.r_nations n
where
p_partkey = ps_partkey
and p_size = 15
and p_type like '%BRASS'
and s_nationkey = n_nationkey
and r_name = 'EUROPE'
and ps_supplycost = (
select
min(ps_supplycost)
from
supplier s,
s.s_partsupps ps,
region r,
r.r_nations n
where
p_partkey = ps_partkey
and s_nationkey = n_nationkey
and r_name = 'EUROPE'
)
order by
s_acctbal desc,
n_name,
s_name,
p_partkey
limit 100
---- PLAN
Max Per-Host Resource Reservation: Memory=76.84MB Threads=6
Per-Host Resource Estimates: Memory=865MB
PLAN-ROOT SINK
|
26:TOP-N [LIMIT=100]
| order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
| row-size=223B cardinality=100
|
25:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: p_partkey = ps_partkey, ps_supplycost = min(ps_supplycost)
| runtime filters: RF000 <- ps_partkey
| row-size=322B cardinality=100.00K
|
|--22:AGGREGATE [FINALIZE]
| | output: min(ps_supplycost)
| | group by: ps_partkey
| | row-size=16B cardinality=100.00K
| |
| 21:HASH JOIN [INNER JOIN]
| | hash predicates: s_nationkey = n_nationkey
| | runtime filters: RF008 <- n_nationkey
| | row-size=63B cardinality=100.00K
| |
| |--17:SUBPLAN
| | | row-size=33B cardinality=10
| | |
| | |--20:NESTED LOOP JOIN [CROSS JOIN]
| | | | row-size=33B cardinality=10
| | | |
| | | |--18:SINGULAR ROW SRC
| | | | row-size=31B cardinality=1
| | | |
| | | 19:UNNEST [r.r_nations n]
| | | row-size=0B cardinality=10
| | |
| | 16:SCAN HDFS [tpch_nested_parquet.region r]
| | HDFS partitions=1/1 files=1 size=3.59KB
| | predicates: r_name = 'EUROPE', !empty(r.r_nations)
| | row-size=31B cardinality=1
| |
| 12:SUBPLAN
| | row-size=30B cardinality=100.00K
| |
| |--15:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=30B cardinality=10
| | |
| | |--13:SINGULAR ROW SRC
| | | row-size=14B cardinality=1
| | |
| | 14:UNNEST [s.s_partsupps ps]
| | row-size=0B cardinality=10
| |
| 11:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| predicates: !empty(s.s_partsupps)
| runtime filters: RF008 -> s_nationkey
| row-size=14B cardinality=10.00K
|
24:HASH JOIN [INNER JOIN]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF004 <- n_nationkey
| row-size=322B cardinality=100.00K
|
|--07:SUBPLAN
| | row-size=45B cardinality=10
| |
| |--10:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=45B cardinality=10
| | |
| | |--08:SINGULAR ROW SRC
| | | row-size=31B cardinality=1
| | |
| | 09:UNNEST [r.r_nations n]
| | row-size=0B cardinality=10
| |
| 06:SCAN HDFS [tpch_nested_parquet.region r]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: r_name = 'EUROPE', !empty(r.r_nations)
| row-size=31B cardinality=1
|
23:HASH JOIN [INNER JOIN]
| hash predicates: ps_partkey = p_partkey
| row-size=277B cardinality=100.00K
|
|--05:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_size = 15, p_type LIKE '%BRASS'
| runtime filters: RF000 -> p_partkey
| row-size=71B cardinality=1.26K
|
01:SUBPLAN
| row-size=207B cardinality=100.00K
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=207B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=191B cardinality=1
| |
| 03:UNNEST [s.s_partsupps ps]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.supplier s]
HDFS partitions=1/1 files=1 size=41.80MB
predicates: !empty(s.s_partsupps)
runtime filters: RF004 -> s_nationkey
row-size=191B cardinality=10.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=114.73MB Threads=12
Per-Host Resource Estimates: Memory=1.01GB
PLAN-ROOT SINK
|
33:MERGING-EXCHANGE [UNPARTITIONED]
| order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
| limit: 100
|
26:TOP-N [LIMIT=100]
| order by: s_acctbal DESC, n_name ASC, s_name ASC, p_partkey ASC
| row-size=223B cardinality=100
|
25:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: p_partkey = ps_partkey, ps_supplycost = min(ps_supplycost)
| runtime filters: RF000 <- ps_partkey
| row-size=322B cardinality=100.00K
|
|--32:EXCHANGE [BROADCAST]
| |
| 31:AGGREGATE [FINALIZE]
| | output: min:merge(ps_supplycost)
| | group by: ps_partkey
| | row-size=16B cardinality=100.00K
| |
| 30:EXCHANGE [HASH(ps_partkey)]
| |
| 22:AGGREGATE [STREAMING]
| | output: min(ps_supplycost)
| | group by: ps_partkey
| | row-size=16B cardinality=100.00K
| |
| 21:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: s_nationkey = n_nationkey
| | runtime filters: RF008 <- n_nationkey
| | row-size=63B cardinality=100.00K
| |
| |--29:EXCHANGE [BROADCAST]
| | |
| | 17:SUBPLAN
| | | row-size=33B cardinality=10
| | |
| | |--20:NESTED LOOP JOIN [CROSS JOIN]
| | | | row-size=33B cardinality=10
| | | |
| | | |--18:SINGULAR ROW SRC
| | | | row-size=31B cardinality=1
| | | |
| | | 19:UNNEST [r.r_nations n]
| | | row-size=0B cardinality=10
| | |
| | 16:SCAN HDFS [tpch_nested_parquet.region r]
| | HDFS partitions=1/1 files=1 size=3.59KB
| | predicates: r_name = 'EUROPE', !empty(r.r_nations)
| | row-size=31B cardinality=1
| |
| 12:SUBPLAN
| | row-size=30B cardinality=100.00K
| |
| |--15:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=30B cardinality=10
| | |
| | |--13:SINGULAR ROW SRC
| | | row-size=14B cardinality=1
| | |
| | 14:UNNEST [s.s_partsupps ps]
| | row-size=0B cardinality=10
| |
| 11:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| predicates: !empty(s.s_partsupps)
| runtime filters: RF008 -> s_nationkey
| row-size=14B cardinality=10.00K
|
24:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF004 <- n_nationkey
| row-size=322B cardinality=100.00K
|
|--28:EXCHANGE [BROADCAST]
| |
| 07:SUBPLAN
| | row-size=45B cardinality=10
| |
| |--10:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=45B cardinality=10
| | |
| | |--08:SINGULAR ROW SRC
| | | row-size=31B cardinality=1
| | |
| | 09:UNNEST [r.r_nations n]
| | row-size=0B cardinality=10
| |
| 06:SCAN HDFS [tpch_nested_parquet.region r]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: r_name = 'EUROPE', !empty(r.r_nations)
| row-size=31B cardinality=1
|
23:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: ps_partkey = p_partkey
| row-size=277B cardinality=100.00K
|
|--27:EXCHANGE [BROADCAST]
| |
| 05:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_size = 15, p_type LIKE '%BRASS'
| runtime filters: RF000 -> p_partkey
| row-size=71B cardinality=1.26K
|
01:SUBPLAN
| row-size=207B cardinality=100.00K
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=207B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=191B cardinality=1
| |
| 03:UNNEST [s.s_partsupps ps]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.supplier s]
HDFS partitions=1/1 files=1 size=41.80MB
predicates: !empty(s.s_partsupps)
runtime filters: RF004 -> s_nationkey
row-size=191B cardinality=10.00K
====
# TPCH-Q3
# Q3 - Shipping Priority Query
select
o_orderkey,
sum(l_extendedprice * (1 - l_discount)) as revenue,
o_orderdate,
o_shippriority
from
customer c,
c.c_orders o,
o.o_lineitems l
where
c_mktsegment = 'BUILDING'
and o_orderdate < '1995-03-15'
and l_shipdate > '1995-03-15'
group by
o_orderkey,
o_orderdate,
o_shippriority
order by
revenue desc,
o_orderdate
limit 10
---- PLAN
Max Per-Host Resource Reservation: Memory=66.00MB Threads=2
Per-Host Resource Estimates: Memory=744MB
PLAN-ROOT SINK
|
10:TOP-N [LIMIT=10]
| order by: sum(l_extendedprice * (1 - l_discount)) DESC, o_orderdate ASC
| row-size=40B cardinality=10
|
09:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: o_orderkey, o_orderdate, o_shippriority
| row-size=40B cardinality=3.00M
|
01:SUBPLAN
| row-size=97B cardinality=3.00M
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=97B cardinality=100
| |
| |--02:SINGULAR ROW SRC
| | row-size=33B cardinality=1
| |
| 04:SUBPLAN
| | row-size=64B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=64B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=36B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems l]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: c_mktsegment = 'BUILDING', !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate < '1995-03-15'
predicates on l: l_shipdate > '1995-03-15'
row-size=33B cardinality=30.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=100.00MB Threads=4
Per-Host Resource Estimates: Memory=882MB
PLAN-ROOT SINK
|
13:MERGING-EXCHANGE [UNPARTITIONED]
| order by: sum(l_extendedprice * (1 - l_discount)) DESC, o_orderdate ASC
| limit: 10
|
10:TOP-N [LIMIT=10]
| order by: sum(l_extendedprice * (1 - l_discount)) DESC, o_orderdate ASC
| row-size=40B cardinality=10
|
12:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| group by: o_orderkey, o_orderdate, o_shippriority
| row-size=40B cardinality=3.00M
|
11:EXCHANGE [HASH(o_orderkey,o_orderdate,o_shippriority)]
|
09:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: o_orderkey, o_orderdate, o_shippriority
| row-size=40B cardinality=3.00M
|
01:SUBPLAN
| row-size=97B cardinality=3.00M
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=97B cardinality=100
| |
| |--02:SINGULAR ROW SRC
| | row-size=33B cardinality=1
| |
| 04:SUBPLAN
| | row-size=64B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=64B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=36B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems l]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: c_mktsegment = 'BUILDING', !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate < '1995-03-15'
predicates on l: l_shipdate > '1995-03-15'
row-size=33B cardinality=30.00K
====
# TPCH-Q4
# Q4 - Order Priority Checking Query
select
o_orderpriority,
count(*) as order_count
from
customer c,
c.c_orders o
where
o_orderdate >= '1993-07-01'
and o_orderdate < '1993-10-01'
and exists (
select
*
from
o.o_lineitems
where
l_commitdate < l_receiptdate
)
group by
o_orderpriority
order by
o_orderpriority
---- PLAN
Max Per-Host Resource Reservation: Memory=50.00MB Threads=2
Per-Host Resource Estimates: Memory=480MB
PLAN-ROOT SINK
|
10:SORT
| order by: o_orderpriority ASC
| row-size=20B cardinality=1.50M
|
09:AGGREGATE [FINALIZE]
| output: count(*)
| group by: o_orderpriority
| row-size=20B cardinality=1.50M
|
01:SUBPLAN
| row-size=48B cardinality=1.50M
|
|--08:SUBPLAN
| | row-size=48B cardinality=10
| |
| |--06:NESTED LOOP JOIN [RIGHT SEMI JOIN]
| | | row-size=48B cardinality=1
| | |
| | |--04:SINGULAR ROW SRC
| | | row-size=48B cardinality=1
| | |
| | 05:UNNEST [o.o_lineitems]
| | limit: 1
| | row-size=24B cardinality=10
| |
| 07:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=48B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=12B cardinality=1
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01'
predicates on o_lineitems: l_commitdate < l_receiptdate
row-size=12B cardinality=150.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=96.00MB Threads=4
Per-Host Resource Estimates: Memory=630MB
PLAN-ROOT SINK
|
13:MERGING-EXCHANGE [UNPARTITIONED]
| order by: o_orderpriority ASC
|
10:SORT
| order by: o_orderpriority ASC
| row-size=20B cardinality=1.50M
|
12:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: o_orderpriority
| row-size=20B cardinality=1.50M
|
11:EXCHANGE [HASH(o_orderpriority)]
|
09:AGGREGATE [STREAMING]
| output: count(*)
| group by: o_orderpriority
| row-size=20B cardinality=1.50M
|
01:SUBPLAN
| row-size=48B cardinality=1.50M
|
|--08:SUBPLAN
| | row-size=48B cardinality=10
| |
| |--06:NESTED LOOP JOIN [RIGHT SEMI JOIN]
| | | row-size=48B cardinality=1
| | |
| | |--04:SINGULAR ROW SRC
| | | row-size=48B cardinality=1
| | |
| | 05:UNNEST [o.o_lineitems]
| | limit: 1
| | row-size=24B cardinality=10
| |
| 07:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=48B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=12B cardinality=1
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01'
predicates on o_lineitems: l_commitdate < l_receiptdate
row-size=12B cardinality=150.00K
====
# TPCH-Q5
# Q5 - Local Supplier Volume Query
select
n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
from
customer c,
c.c_orders o,
o.o_lineitems l,
supplier s,
region r,
r.r_nations n
where
l_suppkey = s_suppkey
and c_nationkey = s_nationkey
and s_nationkey = n_nationkey
and r_name = 'ASIA'
and o_orderdate >= '1994-01-01'
and o_orderdate < '1995-01-01'
group by
n_name
order by
revenue desc
---- PLAN
Max Per-Host Resource Reservation: Memory=63.88MB Threads=4
Per-Host Resource Estimates: Memory=606MB
PLAN-ROOT SINK
|
18:SORT
| order by: sum(l_extendedprice * (1 - l_discount)) DESC
| row-size=28B cardinality=6.00G
|
17:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: n_name
| row-size=28B cardinality=6.00G
|
16:HASH JOIN [INNER JOIN]
| hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
| runtime filters: RF000 <- s_nationkey
| row-size=117B cardinality=6.00G
|
|--09:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| row-size=10B cardinality=10.00K
|
15:HASH JOIN [INNER JOIN]
| hash predicates: c.c_nationkey = n.n_nationkey
| runtime filters: RF004 <- n.n_nationkey
| row-size=107B cardinality=15.00M
|
|--11:SUBPLAN
| | row-size=45B cardinality=10
| |
| |--14:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=45B cardinality=10
| | |
| | |--12:SINGULAR ROW SRC
| | | row-size=31B cardinality=1
| | |
| | 13:UNNEST [r.r_nations n]
| | row-size=0B cardinality=10
| |
| 10:SCAN HDFS [tpch_nested_parquet.region r]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: r_name = 'ASIA', !empty(r.r_nations)
| row-size=31B cardinality=1
|
01:SUBPLAN
| row-size=62B cardinality=15.00M
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=62B cardinality=100
| |
| |--02:SINGULAR ROW SRC
| | row-size=14B cardinality=1
| |
| 04:SUBPLAN
| | row-size=48B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=48B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=24B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems l]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
runtime filters: RF000 -> c_nationkey, RF004 -> c.c_nationkey
row-size=14B cardinality=150.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=110.02MB Threads=8
Per-Host Resource Estimates: Memory=1.26GB
PLAN-ROOT SINK
|
23:MERGING-EXCHANGE [UNPARTITIONED]
| order by: sum(l_extendedprice * (1 - l_discount)) DESC
|
18:SORT
| order by: sum(l_extendedprice * (1 - l_discount)) DESC
| row-size=28B cardinality=6.00G
|
22:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| group by: n_name
| row-size=28B cardinality=6.00G
|
21:EXCHANGE [HASH(n_name)]
|
17:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: n_name
| row-size=28B cardinality=6.00G
|
16:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = s_nationkey, l_suppkey = s_suppkey
| runtime filters: RF000 <- s_nationkey
| row-size=117B cardinality=6.00G
|
|--20:EXCHANGE [BROADCAST]
| |
| 09:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| row-size=10B cardinality=10.00K
|
15:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c.c_nationkey = n.n_nationkey
| runtime filters: RF004 <- n.n_nationkey
| row-size=107B cardinality=15.00M
|
|--19:EXCHANGE [BROADCAST]
| |
| 11:SUBPLAN
| | row-size=45B cardinality=10
| |
| |--14:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=45B cardinality=10
| | |
| | |--12:SINGULAR ROW SRC
| | | row-size=31B cardinality=1
| | |
| | 13:UNNEST [r.r_nations n]
| | row-size=0B cardinality=10
| |
| 10:SCAN HDFS [tpch_nested_parquet.region r]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: r_name = 'ASIA', !empty(r.r_nations)
| row-size=31B cardinality=1
|
01:SUBPLAN
| row-size=62B cardinality=15.00M
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=62B cardinality=100
| |
| |--02:SINGULAR ROW SRC
| | row-size=14B cardinality=1
| |
| 04:SUBPLAN
| | row-size=48B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=48B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=24B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems l]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
runtime filters: RF000 -> c_nationkey, RF004 -> c.c_nationkey
row-size=14B cardinality=150.00K
====
# TPCH-Q6
# Q6 - Forecasting Revenue Change Query
select
sum(l_extendedprice * l_discount) as revenue
from
customer.c_orders.o_lineitems
where
l_shipdate >= '1994-01-01'
and l_shipdate < '1995-01-01'
and l_discount between 0.05 and 0.07
and l_quantity < 24
---- PLAN
Max Per-Host Resource Reservation: Memory=16.00MB Threads=2
Per-Host Resource Estimates: Memory=362MB
PLAN-ROOT SINK
|
01:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * l_discount)
| row-size=16B cardinality=1
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
row-size=36B cardinality=1.50M
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=16.00MB Threads=3
Per-Host Resource Estimates: Memory=372MB
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * l_discount)
| row-size=16B cardinality=1
|
02:EXCHANGE [UNPARTITIONED]
|
01:AGGREGATE
| output: sum(l_extendedprice * l_discount)
| row-size=16B cardinality=1
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
row-size=36B cardinality=1.50M
====
# TPCH-Q7
# Q7 - Volume Shipping Query
select
supp_nation,
cust_nation,
l_year,
sum(volume) as revenue
from (
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
year(l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from
customer c,
c.c_orders o,
o.o_lineitems l,
supplier s,
region.r_nations n1,
region.r_nations n2
where
s_suppkey = l_suppkey
and s_nationkey = n1.n_nationkey
and c_nationkey = n2.n_nationkey
and (
(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
)
and l_shipdate between '1995-01-01' and '1996-12-31'
) as shipping
group by
supp_nation,
cust_nation,
l_year
order by
supp_nation,
cust_nation,
l_year
---- PLAN
Max Per-Host Resource Reservation: Memory=65.81MB Threads=5
Per-Host Resource Estimates: Memory=712MB
PLAN-ROOT SINK
|
16:SORT
| order by: supp_nation ASC, cust_nation ASC, l_year ASC
| row-size=44B cardinality=15.00M
|
15:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: n1.n_name, n2.n_name, year(l_shipdate)
| row-size=44B cardinality=15.00M
|
14:HASH JOIN [INNER JOIN]
| hash predicates: c_nationkey = n2.n_nationkey
| other predicates: n1.n_name = 'FRANCE' OR n2.n_name = 'FRANCE', n2.n_name = 'GERMANY' OR n1.n_name = 'GERMANY'
| runtime filters: RF000 <- n2.n_nationkey
| row-size=100B cardinality=15.00M
|
|--11:SCAN HDFS [tpch_nested_parquet.region.r_nations n2]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: n2.n_name IN ('GERMANY', 'FRANCE')
| row-size=14B cardinality=5
|
13:HASH JOIN [INNER JOIN]
| hash predicates: s_nationkey = n1.n_nationkey
| runtime filters: RF002 <- n1.n_nationkey
| row-size=86B cardinality=15.00M
|
|--10:SCAN HDFS [tpch_nested_parquet.region.r_nations n1]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: n1.n_name IN ('FRANCE', 'GERMANY')
| row-size=14B cardinality=5
|
12:HASH JOIN [INNER JOIN]
| hash predicates: l_suppkey = s_suppkey
| row-size=72B cardinality=15.00M
|
|--09:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| runtime filters: RF002 -> s_nationkey
| row-size=10B cardinality=10.00K
|
01:SUBPLAN
| row-size=62B cardinality=15.00M
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=62B cardinality=100
| |
| |--02:SINGULAR ROW SRC
| | row-size=14B cardinality=1
| |
| 04:SUBPLAN
| | row-size=48B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=48B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=12B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems l]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems)
predicates on l: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31'
runtime filters: RF000 -> c_nationkey
row-size=14B cardinality=150.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=112.97MB Threads=10
Per-Host Resource Estimates: Memory=1.00GB
PLAN-ROOT SINK
|
22:MERGING-EXCHANGE [UNPARTITIONED]
| order by: supp_nation ASC, cust_nation ASC, l_year ASC
|
16:SORT
| order by: supp_nation ASC, cust_nation ASC, l_year ASC
| row-size=44B cardinality=15.00M
|
21:AGGREGATE [FINALIZE]
| output: sum:merge(volume)
| group by: supp_nation, cust_nation, l_year
| row-size=44B cardinality=15.00M
|
20:EXCHANGE [HASH(supp_nation,cust_nation,l_year)]
|
15:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: n1.n_name, n2.n_name, year(l_shipdate)
| row-size=44B cardinality=15.00M
|
14:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = n2.n_nationkey
| other predicates: n1.n_name = 'FRANCE' OR n2.n_name = 'FRANCE', n2.n_name = 'GERMANY' OR n1.n_name = 'GERMANY'
| runtime filters: RF000 <- n2.n_nationkey
| row-size=100B cardinality=15.00M
|
|--19:EXCHANGE [BROADCAST]
| |
| 11:SCAN HDFS [tpch_nested_parquet.region.r_nations n2]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: n2.n_name IN ('GERMANY', 'FRANCE')
| row-size=14B cardinality=5
|
13:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s_nationkey = n1.n_nationkey
| runtime filters: RF002 <- n1.n_nationkey
| row-size=86B cardinality=15.00M
|
|--18:EXCHANGE [BROADCAST]
| |
| 10:SCAN HDFS [tpch_nested_parquet.region.r_nations n1]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: n1.n_name IN ('FRANCE', 'GERMANY')
| row-size=14B cardinality=5
|
12:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_suppkey = s_suppkey
| row-size=72B cardinality=15.00M
|
|--17:EXCHANGE [BROADCAST]
| |
| 09:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| runtime filters: RF002 -> s_nationkey
| row-size=10B cardinality=10.00K
|
01:SUBPLAN
| row-size=62B cardinality=15.00M
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=62B cardinality=100
| |
| |--02:SINGULAR ROW SRC
| | row-size=14B cardinality=1
| |
| 04:SUBPLAN
| | row-size=48B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=48B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=12B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems l]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems)
predicates on l: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31'
runtime filters: RF000 -> c_nationkey
row-size=14B cardinality=150.00K
====
# TPCH-Q8
# Q8 - National Market Share Query
select
o_year,
sum(case
when nation = 'BRAZIL'
then volume
else 0
end) / sum(volume) as mkt_share
from (
select
year(o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
from
customer c,
c.c_orders o,
o.o_lineitems l,
supplier s,
part p,
region r,
r.r_nations n1,
region.r_nations n2
where
p_partkey = l_partkey
and s_suppkey = l_suppkey
and c_nationkey = n1.n_nationkey
and r_name = 'AMERICA'
and s_nationkey = n2.n_nationkey
and o_orderdate between '1995-01-01' and '1996-12-31'
and p_type = 'ECONOMY ANODIZED STEEL'
) as all_nations
group by
o_year
order by
o_year
---- PLAN
Max Per-Host Resource Reservation: Memory=67.75MB Threads=6
Per-Host Resource Estimates: Memory=746MB
PLAN-ROOT SINK
|
22:SORT
| order by: o_year ASC
| row-size=36B cardinality=15.00M
|
21:AGGREGATE [FINALIZE]
| output: sum(CASE WHEN n2.n_name = 'BRAZIL' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum(l_extendedprice * (1 - l_discount))
| group by: year(o_orderdate)
| row-size=36B cardinality=15.00M
|
20:HASH JOIN [INNER JOIN]
| hash predicates: s_nationkey = n2.n_nationkey
| runtime filters: RF000 <- n2.n_nationkey
| row-size=167B cardinality=15.00M
|
|--16:SCAN HDFS [tpch_nested_parquet.region.r_nations n2]
| HDFS partitions=1/1 files=1 size=3.59KB
| row-size=14B cardinality=50
|
19:HASH JOIN [INNER JOIN]
| hash predicates: c_nationkey = n1.n_nationkey
| runtime filters: RF002 <- n1.n_nationkey
| row-size=153B cardinality=15.00M
|
|--12:SUBPLAN
| | row-size=33B cardinality=10
| |
| |--15:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=33B cardinality=10
| | |
| | |--13:SINGULAR ROW SRC
| | | row-size=31B cardinality=1
| | |
| | 14:UNNEST [r.r_nations n1]
| | row-size=0B cardinality=10
| |
| 11:SCAN HDFS [tpch_nested_parquet.region r]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: r_name = 'AMERICA', !empty(r.r_nations)
| row-size=31B cardinality=1
|
18:HASH JOIN [INNER JOIN]
| hash predicates: l_partkey = p_partkey
| row-size=121B cardinality=15.00M
|
|--10:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_type = 'ECONOMY ANODIZED STEEL'
| row-size=41B cardinality=1.32K
|
17:HASH JOIN [INNER JOIN]
| hash predicates: l_suppkey = s_suppkey
| row-size=80B cardinality=15.00M
|
|--09:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| runtime filters: RF000 -> s_nationkey
| row-size=10B cardinality=10.00K
|
01:SUBPLAN
| row-size=70B cardinality=15.00M
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=70B cardinality=100
| |
| |--02:SINGULAR ROW SRC
| | row-size=14B cardinality=1
| |
| 04:SUBPLAN
| | row-size=56B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=56B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=24B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems l]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31'
runtime filters: RF002 -> c_nationkey
row-size=14B cardinality=150.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=110.91MB Threads=12
Per-Host Resource Estimates: Memory=1.04GB
PLAN-ROOT SINK
|
29:MERGING-EXCHANGE [UNPARTITIONED]
| order by: o_year ASC
|
22:SORT
| order by: o_year ASC
| row-size=36B cardinality=15.00M
|
28:AGGREGATE [FINALIZE]
| output: sum:merge(CASE WHEN nation = 'BRAZIL' THEN volume ELSE 0 END), sum:merge(volume)
| group by: o_year
| row-size=36B cardinality=15.00M
|
27:EXCHANGE [HASH(o_year)]
|
21:AGGREGATE [STREAMING]
| output: sum(CASE WHEN n2.n_name = 'BRAZIL' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum(l_extendedprice * (1 - l_discount))
| group by: year(o_orderdate)
| row-size=36B cardinality=15.00M
|
20:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s_nationkey = n2.n_nationkey
| runtime filters: RF000 <- n2.n_nationkey
| row-size=167B cardinality=15.00M
|
|--26:EXCHANGE [BROADCAST]
| |
| 16:SCAN HDFS [tpch_nested_parquet.region.r_nations n2]
| HDFS partitions=1/1 files=1 size=3.59KB
| row-size=14B cardinality=50
|
19:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = n1.n_nationkey
| runtime filters: RF002 <- n1.n_nationkey
| row-size=153B cardinality=15.00M
|
|--25:EXCHANGE [BROADCAST]
| |
| 12:SUBPLAN
| | row-size=33B cardinality=10
| |
| |--15:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=33B cardinality=10
| | |
| | |--13:SINGULAR ROW SRC
| | | row-size=31B cardinality=1
| | |
| | 14:UNNEST [r.r_nations n1]
| | row-size=0B cardinality=10
| |
| 11:SCAN HDFS [tpch_nested_parquet.region r]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: r_name = 'AMERICA', !empty(r.r_nations)
| row-size=31B cardinality=1
|
18:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_partkey = p_partkey
| row-size=121B cardinality=15.00M
|
|--24:EXCHANGE [BROADCAST]
| |
| 10:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_type = 'ECONOMY ANODIZED STEEL'
| row-size=41B cardinality=1.32K
|
17:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_suppkey = s_suppkey
| row-size=80B cardinality=15.00M
|
|--23:EXCHANGE [BROADCAST]
| |
| 09:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| runtime filters: RF000 -> s_nationkey
| row-size=10B cardinality=10.00K
|
01:SUBPLAN
| row-size=70B cardinality=15.00M
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=70B cardinality=100
| |
| |--02:SINGULAR ROW SRC
| | row-size=14B cardinality=1
| |
| 04:SUBPLAN
| | row-size=56B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=56B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=24B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems l]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31'
runtime filters: RF002 -> c_nationkey
row-size=14B cardinality=150.00K
====
# TPCH-Q9
# Q9 - Product Type Measure Query
select
nation,
o_year,
sum(amount) as sum_profit
from(
select
n_name as nation,
year(o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
from
customer.c_orders o,
o.o_lineitems l,
supplier s,
s.s_partsupps ps,
part p,
region.r_nations n
where
s_suppkey = l_suppkey
and ps_partkey = l_partkey
and p_partkey = l_partkey
and s_nationkey = n_nationkey
and p_name like '%green%'
) as profit
group by
nation,
o_year
order by
nation,
o_year desc
---- PLAN
Max Per-Host Resource Reservation: Memory=68.56MB Threads=5
Per-Host Resource Estimates: Memory=827MB
PLAN-ROOT SINK
|
16:SORT
| order by: nation ASC, o_year DESC
| row-size=32B cardinality=15.00M
|
15:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity)
| group by: n_name, year(o_orderdate)
| row-size=32B cardinality=15.00M
|
14:HASH JOIN [INNER JOIN]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=169B cardinality=15.00M
|
|--11:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| HDFS partitions=1/1 files=1 size=3.59KB
| row-size=14B cardinality=50
|
13:HASH JOIN [INNER JOIN]
| hash predicates: l_partkey = p_partkey
| row-size=155B cardinality=15.00M
|
|--10:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_name LIKE '%green%'
| row-size=53B cardinality=20.00K
|
12:HASH JOIN [INNER JOIN]
| hash predicates: l_partkey = ps_partkey, l_suppkey = s_suppkey
| row-size=102B cardinality=15.00M
|
|--06:SUBPLAN
| | row-size=38B cardinality=100.00K
| |
| |--09:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=38B cardinality=10
| | |
| | |--07:SINGULAR ROW SRC
| | | row-size=22B cardinality=1
| | |
| | 08:UNNEST [s.s_partsupps ps]
| | row-size=0B cardinality=10
| |
| 05:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| predicates: !empty(s.s_partsupps)
| runtime filters: RF000 -> s_nationkey
| row-size=22B cardinality=10.00K
|
01:SUBPLAN
| row-size=64B cardinality=15.00M
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=64B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=24B cardinality=1
| |
| 03:UNNEST [o.o_lineitems l]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(o.o_lineitems)
row-size=24B cardinality=1.50M
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=139.58MB Threads=10
Per-Host Resource Estimates: Memory=1.11GB
PLAN-ROOT SINK
|
22:MERGING-EXCHANGE [UNPARTITIONED]
| order by: nation ASC, o_year DESC
|
16:SORT
| order by: nation ASC, o_year DESC
| row-size=32B cardinality=15.00M
|
21:AGGREGATE [FINALIZE]
| output: sum:merge(amount)
| group by: nation, o_year
| row-size=32B cardinality=15.00M
|
20:EXCHANGE [HASH(nation,o_year)]
|
15:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity)
| group by: n_name, year(o_orderdate)
| row-size=32B cardinality=15.00M
|
14:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=169B cardinality=15.00M
|
|--19:EXCHANGE [BROADCAST]
| |
| 11:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| HDFS partitions=1/1 files=1 size=3.59KB
| row-size=14B cardinality=50
|
13:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_partkey = p_partkey
| row-size=155B cardinality=15.00M
|
|--18:EXCHANGE [BROADCAST]
| |
| 10:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_name LIKE '%green%'
| row-size=53B cardinality=20.00K
|
12:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_partkey = ps_partkey, l_suppkey = s_suppkey
| row-size=102B cardinality=15.00M
|
|--17:EXCHANGE [BROADCAST]
| |
| 06:SUBPLAN
| | row-size=38B cardinality=100.00K
| |
| |--09:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=38B cardinality=10
| | |
| | |--07:SINGULAR ROW SRC
| | | row-size=22B cardinality=1
| | |
| | 08:UNNEST [s.s_partsupps ps]
| | row-size=0B cardinality=10
| |
| 05:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| predicates: !empty(s.s_partsupps)
| runtime filters: RF000 -> s_nationkey
| row-size=22B cardinality=10.00K
|
01:SUBPLAN
| row-size=64B cardinality=15.00M
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=64B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=24B cardinality=1
| |
| 03:UNNEST [o.o_lineitems l]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(o.o_lineitems)
row-size=24B cardinality=1.50M
====
# TPCH-Q10
# Q10 - Returned Item Reporting Query
# Converted select from multiple tables to joins
select
c_custkey,
c_name,
sum(l_extendedprice * (1 - l_discount)) as revenue,
c_acctbal,
n_name,
c_address,
c_phone,
c_comment
from
customer c,
c.c_orders o,
o.o_lineitems l,
region.r_nations n
where
o_orderdate >= '1993-10-01'
and o_orderdate < '1994-01-01'
and l_returnflag = 'R'
and c_nationkey = n_nationkey
group by
c_custkey,
c_name,
c_acctbal,
c_phone,
n_name,
c_address,
c_comment
order by
revenue desc
limit 20
---- PLAN
Max Per-Host Resource Reservation: Memory=60.94MB Threads=3
Per-Host Resource Estimates: Memory=835MB
PLAN-ROOT SINK
|
12:TOP-N [LIMIT=20]
| order by: sum(l_extendedprice * (1 - l_discount)) DESC
| row-size=223B cardinality=20
|
11:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment
| row-size=223B cardinality=15.00M
|
10:HASH JOIN [INNER JOIN]
| hash predicates: c_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=275B cardinality=15.00M
|
|--09:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| HDFS partitions=1/1 files=1 size=3.59KB
| row-size=14B cardinality=50
|
01:SUBPLAN
| row-size=261B cardinality=15.00M
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=261B cardinality=100
| |
| |--02:SINGULAR ROW SRC
| | row-size=209B cardinality=1
| |
| 04:SUBPLAN
| | row-size=52B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=52B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=24B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems l]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1993-10-01', o_orderdate < '1994-01-01'
predicates on l: l_returnflag = 'R'
runtime filters: RF000 -> c_nationkey
row-size=209B cardinality=150.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=94.95MB Threads=6
Per-Host Resource Estimates: Memory=1006MB
PLAN-ROOT SINK
|
16:MERGING-EXCHANGE [UNPARTITIONED]
| order by: sum(l_extendedprice * (1 - l_discount)) DESC
| limit: 20
|
12:TOP-N [LIMIT=20]
| order by: sum(l_extendedprice * (1 - l_discount)) DESC
| row-size=223B cardinality=20
|
15:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| group by: c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment
| row-size=223B cardinality=15.00M
|
14:EXCHANGE [HASH(c_custkey,c_name,c_acctbal,c_phone,n_name,c_address,c_comment)]
|
11:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment
| row-size=223B cardinality=15.00M
|
10:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: c_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=275B cardinality=15.00M
|
|--13:EXCHANGE [BROADCAST]
| |
| 09:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| HDFS partitions=1/1 files=1 size=3.59KB
| row-size=14B cardinality=50
|
01:SUBPLAN
| row-size=261B cardinality=15.00M
|
|--08:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=261B cardinality=100
| |
| |--02:SINGULAR ROW SRC
| | row-size=209B cardinality=1
| |
| 04:SUBPLAN
| | row-size=52B cardinality=100
| |
| |--07:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=52B cardinality=10
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=24B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems l]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1993-10-01', o_orderdate < '1994-01-01'
predicates on l: l_returnflag = 'R'
runtime filters: RF000 -> c_nationkey
row-size=209B cardinality=150.00K
====
# TPCH-Q11
# Q11 - Important Stock Identification
# Modifications: query was rewritten to not have a subquery in the having clause
select
*
from (
select
ps_partkey,
sum(ps_supplycost * ps_availqty) as value
from
supplier s,
s.s_partsupps ps,
region.r_nations n
where
s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by
ps_partkey
) as inner_query
where
value > (
select
sum(ps_supplycost * ps_availqty) * 0.0001
from
supplier s,
s.s_partsupps ps,
region.r_nations n
where
s_nationkey = n_nationkey
and n_name = 'GERMANY'
)
order by
value desc
---- PLAN
Max Per-Host Resource Reservation: Memory=71.89MB Threads=5
Per-Host Resource Estimates: Memory=558MB
PLAN-ROOT SINK
|
17:SORT
| order by: value DESC
| row-size=24B cardinality=100.00K
|
16:NESTED LOOP JOIN [INNER JOIN]
| predicates: sum(ps_supplycost * ps_availqty) > sum(ps_supplycost * ps_availqty) * 0.0001
| row-size=40B cardinality=100.00K
|
|--15:AGGREGATE [FINALIZE]
| | output: sum(ps_supplycost * ps_availqty)
| | row-size=16B cardinality=1
| |
| 14:HASH JOIN [INNER JOIN]
| | hash predicates: s_nationkey = n_nationkey
| | runtime filters: RF002 <- n_nationkey
| | row-size=40B cardinality=100.00K
| |
| |--13:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| | HDFS partitions=1/1 files=1 size=3.59KB
| | predicates: n_name = 'GERMANY'
| | row-size=14B cardinality=5
| |
| 09:SUBPLAN
| | row-size=26B cardinality=100.00K
| |
| |--12:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=26B cardinality=10
| | |
| | |--10:SINGULAR ROW SRC
| | | row-size=14B cardinality=1
| | |
| | 11:UNNEST [s.s_partsupps ps]
| | row-size=0B cardinality=10
| |
| 08:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| predicates: !empty(s.s_partsupps)
| runtime filters: RF002 -> s_nationkey
| row-size=14B cardinality=10.00K
|
07:AGGREGATE [FINALIZE]
| output: sum(ps_supplycost * ps_availqty)
| group by: ps_partkey
| row-size=24B cardinality=100.00K
|
06:HASH JOIN [INNER JOIN]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=48B cardinality=100.00K
|
|--05:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: n_name = 'GERMANY'
| row-size=14B cardinality=5
|
01:SUBPLAN
| row-size=34B cardinality=100.00K
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=34B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=14B cardinality=1
| |
| 03:UNNEST [s.s_partsupps ps]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.supplier s]
HDFS partitions=1/1 files=1 size=41.80MB
predicates: !empty(s.s_partsupps)
runtime filters: RF000 -> s_nationkey
row-size=14B cardinality=10.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=111.91MB Threads=11
Per-Host Resource Estimates: Memory=746MB
PLAN-ROOT SINK
|
25:MERGING-EXCHANGE [UNPARTITIONED]
| order by: value DESC
|
17:SORT
| order by: value DESC
| row-size=24B cardinality=100.00K
|
16:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
| predicates: sum(ps_supplycost * ps_availqty) > sum(ps_supplycost * ps_availqty) * 0.0001
| row-size=40B cardinality=100.00K
|
|--24:EXCHANGE [BROADCAST]
| |
| 23:AGGREGATE [FINALIZE]
| | output: sum:merge(ps_supplycost * ps_availqty)
| | row-size=16B cardinality=1
| |
| 22:EXCHANGE [UNPARTITIONED]
| |
| 15:AGGREGATE
| | output: sum(ps_supplycost * ps_availqty)
| | row-size=16B cardinality=1
| |
| 14:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: s_nationkey = n_nationkey
| | runtime filters: RF002 <- n_nationkey
| | row-size=40B cardinality=100.00K
| |
| |--21:EXCHANGE [BROADCAST]
| | |
| | 13:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| | HDFS partitions=1/1 files=1 size=3.59KB
| | predicates: n_name = 'GERMANY'
| | row-size=14B cardinality=5
| |
| 09:SUBPLAN
| | row-size=26B cardinality=100.00K
| |
| |--12:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=26B cardinality=10
| | |
| | |--10:SINGULAR ROW SRC
| | | row-size=14B cardinality=1
| | |
| | 11:UNNEST [s.s_partsupps ps]
| | row-size=0B cardinality=10
| |
| 08:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| predicates: !empty(s.s_partsupps)
| runtime filters: RF002 -> s_nationkey
| row-size=14B cardinality=10.00K
|
20:AGGREGATE [FINALIZE]
| output: sum:merge(ps_supplycost * ps_availqty)
| group by: ps_partkey
| row-size=24B cardinality=100.00K
|
19:EXCHANGE [HASH(ps_partkey)]
|
07:AGGREGATE [STREAMING]
| output: sum(ps_supplycost * ps_availqty)
| group by: ps_partkey
| row-size=24B cardinality=100.00K
|
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=48B cardinality=100.00K
|
|--18:EXCHANGE [BROADCAST]
| |
| 05:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: n_name = 'GERMANY'
| row-size=14B cardinality=5
|
01:SUBPLAN
| row-size=34B cardinality=100.00K
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=34B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=14B cardinality=1
| |
| 03:UNNEST [s.s_partsupps ps]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.supplier s]
HDFS partitions=1/1 files=1 size=41.80MB
predicates: !empty(s.s_partsupps)
runtime filters: RF000 -> s_nationkey
row-size=14B cardinality=10.00K
====
# TPCH-Q12
# Q12 - Shipping Mode and Order Priority Query
select
l_shipmode,
sum(case
when o_orderpriority = '1-URGENT'
or o_orderpriority = '2-HIGH'
then 1
else 0
end) as high_line_count,
sum(case
when o_orderpriority <> '1-URGENT'
and o_orderpriority <> '2-HIGH'
then 1
else 0
end) as low_line_count
from
customer.c_orders o,
o.o_lineitems l
where
l_shipmode in ('MAIL', 'SHIP')
and l_commitdate < l_receiptdate
and l_shipdate < l_commitdate
and l_receiptdate >= '1994-01-01'
and l_receiptdate < '1995-01-01'
group by
l_shipmode
order by
l_shipmode
---- PLAN
Max Per-Host Resource Reservation: Memory=58.00MB Threads=2
Per-Host Resource Estimates: Memory=568MB
PLAN-ROOT SINK
|
06:SORT
| order by: l_shipmode ASC
| row-size=28B cardinality=15.00M
|
05:AGGREGATE [FINALIZE]
| output: sum(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END), sum(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END)
| group by: l_shipmode
| row-size=28B cardinality=15.00M
|
01:SUBPLAN
| row-size=72B cardinality=15.00M
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=72B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=24B cardinality=1
| |
| 03:UNNEST [o.o_lineitems l]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(o.o_lineitems)
predicates on l: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01'
row-size=24B cardinality=1.50M
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=104.00MB Threads=4
Per-Host Resource Estimates: Memory=748MB
PLAN-ROOT SINK
|
09:MERGING-EXCHANGE [UNPARTITIONED]
| order by: l_shipmode ASC
|
06:SORT
| order by: l_shipmode ASC
| row-size=28B cardinality=15.00M
|
08:AGGREGATE [FINALIZE]
| output: sum:merge(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END), sum:merge(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END)
| group by: l_shipmode
| row-size=28B cardinality=15.00M
|
07:EXCHANGE [HASH(l_shipmode)]
|
05:AGGREGATE [STREAMING]
| output: sum(CASE WHEN o_orderpriority IN ('1-URGENT', '2-HIGH') THEN 1 ELSE 0 END), sum(CASE WHEN o_orderpriority != '1-URGENT' AND o_orderpriority != '2-HIGH' THEN 1 ELSE 0 END)
| group by: l_shipmode
| row-size=28B cardinality=15.00M
|
01:SUBPLAN
| row-size=72B cardinality=15.00M
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=72B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=24B cardinality=1
| |
| 03:UNNEST [o.o_lineitems l]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(o.o_lineitems)
predicates on l: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01'
row-size=24B cardinality=1.50M
====
# TPCH-Q13
# Q13 - Customer Distribution Query
select
c_count,
count(*) as custdist
from (
select
c_custkey,
count(o_orderkey) as c_count
from
customer c left outer join c.c_orders on (
o_comment not like '%special%requests%'
)
group by
c_custkey
) as c_orders
group by
c_count
order by
custdist desc,
c_count desc
---- PLAN
Max Per-Host Resource Reservation: Memory=40.00MB Threads=2
Per-Host Resource Estimates: Memory=274MB
PLAN-ROOT SINK
|
07:SORT
| order by: count(*) DESC, c_count DESC
| row-size=16B cardinality=150.00K
|
06:AGGREGATE [FINALIZE]
| output: count(*)
| group by: count(o_orderkey)
| row-size=16B cardinality=150.00K
|
05:AGGREGATE [FINALIZE]
| output: count(o_orderkey)
| group by: c_custkey
| row-size=16B cardinality=150.00K
|
01:SUBPLAN
| row-size=40B cardinality=150.00K
|
|--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| | row-size=40B cardinality=1
| |
| |--02:SINGULAR ROW SRC
| | row-size=20B cardinality=1
| |
| 03:UNNEST [c.c_orders]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates on c_orders: (NOT o_comment LIKE '%special%requests%')
row-size=20B cardinality=150.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=94.94MB Threads=5
Per-Host Resource Estimates: Memory=548MB
PLAN-ROOT SINK
|
12:MERGING-EXCHANGE [UNPARTITIONED]
| order by: count(*) DESC, c_count DESC
|
07:SORT
| order by: count(*) DESC, c_count DESC
| row-size=16B cardinality=150.00K
|
11:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: c_count
| row-size=16B cardinality=150.00K
|
10:EXCHANGE [HASH(c_count)]
|
06:AGGREGATE [STREAMING]
| output: count(*)
| group by: count(o_orderkey)
| row-size=16B cardinality=150.00K
|
09:AGGREGATE [FINALIZE]
| output: count:merge(o_orderkey)
| group by: c_custkey
| row-size=16B cardinality=150.00K
|
08:EXCHANGE [HASH(c_custkey)]
|
05:AGGREGATE [STREAMING]
| output: count(o_orderkey)
| group by: c_custkey
| row-size=16B cardinality=150.00K
|
01:SUBPLAN
| row-size=40B cardinality=150.00K
|
|--04:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| | row-size=40B cardinality=1
| |
| |--02:SINGULAR ROW SRC
| | row-size=20B cardinality=1
| |
| 03:UNNEST [c.c_orders]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates on c_orders: (NOT o_comment LIKE '%special%requests%')
row-size=20B cardinality=150.00K
====
# TPCH-Q14
# Q14 - Promotion Effect
select
100.00 * sum(case
when p_type like 'PROMO%'
then l_extendedprice * (1 - l_discount)
else 0.0
end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
from
customer.c_orders.o_lineitems l,
part p
where
l_partkey = p_partkey
and l_shipdate >= '1995-09-01'
and l_shipdate < '1995-10-01'
---- PLAN
Max Per-Host Resource Reservation: Memory=27.50MB Threads=3
Per-Host Resource Estimates: Memory=394MB
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum(l_extendedprice * (1 - l_discount))
| row-size=32B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: l_partkey = p_partkey
| runtime filters: RF000 <- p_partkey
| row-size=77B cardinality=1.50M
|
|--01:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| row-size=41B cardinality=200.00K
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
runtime filters: RF000 -> l_partkey
row-size=36B cardinality=1.50M
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=27.50MB Threads=5
Per-Host Resource Estimates: Memory=414MB
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: sum:merge(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum:merge(l_extendedprice * (1 - l_discount))
| row-size=32B cardinality=1
|
05:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE
| output: sum(CASE WHEN p_type LIKE 'PROMO%' THEN l_extendedprice * (1 - l_discount) ELSE 0 END), sum(l_extendedprice * (1 - l_discount))
| row-size=32B cardinality=1
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_partkey = p_partkey
| runtime filters: RF000 <- p_partkey
| row-size=77B cardinality=1.50M
|
|--04:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| row-size=41B cardinality=200.00K
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01'
runtime filters: RF000 -> l_partkey
row-size=36B cardinality=1.50M
====
# TPCH-Q15
# Q15 - Top Supplier Query
with revenue_view as (
select
l_suppkey as supplier_no,
sum(l_extendedprice * (1 - l_discount)) as total_revenue
from
customer.c_orders.o_lineitems l
where
l_shipdate >= '1996-01-01'
and l_shipdate < '1996-04-01'
group by
l_suppkey)
select
s_suppkey,
s_name,
s_address,
s_phone,
total_revenue
from
supplier,
revenue_view
where
s_suppkey = supplier_no
and total_revenue = (
select
max(total_revenue)
from
revenue_view
)
order by
s_suppkey
---- PLAN
Max Per-Host Resource Reservation: Memory=103.94MB Threads=4
Per-Host Resource Estimates: Memory=1.16GB
PLAN-ROOT SINK
|
08:SORT
| order by: s_suppkey ASC
| row-size=118B cardinality=1.50M
|
07:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: sum(l_extendedprice * (1 - l_discount)) = max(total_revenue)
| row-size=126B cardinality=1.50M
|
|--05:AGGREGATE [FINALIZE]
| | output: max(sum(l_extendedprice * (1 - l_discount)))
| | row-size=16B cardinality=1
| |
| 04:AGGREGATE [FINALIZE]
| | output: sum(l_extendedprice * (1 - l_discount))
| | group by: l_suppkey
| | row-size=24B cardinality=1.50M
| |
| 03:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
| HDFS partitions=1/1 files=4 size=289.08MB
| predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
| row-size=36B cardinality=1.50M
|
06:HASH JOIN [INNER JOIN]
| hash predicates: l_suppkey = s_suppkey
| runtime filters: RF000 <- s_suppkey
| row-size=126B cardinality=1.50M
|
|--00:SCAN HDFS [tpch_nested_parquet.supplier]
| HDFS partitions=1/1 files=1 size=41.80MB
| row-size=102B cardinality=10.00K
|
02:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: l_suppkey
| row-size=24B cardinality=1.50M
|
01:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
runtime filters: RF000 -> l.l_suppkey
row-size=36B cardinality=1.50M
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=186.88MB Threads=10
Per-Host Resource Estimates: Memory=1.47GB
PLAN-ROOT SINK
|
17:MERGING-EXCHANGE [UNPARTITIONED]
| order by: s_suppkey ASC
|
08:SORT
| order by: s_suppkey ASC
| row-size=118B cardinality=1.50M
|
07:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| hash predicates: sum(l_extendedprice * (1 - l_discount)) = max(total_revenue)
| row-size=126B cardinality=1.50M
|
|--16:EXCHANGE [BROADCAST]
| |
| 15:AGGREGATE [FINALIZE]
| | output: max:merge(total_revenue)
| | row-size=16B cardinality=1
| |
| 14:EXCHANGE [UNPARTITIONED]
| |
| 05:AGGREGATE
| | output: max(sum(l_extendedprice * (1 - l_discount)))
| | row-size=16B cardinality=1
| |
| 13:AGGREGATE [FINALIZE]
| | output: sum:merge(l_extendedprice * (1 - l_discount))
| | group by: l_suppkey
| | row-size=24B cardinality=1.50M
| |
| 12:EXCHANGE [HASH(l_suppkey)]
| |
| 04:AGGREGATE [STREAMING]
| | output: sum(l_extendedprice * (1 - l_discount))
| | group by: l_suppkey
| | row-size=24B cardinality=1.50M
| |
| 03:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
| HDFS partitions=1/1 files=4 size=289.08MB
| predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
| row-size=36B cardinality=1.50M
|
06:HASH JOIN [INNER JOIN, PARTITIONED]
| hash predicates: l_suppkey = s_suppkey
| runtime filters: RF000 <- s_suppkey
| row-size=126B cardinality=1.50M
|
|--11:EXCHANGE [HASH(s_suppkey)]
| |
| 00:SCAN HDFS [tpch_nested_parquet.supplier]
| HDFS partitions=1/1 files=1 size=41.80MB
| row-size=102B cardinality=10.00K
|
10:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| group by: l_suppkey
| row-size=24B cardinality=1.50M
|
09:EXCHANGE [HASH(l_suppkey)]
|
02:AGGREGATE [STREAMING]
| output: sum(l_extendedprice * (1 - l_discount))
| group by: l_suppkey
| row-size=24B cardinality=1.50M
|
01:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01'
runtime filters: RF000 -> l.l_suppkey
row-size=36B cardinality=1.50M
====
# TPCH-Q16
# Q16 - Parts/Supplier Relation Query
select
p_brand,
p_type,
p_size,
count(distinct s_suppkey) as supplier_cnt
from
supplier s,
s.s_partsupps ps,
part p
where
p_partkey = ps_partkey
and p_brand <> 'Brand#45'
and p_type not like 'MEDIUM POLISHED%'
and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
and s_comment not like '%Customer%Complaints%'
group by
p_brand,
p_type,
p_size
order by
supplier_cnt desc,
p_brand,
p_type,
p_size
---- PLAN
Max Per-Host Resource Reservation: Memory=13.94MB Threads=3
Per-Host Resource Estimates: Memory=234MB
PLAN-ROOT SINK
|
09:SORT
| order by: count(s_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
| row-size=65B cardinality=10.00K
|
08:AGGREGATE [FINALIZE]
| output: count(s_suppkey)
| group by: p_brand, p_type, p_size
| row-size=65B cardinality=10.00K
|
07:AGGREGATE
| group by: p_brand, p_type, p_size, s_suppkey
| row-size=65B cardinality=10.00K
|
06:HASH JOIN [INNER JOIN]
| hash predicates: ps_partkey = p_partkey
| row-size=167B cardinality=10.00K
|
|--05:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
| row-size=65B cardinality=8.00K
|
01:SUBPLAN
| row-size=103B cardinality=10.00K
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=103B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=95B cardinality=1
| |
| 03:UNNEST [s.s_partsupps ps]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.supplier s]
HDFS partitions=1/1 files=1 size=41.80MB
predicates: NOT s_comment LIKE '%Customer%Complaints%', !empty(s.s_partsupps)
row-size=95B cardinality=1.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=31.81MB Threads=7
Per-Host Resource Estimates: Memory=288MB
PLAN-ROOT SINK
|
15:MERGING-EXCHANGE [UNPARTITIONED]
| order by: count(s_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
|
09:SORT
| order by: count(s_suppkey) DESC, p_brand ASC, p_type ASC, p_size ASC
| row-size=65B cardinality=10.00K
|
14:AGGREGATE [FINALIZE]
| output: count:merge(s_suppkey)
| group by: p_brand, p_type, p_size
| row-size=65B cardinality=10.00K
|
13:EXCHANGE [HASH(p_brand,p_type,p_size)]
|
08:AGGREGATE [STREAMING]
| output: count(s_suppkey)
| group by: p_brand, p_type, p_size
| row-size=65B cardinality=10.00K
|
12:AGGREGATE
| group by: p_brand, p_type, p_size, s_suppkey
| row-size=65B cardinality=10.00K
|
11:EXCHANGE [HASH(p_brand,p_type,p_size,s_suppkey)]
|
07:AGGREGATE [STREAMING]
| group by: p_brand, p_type, p_size, s_suppkey
| row-size=65B cardinality=10.00K
|
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: ps_partkey = p_partkey
| row-size=167B cardinality=10.00K
|
|--10:EXCHANGE [BROADCAST]
| |
| 05:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_size IN (49, 14, 23, 45, 19, 3, 36, 9), p_brand != 'Brand#45', NOT p_type LIKE 'MEDIUM POLISHED%'
| row-size=65B cardinality=8.00K
|
01:SUBPLAN
| row-size=103B cardinality=10.00K
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=103B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=95B cardinality=1
| |
| 03:UNNEST [s.s_partsupps ps]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.supplier s]
HDFS partitions=1/1 files=1 size=41.80MB
predicates: NOT s_comment LIKE '%Customer%Complaints%', !empty(s.s_partsupps)
row-size=95B cardinality=1.00K
====
# TPCH-Q17
# Q17 - Small-Quantity-Order Revenue Query
select
sum(l_extendedprice) / 7.0 as avg_yearly
from
customer.c_orders.o_lineitems l,
part p
where
p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container = 'MED BOX'
and l_quantity < (
select
0.2 * avg(l_quantity)
from
customer.c_orders.o_lineitems l
where
l_partkey = p_partkey
)
---- PLAN
Max Per-Host Resource Reservation: Memory=96.94MB Threads=4
Per-Host Resource Estimates: Memory=703MB
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice)
| row-size=16B cardinality=1
|
05:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: p_partkey = l_partkey
| other join predicates: l_quantity < 0.2 * avg(l_quantity)
| runtime filters: RF000 <- l_partkey
| row-size=72B cardinality=15.00M
|
|--03:AGGREGATE [FINALIZE]
| | output: avg(l_quantity)
| | group by: l_partkey
| | row-size=16B cardinality=15.00M
| |
| 02:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
| HDFS partitions=1/1 files=4 size=289.08MB
| row-size=16B cardinality=15.00M
|
04:HASH JOIN [INNER JOIN]
| hash predicates: l_partkey = p_partkey
| runtime filters: RF002 <- p_partkey
| row-size=72B cardinality=15.00M
|
|--01:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_container = 'MED BOX', p_brand = 'Brand#23'
| runtime filters: RF000 -> p_partkey
| row-size=48B cardinality=1.00K
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
HDFS partitions=1/1 files=4 size=289.08MB
runtime filters: RF000 -> l.l_partkey, RF002 -> l_partkey
row-size=24B cardinality=15.00M
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=154.94MB Threads=8
Per-Host Resource Estimates: Memory=875MB
PLAN-ROOT SINK
|
12:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice)
| row-size=16B cardinality=1
|
11:EXCHANGE [UNPARTITIONED]
|
06:AGGREGATE
| output: sum(l_extendedprice)
| row-size=16B cardinality=1
|
05:HASH JOIN [LEFT SEMI JOIN, PARTITIONED]
| hash predicates: p_partkey = l_partkey
| other join predicates: l_quantity < 0.2 * avg(l_quantity)
| runtime filters: RF000 <- l_partkey
| row-size=72B cardinality=15.00M
|
|--09:AGGREGATE [FINALIZE]
| | output: avg:merge(l_quantity)
| | group by: l_partkey
| | row-size=16B cardinality=15.00M
| |
| 08:EXCHANGE [HASH(l_partkey)]
| |
| 03:AGGREGATE [STREAMING]
| | output: avg(l_quantity)
| | group by: l_partkey
| | row-size=16B cardinality=15.00M
| |
| 02:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
| HDFS partitions=1/1 files=4 size=289.08MB
| row-size=16B cardinality=15.00M
|
10:EXCHANGE [HASH(p_partkey)]
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_partkey = p_partkey
| runtime filters: RF002 <- p_partkey
| row-size=72B cardinality=15.00M
|
|--07:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_container = 'MED BOX', p_brand = 'Brand#23'
| runtime filters: RF000 -> p_partkey
| row-size=48B cardinality=1.00K
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
HDFS partitions=1/1 files=4 size=289.08MB
runtime filters: RF000 -> l.l_partkey, RF002 -> l_partkey
row-size=24B cardinality=15.00M
====
# TPCH-Q18
# Q18 - Large Value Customer Query
select
c_name,
c_custkey,
o_orderkey,
o_orderdate,
o_totalprice,
sum_quantity
from
customer c,
c.c_orders o,
(select sum(l_quantity) sum_quantity from o.o_lineitems) l
where
sum_quantity > 300
order by
o_totalprice desc,
o_orderdate
limit 100
---- PLAN
Max Per-Host Resource Reservation: Memory=24.00MB Threads=2
Per-Host Resource Estimates: Memory=538MB
PLAN-ROOT SINK
|
10:TOP-N [LIMIT=100]
| order by: o_totalprice DESC, o_orderdate ASC
| row-size=82B cardinality=100
|
01:SUBPLAN
| row-size=106B cardinality=1.50M
|
|--09:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=106B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=50B cardinality=1
| |
| 04:SUBPLAN
| | row-size=56B cardinality=10
| |
| |--08:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=56B cardinality=1
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=40B cardinality=1
| | |
| | 07:AGGREGATE [FINALIZE]
| | | output: sum(l_quantity)
| | | having: sum(l_quantity) > 300
| | | row-size=16B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
row-size=50B cardinality=150.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=24.00MB Threads=3
Per-Host Resource Estimates: Memory=538MB
PLAN-ROOT SINK
|
11:MERGING-EXCHANGE [UNPARTITIONED]
| order by: o_totalprice DESC, o_orderdate ASC
| limit: 100
|
10:TOP-N [LIMIT=100]
| order by: o_totalprice DESC, o_orderdate ASC
| row-size=82B cardinality=100
|
01:SUBPLAN
| row-size=106B cardinality=1.50M
|
|--09:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=106B cardinality=10
| |
| |--02:SINGULAR ROW SRC
| | row-size=50B cardinality=1
| |
| 04:SUBPLAN
| | row-size=56B cardinality=10
| |
| |--08:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=56B cardinality=1
| | |
| | |--05:SINGULAR ROW SRC
| | | row-size=40B cardinality=1
| | |
| | 07:AGGREGATE [FINALIZE]
| | | output: sum(l_quantity)
| | | having: sum(l_quantity) > 300
| | | row-size=16B cardinality=1
| | |
| | 06:UNNEST [o.o_lineitems]
| | row-size=0B cardinality=10
| |
| 03:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
row-size=50B cardinality=150.00K
====
# TPCH-Q19
# Q19 - Discounted Revenue Query
select
sum(l_extendedprice * (1 - l_discount)) as revenue
from
customer.c_orders.o_lineitems l,
part p
where
p_partkey = l_partkey
and (
(
p_brand = 'Brand#12'
and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
and l_quantity >= 1 and l_quantity <= 11
and p_size between 1 and 5
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_brand = 'Brand#23'
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
and l_quantity >= 10 and l_quantity <= 20
and p_size between 1 and 10
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_brand = 'Brand#34'
and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
and l_quantity >= 20 and l_quantity <= 30
and p_size between 1 and 15
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
)
---- PLAN
Max Per-Host Resource Reservation: Memory=28.94MB Threads=3
Per-Host Resource Estimates: Memory=595MB
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
| output: sum(l_extendedprice * (1 - l_discount))
| row-size=16B cardinality=1
|
02:HASH JOIN [INNER JOIN]
| hash predicates: l_partkey = p_partkey
| other predicates: l_quantity <= 11 OR l_quantity <= 20 OR p_size <= 15, l_quantity <= 11 OR l_quantity >= 10 OR p_size <= 15, l_quantity <= 11 OR p_size <= 10 OR l_quantity <= 30, l_quantity <= 11 OR p_size <= 10 OR l_quantity >= 20, l_quantity <= 11 OR p_size <= 10 OR p_size <= 15, l_quantity >= 1 OR l_quantity <= 20 OR p_size <= 15, l_quantity >= 1 OR l_quantity >= 10 OR p_size <= 15, l_quantity >= 1 OR p_size <= 10 OR l_quantity <= 30, l_quantity >= 1 OR p_size <= 10 OR l_quantity >= 20, l_quantity >= 1 OR p_size <= 10 OR p_size <= 15, p_size <= 5 OR l_quantity <= 20 OR l_quantity <= 30, p_size <= 5 OR l_quantity <= 20 OR l_quantity >= 20, p_size <= 5 OR l_quantity <= 20 OR p_size <= 15, p_size <= 5 OR l_quantity >= 10 OR l_quantity <= 30, p_size <= 5 OR l_quantity >= 10 OR l_quantity >= 20, p_size <= 5 OR l_quantity >= 10 OR p_size <= 15, p_size <= 5 OR p_size <= 10 OR l_quantity <= 30, p_size <= 5 OR p_size <= 10 OR l_quantity >= 20, l_quantity <= 11 OR l_quantity <= 20 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity <= 11 OR l_quantity >= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity <= 11 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity <= 30, l_quantity <= 11 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity >= 20, l_quantity <= 11 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_size <= 15, l_quantity <= 11 OR p_size <= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity >= 1 OR l_quantity <= 20 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity >= 1 OR l_quantity >= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity >= 1 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity <= 30, l_quantity >= 1 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity >= 20, l_quantity >= 1 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_size <= 15, l_quantity >= 1 OR p_size <= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity <= 20 OR l_quantity <= 30, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity <= 20 OR l_quantity >= 20, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity <= 20 OR p_size <= 15, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity >= 10 OR l_quantity <= 30, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity >= 10 OR l_quantity >= 20, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity >= 10 OR p_size <= 15, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_size <= 10 OR l_quantity <= 30, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_size <= 10 OR l_quantity >= 20, p_size <= 5 OR l_quantity <= 20 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_size <= 5 OR l_quantity >= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity <= 11 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity >= 1 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity <= 20 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity >= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity <= 11 OR l_quantity <= 20 OR p_brand = 'Brand#34', l_quantity <= 11 OR l_quantity >= 10 OR p_brand = 'Brand#34', l_quantity <= 11 OR p_brand = 'Brand#23' OR l_quantity <= 30, l_quantity <= 11 OR p_brand = 'Brand#23' OR l_quantity >= 20, l_quantity <= 11 OR p_brand = 'Brand#23' OR p_size <= 15, l_quantity <= 11 OR p_size <= 10 OR p_brand = 'Brand#34', l_quantity >= 1 OR l_quantity <= 20 OR p_brand = 'Brand#34', l_quantity >= 1 OR l_quantity >= 10 OR p_brand = 'Brand#34', l_quantity >= 1 OR p_brand = 'Brand#23' OR l_quantity <= 30, l_quantity >= 1 OR p_brand = 'Brand#23' OR l_quantity >= 20, l_quantity >= 1 OR p_brand = 'Brand#23' OR p_size <= 15, l_quantity >= 1 OR p_size <= 10 OR p_brand = 'Brand#34', p_brand = 'Brand#12' OR l_quantity <= 20 OR l_quantity <= 30, p_brand = 'Brand#12' OR l_quantity <= 20 OR l_quantity >= 20, p_brand = 'Brand#12' OR l_quantity <= 20 OR p_size <= 15, p_brand = 'Brand#12' OR l_quantity >= 10 OR l_quantity <= 30, p_brand = 'Brand#12' OR l_quantity >= 10 OR l_quantity >= 20, p_brand = 'Brand#12' OR l_quantity >= 10 OR p_size <= 15, p_brand = 'Brand#12' OR p_size <= 10 OR l_quantity <= 30, p_brand = 'Brand#12' OR p_size <= 10 OR l_quantity >= 20, p_size <= 5 OR l_quantity <= 20 OR p_brand = 'Brand#34', p_size <= 5 OR l_quantity >= 10 OR p_brand = 'Brand#34', l_quantity <= 11 OR p_brand = 'Brand#23' OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity <= 11 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_brand = 'Brand#34', l_quantity >= 1 OR p_brand = 'Brand#23' OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity >= 1 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_brand = 'Brand#34', p_brand = 'Brand#12' OR l_quantity <= 20 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_brand = 'Brand#12' OR l_quantity >= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity <= 20 OR p_brand = 'Brand#34', p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity >= 10 OR p_brand = 'Brand#34', p_size <= 5 OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity <= 30, p_size <= 5 OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity >= 20, l_quantity <= 11 OR p_brand = 'Brand#23' OR p_brand = 'Brand#34', l_quantity >= 1 OR p_brand = 'Brand#23' OR p_brand = 'Brand#34', p_brand = 'Brand#12' OR l_quantity <= 20 OR p_brand = 'Brand#34', p_brand = 'Brand#12' OR l_quantity >= 10 OR p_brand = 'Brand#34', p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity <= 30, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity >= 20, p_brand = 'Brand#12' OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity <= 30, p_brand = 'Brand#12' OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity >= 20
| runtime filters: RF000 <- p_partkey
| row-size=108B cardinality=1.50M
|
|--01:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_brand = 'Brand#12' OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_size >= 1, p_size <= 5 OR p_size <= 10 OR p_size <= 15, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_size <= 10 OR p_size <= 15, p_brand = 'Brand#12' OR p_size <= 10 OR p_size <= 15, p_size <= 5 OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_size <= 15, p_size <= 5 OR p_size <= 10 OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_size <= 15, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_size <= 10 OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_brand = 'Brand#12' OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_size <= 15, p_brand = 'Brand#12' OR p_size <= 10 OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_size <= 5 OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
| row-size=52B cardinality=1.43K
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_shipmode IN ('AIR', 'AIR REG'), l_quantity <= 11 OR l_quantity <= 20 OR l_quantity <= 30, l_quantity <= 11 OR l_quantity <= 20 OR l_quantity >= 20, l_quantity <= 11 OR l_quantity >= 10 OR l_quantity <= 30, l_quantity <= 11 OR l_quantity >= 10 OR l_quantity >= 20, l_quantity >= 1 OR l_quantity <= 20 OR l_quantity <= 30, l_quantity >= 1 OR l_quantity <= 20 OR l_quantity >= 20, l_quantity >= 1 OR l_quantity >= 10 OR l_quantity <= 30, l_quantity >= 1 OR l_quantity >= 10 OR l_quantity >= 20, l_shipinstruct = 'DELIVER IN PERSON'
runtime filters: RF000 -> l_partkey
row-size=56B cardinality=1.50M
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=28.94MB Threads=5
Per-Host Resource Estimates: Memory=615MB
PLAN-ROOT SINK
|
06:AGGREGATE [FINALIZE]
| output: sum:merge(l_extendedprice * (1 - l_discount))
| row-size=16B cardinality=1
|
05:EXCHANGE [UNPARTITIONED]
|
03:AGGREGATE
| output: sum(l_extendedprice * (1 - l_discount))
| row-size=16B cardinality=1
|
02:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_partkey = p_partkey
| other predicates: l_quantity <= 11 OR l_quantity <= 20 OR p_size <= 15, l_quantity <= 11 OR l_quantity >= 10 OR p_size <= 15, l_quantity <= 11 OR p_size <= 10 OR l_quantity <= 30, l_quantity <= 11 OR p_size <= 10 OR l_quantity >= 20, l_quantity <= 11 OR p_size <= 10 OR p_size <= 15, l_quantity >= 1 OR l_quantity <= 20 OR p_size <= 15, l_quantity >= 1 OR l_quantity >= 10 OR p_size <= 15, l_quantity >= 1 OR p_size <= 10 OR l_quantity <= 30, l_quantity >= 1 OR p_size <= 10 OR l_quantity >= 20, l_quantity >= 1 OR p_size <= 10 OR p_size <= 15, p_size <= 5 OR l_quantity <= 20 OR l_quantity <= 30, p_size <= 5 OR l_quantity <= 20 OR l_quantity >= 20, p_size <= 5 OR l_quantity <= 20 OR p_size <= 15, p_size <= 5 OR l_quantity >= 10 OR l_quantity <= 30, p_size <= 5 OR l_quantity >= 10 OR l_quantity >= 20, p_size <= 5 OR l_quantity >= 10 OR p_size <= 15, p_size <= 5 OR p_size <= 10 OR l_quantity <= 30, p_size <= 5 OR p_size <= 10 OR l_quantity >= 20, l_quantity <= 11 OR l_quantity <= 20 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity <= 11 OR l_quantity >= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity <= 11 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity <= 30, l_quantity <= 11 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity >= 20, l_quantity <= 11 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_size <= 15, l_quantity <= 11 OR p_size <= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity >= 1 OR l_quantity <= 20 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity >= 1 OR l_quantity >= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity >= 1 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity <= 30, l_quantity >= 1 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity >= 20, l_quantity >= 1 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_size <= 15, l_quantity >= 1 OR p_size <= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity <= 20 OR l_quantity <= 30, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity <= 20 OR l_quantity >= 20, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity <= 20 OR p_size <= 15, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity >= 10 OR l_quantity <= 30, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity >= 10 OR l_quantity >= 20, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity >= 10 OR p_size <= 15, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_size <= 10 OR l_quantity <= 30, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_size <= 10 OR l_quantity >= 20, p_size <= 5 OR l_quantity <= 20 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_size <= 5 OR l_quantity >= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity <= 11 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity >= 1 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity <= 20 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity >= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity <= 11 OR l_quantity <= 20 OR p_brand = 'Brand#34', l_quantity <= 11 OR l_quantity >= 10 OR p_brand = 'Brand#34', l_quantity <= 11 OR p_brand = 'Brand#23' OR l_quantity <= 30, l_quantity <= 11 OR p_brand = 'Brand#23' OR l_quantity >= 20, l_quantity <= 11 OR p_brand = 'Brand#23' OR p_size <= 15, l_quantity <= 11 OR p_size <= 10 OR p_brand = 'Brand#34', l_quantity >= 1 OR l_quantity <= 20 OR p_brand = 'Brand#34', l_quantity >= 1 OR l_quantity >= 10 OR p_brand = 'Brand#34', l_quantity >= 1 OR p_brand = 'Brand#23' OR l_quantity <= 30, l_quantity >= 1 OR p_brand = 'Brand#23' OR l_quantity >= 20, l_quantity >= 1 OR p_brand = 'Brand#23' OR p_size <= 15, l_quantity >= 1 OR p_size <= 10 OR p_brand = 'Brand#34', p_brand = 'Brand#12' OR l_quantity <= 20 OR l_quantity <= 30, p_brand = 'Brand#12' OR l_quantity <= 20 OR l_quantity >= 20, p_brand = 'Brand#12' OR l_quantity <= 20 OR p_size <= 15, p_brand = 'Brand#12' OR l_quantity >= 10 OR l_quantity <= 30, p_brand = 'Brand#12' OR l_quantity >= 10 OR l_quantity >= 20, p_brand = 'Brand#12' OR l_quantity >= 10 OR p_size <= 15, p_brand = 'Brand#12' OR p_size <= 10 OR l_quantity <= 30, p_brand = 'Brand#12' OR p_size <= 10 OR l_quantity >= 20, p_size <= 5 OR l_quantity <= 20 OR p_brand = 'Brand#34', p_size <= 5 OR l_quantity >= 10 OR p_brand = 'Brand#34', l_quantity <= 11 OR p_brand = 'Brand#23' OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity <= 11 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_brand = 'Brand#34', l_quantity >= 1 OR p_brand = 'Brand#23' OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), l_quantity >= 1 OR p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_brand = 'Brand#34', p_brand = 'Brand#12' OR l_quantity <= 20 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_brand = 'Brand#12' OR l_quantity >= 10 OR p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity <= 20 OR p_brand = 'Brand#34', p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR l_quantity >= 10 OR p_brand = 'Brand#34', p_size <= 5 OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity <= 30, p_size <= 5 OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity >= 20, l_quantity <= 11 OR p_brand = 'Brand#23' OR p_brand = 'Brand#34', l_quantity >= 1 OR p_brand = 'Brand#23' OR p_brand = 'Brand#34', p_brand = 'Brand#12' OR l_quantity <= 20 OR p_brand = 'Brand#34', p_brand = 'Brand#12' OR l_quantity >= 10 OR p_brand = 'Brand#34', p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity <= 30, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity >= 20, p_brand = 'Brand#12' OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity <= 30, p_brand = 'Brand#12' OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR l_quantity >= 20
| runtime filters: RF000 <- p_partkey
| row-size=108B cardinality=1.50M
|
|--04:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [tpch_nested_parquet.part p]
| HDFS partitions=1/1 files=1 size=6.24MB
| predicates: p_brand = 'Brand#12' OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_size >= 1, p_size <= 5 OR p_size <= 10 OR p_size <= 15, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_size <= 10 OR p_size <= 15, p_brand = 'Brand#12' OR p_size <= 10 OR p_size <= 15, p_size <= 5 OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_size <= 15, p_size <= 5 OR p_size <= 10 OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_size <= 15, p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') OR p_size <= 10 OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_brand = 'Brand#12' OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_size <= 15, p_brand = 'Brand#12' OR p_size <= 10 OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG'), p_size <= 5 OR p_brand = 'Brand#23' AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') OR p_brand = 'Brand#34' AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
| row-size=52B cardinality=1.43K
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_shipmode IN ('AIR', 'AIR REG'), l_quantity <= 11 OR l_quantity <= 20 OR l_quantity <= 30, l_quantity <= 11 OR l_quantity <= 20 OR l_quantity >= 20, l_quantity <= 11 OR l_quantity >= 10 OR l_quantity <= 30, l_quantity <= 11 OR l_quantity >= 10 OR l_quantity >= 20, l_quantity >= 1 OR l_quantity <= 20 OR l_quantity <= 30, l_quantity >= 1 OR l_quantity <= 20 OR l_quantity >= 20, l_quantity >= 1 OR l_quantity >= 10 OR l_quantity <= 30, l_quantity >= 1 OR l_quantity >= 10 OR l_quantity >= 20, l_shipinstruct = 'DELIVER IN PERSON'
runtime filters: RF000 -> l_partkey
row-size=56B cardinality=1.50M
====
# TPCH-Q20
# Note: Tricky rewrite from the original to avoid mixing
# correlated and uncorrelated table refs in a subquery.
select distinct
s_name,
s_address
from
supplier s,
s.s_partsupps ps,
region.r_nations n
where
ps_partkey in (
select
p_partkey
from
part p
where
p_name like 'forest%'
)
and ps_availqty > (
select
0.5 * sum(l_quantity)
from
customer.c_orders.o_lineitems l
where
l_partkey = ps_partkey
and l_suppkey = s_suppkey
and l_shipdate >= '1994-01-01'
and l_shipdate < '1995-01-01'
)
and s_nationkey = n_nationkey
and n_name = 'CANADA'
order by
s_name
---- PLAN
Max Per-Host Resource Reservation: Memory=90.81MB Threads=5
Per-Host Resource Estimates: Memory=888MB
PLAN-ROOT SINK
|
13:SORT
| order by: s_name ASC
| row-size=67B cardinality=100.00K
|
12:AGGREGATE [FINALIZE]
| group by: s_name, s_address
| row-size=67B cardinality=100.00K
|
11:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: l_partkey = ps_partkey, l_suppkey = s_suppkey
| other join predicates: ps_availqty > 0.5 * sum(l_quantity)
| runtime filters: RF000 <- ps_partkey, RF001 <- s_suppkey
| row-size=115B cardinality=100.00K
|
|--10:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: ps_partkey = p_partkey
| | row-size=115B cardinality=100.00K
| |
| |--06:SCAN HDFS [tpch_nested_parquet.part p]
| | HDFS partitions=1/1 files=1 size=6.24MB
| | predicates: p_name LIKE 'forest%'
| | row-size=53B cardinality=20.00K
| |
| 09:HASH JOIN [INNER JOIN]
| | hash predicates: s_nationkey = n_nationkey
| | runtime filters: RF006 <- n_nationkey
| | row-size=115B cardinality=100.00K
| |
| |--05:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| | HDFS partitions=1/1 files=1 size=3.59KB
| | predicates: n_name = 'CANADA'
| | row-size=14B cardinality=5
| |
| 01:SUBPLAN
| | row-size=101B cardinality=100.00K
| |
| |--04:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=101B cardinality=10
| | |
| | |--02:SINGULAR ROW SRC
| | | row-size=89B cardinality=1
| | |
| | 03:UNNEST [s.s_partsupps ps]
| | row-size=0B cardinality=10
| |
| 00:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| predicates: !empty(s.s_partsupps)
| runtime filters: RF006 -> s_nationkey
| row-size=89B cardinality=10.00K
|
08:AGGREGATE [FINALIZE]
| output: sum(l_quantity)
| group by: l_partkey, l_suppkey
| row-size=32B cardinality=1.50M
|
07:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
runtime filters: RF000 -> l.l_partkey, RF001 -> l.l_suppkey
row-size=36B cardinality=1.50M
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=164.33MB Threads=11
Per-Host Resource Estimates: Memory=1.07GB
PLAN-ROOT SINK
|
21:MERGING-EXCHANGE [UNPARTITIONED]
| order by: s_name ASC
|
13:SORT
| order by: s_name ASC
| row-size=67B cardinality=100.00K
|
20:AGGREGATE [FINALIZE]
| group by: s_name, s_address
| row-size=67B cardinality=100.00K
|
19:EXCHANGE [HASH(s_name,s_address)]
|
12:AGGREGATE [STREAMING]
| group by: s_name, s_address
| row-size=67B cardinality=100.00K
|
11:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED]
| hash predicates: l_partkey = ps_partkey, l_suppkey = s_suppkey
| other join predicates: ps_availqty > 0.5 * sum(l_quantity)
| runtime filters: RF000 <- ps_partkey, RF001 <- s_suppkey
| row-size=115B cardinality=100.00K
|
|--18:EXCHANGE [HASH(ps_partkey,s_suppkey)]
| |
| 10:HASH JOIN [LEFT SEMI JOIN, BROADCAST]
| | hash predicates: ps_partkey = p_partkey
| | row-size=115B cardinality=100.00K
| |
| |--17:EXCHANGE [BROADCAST]
| | |
| | 06:SCAN HDFS [tpch_nested_parquet.part p]
| | HDFS partitions=1/1 files=1 size=6.24MB
| | predicates: p_name LIKE 'forest%'
| | row-size=53B cardinality=20.00K
| |
| 09:HASH JOIN [INNER JOIN, BROADCAST]
| | hash predicates: s_nationkey = n_nationkey
| | runtime filters: RF006 <- n_nationkey
| | row-size=115B cardinality=100.00K
| |
| |--16:EXCHANGE [BROADCAST]
| | |
| | 05:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| | HDFS partitions=1/1 files=1 size=3.59KB
| | predicates: n_name = 'CANADA'
| | row-size=14B cardinality=5
| |
| 01:SUBPLAN
| | row-size=101B cardinality=100.00K
| |
| |--04:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=101B cardinality=10
| | |
| | |--02:SINGULAR ROW SRC
| | | row-size=89B cardinality=1
| | |
| | 03:UNNEST [s.s_partsupps ps]
| | row-size=0B cardinality=10
| |
| 00:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| predicates: !empty(s.s_partsupps)
| runtime filters: RF006 -> s_nationkey
| row-size=89B cardinality=10.00K
|
15:AGGREGATE [FINALIZE]
| output: sum:merge(l_quantity)
| group by: l_partkey, l_suppkey
| row-size=32B cardinality=1.50M
|
14:EXCHANGE [HASH(l_partkey,l_suppkey)]
|
08:AGGREGATE [STREAMING]
| output: sum(l_quantity)
| group by: l_partkey, l_suppkey
| row-size=32B cardinality=1.50M
|
07:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01'
runtime filters: RF000 -> l.l_partkey, RF001 -> l.l_suppkey
row-size=36B cardinality=1.50M
====
# TPCH-Q21
# Q21 - Suppliers Who Kept Orders Waiting Query
select
s_name,
count(*) as numwait
from
supplier s,
customer c,
c.c_orders o,
o.o_lineitems l1,
region.r_nations n
where
s_suppkey = l1.l_suppkey
and o_orderstatus = 'F'
and l1.l_receiptdate > l1.l_commitdate
and exists (
select
*
from
o.o_lineitems l2
where
l2.l_suppkey <> l1.l_suppkey
)
and not exists (
select
*
from
o.o_lineitems l3
where
l3.l_suppkey <> l1.l_suppkey
and l3.l_receiptdate > l3.l_commitdate
)
and s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
group by
s_name
order by
numwait desc,
s_name
limit 100
---- PLAN
Max Per-Host Resource Reservation: Memory=39.33MB Threads=4
Per-Host Resource Estimates: Memory=919MB
PLAN-ROOT SINK
|
20:TOP-N [LIMIT=100]
| order by: count(*) DESC, s_name ASC
| row-size=38B cardinality=100
|
19:AGGREGATE [FINALIZE]
| output: count(*)
| group by: s_name
| row-size=38B cardinality=9.96K
|
18:SUBPLAN
| row-size=146B cardinality=15.00M
|
|--16:NESTED LOOP JOIN [RIGHT ANTI JOIN]
| | join predicates: l3.l_suppkey != l1.l_suppkey
| | row-size=146B cardinality=1
| |
| |--15:NESTED LOOP JOIN [RIGHT SEMI JOIN]
| | | join predicates: l2.l_suppkey != l1.l_suppkey
| | | row-size=146B cardinality=1
| | |
| | |--12:SINGULAR ROW SRC
| | | row-size=146B cardinality=1
| | |
| | 13:UNNEST [o.o_lineitems l2]
| | row-size=8B cardinality=10
| |
| 14:UNNEST [o.o_lineitems l3]
| row-size=32B cardinality=10
|
17:HASH JOIN [INNER JOIN]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=146B cardinality=15.00M
|
|--10:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: n_name = 'SAUDI ARABIA'
| row-size=14B cardinality=5
|
11:HASH JOIN [INNER JOIN]
| hash predicates: l1.l_suppkey = s_suppkey
| row-size=132B cardinality=15.00M
|
|--00:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| runtime filters: RF000 -> s_nationkey
| row-size=40B cardinality=10.00K
|
02:SUBPLAN
| row-size=92B cardinality=15.00M
|
|--09:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=92B cardinality=100
| |
| |--03:SINGULAR ROW SRC
| | row-size=12B cardinality=1
| |
| 05:SUBPLAN
| | row-size=80B cardinality=100
| |
| |--08:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=80B cardinality=10
| | |
| | |--06:SINGULAR ROW SRC
| | | row-size=48B cardinality=1
| | |
| | 07:UNNEST [o.o_lineitems l1]
| | row-size=0B cardinality=10
| |
| 04:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
01:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderstatus = 'F'
predicates on l1: l1.l_receiptdate > l1.l_commitdate
predicates on l3: l3.l_receiptdate > l3.l_commitdate
row-size=12B cardinality=150.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=42.84MB Threads=10
Per-Host Resource Estimates: Memory=1.10GB
PLAN-ROOT SINK
|
25:MERGING-EXCHANGE [UNPARTITIONED]
| order by: count(*) DESC, s_name ASC
| limit: 100
|
20:TOP-N [LIMIT=100]
| order by: count(*) DESC, s_name ASC
| row-size=38B cardinality=100
|
24:AGGREGATE [FINALIZE]
| output: count:merge(*)
| group by: s_name
| row-size=38B cardinality=9.96K
|
23:EXCHANGE [HASH(s_name)]
|
19:AGGREGATE [STREAMING]
| output: count(*)
| group by: s_name
| row-size=38B cardinality=9.96K
|
18:SUBPLAN
| row-size=146B cardinality=15.00M
|
|--16:NESTED LOOP JOIN [RIGHT ANTI JOIN]
| | join predicates: l3.l_suppkey != l1.l_suppkey
| | row-size=146B cardinality=1
| |
| |--15:NESTED LOOP JOIN [RIGHT SEMI JOIN]
| | | join predicates: l2.l_suppkey != l1.l_suppkey
| | | row-size=146B cardinality=1
| | |
| | |--12:SINGULAR ROW SRC
| | | row-size=146B cardinality=1
| | |
| | 13:UNNEST [o.o_lineitems l2]
| | row-size=8B cardinality=10
| |
| 14:UNNEST [o.o_lineitems l3]
| row-size=32B cardinality=10
|
17:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: s_nationkey = n_nationkey
| runtime filters: RF000 <- n_nationkey
| row-size=146B cardinality=15.00M
|
|--22:EXCHANGE [BROADCAST]
| |
| 10:SCAN HDFS [tpch_nested_parquet.region.r_nations n]
| HDFS partitions=1/1 files=1 size=3.59KB
| predicates: n_name = 'SAUDI ARABIA'
| row-size=14B cardinality=5
|
11:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l1.l_suppkey = s_suppkey
| row-size=132B cardinality=15.00M
|
|--21:EXCHANGE [BROADCAST]
| |
| 00:SCAN HDFS [tpch_nested_parquet.supplier s]
| HDFS partitions=1/1 files=1 size=41.80MB
| runtime filters: RF000 -> s_nationkey
| row-size=40B cardinality=10.00K
|
02:SUBPLAN
| row-size=92B cardinality=15.00M
|
|--09:NESTED LOOP JOIN [CROSS JOIN]
| | row-size=92B cardinality=100
| |
| |--03:SINGULAR ROW SRC
| | row-size=12B cardinality=1
| |
| 05:SUBPLAN
| | row-size=80B cardinality=100
| |
| |--08:NESTED LOOP JOIN [CROSS JOIN]
| | | row-size=80B cardinality=10
| | |
| | |--06:SINGULAR ROW SRC
| | | row-size=48B cardinality=1
| | |
| | 07:UNNEST [o.o_lineitems l1]
| | row-size=0B cardinality=10
| |
| 04:UNNEST [c.c_orders o]
| row-size=0B cardinality=10
|
01:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderstatus = 'F'
predicates on l1: l1.l_receiptdate > l1.l_commitdate
predicates on l3: l3.l_receiptdate > l3.l_commitdate
row-size=12B cardinality=150.00K
====
# TPCH-Q22
# Q22 - Global Sales Opportunity Query
select
cntrycode,
count(*) as numcust,
sum(c_acctbal) as totacctbal
from (
select
substr(c_phone, 1, 2) as cntrycode,
c_acctbal
from
customer c
where
substr(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')
and c_acctbal > (
select
avg(c_acctbal)
from
customer c
where
c_acctbal > 0.00
and substr(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')
)
and not exists (
select
o_orderkey
from
c.c_orders
)
) as custsale
group by
cntrycode
order by
cntrycode
---- PLAN
Max Per-Host Resource Reservation: Memory=13.94MB Threads=3
Per-Host Resource Estimates: Memory=460MB
PLAN-ROOT SINK
|
09:SORT
| order by: cntrycode ASC
| row-size=36B cardinality=15.00K
|
08:AGGREGATE [FINALIZE]
| output: count(*), sum(c_acctbal)
| group by: substr(c_phone, 1, 2)
| row-size=36B cardinality=15.00K
|
07:SUBPLAN
| row-size=55B cardinality=15.00K
|
|--05:NESTED LOOP JOIN [RIGHT ANTI JOIN]
| | row-size=55B cardinality=1
| |
| |--03:SINGULAR ROW SRC
| | row-size=55B cardinality=1
| |
| 04:UNNEST [c.c_orders]
| limit: 1
| row-size=0B cardinality=10
|
06:NESTED LOOP JOIN [INNER JOIN]
| predicates: c_acctbal > avg(c_acctbal)
| row-size=55B cardinality=15.00K
|
|--02:AGGREGATE [FINALIZE]
| | output: avg(c_acctbal)
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [tpch_nested_parquet.customer c]
| HDFS partitions=1/1 files=4 size=289.08MB
| predicates: c_acctbal > 0, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
| row-size=35B cardinality=15.00K
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
row-size=47B cardinality=15.00K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=27.94MB Threads=8
Per-Host Resource Estimates: Memory=688MB
PLAN-ROOT SINK
|
15:MERGING-EXCHANGE [UNPARTITIONED]
| order by: cntrycode ASC
|
09:SORT
| order by: cntrycode ASC
| row-size=36B cardinality=15.00K
|
14:AGGREGATE [FINALIZE]
| output: count:merge(*), sum:merge(c_acctbal)
| group by: cntrycode
| row-size=36B cardinality=15.00K
|
13:EXCHANGE [HASH(cntrycode)]
|
08:AGGREGATE [STREAMING]
| output: count(*), sum(c_acctbal)
| group by: substr(c_phone, 1, 2)
| row-size=36B cardinality=15.00K
|
07:SUBPLAN
| row-size=55B cardinality=15.00K
|
|--05:NESTED LOOP JOIN [RIGHT ANTI JOIN]
| | row-size=55B cardinality=1
| |
| |--03:SINGULAR ROW SRC
| | row-size=55B cardinality=1
| |
| 04:UNNEST [c.c_orders]
| limit: 1
| row-size=0B cardinality=10
|
06:NESTED LOOP JOIN [INNER JOIN, BROADCAST]
| predicates: c_acctbal > avg(c_acctbal)
| row-size=55B cardinality=15.00K
|
|--12:EXCHANGE [BROADCAST]
| |
| 11:AGGREGATE [FINALIZE]
| | output: avg:merge(c_acctbal)
| | row-size=8B cardinality=1
| |
| 10:EXCHANGE [UNPARTITIONED]
| |
| 02:AGGREGATE
| | output: avg(c_acctbal)
| | row-size=8B cardinality=1
| |
| 01:SCAN HDFS [tpch_nested_parquet.customer c]
| HDFS partitions=1/1 files=4 size=289.08MB
| predicates: c_acctbal > 0, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
| row-size=35B cardinality=15.00K
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
HDFS partitions=1/1 files=4 size=289.08MB
predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
row-size=47B cardinality=15.00K
====