mirror of
https://github.com/apache/impala.git
synced 2026-01-04 09:00:56 -05:00
Re-order union operands descending by their estimated per-host memory, s.t. parent nodes can gauge the peak memory consumption of a MergeNode after opening it during execution (a MergeNode opens its first operand in Open()). Scan nodes are always ordered last because they can dynamically scale down their memory usage, whereas many other nodes cannot (e.g., joins, aggregations). One goal is to decrease the likelihood of a SortNode parent claiming too much memory in its Open(), possibly causing the mem limit to be hit when subsequent union operands are executed. Change-Id: Ia51caaffd55305ea3dbd2146cd55acc7da67f382 Reviewed-on: http://gerrit.ent.cloudera.com:8080/3146 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Alex Behm <alex.behm@cloudera.com> Reviewed-on: http://gerrit.ent.cloudera.com:8080/3213 Tested-by: jenkins
457 lines
12 KiB
Plaintext
457 lines
12 KiB
Plaintext
# Basic test with a single with-clause view.
|
|
with t as (select int_col x, bigint_col y from functional.alltypes) select x, y from t
|
|
---- PLAN
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# Basic test with a single with-clause view that references a catalog view.
|
|
with t as (select int_col x, bigint_col y from functional.alltypes_view)
|
|
select x, y from t
|
|
---- PLAN
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# Multiple views in with-clause. Only one view is used.
|
|
with t1 as (select int_col x, bigint_col y from functional.alltypes),
|
|
t2 as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30))
|
|
select x, y from t2
|
|
---- PLAN
|
|
00:UNION
|
|
constant-operands=1
|
|
---- DISTRIBUTEDPLAN
|
|
00:UNION
|
|
constant-operands=1
|
|
====
|
|
# Multiple views in with-clause. All views are used in a union.
|
|
with t1 as (select int_col x, bigint_col y from functional.alltypes),
|
|
t2 as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30))
|
|
select * from t1 union all select * from t2 union all select * from t3
|
|
---- PLAN
|
|
00:UNION
|
|
|
|
|
|--03:UNION
|
|
| constant-operands=2
|
|
|
|
|
|--02:UNION
|
|
| constant-operands=1
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
06:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
00:UNION
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
|
|
|
|--05:EXCHANGE [RANDOM]
|
|
| |
|
|
| 03:UNION
|
|
| constant-operands=2
|
|
|
|
|
04:EXCHANGE [RANDOM]
|
|
|
|
|
02:UNION
|
|
constant-operands=1
|
|
====
|
|
# Multiple views in with-clause. All views are used in a join.
|
|
with t1 as (select int_col x, bigint_col y from functional.alltypes),
|
|
t2 as (select int_col x, bigint_col y from functional.alltypestiny),
|
|
t3 as (select int_col x, bigint_col y from functional.alltypessmall)
|
|
select * from t1, t2, t3 where t1.x = t2.x and t2.x = t3.x
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB compact
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--06:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 size=6.32KB
|
|
|
|
|
03:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--05:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# Multiple dependent views in with-clause
|
|
with t1 as (
|
|
select int_col c1, tinyint_col c2, max(id) c3
|
|
from functional.alltypessmall
|
|
group by 1, 2
|
|
order by 1,2
|
|
limit 5),
|
|
t2 as (select c1, c2, c3 from t1),
|
|
t3 as (
|
|
select c1, c3, max(c2) m2
|
|
from t2
|
|
group by c1, c3
|
|
limit 10),
|
|
t4 as (select c1, c3, m2 from t3)
|
|
select * from t4
|
|
where c1 > 0
|
|
order by c3, c1 desc
|
|
limit 3
|
|
---- PLAN
|
|
05:TOP-N [LIMIT=3]
|
|
| order by: c3 ASC, c1 DESC
|
|
|
|
|
04:SELECT
|
|
| predicates: c1 > 0
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: max(tinyint_col)
|
|
| group by: int_col, max(id)
|
|
| limit: 10
|
|
|
|
|
02:TOP-N [LIMIT=5]
|
|
| order by: int_col ASC, tinyint_col ASC
|
|
|
|
|
01:AGGREGATE [FINALIZE]
|
|
| output: max(id)
|
|
| group by: int_col, tinyint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypessmall]
|
|
partitions=4/4 size=6.32KB
|
|
---- DISTRIBUTEDPLAN
|
|
05:TOP-N [LIMIT=3]
|
|
| order by: c3 ASC, c1 DESC
|
|
|
|
|
04:SELECT
|
|
| predicates: c1 > 0
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: max(tinyint_col)
|
|
| group by: int_col, max(id)
|
|
| limit: 10
|
|
|
|
|
08:MERGING-EXCHANGE [UNPARTITIONED]
|
|
| order by: int_col ASC, tinyint_col ASC
|
|
| limit: 5
|
|
|
|
|
02:TOP-N [LIMIT=5]
|
|
| order by: int_col ASC, tinyint_col ASC
|
|
|
|
|
07:AGGREGATE [MERGE FINALIZE]
|
|
| output: max(max(id))
|
|
| group by: int_col, tinyint_col
|
|
|
|
|
06:EXCHANGE [HASH(int_col,tinyint_col)]
|
|
|
|
|
01:AGGREGATE
|
|
| output: max(id)
|
|
| group by: int_col, tinyint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypessmall]
|
|
partitions=4/4 size=6.32KB
|
|
====
|
|
# Self-join of with-clause table to make sure the on clause is properly set
|
|
# in the cloned inline-view instances.
|
|
with t as (select int_col x, bigint_col y from functional.alltypestiny)
|
|
select * from t t1 inner join t t2 on (t1.x = t2.x) inner join t t3 on (t2.x = t3.x)
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
---- DISTRIBUTEDPLAN
|
|
08:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--07:EXCHANGE [HASH(int_col)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
03:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--06:EXCHANGE [HASH(int_col)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
05:EXCHANGE [HASH(int_col)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
====
|
|
# Self-join of with-clause table to make sure the using clause is properly set
|
|
# in the cloned inline-view instances.
|
|
with t as (select int_col x, bigint_col y from functional.alltypestiny)
|
|
select * from t t1 inner join t t2 using(x) inner join t t3 using(x)
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
---- DISTRIBUTEDPLAN
|
|
08:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--07:EXCHANGE [HASH(int_col)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
03:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--06:EXCHANGE [HASH(int_col)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
05:EXCHANGE [HASH(int_col)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
====
|
|
# Self-join of with-clause table to make sure the join op is properly set
|
|
# in the cloned inline-view instances.
|
|
with t as (select int_col x, bigint_col y from functional.alltypestiny)
|
|
select * from t t1 left outer join t t2 using(x) full outer join t t3 using(x)
|
|
---- PLAN
|
|
04:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
---- DISTRIBUTEDPLAN
|
|
08:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--07:EXCHANGE [HASH(int_col)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--06:EXCHANGE [HASH(int_col)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
05:EXCHANGE [HASH(int_col)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
====
|
|
# Self-join of with-clause table to make sure join hints are properly set
|
|
# in the cloned inline-view instances.
|
|
# Note that in the plan above without hints the first join uses shuffle
|
|
# and the second broadcast.
|
|
with t as (select int_col x, bigint_col y from functional.alltypestiny)
|
|
select * from t t1 inner join [broadcast] t t2 using(x) inner join [shuffle] t t3 using(x)
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
---- DISTRIBUTEDPLAN
|
|
08:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--07:EXCHANGE [HASH(int_col)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
06:EXCHANGE [HASH(int_col)]
|
|
|
|
|
03:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: int_col = int_col
|
|
|
|
|
|--05:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
====
|
|
# Multiple with clauses. One for the UnionStmt and one for each union operand.
|
|
with t1 as (values('a', 'b'))
|
|
(with t2 as (values('c', 'd')) select * from t2) union all
|
|
(with t3 as (values('e', 'f')) select * from t3) order by 1 limit 1
|
|
---- PLAN
|
|
03:TOP-N [LIMIT=1]
|
|
| order by: 'c' ASC
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:UNION
|
|
| constant-operands=1
|
|
|
|
|
01:UNION
|
|
constant-operands=1
|
|
---- DISTRIBUTEDPLAN
|
|
03:TOP-N [LIMIT=1]
|
|
| order by: 'c' ASC
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:UNION
|
|
| constant-operands=1
|
|
|
|
|
01:UNION
|
|
constant-operands=1
|
|
====
|
|
# Multiple with clauses. One for the UnionStmt and one for each union operand.
|
|
with t1 as (values('a', 'b'))
|
|
(with t2 as (values('c', 'd')) select * from t2) union all
|
|
(with t3 as (values('e', 'f')) select * from t3) order by 1 limit 1
|
|
---- PLAN
|
|
03:TOP-N [LIMIT=1]
|
|
| order by: 'c' ASC
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:UNION
|
|
| constant-operands=1
|
|
|
|
|
01:UNION
|
|
constant-operands=1
|
|
---- DISTRIBUTEDPLAN
|
|
03:TOP-N [LIMIT=1]
|
|
| order by: 'c' ASC
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:UNION
|
|
| constant-operands=1
|
|
|
|
|
01:UNION
|
|
constant-operands=1
|
|
====
|
|
# Test with clause in an insert statement.
|
|
with t1 as (select * from functional.alltypestiny)
|
|
insert into functional.alltypesinsert partition(year, month) select * from t1
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypesinsert, OVERWRITE=false, PARTITION-KEYS=(functional.alltypestiny.year,functional.alltypestiny.month)]
|
|
| partitions=4
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypesinsert, OVERWRITE=false, PARTITION-KEYS=(functional.alltypestiny.year,functional.alltypestiny.month)]
|
|
| partitions=4
|
|
|
|
|
01:EXCHANGE [HASH(functional.alltypestiny.year,functional.alltypestiny.month)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
====
|
|
# Test with clause in an insert statement and in its query statement.
|
|
with t1 as (select * from functional.alltypestiny)
|
|
insert into functional.alltypesinsert partition(year, month)
|
|
with t2 as (select * from functional.alltypestiny)
|
|
select * from t1 union all select * from t2
|
|
---- PLAN
|
|
WRITE TO HDFS [functional.alltypesinsert, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=16
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
01:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
---- DISTRIBUTEDPLAN
|
|
WRITE TO HDFS [functional.alltypesinsert, OVERWRITE=false, PARTITION-KEYS=(year,month)]
|
|
| partitions=16
|
|
|
|
|
03:EXCHANGE [HASH(year,month)]
|
|
|
|
|
00:UNION
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
01:SCAN HDFS [functional.alltypestiny]
|
|
partitions=4/4 size=460B
|
|
====
|