mirror of
https://github.com/apache/impala.git
synced 2026-01-05 12:01:11 -05:00
The reported issue is that we can have redundant hash expressions in exchanges. The underlying cause is that we fail to remove redundant join predicates. This patch enforces slot equivalences based on our computed equivalence classes at the lowest possible plan node by generating new equality predicates. Each plan subtree now has a minimal set of equality predicates that express all known equivalences between slots belonging to tuples materialized at that plan node. As a result, eliminating redundant join predicates becomes trivial: It is sufficient to pick a single representative predicate of each relevant equivalence class. All predicates beyond that are redundant. Change-Id: I7998fe8d7bdf84cc8eb129d32c86269bedeab68e Reviewed-on: http://gerrit.ent.cloudera.com:8080/2177 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/2278
395 lines
11 KiB
Plaintext
395 lines
11 KiB
Plaintext
# Basic test with a view.
|
|
select int_col, string_col from functional.alltypes_view
|
|
---- PLAN
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# Basic test with a complex view.
|
|
select * from functional.complex_view
|
|
---- PLAN
|
|
04:TOP-N [LIMIT=100]
|
|
| order by: b.string_col ASC
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: count(a.bigint_col)
|
|
| group by: b.string_col
|
|
| having: count(a.bigint_col) > 1
|
|
|
|
|
02:HASH JOIN [INNER JOIN]
|
|
| hash predicates: a.id = b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny b]
|
|
| partitions=4/4 size=460B compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=10/10 size=743.67KB
|
|
predicates: a.bigint_col < 50
|
|
---- DISTRIBUTEDPLAN
|
|
09:TOP-N [LIMIT=100]
|
|
| order by: b.string_col ASC
|
|
|
|
|
08:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
04:TOP-N [LIMIT=100]
|
|
| order by: b.string_col ASC
|
|
|
|
|
07:AGGREGATE [MERGE FINALIZE]
|
|
| output: sum(count(a.bigint_col))
|
|
| group by: b.string_col
|
|
| having: count(a.bigint_col) > 1
|
|
|
|
|
06:EXCHANGE [PARTITION=HASH(b.string_col)]
|
|
|
|
|
03:AGGREGATE
|
|
| output: count(a.bigint_col)
|
|
| group by: b.string_col
|
|
|
|
|
02:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: a.id = b.id
|
|
|
|
|
|--05:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny b]
|
|
| partitions=4/4 size=460B
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=10/10 size=743.67KB
|
|
predicates: a.bigint_col < 50
|
|
====
|
|
# Basic test with a view on a view
|
|
select int_col, string_col from functional.view_view
|
|
---- PLAN
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# view used in a union.
|
|
select * from functional.alltypes_view union all
|
|
select * from functional.alltypes_view where id < 10
|
|
---- PLAN
|
|
00:MERGE
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
| predicates: functional.alltypes.id < 10
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
03:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
|--05:MERGE
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
| predicates: functional.alltypes.id < 10
|
|
|
|
|
04:MERGE
|
|
|
|
|
01:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# view used in an inline view.
|
|
select t.id from (select id from functional.alltypes_view) t
|
|
where t.id < 10
|
|
---- PLAN
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
predicates: functional.alltypes.id < 10
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
predicates: functional.alltypes.id < 10
|
|
====
|
|
# Multiple views used in a join.
|
|
select * from functional.alltypes_view t1, functional.alltypes_view_sub t2,
|
|
functional.complex_view t3 where t1.id = t2.x and t2.x = t3.abc
|
|
---- PLAN
|
|
08:HASH JOIN [INNER JOIN]
|
|
| hash predicates: functional.alltypes.id = int_col
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB compact
|
|
| predicates: functional.alltypes.int_col > 1
|
|
|
|
|
07:HASH JOIN [INNER JOIN]
|
|
| hash predicates: functional.alltypes.id = count(a.bigint_col)
|
|
|
|
|
|--06:TOP-N [LIMIT=100]
|
|
| | order by: b.string_col ASC
|
|
| |
|
|
| 05:AGGREGATE [FINALIZE]
|
|
| | output: count(a.bigint_col)
|
|
| | group by: b.string_col
|
|
| | having: count(a.bigint_col) > 1
|
|
| |
|
|
| 04:HASH JOIN [INNER JOIN]
|
|
| | hash predicates: a.id = b.id
|
|
| |
|
|
| |--03:SCAN HDFS [functional.alltypestiny b]
|
|
| | partitions=4/4 size=460B compact
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=10/10 size=743.67KB
|
|
| predicates: a.bigint_col < 50
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
predicates: functional.alltypes.id > 1
|
|
---- DISTRIBUTEDPLAN
|
|
16:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
08:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: functional.alltypes.id = int_col
|
|
|
|
|
|--15:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
| predicates: functional.alltypes.int_col > 1
|
|
|
|
|
07:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: functional.alltypes.id = count(a.bigint_col)
|
|
|
|
|
|--14:EXCHANGE [BROADCAST]
|
|
| | limit: 100
|
|
| |
|
|
| 13:TOP-N [LIMIT=100]
|
|
| | order by: b.string_col ASC
|
|
| |
|
|
| 12:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
| |
|
|
| 06:TOP-N [LIMIT=100]
|
|
| | order by: b.string_col ASC
|
|
| |
|
|
| 11:AGGREGATE [MERGE FINALIZE]
|
|
| | output: sum(count(a.bigint_col))
|
|
| | group by: b.string_col
|
|
| | having: count(a.bigint_col) > 1
|
|
| |
|
|
| 10:EXCHANGE [PARTITION=HASH(b.string_col)]
|
|
| |
|
|
| 05:AGGREGATE
|
|
| | output: count(a.bigint_col)
|
|
| | group by: b.string_col
|
|
| |
|
|
| 04:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| | hash predicates: a.id = b.id
|
|
| |
|
|
| |--09:EXCHANGE [BROADCAST]
|
|
| | |
|
|
| | 03:SCAN HDFS [functional.alltypestiny b]
|
|
| | partitions=4/4 size=460B
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=10/10 size=743.67KB
|
|
| predicates: a.bigint_col < 50
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
predicates: functional.alltypes.id > 1
|
|
====
|
|
# Self-join of view to make sure the on clause is properly set
|
|
# in the cloned view instances.
|
|
select * from functional.alltypes_view t1
|
|
inner join functional.alltypes_view t2 on (t1.id = t2.id)
|
|
inner join functional.alltypes_view t3 on (t2.id = t3.id)
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB compact
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
08:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--07:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
|
|
|
03:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--06:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
|
|
|
05:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# Self-join views to make sure the using clause is properly set
|
|
# in the cloned view instances.
|
|
select * from functional.alltypes_view t1
|
|
inner join functional.alltypes_view t2 using(id)
|
|
inner join functional.alltypes_view t3 using(id)
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB compact
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
08:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--07:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
|
|
|
03:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--06:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
|
|
|
05:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# Self-join of view to make sure the join op is properly set
|
|
# in the cloned view instances.
|
|
select * from functional.alltypes_view t1
|
|
left outer join functional.alltypes_view t2 using(id)
|
|
full outer join functional.alltypes_view t3 using(id)
|
|
---- PLAN
|
|
04:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB compact
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
08:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--07:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--06:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
|
|
|
05:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# Self-join of view to make sure join hints are properly set
|
|
# in the cloned view instances.
|
|
# Note that in the plan above without hints the first join uses shuffle
|
|
# and the second broadcast.
|
|
select * from functional.alltypes_view t1
|
|
inner join [broadcast] functional.alltypes_view t2 using(id)
|
|
inner join [shuffle] functional.alltypes_view t3 using(id)
|
|
---- PLAN
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB compact
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB compact
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
---- DISTRIBUTEDPLAN
|
|
08:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [INNER JOIN, PARTITIONED]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--07:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
|
|
|
06:EXCHANGE [PARTITION=HASH(functional.alltypes.id)]
|
|
|
|
|
03:HASH JOIN [INNER JOIN, BROADCAST]
|
|
| hash predicates: functional.alltypes.id = functional.alltypes.id
|
|
|
|
|
|--05:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 size=478.45KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 size=478.45KB
|
|
====
|
|
# Tests that parentheses are preserved when creating a view
|
|
# enabling proper partition pruning for this particular view.
|
|
select * from functional.alltypes_parens
|
|
---- PLAN
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=1/24 size=19.95KB
|
|
predicates: (int_col < 100 OR bool_col = FALSE)
|
|
---- DISTRIBUTEDPLAN
|
|
01:EXCHANGE [PARTITION=UNPARTITIONED]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=1/24 size=19.95KB
|
|
predicates: (int_col < 100 OR bool_col = FALSE)
|
|
====
|