Files
impala/testdata/workloads/functional-query/queries/QueryTest/outer-to-inner-joins.test
xqhe 4ae847bf94 IMPALA-10382: fix invalid outer join simplification
When set ENABLE_OUTER_JOIN_TO_INNER_TRANSFORMATION = true, the planner
will simplify outer joins if the predicate with case expr or conditional
function on both sides of outer join.
However, the predicate maybe not null-rejecting, if simplify the outer
join, the result is incorrect. E.g. t1.b > coalesce(t1.c, t2.c) can
return true if t2.c is null, so it is not null-rejecting predicate
for t2.

The fix is simply to support the case that the predicate with two
operands and the operator is one of (=, !=, >, <, >=, <=),
1. one of the operands or
2. if the operand is arithmetic expression and one of the children
does not contain conditional builtin function or case expr and has
tuple id in outer joined tuples.
E.g. t1.b > coalesce(t2.c, t1.c) or t1.b + coalesce(t2.c, t1.c) >
coalesce(t2.c, t1.c) is null-rejecting predicate for t1.

Testing:
* Add new plan tests in outer-to-inner-joins.test
* Add new query tests to verify the correctness on transformation

Change-Id: I84a3812f4212fa823f3d1ced6e12f2df05aedb2b
Reviewed-on: http://gerrit.cloudera.org:8080/16845
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
2021-01-27 17:30:37 +00:00

285 lines
6.7 KiB
Plaintext

====
---- QUERY
# Tests disjunctive conjuncts in where clause.
# t1.int_col < 10 or t2.int_col < 10 can't prove that t2 is nonnullable,
# so we can't convert a left join to an inner join
select count(*)
from functional.alltypes t1
left outer join functional.alltypessmall t2
on t1.id = t2.id
where t1.int_col < 10 or t2.int_col < 10 or t2.tinyint_col < 5
---- RESULTS
7300
---- TYPES
bigint
====
---- QUERY
# Tests where clause containing disjunctive conjuncts
# When ti.int_col < 10, t2.tinyint_col can be null value, so this where conjunct is not
# null-rejecting for t2 and we can't convert a left join to an inner join.
select count(*)
from functional.alltypes t1
left outer join functional.alltypessmall t2
on t1.id = t2.id
where case when t1.int_col < 10 or t2.tinyint_col < 5 then 10 else 20 end = 10
---- RESULTS
7300
---- TYPES
bigint
====
---- QUERY
# Tests disjunctive conjuncts in where clause.
# All disjunctive conjuncts children have t2's slot, we can simplify outer join.
# For t1.int_col + t2.int_col < 10, it maybe true only t2.int_col is not null. At least
# one of the t2's columns is not null the disjunctive conjunct is true, so this
# is null-rejecting conjunct
select count(*)
from functional.alltypes t1
left outer join functional.alltypessmall t2
on t1.id = t2.id
where t1.int_col + t2.int_col < 10 or t2.tinyint_col < 5 or t2.smallint_col > 2
---- RESULTS
100
---- TYPES
bigint
====
---- QUERY
# Simplify outer join by inner join on clause.
# Inner join on conjunct t2.id = t3.test_id proves that t2.id can't be null.
SELECT count(*)
FROM functional.dimtbl t1
LEFT JOIN functional.dimtbl t2 ON t1.id = t2.id
JOIN functional.jointbl t3 ON t2.id = t3.test_id
---- RESULTS
11
---- TYPES
bigint
====
---- QUERY
# Simplify outer join by inner join on clause.
# Inner join on conjunct t1.id = t3.test_id proves that t1.id can't be null.
SELECT count(*)
FROM functional.dimtbl t1
RIGHT JOIN functional.dimtbl t2 ON t1.id = t2.id
JOIN functional.jointbl t3 ON t1.id = t3.test_id
---- RESULTS
11
---- TYPES
bigint
====
---- QUERY
SELECT count(*)
FROM functional.dimtbl t1
FULL JOIN functional.jointbl t2 ON t1.id = t2.test_id
JOIN functional.jointbl t3 ON t2.test_name = t3.test_name
---- RESULTS
105
---- TYPES
bigint
====
---- QUERY
# Simplify outer join by inner join on clause.
# Same as above, t1.id can't be null, we can convert t1 full join t2 to t1 left join t2.
SELECT count(*)
FROM functional.dimtbl t1
FULL JOIN functional.jointbl t2 ON t1.id = t2.test_id
JOIN functional.jointbl t3 ON t1.id = t3.test_id
---- RESULTS
41
---- TYPES
bigint
====
---- QUERY
# Simplify outer join by inner join on clause.
# t1.id + t2.id = t3.test_id + 1001 is null-rejecting conjunct for t1 and t2
SELECT count(*)
FROM functional.dimtbl t1
FULL JOIN functional.jointbl t2 ON t1.id = t2.test_id
JOIN functional.jointbl t3 ON t1.id + t2.test_id = t3.test_id + 1001
---- RESULTS
3
---- TYPES
bigint
====
---- QUERY
# 'is distinct from'/'is not distinct from' is not null-rejecting conjunct
SELECT count(*)
FROM functional.dimtbl t1
LEFT JOIN functional.jointbl t2 ON t1.id = t2.test_id
WHERE t2.test_name is distinct from 'Name1' OR t2.test_name is not distinct from 'Name2'
---- RESULTS
14
---- TYPES
bigint
====
---- QUERY
# Tests the conjunct containing nondeterministic function
SELECT count(*)
FROM functional.dimtbl t1
LEFT JOIN functional.jointbl t2 ON t1.id = t2.test_id
WHERE CASE WHEN t2.test_zip = 2 THEN 0.2 ELSE 2 END > rand()
---- RESULTS
15
---- TYPES
bigint
====
---- QUERY
# Expect no conversion
select count(*) from functional.alltypes t1 left outer join functional.alltypessmall t2
on t1.id = t2.id where t1.int_col < 10;
---- RESULTS
7300
---- TYPES
bigint
====
---- QUERY
# Expect no conversion
select t2.tinyint_col, count(t2.id)
from functional.alltypes t1
left outer join functional.alltypessmall t2
on t1.id = t2.id
group by t2.tinyint_col
having count(t2.id) > 0
---- RESULTS
0,12
2,12
9,8
7,8
5,8
8,8
6,8
3,12
1,12
4,12
---- TYPES
tinyint,bigint
====
---- QUERY
# Expect no conversion
select count(*) from functional.alltypes t1 left outer join functional.alltypessmall t2
on t1.id = t2.id where zeroifnull(t2.int_col) < 10;
---- RESULTS
7300
---- TYPES
bigint
====
---- QUERY
# Should be converted
select count(*) from (values(1))x where 1 in
(
select count(t2.id)
from functional.alltypes t1
left outer join functional.alltypessmall t2
on t1.id = t2.id
where t2.int_col > 10
)
---- RESULTS
0
---- TYPES
bigint
====
---- QUERY
# TODO: Should be converted
select count(*)
from functional.alltypes t0,
(
select t2.id ct
from functional.alltypes t1
left outer join functional.alltypessmall t2
on t1.id = t2.id
) s
where t0.int_col = s.ct
---- RESULTS
7300
---- TYPES
bigint
====
---- QUERY
SELECT count(*)
FROM functional.dimtbl t1
LEFT JOIN functional.jointbl t2 ON t1.id = t2.test_id
LEFT JOIN functional.jointbl t3 ON t2.test_id = t3.test_id
WHERE ZEROIFNULL(t2.test_zip) < t3.test_zip
---- RESULTS
8
---- TYPES
bigint
====
---- QUERY
SELECT count(*)
FROM functional.dimtbl t1
LEFT JOIN functional.jointbl t2 ON t1.id = t2.test_id
LEFT JOIN functional.jointbl t3 ON t2.test_id = t3.test_id + 1
WHERE ZEROIFNULL(t3.test_zip) < t2.test_zip
---- RESULTS
7
---- TYPES
bigint
====
---- QUERY
# We can't simplify outer join executing after inner join by inner join on clause
SELECT count(*)
FROM functional.dimtbl t1
JOIN functional.jointbl t2 ON t1.id = t2.test_id
LEFT JOIN functional.jointbl t3 ON t2.test_id + 1 = t3.test_id
---- RESULTS
16
---- TYPES
bigint
====
---- QUERY
# Tests complex types
select a.id, b.item from functional_parquet.complextypestbl a full outer join a.int_array b
where b.item % 2 = 0 and a.id < 10
---- RESULTS
1,2
2,2
---- TYPES
bigint, int
====
---- QUERY
select a.id, b.item from functional_parquet.complextypestbl a
full outer join a.int_array b on (a.id < b.item and a.id < 10)
where b.item % 2 = 0
---- RESULTS
1,2
NULL,2
---- TYPES
bigint, int
====
---- QUERY
select straight_join id from functional_parquet.complextypestbl t1 left outer join t1.int_array t2
where t1.id = t2.pos and t2.item = 2
---- RESULTS
1
2
---- TYPES
bigint
====
---- QUERY
SELECT count(*)
FROM functional.alltypestiny t1
LEFT JOIN functional.alltypesagg t2 ON t1.tinyint_col = t2.tinyint_col
LEFT JOIN functional.alltypes t3 ON t1.int_col = t3.int_col
WHERE t1.tinyint_col >= coalesce(t1.int_col, t2.int_col)
---- RESULTS
2922920
---- TYPES
bigint
====
---- QUERY
SELECT count(*)
FROM functional.alltypestiny t1
FULL JOIN functional.alltypesagg t2 ON t1.tinyint_col = t2.tinyint_col
WHERE t2.tinyint_col + CASE
WHEN t1.int_col IS NOT NULL THEN t1.int_col
ELSE t2.int_col
END >= CASE
WHEN t1.int_col IS NOT NULL THEN t1.int_col
ELSE t2.int_col
END
---- RESULTS
12000
---- TYPES
bigint
====