IMPALA-2147: Support IS [NOT] DISTINCT FROM and "<=>" predicates

Enforces that the planner treats IS NOT DISTINCT FROM as eligible for
hash joins, but does not find the minimum spanning tree of
equivalences for use in optimizing query plans; this is left as future
work.

Change-Id: I62c5300b1fbd764796116f95efe36573eed4c8d0
Reviewed-on: http://gerrit.cloudera.org:8080/710
Reviewed-by: Jim Apple <jbapple@cloudera.com>
Tested-by: Internal Jenkins
This commit is contained in:
Jim Apple
2015-08-26 20:18:26 -07:00
committed by Internal Jenkins
parent 022403bffc
commit 1a3d7ffd4f
40 changed files with 1365 additions and 145 deletions

View File

@@ -63,4 +63,68 @@ where string_col = 'VALIDATE_PREDICATES##id LT 1 && id GT 1 && id LE 1 && id GE
'SUCCESS'
---- TYPES
STRING
====
====
---- QUERY
# Test that <=>, IS DISTINCT FROM, and IS NOT DISTINCT FROM all can be validated
select string_col from alltypes_datasource
where string_col = 'VALIDATE_PREDICATES##id NOT_DISTINCT 1 && id DISTINCT_FROM 1 && id NOT_DISTINCT 1'
and 1 <=> id and 1 IS DISTINCT FROM id and 1 IS NOT DISTINCT FROM id
---- RESULTS
'SUCCESS'
---- TYPES
STRING
====
---- QUERY
# Test that <=>, IS DISTINCT FROM, and IS NOT DISTINCT FROM are evaluated just like their
# equality counterparts
select * from
(select count(*) from alltypes_datasource
where tinyint_col = 1 and smallint_col = 11) a
union all
(select count(*) from alltypes_datasource
where tinyint_col <=> 1 and smallint_col <=> 11)
---- RESULTS
50
50
---- TYPES
BIGINT
====
---- QUERY
select * from
(select count(*) from alltypes_datasource
where smallint_col = 11 and tinyint_col = 1) a
union all
(select count(*) from alltypes_datasource
where smallint_col <=> 11 and tinyint_col <=> 1)
---- RESULTS
500
500
---- TYPES
BIGINT
====
---- QUERY
select * from
(select count(*) from alltypes_datasource
where tinyint_col != 1 and smallint_col != 11) a
union all
(select count(*) from alltypes_datasource
where tinyint_col IS DISTINCT FROM 1 and smallint_col IS DISTINCT FROM 11)
---- RESULTS
4950
4950
---- TYPES
BIGINT
====
---- QUERY
select * from
(select count(*) from alltypes_datasource
where smallint_col != 11 and tinyint_col != 1) a
union all
(select count(*) from alltypes_datasource
where smallint_col IS DISTINCT FROM 11 and tinyint_col IS DISTINCT FROM 1)
---- RESULTS
4096
4096
---- TYPES
BIGINT
====

View File

@@ -2338,3 +2338,61 @@ select regexp_match_count(tmp.str, tmp.pattern, tmp.start_pos, tmp.params) from
('iPhone\niPad\niPod', '^I.*$', 1, 'imn')) as tmp
---- CATCH
Illegal match parameter x
====
---- QUERY
# IMPALA-2147: IS [NOT] DISTINCT FROM and "<=>"
select NULL <=> NULL
---- RESULTS
true
---- TYPES
BOOLEAN
====
---- QUERY
select NULL <=> 1
---- RESULTS
false
---- TYPES
BOOLEAN
====
---- QUERY
select NULL <=> "foo"
---- RESULTS
false
---- TYPES
BOOLEAN
====
---- QUERY
select NULL IS DISTINCT FROM NULL
---- RESULTS
false
---- TYPES
BOOLEAN
====
---- QUERY
select NULL IS DISTINCT FROM 3.14
---- RESULTS
true
---- TYPES
BOOLEAN
====
---- QUERY
select cast(0 as bigint) IS DISTINCT FROM NULL
---- RESULTS
true
---- TYPES
BOOLEAN
====
---- QUERY
select 2.78 IS DISTINCT FROM 3.14
---- RESULTS
true
---- TYPES
BOOLEAN
====
---- QUERY
select 2.78 IS NOT DISTINCT FROM 3.14
---- RESULTS
false
---- TYPES
BOOLEAN
====

View File

@@ -562,3 +562,151 @@ cross join functional.alltypes t2 where t1.id = 0 limit 1;
---- TYPES
INT,BOOLEAN
====
---- QUERY
# IMPALA-2147: IS [NOT] DISTINCT FROM and "<=>"
select count(*) > 0
from alltypesagg as a, alltypesagg as b
where (a.tinyint_col IS DISTINCT FROM b.tinyint_col)
and a.tinyint_col is null
---- RESULTS
true
---- TYPES
BOOLEAN
====
---- QUERY
select count(*) > 0
from alltypesagg as a, alltypesagg as b
where (a.tinyint_col IS NOT DISTINCT FROM b.tinyint_col)
and a.tinyint_col is null
---- RESULTS
true
---- TYPES
BOOLEAN
====
---- QUERY
select count(*) > 0
from alltypesagg as a, alltypesagg as b
where (a.tinyint_col <=> b.tinyint_col)
and a.tinyint_col is null
---- RESULTS
true
---- TYPES
BOOLEAN
====
---- QUERY
# left joins with <=> are different from left joins with =
select P.d, Q.d, Q.b, Q.b is null
from nulltable P left join nulltable Q
on P.d = Q.d
---- RESULTS
NULL,NULL,'NULL',true
---- TYPES
INT,INT,STRING,BOOLEAN
====
---- QUERY
select P.d, Q.d, Q.b
from nulltable P left join nulltable Q
on P.d <=> Q.d
---- RESULTS
NULL,NULL,''
---- TYPES
INT,INT,STRING
====
---- QUERY
select count(*)
from nulltable P left anti join nulltable Q
on P.d = Q.d
---- RESULTS
1
---- TYPES
BIGINT
====
---- QUERY
select count(*)
from nulltable P left anti join nulltable Q
on P.d <=> Q.d
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Test that 'IS DISTINCT FROM' works in nested loop joins
select count(*) from nulltable t1, nulltable t2 where not(t1.d != t2.d)
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
select t1.d, t2.d from nulltable t1, nulltable t2 where not(t1.d IS DISTINCT FROM t2.d)
---- RESULTS
NULL,NULL
---- TYPES
INT,INT
====
---- QUERY
select count(*) from nulltable t1, nulltable t2
where t1.d != length(t2.a)
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
select t1.d, t2.a from nulltable t1, nulltable t2
where t1.d IS DISTINCT FROM length(t2.a)
---- RESULTS
NULL,'a'
---- TYPES
INT,STRING
====
---- QUERY
select t1.a, t1.b, t2.a, t2.b from
(values
(NULL a, NULL b), (NULL, 0), (NULL, 1),
(0, NULL), (0, 0), (0, 1),
(1, NULL), (1, 0), (1, 1)) as t1,
(values
(NULL a, NULL b), (NULL, 0), (NULL, 1),
(0, NULL), (0, 0), (0, 1),
(1, NULL), (1, 0), (1, 1)) as t2
where t1.a <=> t2.a
and t1.b <=> t2.b
order by t1.a, t1.b, t2.a, t2.b
---- RESULTS
0,0,0,0
0,1,0,1
0,NULL,0,NULL
1,0,1,0
1,1,1,1
1,NULL,1,NULL
NULL,0,NULL,0
NULL,1,NULL,1
NULL,NULL,NULL,NULL
---- TYPES
TINYINT,TINYINT,TINYINT,TINYINT
====
---- QUERY
select t1.a, t1.b, t2.a, t2.b from
(values
(NULL a, NULL b), (NULL, 0), (NULL, 1),
(0, NULL), (0, 0), (0, 1),
(1, NULL), (1, 0), (1, 1)) as t1,
(values
(NULL a, NULL b), (NULL, 0), (NULL, 1),
(0, NULL), (0, 0), (0, 1),
(1, NULL), (1, 0), (1, 1)) as t2
where t1.a <=> t2.a
and t1.b = t2.b
order by t1.a, t1.b, t2.a, t2.b
---- RESULTS
0,0,0,0
0,1,0,1
1,0,1,0
1,1,1,1
NULL,0,NULL,0
NULL,1,NULL,1
---- TYPES
TINYINT,TINYINT,TINYINT,TINYINT
====