mirror of
https://github.com/apache/impala.git
synced 2026-02-02 15:00:38 -05:00
The constant propagation introduced in IMPALA-10064 handled conversion
of < and > predicates from timestamps to dates incorrectly.
Example:
select * from functional.alltypes_date_partition
where date_col = cast(timestamp_col as date)
and timestamp_col > '2009-01-01 01:00:00'
and timestamp_col < '2009-02-01 01:00:00';
Before this change query rewrites added the following predicates:
date_col > DATE '2009-01-01' AND date_col < DATE '2009-02-01'
This incorrectly rejected all timestamps on the days of the
lower / upper bounds.
The fix is to rewrite < and > to <= and >= in the date predicates.
< could be kept if the upper bound is a constant with no time-of-day
part, e.g. timestamp_col < "2009-01-01" could be rewritten to
date_col < "2009-01-01", but this optimization is not added in this
patch to make it simpler.
Testing:
- added planner + EE regression tests
Change-Id: I1938bf5e91057b220daf8a1892940f674aac3d68
Reviewed-on: http://gerrit.cloudera.org:8080/19572
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
86 lines
2.3 KiB
Plaintext
86 lines
2.3 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Constant propagation for range predicates on timestamp.
|
|
select count(*), sum(int_col) from alltypes_date_partition
|
|
where date_col = cast(timestamp_col as date)
|
|
and timestamp_col between '2009-01-01' and '2009-02-01';
|
|
---- RESULTS
|
|
156,620
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Same query as above with time-of-day parts. Regression test for IMPALA-11960.
|
|
select count(*), sum(int_col) from alltypes_date_partition
|
|
where date_col = cast(timestamp_col as date)
|
|
and timestamp_col > '2009-01-01 01:00:00' and timestamp_col < '2009-02-01 01:00:00';
|
|
---- RESULTS
|
|
155,620
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Same as above but using >= instead of > and using a lower bound that matches with
|
|
# a row. Regression test for IMPALA-11960.
|
|
select count(*), sum(int_col) from alltypes_date_partition
|
|
where date_col = cast(timestamp_col as date)
|
|
and timestamp_col >= '2009-01-01 00:08:00.280' and timestamp_col < '2009-02-01 01:00:00';
|
|
---- RESULTS
|
|
156,628
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Mix of various predicates some of which are eligible for propagation
|
|
with dp_view as
|
|
(select * from alltypes_date_partition
|
|
where date_col = cast(timestamp_col as date))
|
|
select count(*), sum(int_col) from dp_view
|
|
where int_col < 100 and timestamp_col >= '2009-01-01'
|
|
and bigint_col in (20, 40)
|
|
and timestamp_col <= '2009-02-01';
|
|
---- RESULTS
|
|
62,186
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10314
|
|
# Simple limit in outer query referencing a with clause subquery.
|
|
# WHERE clause has an always_true hint.
|
|
set optimize_simple_limit=true;
|
|
with dp_view as
|
|
(select * from alltypes_date_partition_2
|
|
where /* +always_true */ date_col = cast(timestamp_col as date))
|
|
select count(*) from (select * from dp_view limit 10) t;
|
|
---- RESULTS
|
|
10
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-10360
|
|
# Query against a view that has hints for table sampling
|
|
# and WHERE clause.
|
|
set optimize_simple_limit=true;
|
|
select count(*) from (select * from alltypes_dp_2_view_2 limit 10) t;
|
|
---- RESULTS
|
|
10
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-9745
|
|
# Test correctness of constant propagation in the presence of
|
|
# implicit casts
|
|
select * from
|
|
(select o_orderdate, to_timestamp(o_orderdate, 'yyyy-MM-dd') ts
|
|
from tpch.orders) dt where ts = '1996-12-01' and o_orderdate = ts
|
|
limit 2;
|
|
---- RESULTS
|
|
'1996-12-01',1996-12-01 00:00:00
|
|
'1996-12-01',1996-12-01 00:00:00
|
|
---- TYPES
|
|
STRING,TIMESTAMP
|
|
====
|