mirror of
https://github.com/apache/impala.git
synced 2026-01-01 18:00:30 -05:00
Previously, scanners will assume that there are no conjuncts associated with a scan node for queries with no materialized slots (e.g. count(*)). This is not necessarily the case as one can write queries such as select count(*) from tpch.lineitem where rand() * 10 < 0; or select count(*) from tpch.lineitem where rand() > <a partition column>. In which case, the conjuncts should still be evaluated once per row. This change fixes the problem in the short-circuit handling logic for count(*) to evaluate the conjuncts once per row and only commits a row to the output row batch if the conjuncts evaluate to true. Testing done: Added the example above to the scanner test Change-Id: Ib530f1fdcd2c6de699977db163b3f6eb38481517 Reviewed-on: http://gerrit.cloudera.org:8080/8623 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Reviewed-by: Alex Behm <alex.behm@cloudera.com> Reviewed-by: Dan Hecht <dhecht@cloudera.com> Tested-by: Impala Public Jenkins
151 lines
4.3 KiB
Plaintext
151 lines
4.3 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Make sure LIMIT is enforced.
|
|
select * from functional_kudu.dimtbl order by id limit 1;
|
|
---- RESULTS
|
|
1001,'Name1',94611
|
|
---- TYPES
|
|
BIGINT, STRING, INT
|
|
====
|
|
---- QUERY
|
|
# Make sure that we can list the columns to be scanned in any order, that predicates
|
|
# work and that we can have predicates on columns not referenced elsewhere.
|
|
select zip, id from functional_kudu.dimtbl where id >= 1000 and 1002 >= id and
|
|
94611 = zip and 'Name1' = name order by id;
|
|
---- RESULTS
|
|
94611,1001
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2740, a NULL value from a previously filtered row would
|
|
# carry over into the next unfiltered row (the result below would incorrectly be 2,NULL).
|
|
CREATE TABLE impala_2740 (key INT PRIMARY KEY, value INT)
|
|
PARTITION BY HASH (key) PARTITIONS 3 STORED AS KUDU;
|
|
INSERT INTO impala_2740 VALUES (1, NULL), (2, -2);
|
|
SELECT * FROM impala_2740 WHERE key != 1;
|
|
---- RESULTS
|
|
2,-2
|
|
---- TYPES
|
|
INT, INT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-2635, the Kudu scanner hangs waiting for data from scanner
|
|
# threads that are never started. The union and both scans land in the same fragment which
|
|
# is run on all impalads. However, for the t1 table there is only as single scan range,
|
|
# so two of the scan instances get empty scan ranges.
|
|
CREATE TABLE impala_2635_t1 (id BIGINT PRIMARY KEY, name STRING)
|
|
PARTITION BY HASH (id) PARTITIONS 3 STORED AS KUDU;
|
|
CREATE TABLE impala_2635_t2 (id BIGINT PRIMARY KEY, name STRING)
|
|
PARTITION BY HASH(id) PARTITIONS 16 STORED AS KUDU;
|
|
INSERT INTO impala_2635_t1 VALUES (0, 'Foo');
|
|
INSERT INTO impala_2635_t2 VALUES (1, 'Blah');
|
|
SELECT * FROM impala_2635_t1 UNION ALL SELECT * FROM impala_2635_t2;
|
|
---- RESULTS
|
|
0,'Foo'
|
|
1,'Blah'
|
|
---- TYPES
|
|
BIGINT, STRING
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4408: Test Kudu scans where all materialized slots are non-nullable.
|
|
select count(int_col) from functional_kudu.tinyinttable
|
|
---- RESULTS
|
|
10
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-4859: Test Kudu IS NULL/IS NOT NULL pushdown
|
|
select count(*) from functional_kudu.alltypesagg where id < 10 and float_col is null;
|
|
---- RESULTS
|
|
2
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_kudu.alltypesagg where id < 10 and float_col is not null;
|
|
---- RESULTS
|
|
9
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# alltypes.id is primary key/not nullable, verify IS NOT NULL/IS NULL pushdown works
|
|
select count(*) from functional_kudu.alltypes where id is not null;
|
|
---- RESULTS
|
|
7300
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select count(*) from functional_kudu.alltypes where id is null;
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Push down TIMESTAMP binary predicates
|
|
select id, timestamp_col from functional_kudu.alltypes where
|
|
timestamp_col <= cast('2009-01-01 00:08:00.28' as timestamp) and
|
|
timestamp_col >= cast('2009-01-01 00:04:00.6' as timestamp)
|
|
order by id;
|
|
---- RESULTS
|
|
4,2009-01-01 00:04:00.600000000
|
|
5,2009-01-01 00:05:00.100000000
|
|
6,2009-01-01 00:06:00.150000000
|
|
7,2009-01-01 00:07:00.210000000
|
|
8,2009-01-01 00:08:00.280000000
|
|
---- TYPES
|
|
INT, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Out-of-range TIMESTAMP predicate (evaluates to NULL)
|
|
select id, timestamp_col from functional_kudu.alltypes where
|
|
timestamp_col > cast('1000-01-01 00:00:00.00' as timestamp)
|
|
---- RESULTS
|
|
---- TYPES
|
|
INT, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
select id, timestamp_col from functional_kudu.alltypes where
|
|
timestamp_col < cast('2009-01-01 00:08:00.28' as timestamp) and
|
|
timestamp_col > cast('2009-01-01 00:04:00.6' as timestamp)
|
|
order by id;
|
|
---- RESULTS
|
|
5,2009-01-01 00:05:00.100000000
|
|
6,2009-01-01 00:06:00.150000000
|
|
7,2009-01-01 00:07:00.210000000
|
|
---- TYPES
|
|
INT, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
select id, timestamp_col from functional_kudu.alltypes where
|
|
timestamp_col = cast('2009-01-01 00:08:00.28' as timestamp);
|
|
---- RESULTS
|
|
8,2009-01-01 00:08:00.280000000
|
|
---- TYPES
|
|
INT, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Push down TIMESTAMP IN list predicates
|
|
select id, timestamp_col from functional_kudu.alltypes where
|
|
timestamp_col in (cast('2010-03-01 00:00:00' as timestamp),
|
|
cast('2010-03-01 00:01:00' as timestamp))
|
|
order by id;
|
|
---- RESULTS
|
|
4240,2010-03-01 00:00:00
|
|
4241,2010-03-01 00:01:00
|
|
---- TYPES
|
|
INT, TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-6187. Make sure count(*) queries with partition columns only
|
|
# won't miss conjuncts evaluation. 'id' is the partition column here.
|
|
select count(*) from functional_kudu.alltypes where rand() + id < 0.0;
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
==== |