mirror of
https://github.com/apache/impala.git
synced 2026-01-25 09:01:08 -05:00
DataSourceScanNode does not handle eos properly in function DataSourceScanNode::GetNext(). Rows, which are returned from external data source, could be dropped if data_source_batch_size is set with value which is greater than default value 1024. Testing: - Added end-to-end test with data_source_batch_size as 2048. The test failed without fixing, passed with fixing. Also added test with data_source_batch_size as 512. - Passed core tests. Change-Id: I978d0a65faa63a47ec86a0127c0bee8dfb79530b Reviewed-on: http://gerrit.cloudera.org:8080/20636 Reviewed-by: Abhishek Rawat <arawat@cloudera.com> Tested-by: Wenzhe Zhou <wzhou@cloudera.com>
188 lines
5.6 KiB
Plaintext
188 lines
5.6 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create DataSource
|
|
DROP DATA SOURCE IF EXISTS TestGenericDataSource;
|
|
CREATE DATA SOURCE TestGenericDataSource
|
|
LOCATION '$FILESYSTEM_PREFIX/test-warehouse/data-sources/test-data-source.jar'
|
|
CLASS 'org.apache.impala.extdatasource.AllTypesDataSource'
|
|
API_VERSION 'V1';
|
|
---- RESULTS
|
|
'Data source has been created.'
|
|
====
|
|
---- QUERY
|
|
# Show created DataSource
|
|
SHOW DATA SOURCES LIKE 'testgenericdatasource';
|
|
---- LABELS
|
|
NAME,LOCATION,CLASS NAME,API VERSION
|
|
---- RESULTS
|
|
'testgenericdatasource',regex:'.*/test-warehouse/data-sources/test-data-source.jar','org.apache.impala.extdatasource.AllTypesDataSource','V1'
|
|
---- TYPES
|
|
STRING,STRING,STRING,STRING
|
|
====
|
|
---- QUERY
|
|
# Create DataSource table
|
|
DROP TABLE IF EXISTS alltypes_datasource;
|
|
CREATE TABLE alltypes_datasource (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col TINYINT,
|
|
smallint_col SMALLINT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
timestamp_col TIMESTAMP,
|
|
string_col STRING,
|
|
dec_col1 DECIMAL(9,0),
|
|
dec_col2 DECIMAL(10,0),
|
|
dec_col3 DECIMAL(20,10),
|
|
dec_col4 DECIMAL(38,37),
|
|
dec_col5 DECIMAL(10,5),
|
|
date_col DATE)
|
|
PRODUCED BY DATA SOURCE TestGenericDataSource("TestInitString");
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Gets all types including a row with a NULL value. The predicate pushed to
|
|
# the data source is not actually used, but the second predicate is
|
|
# evaluated by Impala.
|
|
select *
|
|
from alltypes_datasource
|
|
where float_col != 0 and
|
|
int_col >= 1990 limit 5
|
|
---- RESULTS
|
|
1990,true,0,90,1990,19900,2189,1990,1970-01-01 00:00:01.990000000,'NULL',-999998009,-9999998009,-9999999999.9999998009,-9.9999999999999999999999999999999998009,-99999.98009,1975-06-14
|
|
1991,false,1,91,1991,19910,2190.10009765625,1991,1970-01-01 00:00:01.991000000,'1991',999998008,9999998008,9999999999.9999998008,9.9999999999999999999999999999999998008,99999.98008,1975-06-15
|
|
1992,true,2,92,1992,19920,2191.199951171875,1992,1970-01-01 00:00:01.992000000,'1992',-999998007,-9999998007,-9999999999.9999998007,-9.9999999999999999999999999999999998007,-99999.98007,1975-06-16
|
|
1993,false,3,93,1993,19930,2192.300048828125,1993,1970-01-01 00:00:01.993000000,'1993',999998006,9999998006,9999999999.9999998006,9.9999999999999999999999999999999998006,99999.98006,1975-06-17
|
|
1994,true,4,94,1994,19940,2193.39990234375,1994,1970-01-01 00:00:01.994000000,'1994',-999998005,-9999998005,-9999999999.9999998005,-9.9999999999999999999999999999999998005,-99999.98005,1975-06-18
|
|
---- TYPES
|
|
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, TIMESTAMP, STRING, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL, DATE
|
|
====
|
|
---- QUERY
|
|
# Project a subset of the columns
|
|
select bigint_col, timestamp_col, double_col
|
|
from alltypes_datasource
|
|
where double_col != 0 and int_col >= 1990 limit 3
|
|
---- RESULTS
|
|
19900,1970-01-01 00:00:01.990000000,1990
|
|
19910,1970-01-01 00:00:01.991000000,1991
|
|
19920,1970-01-01 00:00:01.992000000,1992
|
|
---- TYPES
|
|
BIGINT, TIMESTAMP, DOUBLE
|
|
====
|
|
---- QUERY
|
|
# count(*) with a predicate evaluated by Impala
|
|
select count(*) from alltypes_datasource
|
|
where float_col = 0 and
|
|
string_col is not NULL
|
|
---- RESULTS
|
|
4000
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# count(*) with no predicates has no materialized slots
|
|
select count(*) from alltypes_datasource
|
|
---- RESULTS
|
|
5000
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select string_col from alltypes_datasource
|
|
where string_col = 'VALIDATE_PREDICATES##id LT 1 && id GT 1 && id LE 1 && id GE 1 && int_col EQ 1 && id NE 1'
|
|
and id < 1 and id > 1 and id <= 1 and id >= 1 and int_col = 1 and id != 1
|
|
---- RESULTS
|
|
'SUCCESS'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select string_col from alltypes_datasource
|
|
where string_col = 'VALIDATE_PREDICATES##id LT 1 && id GT 1 && id LE 1 && id GE 1 && int_col EQ 1 && id NE 1'
|
|
and 1 > id and 1 < id and 1 >= id and 1 <= id and 1 = int_col and 1 != id
|
|
---- RESULTS
|
|
'SUCCESS'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Test that <=>, IS DISTINCT FROM, and IS NOT DISTINCT FROM all can be validated
|
|
# Note the duplicate predicate 1 IS NOT DISTINCT FROM id is removed.
|
|
select string_col from alltypes_datasource
|
|
where string_col = 'VALIDATE_PREDICATES##id NOT_DISTINCT 1 && id DISTINCT_FROM 1'
|
|
and 1 <=> id and 1 IS DISTINCT FROM id and 1 IS NOT DISTINCT FROM id
|
|
---- RESULTS
|
|
'SUCCESS'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
# Test that <=>, IS DISTINCT FROM, and IS NOT DISTINCT FROM are evaluated just like their
|
|
# equality counterparts
|
|
select * from
|
|
(select count(*) from alltypes_datasource
|
|
where tinyint_col = 1 and smallint_col = 11) a
|
|
union all
|
|
(select count(*) from alltypes_datasource
|
|
where tinyint_col <=> 1 and smallint_col <=> 11)
|
|
---- RESULTS
|
|
50
|
|
50
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select * from
|
|
(select count(*) from alltypes_datasource
|
|
where smallint_col = 11 and tinyint_col = 1) a
|
|
union all
|
|
(select count(*) from alltypes_datasource
|
|
where smallint_col <=> 11 and tinyint_col <=> 1)
|
|
---- RESULTS
|
|
500
|
|
500
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select * from
|
|
(select count(*) from alltypes_datasource
|
|
where tinyint_col != 1 and smallint_col != 11) a
|
|
union all
|
|
(select count(*) from alltypes_datasource
|
|
where tinyint_col IS DISTINCT FROM 1 and smallint_col IS DISTINCT FROM 11)
|
|
---- RESULTS
|
|
4950
|
|
4950
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
select * from
|
|
(select count(*) from alltypes_datasource
|
|
where smallint_col != 11 and tinyint_col != 1) a
|
|
union all
|
|
(select count(*) from alltypes_datasource
|
|
where smallint_col IS DISTINCT FROM 11 and tinyint_col IS DISTINCT FROM 1)
|
|
---- RESULTS
|
|
4500
|
|
4500
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Drop table
|
|
DROP TABLE alltypes_datasource;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# Drop DataSource
|
|
DROP DATA SOURCE TestGenericDataSource;
|
|
---- RESULTS
|
|
'Data source has been dropped.'
|
|
====
|