mirror of
https://github.com/apache/impala.git
synced 2025-12-23 21:08:39 -05:00
Decimal type is a primitive data type for Impala. Current code returns wrong values for columns with decimal data type in external JDBC tables. This patch fixes wrong values returned from JDBC data source, and supports pushing down decimal type of predicates to remote database and remote Impala. The decimal precision and scale of the columns in external JDBC table must be no less than the decimal precision and scale of the corresponding columns in the table of remote database. Otherwise, Impala fails with an error since it may cause truncation of decimal data. Testing: - Added Planner test for pushing down decimal type of predicates. - Added end-to-end unit-tests for tables with decimal type of columns for Postgres, MySQL, and Impala-to-Impala. - Passed core-tests. Change-Id: I8c9d2e0667c42c0e52436b158e3dfe3ec14b9e3b Reviewed-on: http://gerrit.cloudera.org:8080/21218 Reviewed-by: Michael Smith <michael.smith@cloudera.com> Reviewed-by: Abhishek Rawat <arawat@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
374 lines
11 KiB
Plaintext
374 lines
11 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create external JDBC table
|
|
DROP TABLE IF EXISTS alltypes_jdbc_datasource;
|
|
CREATE EXTERNAL TABLE alltypes_jdbc_datasource (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col TINYINT,
|
|
smallint_col SMALLINT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_string_col STRING,
|
|
string_col STRING,
|
|
timestamp_col TIMESTAMP)
|
|
STORED BY JDBC
|
|
TBLPROPERTIES (
|
|
"database.type"="IMPALA",
|
|
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
|
|
"jdbc.auth"="AuthMech=0",
|
|
"jdbc.properties"="MEM_LIMIT=1000000000, MAX_ERRORS = 10000, ENABLED_RUNTIME_FILTER_TYPES=\"BLOOM,MIN_MAX\"",
|
|
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
|
|
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
|
|
"dbcp.username"="impala",
|
|
"dbcp.password"="cloudera",
|
|
"table"="alltypes");
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create external JDBC table
|
|
DROP TABLE IF EXISTS alltypes_jdbc_datasource_2;
|
|
CREATE EXTERNAL TABLE alltypes_jdbc_datasource_2 (
|
|
id INT,
|
|
bool_col BOOLEAN,
|
|
tinyint_col TINYINT,
|
|
smallint_col SMALLINT,
|
|
int_col INT,
|
|
bigint_col BIGINT,
|
|
float_col FLOAT,
|
|
double_col DOUBLE,
|
|
date_string_col STRING,
|
|
string_col STRING,
|
|
timestamp_col TIMESTAMP)
|
|
STORED BY JDBC
|
|
TBLPROPERTIES (
|
|
"database.type"="IMPALA",
|
|
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
|
|
"jdbc.auth"="AuthMech=0",
|
|
"jdbc.properties"="QUERY_TIMEOUT_S=600, REQUEST_POOL= \"default-pool\", DEBUG_ACTION",
|
|
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
|
|
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
|
|
"dbcp.username"="impala",
|
|
"dbcp.password"="cloudera",
|
|
"table"="alltypes");
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create external JDBC table with decimal type of columns.
|
|
# The decimal precisions and scales of decimal columns equal to the decimal precisions
|
|
# and scales of the corresponding decimal columns in the remote table.
|
|
DROP TABLE IF EXISTS jdbc_decimal_tbl;
|
|
CREATE EXTERNAL TABLE jdbc_decimal_tbl (
|
|
d1 DECIMAL(9,0),
|
|
d2 DECIMAL(10,0),
|
|
d3 DECIMAL(20,10),
|
|
d4 DECIMAL(38,38),
|
|
d5 DECIMAL(10,5),
|
|
d6 DECIMAL(9,0))
|
|
STORED BY JDBC
|
|
TBLPROPERTIES (
|
|
"database.type"="IMPALA",
|
|
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
|
|
"jdbc.auth"="AuthMech=0",
|
|
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
|
|
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
|
|
"dbcp.username"="impala",
|
|
"dbcp.password"="cloudera",
|
|
"table"="decimal_tbl");
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create external JDBC table with decimal type.
|
|
# The decimal precisions and scales of decimal columns are greater than or equal to the
|
|
# decimal precision and scales of the corresponding decimal columns in the remote table.
|
|
DROP TABLE IF EXISTS jdbc_decimal_tbl_2;
|
|
CREATE EXTERNAL TABLE jdbc_decimal_tbl_2 (
|
|
d1 DECIMAL(10,1),
|
|
d2 DECIMAL(11,1),
|
|
d3 DECIMAL(21,11),
|
|
d4 DECIMAL(38,38),
|
|
d5 DECIMAL(11,6),
|
|
d6 DECIMAL(10,1))
|
|
STORED BY JDBC
|
|
TBLPROPERTIES (
|
|
"database.type"="IMPALA",
|
|
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
|
|
"jdbc.auth"="AuthMech=0",
|
|
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
|
|
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
|
|
"dbcp.username"="impala",
|
|
"dbcp.password"="cloudera",
|
|
"table"="decimal_tbl");
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create external JDBC table with decimal type.
|
|
# The decimal scales of two columns d3 and d5 are less than the decimal scales of the
|
|
# corresponding decimal columns in the remote table, which cause truncation, hence
|
|
# exception will be thrown.
|
|
DROP TABLE IF EXISTS jdbc_decimal_tbl_3;
|
|
CREATE EXTERNAL TABLE jdbc_decimal_tbl_3 (
|
|
d1 DECIMAL(9,0),
|
|
d2 DECIMAL(10,0),
|
|
d3 DECIMAL(20,9),
|
|
d4 DECIMAL(38,38),
|
|
d5 DECIMAL(10,4),
|
|
d6 DECIMAL(9,0))
|
|
STORED BY JDBC
|
|
TBLPROPERTIES (
|
|
"database.type"="IMPALA",
|
|
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
|
|
"jdbc.auth"="AuthMech=0",
|
|
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
|
|
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
|
|
"dbcp.username"="impala",
|
|
"dbcp.password"="cloudera",
|
|
"table"="decimal_tbl");
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create external JDBC table with decimal type.
|
|
# The decimal scale of column d5 is larger than the decimal scale of the corresponding
|
|
# decimal columns in the remote table, but their decimal precisions are same, which cause
|
|
# truncation, hence exception will be thrown.
|
|
DROP TABLE IF EXISTS jdbc_decimal_tbl_4;
|
|
CREATE EXTERNAL TABLE jdbc_decimal_tbl_4 (
|
|
d1 DECIMAL(9,0),
|
|
d2 DECIMAL(10,0),
|
|
d3 DECIMAL(20,10),
|
|
d4 DECIMAL(38,38),
|
|
d5 DECIMAL(10,6),
|
|
d6 DECIMAL(9,0))
|
|
STORED BY JDBC
|
|
TBLPROPERTIES (
|
|
"database.type"="IMPALA",
|
|
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
|
|
"jdbc.auth"="AuthMech=0",
|
|
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
|
|
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
|
|
"dbcp.username"="impala",
|
|
"dbcp.password"="cloudera",
|
|
"table"="decimal_tbl");
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Test the jdbc DataSource
|
|
# count(*) with a predicate evaluated by Impala
|
|
# Binary predicates are pushed to the external jdbc DataSource.
|
|
select count(*) from alltypes_jdbc_datasource
|
|
where float_col = 0 and string_col is not NULL
|
|
---- RESULTS
|
|
730
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumExternalDataSourceGetNext: 1 .*
|
|
row_regex: .*RowsRead: 730 .*
|
|
aggregation(SUM, RowsRead): 730
|
|
====
|
|
---- QUERY
|
|
# count(*) with no predicates has no materialized slots
|
|
select count(*) from alltypes_jdbc_datasource
|
|
---- RESULTS
|
|
7300
|
|
---- TYPES
|
|
BIGINT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumExternalDataSourceGetNext: 1 .*
|
|
row_regex: .*RowsRead: 7.30K .*
|
|
aggregation(SUM, RowsRead): 7300
|
|
====
|
|
---- QUERY
|
|
# Gets all types including a row with a NULL value. The binary predicates are pushed to
|
|
# the DataSource, "order by" and "limit" are evaluated locally.
|
|
select *
|
|
from alltypes_jdbc_datasource
|
|
where id > 10 and int_col< 5 order by id limit 5 offset 0
|
|
---- RESULTS
|
|
11,false,1,1,1,10,1.100000023841858,10.1,'01/02/09','1',2009-01-02 00:11:00.450000000
|
|
12,true,2,2,2,20,2.200000047683716,20.2,'01/02/09','2',2009-01-02 00:12:00.460000000
|
|
13,false,3,3,3,30,3.299999952316284,30.3,'01/02/09','3',2009-01-02 00:13:00.480000000
|
|
14,true,4,4,4,40,4.400000095367432,40.4,'01/02/09','4',2009-01-02 00:14:00.510000000
|
|
20,true,0,0,0,0,0,0,'01/03/09','0',2009-01-03 00:20:00.900000000
|
|
---- TYPES
|
|
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumExternalDataSourceGetNext: 4 .*
|
|
row_regex: .*RowsRead: 3.64K .*
|
|
aggregation(SUM, RowsRead): 3644
|
|
====
|
|
---- QUERY
|
|
# Gets specified columns.
|
|
# The binary predicates are pushed to the DataSource, "order by" and "limit" are
|
|
# evaluated locally.
|
|
select id, bool_col, smallint_col, float_col, double_col, date_string_col
|
|
from alltypes_jdbc_datasource
|
|
where id > 10 and int_col< 5 order by id limit 5 offset 0
|
|
---- RESULTS
|
|
11,false,1,1.100000023841858,10.1,'01/02/09'
|
|
12,true,2,2.200000047683716,20.2,'01/02/09'
|
|
13,false,3,3.299999952316284,30.3,'01/02/09'
|
|
14,true,4,4.400000095367432,40.4,'01/02/09'
|
|
20,true,0,0,0,'01/03/09'
|
|
---- TYPES
|
|
INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, STRING
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumExternalDataSourceGetNext: 4 .*
|
|
row_regex: .*RowsRead: 3.64K .*
|
|
aggregation(SUM, RowsRead): 3644
|
|
====
|
|
---- QUERY
|
|
# Gets specified columns from external jdbc table with case sensitive column names
|
|
# and table name.
|
|
# The binary predicates are pushed to the DataSource, "order by" and "limit" are
|
|
# evaluated locally.
|
|
select id, bool_col, smallint_col, float_col, double_col, date_string_col
|
|
from alltypes_jdbc_datasource_2
|
|
where id > 10 and int_col< 5 order by id limit 5 offset 0
|
|
---- RESULTS
|
|
11,false,1,1.100000023841858,10.1,'01/02/09'
|
|
12,true,2,2.200000047683716,20.2,'01/02/09'
|
|
13,false,3,3.299999952316284,30.3,'01/02/09'
|
|
14,true,4,4.400000095367432,40.4,'01/02/09'
|
|
20,true,0,0,0,'01/03/09'
|
|
---- TYPES
|
|
INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, STRING
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumExternalDataSourceGetNext: 4 .*
|
|
row_regex: .*RowsRead: 3.64K .*
|
|
aggregation(SUM, RowsRead): 3644
|
|
====
|
|
---- QUERY
|
|
# Inner join with a non jdbc table
|
|
# The binary predicates are pushed to the DataSource, but no predicate defined for
|
|
# local table.
|
|
select a.id, b.int_col
|
|
from alltypes_jdbc_datasource a inner join functional.alltypes b on (a.id = b.id)
|
|
where a.id = 1
|
|
---- RESULTS
|
|
1,1
|
|
---- TYPES
|
|
INT, INT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumExternalDataSourceGetNext: 1 .*
|
|
row_regex: .*RowsRead: 1 .*
|
|
aggregation(SUM, RowsRead): 7301
|
|
====
|
|
---- QUERY
|
|
# Inner join with another jdbc table
|
|
# The binary predicates are pushed to the two DataSource Nodes.
|
|
select a.id, b.int_col
|
|
from alltypes_jdbc_datasource a inner join alltypes_jdbc_datasource_2 b on (a.id = b.id)
|
|
where a.id < 3 group by a.id, b.int_col
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,2
|
|
---- TYPES
|
|
INT, INT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumExternalDataSourceGetNext: 1 .*
|
|
row_regex: .*RowsRead: 3 .*
|
|
aggregation(SUM, RowsRead): 6
|
|
====
|
|
---- QUERY
|
|
# Cross join
|
|
# The binary predicates are pushed to the two DataSource Nodes.
|
|
select a.id, b.id
|
|
from alltypes_jdbc_datasource a cross join alltypes_jdbc_datasource b
|
|
where (a.id < 3 and b.id < 3)
|
|
order by a.id, b.id limit 10
|
|
---- RESULTS
|
|
0,0
|
|
0,1
|
|
0,2
|
|
1,0
|
|
1,1
|
|
1,2
|
|
2,0
|
|
2,1
|
|
2,2
|
|
---- TYPES
|
|
INT, INT
|
|
---- RUNTIME_PROFILE
|
|
row_regex: .*NumExternalDataSourceGetNext: 1 .*
|
|
row_regex: .*RowsRead: 3 .*
|
|
aggregation(SUM, RowsRead): 6
|
|
====
|
|
---- QUERY
|
|
select * from jdbc_decimal_tbl;
|
|
---- RESULTS
|
|
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
|
|
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
|
|
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
|
|
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
|
|
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
select * from jdbc_decimal_tbl_2;
|
|
---- RESULTS
|
|
1234.0,2222.0,1.23456789000,0.12345678900000000000000000000000000000,12345.789000,1.0
|
|
2345.0,111.0,12.34567890000,0.12345678900000000000000000000000000000,3.141000,1.0
|
|
12345.0,333.0,123.45678900000,0.12345678900000000000000000000000000000,11.220000,1.0
|
|
12345.0,333.0,1234.56789000000,0.12345678900000000000000000000000000000,0.100000,1.0
|
|
132842.0,333.0,12345.67890000000,0.12345678900000000000000000000000000000,0.778890,1.0
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
select * from jdbc_decimal_tbl_3;
|
|
---- CATCH
|
|
row_regex: .*Invalid DECIMAL\(20, 9\) for column d3 since there is possible loss of precision when casting from DECIMAL\(11, 10\).*
|
|
====
|
|
---- QUERY
|
|
select * from jdbc_decimal_tbl_4;
|
|
---- CATCH
|
|
row_regex: .*Invalid DECIMAL\(10, 6\) for column d5 since there is possible loss of precision when casting from DECIMAL\(10, 5\).*
|
|
====
|
|
---- QUERY
|
|
# Drop table
|
|
DROP TABLE alltypes_jdbc_datasource;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# Drop table
|
|
DROP TABLE alltypes_jdbc_datasource_2;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# Drop table
|
|
DROP TABLE jdbc_decimal_tbl;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# Drop table
|
|
DROP TABLE jdbc_decimal_tbl_2;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# Drop table
|
|
DROP TABLE jdbc_decimal_tbl_3;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|
|
---- QUERY
|
|
# Drop table
|
|
DROP TABLE jdbc_decimal_tbl_4;
|
|
---- RESULTS
|
|
'Table has been dropped.'
|
|
====
|