Files
impala/testdata/workloads/functional-query/queries/QueryTest/impala-ext-jdbc-tables.test
wzhou-code e50bfa8376 IMPALA-12925: Fix decimal data type for external JDBC table
Decimal type is a primitive data type for Impala. Current code returns
wrong values for columns with decimal data type in external JDBC tables.

This patch fixes wrong values returned from JDBC data source, and
supports pushing down decimal type of predicates to remote database
and remote Impala.
The decimal precision and scale of the columns in external JDBC table
must be no less than the decimal precision and scale of the
corresponding columns in the table of remote database. Otherwise,
Impala fails with an error since it may cause truncation of decimal
data.

Testing:
 - Added Planner test for pushing down decimal type of predicates.
 - Added end-to-end unit-tests for tables with decimal type of columns
   for Postgres, MySQL, and Impala-to-Impala.
 - Passed core-tests.

Change-Id: I8c9d2e0667c42c0e52436b158e3dfe3ec14b9e3b
Reviewed-on: http://gerrit.cloudera.org:8080/21218
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Reviewed-by: Abhishek Rawat <arawat@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2024-04-05 09:16:53 +00:00

374 lines
11 KiB
Plaintext

====
---- QUERY
# Create external JDBC table
DROP TABLE IF EXISTS alltypes_jdbc_datasource;
CREATE EXTERNAL TABLE alltypes_jdbc_datasource (
id INT,
bool_col BOOLEAN,
tinyint_col TINYINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_string_col STRING,
string_col STRING,
timestamp_col TIMESTAMP)
STORED BY JDBC
TBLPROPERTIES (
"database.type"="IMPALA",
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.properties"="MEM_LIMIT=1000000000, MAX_ERRORS = 10000, ENABLED_RUNTIME_FILTER_TYPES=\"BLOOM,MIN_MAX\"",
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
"dbcp.username"="impala",
"dbcp.password"="cloudera",
"table"="alltypes");
---- RESULTS
'Table has been created.'
====
---- QUERY
# Create external JDBC table
DROP TABLE IF EXISTS alltypes_jdbc_datasource_2;
CREATE EXTERNAL TABLE alltypes_jdbc_datasource_2 (
id INT,
bool_col BOOLEAN,
tinyint_col TINYINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_string_col STRING,
string_col STRING,
timestamp_col TIMESTAMP)
STORED BY JDBC
TBLPROPERTIES (
"database.type"="IMPALA",
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.properties"="QUERY_TIMEOUT_S=600, REQUEST_POOL= \"default-pool\", DEBUG_ACTION",
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
"dbcp.username"="impala",
"dbcp.password"="cloudera",
"table"="alltypes");
---- RESULTS
'Table has been created.'
====
---- QUERY
# Create external JDBC table with decimal type of columns.
# The decimal precisions and scales of decimal columns equal to the decimal precisions
# and scales of the corresponding decimal columns in the remote table.
DROP TABLE IF EXISTS jdbc_decimal_tbl;
CREATE EXTERNAL TABLE jdbc_decimal_tbl (
d1 DECIMAL(9,0),
d2 DECIMAL(10,0),
d3 DECIMAL(20,10),
d4 DECIMAL(38,38),
d5 DECIMAL(10,5),
d6 DECIMAL(9,0))
STORED BY JDBC
TBLPROPERTIES (
"database.type"="IMPALA",
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
"dbcp.username"="impala",
"dbcp.password"="cloudera",
"table"="decimal_tbl");
---- RESULTS
'Table has been created.'
====
---- QUERY
# Create external JDBC table with decimal type.
# The decimal precisions and scales of decimal columns are greater than or equal to the
# decimal precision and scales of the corresponding decimal columns in the remote table.
DROP TABLE IF EXISTS jdbc_decimal_tbl_2;
CREATE EXTERNAL TABLE jdbc_decimal_tbl_2 (
d1 DECIMAL(10,1),
d2 DECIMAL(11,1),
d3 DECIMAL(21,11),
d4 DECIMAL(38,38),
d5 DECIMAL(11,6),
d6 DECIMAL(10,1))
STORED BY JDBC
TBLPROPERTIES (
"database.type"="IMPALA",
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
"dbcp.username"="impala",
"dbcp.password"="cloudera",
"table"="decimal_tbl");
---- RESULTS
'Table has been created.'
====
---- QUERY
# Create external JDBC table with decimal type.
# The decimal scales of two columns d3 and d5 are less than the decimal scales of the
# corresponding decimal columns in the remote table, which cause truncation, hence
# exception will be thrown.
DROP TABLE IF EXISTS jdbc_decimal_tbl_3;
CREATE EXTERNAL TABLE jdbc_decimal_tbl_3 (
d1 DECIMAL(9,0),
d2 DECIMAL(10,0),
d3 DECIMAL(20,9),
d4 DECIMAL(38,38),
d5 DECIMAL(10,4),
d6 DECIMAL(9,0))
STORED BY JDBC
TBLPROPERTIES (
"database.type"="IMPALA",
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
"dbcp.username"="impala",
"dbcp.password"="cloudera",
"table"="decimal_tbl");
---- RESULTS
'Table has been created.'
====
---- QUERY
# Create external JDBC table with decimal type.
# The decimal scale of column d5 is larger than the decimal scale of the corresponding
# decimal columns in the remote table, but their decimal precisions are same, which cause
# truncation, hence exception will be thrown.
DROP TABLE IF EXISTS jdbc_decimal_tbl_4;
CREATE EXTERNAL TABLE jdbc_decimal_tbl_4 (
d1 DECIMAL(9,0),
d2 DECIMAL(10,0),
d3 DECIMAL(20,10),
d4 DECIMAL(38,38),
d5 DECIMAL(10,6),
d6 DECIMAL(9,0))
STORED BY JDBC
TBLPROPERTIES (
"database.type"="IMPALA",
"jdbc.url"="jdbc:impala://$INTERNAL_LISTEN_HOST:21050/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="com.cloudera.impala.jdbc.Driver",
"driver.url"="$FILESYSTEM_PREFIX/test-warehouse/data-sources/jdbc-drivers/ImpalaJDBC42.jar",
"dbcp.username"="impala",
"dbcp.password"="cloudera",
"table"="decimal_tbl");
---- RESULTS
'Table has been created.'
====
---- QUERY
# Test the jdbc DataSource
# count(*) with a predicate evaluated by Impala
# Binary predicates are pushed to the external jdbc DataSource.
select count(*) from alltypes_jdbc_datasource
where float_col = 0 and string_col is not NULL
---- RESULTS
730
---- TYPES
BIGINT
---- RUNTIME_PROFILE
row_regex: .*NumExternalDataSourceGetNext: 1 .*
row_regex: .*RowsRead: 730 .*
aggregation(SUM, RowsRead): 730
====
---- QUERY
# count(*) with no predicates has no materialized slots
select count(*) from alltypes_jdbc_datasource
---- RESULTS
7300
---- TYPES
BIGINT
---- RUNTIME_PROFILE
row_regex: .*NumExternalDataSourceGetNext: 1 .*
row_regex: .*RowsRead: 7.30K .*
aggregation(SUM, RowsRead): 7300
====
---- QUERY
# Gets all types including a row with a NULL value. The binary predicates are pushed to
# the DataSource, "order by" and "limit" are evaluated locally.
select *
from alltypes_jdbc_datasource
where id > 10 and int_col< 5 order by id limit 5 offset 0
---- RESULTS
11,false,1,1,1,10,1.100000023841858,10.1,'01/02/09','1',2009-01-02 00:11:00.450000000
12,true,2,2,2,20,2.200000047683716,20.2,'01/02/09','2',2009-01-02 00:12:00.460000000
13,false,3,3,3,30,3.299999952316284,30.3,'01/02/09','3',2009-01-02 00:13:00.480000000
14,true,4,4,4,40,4.400000095367432,40.4,'01/02/09','4',2009-01-02 00:14:00.510000000
20,true,0,0,0,0,0,0,'01/03/09','0',2009-01-03 00:20:00.900000000
---- TYPES
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP
---- RUNTIME_PROFILE
row_regex: .*NumExternalDataSourceGetNext: 4 .*
row_regex: .*RowsRead: 3.64K .*
aggregation(SUM, RowsRead): 3644
====
---- QUERY
# Gets specified columns.
# The binary predicates are pushed to the DataSource, "order by" and "limit" are
# evaluated locally.
select id, bool_col, smallint_col, float_col, double_col, date_string_col
from alltypes_jdbc_datasource
where id > 10 and int_col< 5 order by id limit 5 offset 0
---- RESULTS
11,false,1,1.100000023841858,10.1,'01/02/09'
12,true,2,2.200000047683716,20.2,'01/02/09'
13,false,3,3.299999952316284,30.3,'01/02/09'
14,true,4,4.400000095367432,40.4,'01/02/09'
20,true,0,0,0,'01/03/09'
---- TYPES
INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, STRING
---- RUNTIME_PROFILE
row_regex: .*NumExternalDataSourceGetNext: 4 .*
row_regex: .*RowsRead: 3.64K .*
aggregation(SUM, RowsRead): 3644
====
---- QUERY
# Gets specified columns from external jdbc table with case sensitive column names
# and table name.
# The binary predicates are pushed to the DataSource, "order by" and "limit" are
# evaluated locally.
select id, bool_col, smallint_col, float_col, double_col, date_string_col
from alltypes_jdbc_datasource_2
where id > 10 and int_col< 5 order by id limit 5 offset 0
---- RESULTS
11,false,1,1.100000023841858,10.1,'01/02/09'
12,true,2,2.200000047683716,20.2,'01/02/09'
13,false,3,3.299999952316284,30.3,'01/02/09'
14,true,4,4.400000095367432,40.4,'01/02/09'
20,true,0,0,0,'01/03/09'
---- TYPES
INT, BOOLEAN, SMALLINT, FLOAT, DOUBLE, STRING
---- RUNTIME_PROFILE
row_regex: .*NumExternalDataSourceGetNext: 4 .*
row_regex: .*RowsRead: 3.64K .*
aggregation(SUM, RowsRead): 3644
====
---- QUERY
# Inner join with a non jdbc table
# The binary predicates are pushed to the DataSource, but no predicate defined for
# local table.
select a.id, b.int_col
from alltypes_jdbc_datasource a inner join functional.alltypes b on (a.id = b.id)
where a.id = 1
---- RESULTS
1,1
---- TYPES
INT, INT
---- RUNTIME_PROFILE
row_regex: .*NumExternalDataSourceGetNext: 1 .*
row_regex: .*RowsRead: 1 .*
aggregation(SUM, RowsRead): 7301
====
---- QUERY
# Inner join with another jdbc table
# The binary predicates are pushed to the two DataSource Nodes.
select a.id, b.int_col
from alltypes_jdbc_datasource a inner join alltypes_jdbc_datasource_2 b on (a.id = b.id)
where a.id < 3 group by a.id, b.int_col
---- RESULTS
0,0
1,1
2,2
---- TYPES
INT, INT
---- RUNTIME_PROFILE
row_regex: .*NumExternalDataSourceGetNext: 1 .*
row_regex: .*RowsRead: 3 .*
aggregation(SUM, RowsRead): 6
====
---- QUERY
# Cross join
# The binary predicates are pushed to the two DataSource Nodes.
select a.id, b.id
from alltypes_jdbc_datasource a cross join alltypes_jdbc_datasource b
where (a.id < 3 and b.id < 3)
order by a.id, b.id limit 10
---- RESULTS
0,0
0,1
0,2
1,0
1,1
1,2
2,0
2,1
2,2
---- TYPES
INT, INT
---- RUNTIME_PROFILE
row_regex: .*NumExternalDataSourceGetNext: 1 .*
row_regex: .*RowsRead: 3 .*
aggregation(SUM, RowsRead): 6
====
---- QUERY
select * from jdbc_decimal_tbl;
---- RESULTS
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
---- TYPES
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
====
---- QUERY
select * from jdbc_decimal_tbl_2;
---- RESULTS
1234.0,2222.0,1.23456789000,0.12345678900000000000000000000000000000,12345.789000,1.0
2345.0,111.0,12.34567890000,0.12345678900000000000000000000000000000,3.141000,1.0
12345.0,333.0,123.45678900000,0.12345678900000000000000000000000000000,11.220000,1.0
12345.0,333.0,1234.56789000000,0.12345678900000000000000000000000000000,0.100000,1.0
132842.0,333.0,12345.67890000000,0.12345678900000000000000000000000000000,0.778890,1.0
---- TYPES
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
====
---- QUERY
select * from jdbc_decimal_tbl_3;
---- CATCH
row_regex: .*Invalid DECIMAL\(20, 9\) for column d3 since there is possible loss of precision when casting from DECIMAL\(11, 10\).*
====
---- QUERY
select * from jdbc_decimal_tbl_4;
---- CATCH
row_regex: .*Invalid DECIMAL\(10, 6\) for column d5 since there is possible loss of precision when casting from DECIMAL\(10, 5\).*
====
---- QUERY
# Drop table
DROP TABLE alltypes_jdbc_datasource;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Drop table
DROP TABLE alltypes_jdbc_datasource_2;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Drop table
DROP TABLE jdbc_decimal_tbl;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Drop table
DROP TABLE jdbc_decimal_tbl_2;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Drop table
DROP TABLE jdbc_decimal_tbl_3;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
# Drop table
DROP TABLE jdbc_decimal_tbl_4;
---- RESULTS
'Table has been dropped.'
====