IMPALA-13869: Support for 'hive.sql.query' property for Hive JDBC tables

This patch adds support for the hive.sql.query table property in Hive
JDBC tables accessed through Impala. Impala has support for Hive
JDBC tables using the hive.sql.table property, which limits users
to simple table access. However, many use cases demand the ability
to expose complex joins, filters, aggregations, or derived columns
as external views. Hive.sql.query leads to a custom SQL query that
returns a virtual table(subquery) instead of pointing to a physical
table. These use cases cannot be achieved with just the hive.sql.table
property. This change allows Impala to:
 • Interact with views or complex queries defined on external
 systems without needing schema-level access to base tables.
 • Expose materialized logic (such as filters, joins, or
 transformations) via Hive to Impala consumers in a secure,
 abstracted way.
 • Better align with data virtualization use cases where
 physical data location and structure should be hidden from
 the querying engine.
This patch also lays the groundwork for future enhancements such
as predicate pushdown and performance optimizations for Hive
JDBC tables backed by queries.

Testing: End-to-end tests are included in
test_ext_data_sources.py.

Change-Id: I039fcc1e008233a3eeed8d09554195fdb8c8706b
Reviewed-on: http://gerrit.cloudera.org:8080/22865
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
pranav.lodha
2025-05-07 17:48:01 +05:30
committed by Impala Public Jenkins
parent a12e49e38d
commit 7f77176970
10 changed files with 1300 additions and 76 deletions

View File

@@ -18,6 +18,7 @@
package org.apache.impala.catalog;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.HashMap;
@@ -227,12 +228,20 @@ public class TableLoader {
} else {
impala_tbl_props.put("dbcp.username", val);
}
val = msTbl.getParameters().get("hive.sql.table");
if (val == null) {
throw new TableLoadingException("Required parameter: hive.sql.table" +
"is missing.");
// Ensure either 'hive.sql.table' or 'hive.sql.query' is set
String table = msTbl.getParameters().get("hive.sql.table");
String query = msTbl.getParameters().get("hive.sql.query");
if (Strings.isNullOrEmpty(table) && Strings.isNullOrEmpty(query)) {
throw new TableLoadingException("Either 'hive.sql.table' or" +
" 'hive.sql.query' must be set.");
} else if (!Strings.isNullOrEmpty(table) && !Strings.isNullOrEmpty(query)) {
throw new TableLoadingException("Only one of 'hive.sql.table' or" +
" 'hive.sql.query' should be set.");
} else if (!Strings.isNullOrEmpty(table)) {
impala_tbl_props.put("table", table);
} else {
impala_tbl_props.put("table", val);
impala_tbl_props.put("query", query);
}
val = msTbl.getParameters().get("hive.sql.jdbc.driver");
if (val == null) {

View File

@@ -57,6 +57,7 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
@@ -320,46 +321,62 @@ public class JdbcDataSource implements ExternalDataSource {
Map<String, String> columnMapping = getColumnMapping(tableConfig_
.get(JdbcStorageConfig.COLUMN_MAPPING.getPropertyName()));
// Build query statement
StringBuilder sb = new StringBuilder("SELECT ");
String project;
// If cols size equals to 0, it is 'select count(*) from tbl' statement.
if (schema_.getColsSize() == 0) {
project = "*";
} else {
String driverClass = JdbcStorageConfigManager.getConfigValue(
JdbcStorageConfig.JDBC_DRIVER_CLASS, tableConfig_);
final String quoteChar;
if (driverClass != null && (driverClass.toLowerCase().contains("impala") ||
driverClass.toLowerCase().contains("hive") ||
driverClass.toLowerCase().contains("mysql"))) {
quoteChar = "`";
String query;
String tableName = tableConfig_.get(JdbcStorageConfig.TABLE.getPropertyName());
// Check if 'table' property is not null or empty
if (!Strings.isNullOrEmpty(tableName)) {
StringBuilder sb = new StringBuilder("SELECT ");
String project;
// If cols size equals to 0, it is 'select count(*) from tbl' statement.
if (schema_.getColsSize() == 0) {
project = "*";
} else {
quoteChar = "\"";
String driverClass = JdbcStorageConfigManager.getConfigValue(
JdbcStorageConfig.JDBC_DRIVER_CLASS, tableConfig_);
final String quoteChar;
if (driverClass != null && (driverClass.toLowerCase().contains("impala") ||
driverClass.toLowerCase().contains("hive") ||
driverClass.toLowerCase().contains("mysql"))) {
quoteChar = "`";
} else {
quoteChar = "\"";
}
project = schema_.getCols().stream()
.map(TColumnDesc::getName)
.map(name -> columnMapping.containsKey(name)
? columnMapping.get(name)
: quoteChar + name + quoteChar)
.collect(Collectors.joining(", "));
}
sb.append(project);
sb.append(" FROM ");
// Make jdbc table name to be quoted with double quotes if
// columnMapping is not empty
if (!columnMapping.isEmpty()) {
tableName = dbAccessor_.getCaseSensitiveName(tableName);
}
sb.append(tableName);
String condition = QueryConditionUtil
.buildCondition(params.getPredicates(), columnMapping, dbAccessor_);
if (StringUtils.isNotBlank(condition)) {
sb.append(" WHERE ").append(condition);
}
project = schema_.getCols().stream()
.map(TColumnDesc::getName)
.map(name -> columnMapping.containsKey(name)
? columnMapping.get(name)
: quoteChar + name + quoteChar)
.collect(Collectors.joining(", "));
query = sb.toString();
} else {
// Use 'query' property if 'table' is null
query = tableConfig_.get(JdbcStorageConfig.QUERY.getPropertyName());
Preconditions.checkState(!Strings.isNullOrEmpty(query));
if (Strings.isNullOrEmpty(query)) {
throw new IllegalStateException("Generated query is null or empty");
}
}
sb.append(project);
sb.append(" FROM ");
// Make jdbc table name to be quoted with double quotes if columnMapping is not empty
String jdbcTableName = tableConfig_.get(JdbcStorageConfig.TABLE.getPropertyName());
if (!columnMapping.isEmpty()) {
jdbcTableName = dbAccessor_.getCaseSensitiveName(jdbcTableName);
}
sb.append(jdbcTableName);
String condition = QueryConditionUtil
.buildCondition(params.getPredicates(), columnMapping, dbAccessor_);
if (StringUtils.isNotBlank(condition)) {
sb.append(" WHERE ").append(condition);
}
// Execute query and get iterator
tableConfig_.set(JdbcStorageConfig.QUERY.getPropertyName(), sb.toString());
LOG.trace("JDBC Query: " + sb.toString());
// Store the generated query
tableConfig_.set(JdbcStorageConfig.QUERY.getPropertyName(), query);
LOG.trace("JDBC Query: " + query);
if (schema_.getColsSize() != 0) {
int limit = -1;

View File

@@ -25,5 +25,6 @@ public enum DatabaseType {
POSTGRES,
MSSQL,
JETHRO_DATA,
IMPALA
IMPALA,
HIVE
}

View File

@@ -55,7 +55,7 @@ public enum JdbcStorageConfig {
// User need to specify either “table” or “query” in the create table statement.
QUERY("query", false),
// Name of the external table to be mapped in Impala.
TABLE("table", true),
TABLE("table", false),
// Mapping of column names between external table and Impala.
COLUMN_MAPPING("column.mapping", false);

View File

@@ -17,6 +17,7 @@
package org.apache.impala.extdatasource.jdbc.conf;
import com.google.common.base.Strings;
import java.util.Map;
import java.util.Map.Entry;
@@ -96,10 +97,23 @@ public class JdbcStorageConfigManager {
}
private static void checkRequiredPropertiesAreDefined(Map<String, String> props) {
DatabaseType dbType = null;
try {
String dbTypeName = props.get(JdbcStorageConfig.DATABASE_TYPE.getPropertyName());
DatabaseType.valueOf(dbTypeName.toUpperCase());
dbType = DatabaseType.valueOf(dbTypeName.toUpperCase());
if (dbType != DatabaseType.HIVE) {
String table = props.get(JdbcStorageConfig.TABLE.getPropertyName());
String query = props.get(JdbcStorageConfig.QUERY.getPropertyName());
if (Strings.isNullOrEmpty(table) && Strings.isNullOrEmpty(query)) {
throw new IllegalArgumentException(
"For JDBC tables, either 'table' or 'query' property must be set.");
}
if (!Strings.isNullOrEmpty(table) && !Strings.isNullOrEmpty(query)) {
throw new IllegalArgumentException("Only one of 'hive.sql.table' or" +
" 'hive.sql.query' should be set.");
}
}
} catch (Exception e) {
throw new IllegalArgumentException("Unknown database type.", e);
}

View File

@@ -107,51 +107,52 @@ public class JdbcRecordIterator {
}
Preconditions.checkState(type.getTypesSize() == 1);
TScalarType scalarType = type.types.get(0).scalar_type;
String columnName = colDescs.get(i).getName();
try {
Object value = rs.getObject(i + 1);
Object value = rs.getObject(columnName);
if (value == null) {
colData.addToIs_null(true);
continue;
}
switch (scalarType.type) {
case TINYINT:
colData.addToByte_vals(rs.getByte(i + 1));
colData.addToByte_vals(rs.getByte(columnName));
break;
case SMALLINT:
colData.addToShort_vals(rs.getShort(i + 1));
colData.addToShort_vals(rs.getShort(columnName));
break;
case INT:
colData.addToInt_vals(rs.getInt(i + 1));
colData.addToInt_vals(rs.getInt(columnName));
break;
case DATE:
LocalDate localDate = Instant.ofEpochMilli(rs.getDate(i + 1).getTime())
LocalDate localDate = Instant.ofEpochMilli(rs.getDate(columnName).getTime())
.atZone(ZoneId.systemDefault())
.toLocalDate();
colData.addToInt_vals((int) localDate.toEpochDay());
break;
case BIGINT:
colData.addToLong_vals(rs.getLong(i + 1));
colData.addToLong_vals(rs.getLong(columnName));
break;
case DOUBLE:
colData.addToDouble_vals(rs.getDouble(i + 1));
colData.addToDouble_vals(rs.getDouble(columnName));
break;
case FLOAT:
colData.addToDouble_vals(rs.getFloat(i + 1));
colData.addToDouble_vals(rs.getFloat(columnName));
break;
case STRING:
colData.addToString_vals(rs.getString(i + 1));
colData.addToString_vals(rs.getString(columnName));
break;
case BOOLEAN:
colData.addToBool_vals(rs.getBoolean(i + 1));
colData.addToBool_vals(rs.getBoolean(columnName));
break;
case TIMESTAMP:
// Use UTC time zone instead of system default time zone
colData.addToBinary_vals(
SerializationUtils.encodeTimestamp(rs.getTimestamp(i + 1,
SerializationUtils.encodeTimestamp(rs.getTimestamp(columnName,
Calendar.getInstance(TimeZone.getTimeZone(ZoneOffset.UTC)))));
break;
case DECIMAL:
BigDecimal val = rs.getBigDecimal(i + 1);
BigDecimal val = rs.getBigDecimal(columnName);
int valPrecision = val.precision();
int valScale = val.scale();
// Check if there is enough precision and scale in the destination decimal.

View File

@@ -233,6 +233,376 @@ select * from country_keystore_mysql where name IN ('India', 'USA');
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from country_mysql_query;
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01 11:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from country_keystore_mysql_query;
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01 11:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select id, name, date_col from country_mysql_query;
---- RESULTS
1,'India',2024-01-01
2,'Russia',2024-02-01
3,'USA',2024-03-01
---- TYPES
INT, STRING, DATE
====
---- QUERY
select id, name, date_col from country_keystore_mysql_query;
---- RESULTS
1,'India',2024-01-01
2,'Russia',2024-02-01
3,'USA',2024-03-01
---- TYPES
INT, STRING, DATE
====
---- QUERY
select count(*) from country_mysql_query;
---- RESULTS
3
---- TYPES
BIGINT
====
---- QUERY
select count(*) from country_keystore_mysql_query;
---- RESULTS
3
---- TYPES
BIGINT
====
---- QUERY
select distinct name from country_mysql_query;
---- RESULTS
'India'
'Russia'
'USA'
---- TYPES
STRING
====
---- QUERY
select distinct name from country_keystore_mysql_query;
---- RESULTS
'India'
'Russia'
'USA'
---- TYPES
STRING
====
---- QUERY
# Check for null values
select * from country_mysql_query where string_col IS NULL;
---- RESULTS
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from country_keystore_mysql_query where string_col IS NULL;
---- RESULTS
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
# Order by integer column
select id, name, int_col from country_mysql_query order by int_col DESC;
---- RESULTS
3,'USA',3000
2,'Russia',2000
1,'India',1000
---- TYPES
INT, STRING, INT
====
---- QUERY
select id, name, int_col from country_keystore_mysql_query order by int_col DESC;
---- RESULTS
3,'USA',3000
2,'Russia',2000
1,'India',1000
---- TYPES
INT, STRING, INT
====
---- QUERY
# Select using case statement
select id, name, case when bool_col then 'Active' else 'Inactive' end as status from country_mysql_query;
---- RESULTS
1,'India','Active'
2,'Russia','Inactive'
3,'USA','Active'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select id, name, case when bool_col then 'Active' else 'Inactive' end as status from country_keystore_mysql_query;
---- RESULTS
1,'India','Active'
2,'Russia','Inactive'
3,'USA','Active'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Check for minimum date
select min(date_col) from country_mysql_query;
---- RESULTS
2024-01-01
---- TYPES
DATE
====
---- QUERY
select min(date_col) from country_keystore_mysql_query;
---- RESULTS
2024-01-01
---- TYPES
DATE
====
---- QUERY
# Join with a self table alias
select a.id, a.name, b.name from country_mysql_query a join country_mysql_query b on a.id <> b.id;
---- RESULTS
1,'India','Russia'
1,'India','USA'
2,'Russia','India'
2,'Russia','USA'
3,'USA','India'
3,'USA','Russia'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.name from country_keystore_mysql_query a join country_keystore_mysql_query b on
a.id <> b.id;
---- RESULTS
1,'India','Russia'
1,'India','USA'
2,'Russia','India'
2,'Russia','USA'
3,'USA','India'
3,'USA','Russia'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Left join with another table
select a.id, a.name, b.string_col from country_mysql_query a left join country_keystore_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_mysql a left join country_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_mysql a left join country_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_mysql_query a left join country_mysql b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_mysql_query a left join country_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_mysql_query a left join country_mysql b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_mysql_query a left join country_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Right join with another table
select a.id, a.name, b.string_col from country_mysql_query a right join country_keystore_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_mysql_query a right join country_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_mysql a right join country_keystore_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_mysql a right join country_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_mysql a right join country_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_mysql a right join country_keystore_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Full outer join
select a.id, a.name, b.string_col from country_mysql_query a full outer join country_keystore_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_mysql_query a full outer join country_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_mysql a full outer join country_keystore_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_mysql a full outer join country_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_mysql a full outer join country_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_mysql a full outer join country_keystore_mysql_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Using IN clause
select * from country_mysql_query where name IN ('India', 'USA');
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from country_keystore_mysql_query where name IN ('India', 'USA');
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from quoted_col;
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00

View File

@@ -233,6 +233,376 @@ select * from country_keystore_postgres where name IN ('India', 'USA');
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from country_postgres_query;
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01 11:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from country_keystore_postgres_query;
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01 11:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select id, name, date_col from country_postgres_query;
---- RESULTS
1,'India',2024-01-01
2,'Russia',2024-02-01
3,'USA',2024-03-01
---- TYPES
INT, STRING, DATE
====
---- QUERY
select id, name, date_col from country_keystore_postgres_query;
---- RESULTS
1,'India',2024-01-01
2,'Russia',2024-02-01
3,'USA',2024-03-01
---- TYPES
INT, STRING, DATE
====
---- QUERY
select count(*) from country_postgres_query;
---- RESULTS
3
---- TYPES
BIGINT
====
---- QUERY
select count(*) from country_keystore_postgres_query;
---- RESULTS
3
---- TYPES
BIGINT
====
---- QUERY
select distinct name from country_postgres_query;
---- RESULTS
'India'
'Russia'
'USA'
---- TYPES
STRING
====
---- QUERY
select distinct name from country_keystore_postgres_query;
---- RESULTS
'India'
'Russia'
'USA'
---- TYPES
STRING
====
---- QUERY
# Check for null values
select * from country_postgres_query where string_col IS NULL;
---- RESULTS
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from country_keystore_postgres_query where string_col IS NULL;
---- RESULTS
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
# Order by integer column
select id, name, int_col from country_postgres_query order by int_col DESC;
---- RESULTS
3,'USA',3000
2,'Russia',2000
1,'India',1000
---- TYPES
INT, STRING, INT
====
---- QUERY
select id, name, int_col from country_keystore_postgres_query order by int_col DESC;
---- RESULTS
3,'USA',3000
2,'Russia',2000
1,'India',1000
---- TYPES
INT, STRING, INT
====
---- QUERY
# Select using case statement
select id, name, case when bool_col then 'Active' else 'Inactive' end as status from country_postgres_query;
---- RESULTS
1,'India','Active'
2,'Russia','Inactive'
3,'USA','Active'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select id, name, case when bool_col then 'Active' else 'Inactive' end as status from country_keystore_postgres_query;
---- RESULTS
1,'India','Active'
2,'Russia','Inactive'
3,'USA','Active'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Check for minimum date
select min(date_col) from country_postgres_query;
---- RESULTS
2024-01-01
---- TYPES
DATE
====
---- QUERY
select min(date_col) from country_keystore_postgres_query;
---- RESULTS
2024-01-01
---- TYPES
DATE
====
---- QUERY
# Join with a self table alias
select a.id, a.name, b.name from country_postgres_query a join country_postgres_query b on a.id <> b.id;
---- RESULTS
1,'India','Russia'
1,'India','USA'
2,'Russia','India'
2,'Russia','USA'
3,'USA','India'
3,'USA','Russia'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.name from country_keystore_postgres_query a join country_keystore_postgres_query b on
a.id <> b.id;
---- RESULTS
1,'India','Russia'
1,'India','USA'
2,'Russia','India'
2,'Russia','USA'
3,'USA','India'
3,'USA','Russia'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Left join with another table
select a.id, a.name, b.string_col from country_postgres_query a left join country_keystore_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_postgres a left join country_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_postgres a left join country_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_postgres_query a left join country_postgres b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_postgres_query a left join country_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_postgres_query a left join country_postgres b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_postgres_query a left join country_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Right join with another table
select a.id, a.name, b.string_col from country_postgres_query a right join country_keystore_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_postgres_query a right join country_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_postgres a right join country_keystore_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_postgres a right join country_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_postgres a right join country_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_postgres a right join country_keystore_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Full outer join
select a.id, a.name, b.string_col from country_postgres_query a full outer join country_keystore_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_postgres_query a full outer join country_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_postgres a full outer join country_keystore_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_postgres a full outer join country_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_postgres a full outer join country_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.string_col from country_keystore_postgres a full outer join country_keystore_postgres_query b on
a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Using IN clause
select * from country_postgres_query where name IN ('India', 'USA');
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from country_keystore_postgres_query where name IN ('India', 'USA');
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from quoted_col;
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00

View File

@@ -598,12 +598,6 @@ select * from jdbc_test order by id limit 5;
INT, BOOLEAN, INT
====
---- QUERY
# Alter JDBC table to unset the required JDBC configuration "table".
ALTER TABLE jdbc_test UNSET TBLPROPERTIES ("table");
---- CATCH
row_regex:.*AnalysisException: Unsetting the 'table' table property is not supported for JDBC DataSource table
====
---- QUERY
# Alter table to drop a column.
ALTER TABLE jdbc_test DROP COLUMN int_col;
---- RESULTS

View File

@@ -35,28 +35,28 @@ class TestExtDataSources(CustomClusterTestSuite):
def add_test_dimensions(cls):
super(TestExtDataSources, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
exec_single_node_option=[100]))
exec_single_node_option=[100]))
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--use_local_catalog=true",
catalogd_args="--catalog_topic_mode=minimal")
impalad_args="--use_local_catalog=true",
catalogd_args="--catalog_topic_mode=minimal")
def test_data_source_tables(self, vector, unique_database, unique_name):
"""Start Impala cluster in LocalCatalog Mode"""
self.run_test_case('QueryTest/data-source-tables', vector, use_db=unique_database,
test_file_vars={'$UNIQUE_DATASOURCE': unique_name})
test_file_vars={'$UNIQUE_DATASOURCE': unique_name})
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--use_local_catalog=true",
catalogd_args="--catalog_topic_mode=minimal")
impalad_args="--use_local_catalog=true",
catalogd_args="--catalog_topic_mode=minimal")
def test_jdbc_data_source(self, vector, unique_database):
"""Start Impala cluster in LocalCatalog Mode"""
self.run_test_case('QueryTest/jdbc-data-source', vector, use_db=unique_database)
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args='--data_source_batch_size=2048')
impalad_args='--data_source_batch_size=2048')
def test_data_source_big_batch_size(self, vector, unique_database, unique_name):
"""Run test with batch size greater than default size 1024"""
self.run_test_case('QueryTest/data-source-tables', vector, use_db=unique_database,
@@ -64,7 +64,7 @@ class TestExtDataSources(CustomClusterTestSuite):
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args='--data_source_batch_size=512')
impalad_args='--data_source_batch_size=512')
def test_data_source_small_batch_size(self, vector, unique_database, unique_name):
"""Run test with batch size less than default size 1024"""
self.run_test_case('QueryTest/data-source-tables', vector, use_db=unique_database,
@@ -175,6 +175,242 @@ class TestExtDataSources(CustomClusterTestSuite):
assert "FailoverInSyncJdbcDataSource" not in result.get_data()
class TestPostgresJdbcTables(CustomClusterTestSuite):
@classmethod
def get_workload(cls):
return 'functional-query'
@classmethod
def setup_class(cls):
super(TestPostgresJdbcTables, cls).setup_class()
@pytest.mark.execute_serially
def test_postgres_jdbc_tables(self, vector, unique_database):
sql = """
DROP TABLE IF EXISTS {0}.country_postgres;
CREATE EXTERNAL TABLE {0}.country_postgres (
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP)
STORED BY JDBC
TBLPROPERTIES (
"database.type"="POSTGRES",
"jdbc.url"="jdbc:postgresql://localhost:5432/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="org.postgresql.Driver",
"driver.url"="/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
"dbcp.username"="hiveuser",
"dbcp.password"="password",
"table"="country");
DROP TABLE IF EXISTS {0}.quoted_col;
CREATE EXTERNAL TABLE {0}.quoted_col
(
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
`freeze` STRING,
timestamp_col TIMESTAMP
)
STORED BY JDBC
TBLPROPERTIES (
"database.type"="POSTGRES",
"jdbc.url"="jdbc:postgresql://localhost:5432/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="org.postgresql.Driver",
"driver.url"="/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
"dbcp.username"="hiveuser",
"dbcp.password"="password",
"table"="quoted_col"
);
DROP TABLE IF EXISTS {0}.country_keystore_postgres;
CREATE EXTERNAL TABLE {0}.country_keystore_postgres (
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP)
STORED BY JDBC
TBLPROPERTIES (
"database.type"="POSTGRES",
"jdbc.url"="jdbc:postgresql://localhost:5432/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="org.postgresql.Driver",
"driver.url"="/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
"dbcp.username"="hiveuser",
"dbcp.password"="password",
"table"="country");
DROP TABLE IF EXISTS {0}.country_postgres_query;
CREATE EXTERNAL TABLE {0}.country_postgres_query (
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP)
STORED BY JDBC
TBLPROPERTIES (
"database.type"="POSTGRES",
"jdbc.url"="jdbc:postgresql://localhost:5432/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="org.postgresql.Driver",
"driver.url"="/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
"dbcp.username"="hiveuser",
"dbcp.password"="password",
"query"="select id,name,bool_col,tinyint_col,smallint_col,
int_col,bigint_col,float_col,double_col,date_col,string_col,
timestamp_col from country");
DROP TABLE IF EXISTS {0}.country_keystore_postgres_query;
CREATE EXTERNAL TABLE {0}.country_keystore_postgres_query (
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP)
STORED BY JDBC
TBLPROPERTIES (
"database.type"="POSTGRES",
"jdbc.url"="jdbc:postgresql://localhost:5432/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="org.postgresql.Driver",
"driver.url"="/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
"dbcp.username"="hiveuser",
"dbcp.password"="password",
"query"="select id,name,bool_col,tinyint_col,smallint_col,
int_col,bigint_col,float_col,double_col,date_col,string_col,
timestamp_col from country");
""".format(unique_database)
'''
try:
self.client.execute(sql)
except Exception as e:
print("\n[DEBUG] Failed to create JDBC table")
print("[DEBUG] Exception type:", type(e))
print("[DEBUG] Exception message:", str(e))
print("[DEBUG] Traceback:\n" + "".join(traceback.format_tb(e.__traceback__)))
pytest.xfail(reason="Can't create JDBC table.")
'''
# Split into statements and execute one-by-one.
stmts = [s.strip() for s in sql.split(';')]
for i, stmt in enumerate(stmts):
if not stmt:
continue
# Optional: skip pure comment lines (if any)
if stmt.startswith('--') or stmt.startswith('/*'):
continue
# Log the statement (truncate for readability)
truncated = (stmt[:200] + '...') if len(stmt) > 200 else stmt
print("\n[DEBUG] Executing statement #%d:\n%s\n" % (i + 1, truncated))
try:
# Use run_stmt_in_hive as before (this is what the test harness uses).
self.client.execute(stmt + ';')
except Exception as e:
print("\n[DEBUG] Statement #%d failed." % (i + 1))
print("[DEBUG] Exception type:", type(e))
print("[DEBUG] Exception message:", str(e))
raise
self.client.execute("INVALIDATE METADATA {0}.country_postgres"
.format(unique_database))
self.client.execute("INVALIDATE METADATA {0}.country_keystore_postgres"
.format(unique_database))
self.client.execute("INVALIDATE METADATA {0}.country_postgres_query"
.format(unique_database))
self.client.execute("INVALIDATE METADATA {0}"
".country_keystore_postgres_query"
.format(unique_database))
self.client.execute("DESCRIBE {0}.country_postgres_query"
.format(unique_database))
self.client.execute("DESCRIBE {0}"
".country_keystore_postgres_query"
.format(unique_database))
self.run_test_case('QueryTest/hive-jdbc-postgres-tables',
vector, use_db=unique_database)
def test_invalid_postgres_jdbc_table(self, unique_database):
sql_both_set = """
CREATE EXTERNAL TABLE {0}.invalid_both_props (
id INT,
name STRING
)
STORED BY JDBC
TBLPROPERTIES (
"database.type"="POSTGRES",
"jdbc.url"="jdbc:postgresql://localhost:5432/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="org.postgresql.Driver",
"driver.url"="/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
"dbcp.username"="hiveuser",
"dbcp.password"="password",
"table"="country",
"query"="SELECT * FROM country");
""".format(unique_database)
with pytest.raises(Exception, match="Only one of 'table' or 'query' should be set"):
self.run_stmt_in_hive(sql_both_set)
sql_none_set = """
CREATE EXTERNAL TABLE {0}.invalid_no_props (
id INT,
name STRING
)
STORED BY JDBC
TBLPROPERTIES (
"database.type"="POSTGRES",
"jdbc.url"="jdbc:postgresql://localhost:5432/functional",
"jdbc.auth"="AuthMech=0",
"jdbc.driver"="org.postgresql.Driver",
"driver.url"="/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
"dbcp.username"="hiveuser",
"dbcp.password"="password");
""".format(unique_database)
with pytest.raises(Exception, match="Either 'table' or 'query' must be set"):
self.run_stmt_in_hive(sql_none_set)
class TestHivePostgresJdbcTables(CustomClusterTestSuite):
"""Tests for hive jdbc postgres tables. """
@@ -265,6 +501,64 @@ class TestHivePostgresJdbcTables(CustomClusterTestSuite):
"hive.sql.dbcp.password.key" = "hiveuser",
"hive.sql.table" = "country"
);
DROP TABLE IF EXISTS {0}.country_postgres_query;
CREATE EXTERNAL TABLE {0}.country_postgres_query
(
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
TBLPROPERTIES (
"hive.sql.database.type" = "POSTGRES",
"hive.sql.jdbc.driver" = "org.postgresql.Driver",
"hive.sql.jdbc.url" = "jdbc:postgresql://localhost:5432/functional",
"hive.sql.dbcp.username" = "hiveuser",
"hive.sql.dbcp.password" = "password",
"hive.sql.query" = "select id,name,bool_col,tinyint_col,smallint_col,
int_col,bigint_col,float_col,double_col,date_col,string_col,
timestamp_col from country"
);
DROP TABLE IF EXISTS {0}.country_keystore_postgres_query;
CREATE EXTERNAL TABLE {0}.country_keystore_postgres_query
(
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
TBLPROPERTIES (
"hive.sql.database.type" = "POSTGRES",
"hive.sql.jdbc.driver" = "org.postgresql.Driver",
"hive.sql.jdbc.url" = "jdbc:postgresql://localhost:5432/functional",
"hive.sql.dbcp.username" = "hiveuser",
"hive.sql.dbcp.password.keystore" =
"jceks://hdfs/test-warehouse/data-sources/test.jceks",
"hive.sql.dbcp.password.key" = "hiveuser",
"hive.sql.query" = "select id,name,bool_col,tinyint_col,smallint_col,
int_col,bigint_col,float_col,double_col,date_col,string_col,
timestamp_col from country"
);
""".format(unique_database)
try:
self.run_stmt_in_hive(hive_sql)
@@ -274,14 +568,61 @@ class TestHivePostgresJdbcTables(CustomClusterTestSuite):
format(unique_database))
self.client.execute("INVALIDATE METADATA {0}.country_keystore_postgres".
format(unique_database))
self.client.execute("INVALIDATE METADATA {0}.country_postgres_query".
format(unique_database))
self.client.execute("INVALIDATE METADATA {0}.country_keystore_postgres_query".
format(unique_database))
# Describing postgres hive jdbc table in Impala.
self.client.execute("DESCRIBE {0}.country_postgres".format(unique_database))
self.client.execute("DESCRIBE {0}.country_keystore_postgres".format(unique_database))
self.client.execute("DESCRIBE {0}.country_postgres_query".format(unique_database))
self.client.execute("DESCRIBE {0}.country_keystore_postgres_query"
.format(unique_database))
# Select statements are verified in hive-jdbc-postgres-tables.test.
self.run_test_case('QueryTest/hive-jdbc-postgres-tables', vector,
use_db=unique_database)
def test_invalid_postgres_hive_jdbc_table(self, unique_database):
"""Negative tests for hive jdbc tables with postgres"""
# Both hive.sql.table and hive.sql.query are set
sql_both_set = """
CREATE EXTERNAL TABLE {0}.invalid_both_props (
id INT,
name STRING
)
STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
TBLPROPERTIES (
"hive.sql.database.type" = "POSTGRES",
"hive.sql.jdbc.driver" = "org.postgresql.Driver",
"hive.sql.jdbc.url" = "jdbc:postgresql://localhost:5432/functional",
"hive.sql.dbcp.username" = "hiveuser",
"hive.sql.dbcp.password" = "password",
"hive.sql.table" = "country",
"hive.sql.query" = "SELECT * FROM country"
)
""".format(unique_database)
with pytest.raises(Exception, match="Only one of 'hive.sql.table' or"
" 'hive.sql.query' should be set"): self.run_stmt_in_hive(sql_both_set)
# Neither hive.sql.table nor hive.sql.query is set
sql_none_set = """
CREATE EXTERNAL TABLE {0}.invalid_no_props (
id INT,
name STRING
)
STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
TBLPROPERTIES (
"hive.sql.database.type" = "POSTGRES",
"hive.sql.jdbc.driver" = "org.postgresql.Driver",
"hive.sql.jdbc.url" = "jdbc:postgresql://localhost:5432/functional",
"hive.sql.dbcp.username" = "hiveuser",
"hive.sql.dbcp.password" = "password"
)
""".format(unique_database)
with pytest.raises(Exception, match="Either 'hive.sql.table' or"
" 'hive.sql.query' must be set"): self.run_stmt_in_hive(sql_none_set)
class TestMySqlExtJdbcTables(CustomClusterTestSuite):
"""Impala query tests for external jdbc tables on MySQL server.
@@ -416,6 +757,60 @@ class TestMySqlExtJdbcTables(CustomClusterTestSuite):
"hive.sql.dbcp.password.key" = "hiveuser",
"hive.sql.table" = "country"
);
DROP TABLE IF EXISTS {0}.country_mysql_query;
CREATE EXTERNAL TABLE {0}.country_mysql_query
(
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
TBLPROPERTIES (
"hive.sql.database.type" = "MYSQL",
"hive.sql.jdbc.driver" = "com.mysql.cj.jdbc.Driver",
"hive.sql.jdbc.url" = "jdbc:mysql://localhost:3306/functional",
"hive.sql.dbcp.username" = "hiveuser",
"hive.sql.dbcp.password" = "password",
"hive.sql.query" = "select * from country"
);
DROP TABLE IF EXISTS {0}.country_keystore_mysql_query;
CREATE EXTERNAL TABLE {0}.country_keystore_mysql_query
(
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
string_col STRING,
timestamp_col TIMESTAMP
)
STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
TBLPROPERTIES (
"hive.sql.database.type" = "MYSQL",
"hive.sql.jdbc.driver" = "com.mysql.cj.jdbc.Driver",
"hive.sql.jdbc.url" = "jdbc:mysql://localhost:3306/functional",
"hive.sql.dbcp.username" = "hiveuser",
"hive.sql.dbcp.password.keystore" =
"jceks://hdfs/test-warehouse/data-sources/test.jceks",
"hive.sql.dbcp.password.key" = "hiveuser",
"hive.sql.query" = "select * from country"
);
""".format(unique_database)
try:
self.run_stmt_in_hive(hive_sql)
@@ -425,14 +820,67 @@ class TestMySqlExtJdbcTables(CustomClusterTestSuite):
.format(unique_database))
self.client.execute("INVALIDATE METADATA {0}.country_keystore_mysql"
.format(unique_database))
self.client.execute("INVALIDATE METADATA {0}.country_mysql_query"
.format(unique_database))
self.client.execute("INVALIDATE METADATA {0}.country_keystore_mysql_query"
.format(unique_database))
# Describing mysql hive jdbc table in Impala.
self.client.execute("DESCRIBE {0}.country_mysql".format(unique_database))
self.client.execute("DESCRIBE {0}.country_keystore_mysql".format(unique_database))
self.client.execute("DESCRIBE {0}.country_mysql_query".format(unique_database))
self.client.execute("DESCRIBE {0}.country_keystore_mysql_query"
.format(unique_database))
# Select statements are verified in hive-jdbc-mysql-tables.test.
# Select statements are verified in hive-jdbc-mysql-tables.test.
self.run_test_case('QueryTest/hive-jdbc-mysql-tables', vector,
use_db=unique_database)
@pytest.mark.execute_serially
def test_invalid_mysql_hive_jdbc_table_properties(self, unique_database):
"""Negative tests for hive jdbc tables with hive"""
add_jar_stmt =\
"ADD JAR hdfs:///test-warehouse/data-sources/jdbc-drivers/mysql-jdbc.jar;"
self.run_stmt_in_hive(add_jar_stmt)
# Both hive.sql.table and hive.sql.query are set
sql_both_set = """
CREATE EXTERNAL TABLE {0}.invalid_both_props_mysql (
id INT,
name STRING
)
STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
TBLPROPERTIES (
"hive.sql.database.type" = "MYSQL",
"hive.sql.jdbc.driver" = "com.mysql.cj.jdbc.Driver",
"hive.sql.jdbc.url" = "jdbc:mysql://localhost:3306/functional",
"hive.sql.dbcp.username" = "hiveuser",
"hive.sql.dbcp.password" = "password",
"hive.sql.table" = "country",
"hive.sql.query" = "SELECT id, name FROM country"
)
""".format(unique_database)
with pytest.raises(Exception, match="Only one of 'hive.sql.table' or"
" 'hive.sql.query' should be set"): self.run_stmt_in_hive(sql_both_set)
# Neither hive.sql.table nor hive.sql.query is set
sql_none_set = """
CREATE EXTERNAL TABLE {0}.invalid_no_props_mysql (
id INT,
name STRING
)
STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
TBLPROPERTIES (
"hive.sql.database.type" = "MYSQL",
"hive.sql.jdbc.driver" = "com.mysql.cj.jdbc.Driver",
"hive.sql.jdbc.url" = "jdbc:mysql://localhost:3306/functional",
"hive.sql.dbcp.username" = "hiveuser",
"hive.sql.dbcp.password" = "password"
)
""".format(unique_database)
with pytest.raises(Exception, match="Either 'hive.sql.table' or"
" 'hive.sql.query' must be set"): self.run_stmt_in_hive(sql_none_set)
class TestImpalaExtJdbcTables(CustomClusterTestSuite):
"""Impala query tests for external jdbc tables in Impala cluster."""
@@ -441,13 +889,13 @@ class TestImpalaExtJdbcTables(CustomClusterTestSuite):
def add_test_dimensions(cls):
super(TestImpalaExtJdbcTables, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
exec_single_node_option=[100]))
exec_single_node_option=[100]))
@classmethod
def _download_impala_jdbc_driver(cls):
# Download Impala jdbc driver and copy jdbc driver to HDFS.
script = os.path.join(
os.environ['IMPALA_HOME'], 'testdata/bin/download-impala-jdbc-driver.sh')
os.environ['IMPALA_HOME'], 'testdata/bin/download-impala-jdbc-driver.sh')
run_cmd = [script]
try:
subprocess.check_call(run_cmd, close_fds=True)