IMPALA-14005: Support for quoted reserved words column names

This change updates the way column names are
projected in the SQL query generated for JDBC
external tables. Instead of relying on optional
mapping or default behavior, all column names are now
explicitly quoted using appropriate quote characters.

Column names are now wrapped with quote characters
based on the JDBC driver being used:
1. Backticks (`) for Hive, Impala and MySQL
2. Double quotes (") for all other databases

This helps in the support for case-sensitive or
reserved column names.

Change-Id: I5da5bc7ea5df8f094b7e2877a0ebf35662f93805
Reviewed-on: http://gerrit.cloudera.org:8080/23066
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Wenzhe Zhou <wzhou@cloudera.com>
This commit is contained in:
pranav.lodha
2025-06-17 15:55:47 +05:30
committed by Wenzhe Zhou
parent 6a97109551
commit acca24fe02
7 changed files with 473 additions and 6 deletions

View File

@@ -307,10 +307,22 @@ public class JdbcDataSource implements ExternalDataSource {
if (schema_.getColsSize() == 0) {
project = "*";
} else {
project =
schema_.getCols().stream().map(
TColumnDesc::getName).map(
name -> columnMapping.getOrDefault(name, name))
String driverClass = JdbcStorageConfigManager.getConfigValue(
JdbcStorageConfig.JDBC_DRIVER_CLASS, tableConfig_);
final String quoteChar;
if (driverClass != null && (driverClass.toLowerCase().contains("impala") ||
driverClass.toLowerCase().contains("hive") ||
driverClass.toLowerCase().contains("mysql"))) {
quoteChar = "`";
} else {
quoteChar = "\"";
}
project = schema_.getCols().stream()
.map(TColumnDesc::getName)
.map(name -> columnMapping.containsKey(name)
? columnMapping.get(name)
: quoteChar + name + quoteChar)
.collect(Collectors.joining(", "));
}
sb.append(project);

View File

@@ -128,4 +128,24 @@ TBLPROPERTIES (
"driver.url"="/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
"dbcp.username"="hiveuser",
"dbcp.password"="password",
"table"="test_strategy");
DROP TABLE IF EXISTS quoted_impala;
CREATE EXTERNAL TABLE IF NOT EXISTS quoted_impala (
strategy_id INT,
name STRING,
referrer STRING,
landing STRING,
priority INT,
`freeze` STRING,
last_modified timestamp,
PRIMARY KEY (strategy_id) )
STORED BY JDBC
TBLPROPERTIES (
"database.type"="POSTGRES",
"jdbc.url"="jdbc:postgresql://localhost:5432/functional",
"jdbc.driver"="org.postgresql.Driver",
"driver.url"="/test-warehouse/data-sources/jdbc-drivers/postgresql-jdbc.jar",
"dbcp.username"="hiveuser",
"dbcp.password"="password",
"table"="test_strategy");

View File

@@ -83,7 +83,7 @@ CREATE TABLE decimal_tbl
__EOT__
sudo -u postgres psql -U hiveuser -d functional -f /tmp/jdbc_decimal_tbl.sql
# Create test_strategy1 table for unit test
# Create test_strategy table for unit test
cat > /tmp/jdbc_test_strategy.sql << __EOT__
DROP TABLE IF EXISTS test_strategy;
CREATE TABLE test_strategy
@@ -108,6 +108,61 @@ INSERT INTO test_strategy (strategy_id, name, referrer, landing, priority,
__EOT__
sudo -u postgres psql -U hiveuser -d functional -f /tmp/jdbc_test_strategy.sql
# Create quoted_impala
cat > /tmp/jdbc_quoted_impala.sql << __EOT__
DROP TABLE IF EXISTS quoted_impala;
CREATE TABLE quoted_impala
(
strategy_id INT,
name VARCHAR(50),
referrer VARCHAR(1024),
landing VARCHAR(1024),
priority INT,
"freeze" VARCHAR(512),
last_modified timestamp,
PRIMARY KEY (strategy_id)
);
INSERT INTO quoted_impala (strategy_id, name, referrer, landing, priority,
"freeze", last_modified) VALUES
(1, 'S1', 'aaa', 'abc', 1000, NULL, '2012-05-08 15:01:15'),
(2, 'S2', 'bbb', 'def', 990, NULL, '2012-05-08 15:01:15'),
(3, 'S3', 'ccc', 'ghi', 1000, NULL, '2012-05-08 15:01:15'),
(4, 'S4', 'ddd', 'jkl', 980, NULL, '2012-05-08 15:01:15'),
(5, 'S5', 'eee', NULL, NULL, NULL, '2012-05-08 15:01:15');
__EOT__
sudo -u postgres psql -U hiveuser -d functional -f /tmp/jdbc_quoted_impala.sql
# Create quoted_col table
cat > /tmp/quoted_col.sql << __EOT__
DROP TABLE IF EXISTS quoted_col;
CREATE TABLE quoted_col
(
id int,
name varchar(20),
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE PRECISION,
date_col DATE,
"freeze" VARCHAR(10),
timestamp_col TIMESTAMP
);
INSERT INTO quoted_col (id, name, bool_col, tinyint_col, smallint_col, int_col,
bigint_col, float_col, double_col, date_col, "freeze", timestamp_col)
VALUES
(1, 'India', TRUE, 10, 100, 1000, 10000, 1.1, 1.11, '2024-01-01',
'IN', '2024-01-01 10:00:00'),
(2, 'Russia', FALSE, 20, 200, 2000, 20000, 2.2, 2.22, '2024-02-01',
'RU', '2024-02-01 11:00:00'),
(3, 'USA', TRUE, 30, 300, 3000, 30000, 3.3, 3.33, '2024-03-01',
'US', '2024-03-01 12:00:00');
__EOT__
sudo -u postgres psql -U hiveuser -d functional -f /tmp/quoted_col.sql
# Create country table
cat > /tmp/jdbc_country.sql << __EOT__
DROP TABLE IF EXISTS country;

View File

@@ -173,6 +173,38 @@ __EOT__
docker exec -i mysql mysql -uroot -psecret functional < \
/tmp/jdbc_country.sql
# Create quoted_col table
cat > /tmp/quoted_col.sql << '__EOT__'
DROP TABLE IF EXISTS quoted_col;
CREATE TABLE quoted_col
(
id int,
name varchar(20),
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
`freeze` VARCHAR(10),
timestamp_col TIMESTAMP
);
INSERT INTO quoted_col (id, name, bool_col, tinyint_col, smallint_col, int_col,
bigint_col, float_col, double_col, date_col, `freeze`, timestamp_col)
VALUES
(1, 'India', TRUE, 10, 100, 1000, 10000, 1.1, 1.11, '2024-01-01',
'IN', '2024-01-01 10:00:00'),
(2, 'Russia', FALSE, 20, 200, 2000, 20000, 2.2, 2.22, '2024-02-01',
'RU', '2024-02-01 11:00:00'),
(3, 'USA', TRUE, 30, 300, 3000, 30000, 3.3, 3.33, '2024-03-01',
'US', '2024-03-01 12:00:00');
__EOT__
docker exec -i mysql mysql -uroot -psecret functional < \
/tmp/quoted_col.sql
# Load data to jdbc table
cat ${IMPALA_HOME}/testdata/target/AllTypes/* > /tmp/mysql_jdbc_alltypes.csv
docker cp /tmp/mysql_jdbc_alltypes.csv mysql:/tmp

View File

@@ -231,4 +231,152 @@ select * from country_keystore_mysql where name IN ('India', 'USA');
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from quoted_col;
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01 11:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select id, name, date_col from quoted_col;
---- RESULTS
1,'India',2024-01-01
2,'Russia',2024-02-01
3,'USA',2024-03-01
---- TYPES
INT, STRING, DATE
====
---- QUERY
select count(*) from quoted_col;
---- RESULTS
3
---- TYPES
BIGINT
====
---- QUERY
select distinct name from quoted_col;
---- RESULTS
'India'
'Russia'
'USA'
---- TYPES
STRING
====
---- QUERY
# Check for null values
select * from quoted_col where `freeze` IS NULL;
---- RESULTS
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
# Order by integer column
select id, name, int_col from quoted_col order by int_col DESC;
---- RESULTS
3,'USA',3000
2,'Russia',2000
1,'India',1000
---- TYPES
INT, STRING, INT
====
---- QUERY
# Select using case statement
select id, name, case when bool_col then 'Active' else 'Inactive' end as status from quoted_col;
---- RESULTS
1,'India','Active'
2,'Russia','Inactive'
3,'USA','Active'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Check for minimum date
select min(date_col) from quoted_col;
---- RESULTS
2024-01-01
---- TYPES
DATE
====
---- QUERY
# Join with a self table alias
select a.id, a.name, b.name from quoted_col a join quoted_col b on a.id <> b.id;
---- RESULTS
1,'India','Russia'
1,'India','USA'
2,'Russia','India'
2,'Russia','USA'
3,'USA','India'
3,'USA','Russia'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Left join with another table
select a.id, a.name, b.`freeze` from quoted_col a left join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.`freeze` from quoted_col a left join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Right join with another table
select a.id, a.name, b.`freeze` from quoted_col a right join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.`freeze` from quoted_col a right join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Full outer join
select a.id, a.name, b.`freeze` from quoted_col a full outer join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.`freeze` from quoted_col a full outer join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Using IN clause
select * from quoted_col where name IN ('India', 'USA');
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====

View File

@@ -231,4 +231,152 @@ select * from country_keystore_postgres where name IN ('India', 'USA');
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select * from quoted_col;
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
2,'Russia',false,20,200,2000,20000,2.200000047683716,2.22,2024-02-01,'RU',2024-02-01 11:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
select id, name, date_col from quoted_col;
---- RESULTS
1,'India',2024-01-01
2,'Russia',2024-02-01
3,'USA',2024-03-01
---- TYPES
INT, STRING, DATE
====
---- QUERY
select count(*) from quoted_col;
---- RESULTS
3
---- TYPES
BIGINT
====
---- QUERY
select distinct name from quoted_col;
---- RESULTS
'India'
'Russia'
'USA'
---- TYPES
STRING
====
---- QUERY
# Check for null values
select * from quoted_col where `freeze` IS NULL;
---- RESULTS
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====
---- QUERY
# Order by integer column
select id, name, int_col from quoted_col order by int_col DESC;
---- RESULTS
3,'USA',3000
2,'Russia',2000
1,'India',1000
---- TYPES
INT, STRING, INT
====
---- QUERY
# Select using case statement
select id, name, case when bool_col then 'Active' else 'Inactive' end as status from quoted_col;
---- RESULTS
1,'India','Active'
2,'Russia','Inactive'
3,'USA','Active'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Check for minimum date
select min(date_col) from quoted_col;
---- RESULTS
2024-01-01
---- TYPES
DATE
====
---- QUERY
# Join with a self table alias
select a.id, a.name, b.name from quoted_col a join quoted_col b on a.id <> b.id;
---- RESULTS
1,'India','Russia'
1,'India','USA'
2,'Russia','India'
2,'Russia','USA'
3,'USA','India'
3,'USA','Russia'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Left join with another table
select a.id, a.name, b.`freeze` from quoted_col a left join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.`freeze` from quoted_col a left join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Right join with another table
select a.id, a.name, b.`freeze` from quoted_col a right join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.`freeze` from quoted_col a right join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Full outer join
select a.id, a.name, b.`freeze` from quoted_col a full outer join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
select a.id, a.name, b.`freeze` from quoted_col a full outer join quoted_col b on a.id = b.id;
---- RESULTS
1,'India','IN'
2,'Russia','RU'
3,'USA','US'
---- TYPES
INT, STRING, STRING
====
---- QUERY
# Using IN clause
select * from quoted_col where name IN ('India', 'USA');
---- RESULTS
1,'India',true,10,100,1000,10000,1.100000023841858,1.11,2024-01-01,'IN',2024-01-01 10:00:00
3,'USA',true,30,300,3000,30000,3.299999952316284,3.33,2024-03-01,'US',2024-03-01 12:00:00
---- TYPES
INT, STRING, BOOLEAN, SMALLINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, DATE, STRING, TIMESTAMP
====

View File

@@ -212,6 +212,32 @@ class TestHivePostgresJdbcTables(CustomClusterTestSuite):
"hive.sql.table" = "country"
);
DROP TABLE IF EXISTS {0}.quoted_col;
CREATE EXTERNAL TABLE {0}.quoted_col
(
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
`freeze` STRING,
timestamp_col TIMESTAMP
)
STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
TBLPROPERTIES (
"hive.sql.database.type" = "POSTGRES",
"hive.sql.jdbc.driver" = "org.postgresql.Driver",
"hive.sql.jdbc.url" = "jdbc:postgresql://localhost:5432/functional",
"hive.sql.dbcp.username" = "hiveuser",
"hive.sql.dbcp.password" = "password",
"hive.sql.table" = "quoted_col"
);
DROP TABLE IF EXISTS {0}.country_keystore_postgres;
CREATE EXTERNAL TABLE {0}.country_keystore_postgres
(
@@ -252,7 +278,7 @@ class TestHivePostgresJdbcTables(CustomClusterTestSuite):
self.client.execute("DESCRIBE {0}.country_postgres".format(unique_database))
self.client.execute("DESCRIBE {0}.country_keystore_postgres".format(unique_database))
# Select statements are verified in hive-jdbc-postgres-tables.test.
# Select statements are verified in hive-jdbc-postgres-tables.test.
self.run_test_case('QueryTest/hive-jdbc-postgres-tables', vector,
use_db=unique_database)
@@ -337,6 +363,32 @@ class TestMySqlExtJdbcTables(CustomClusterTestSuite):
"hive.sql.table" = "country"
);
DROP TABLE IF EXISTS {0}.quoted_col;
CREATE EXTERNAL TABLE {0}.quoted_col
(
id INT,
name STRING,
bool_col BOOLEAN,
tinyint_col SMALLINT,
smallint_col SMALLINT,
int_col INT,
bigint_col BIGINT,
float_col FLOAT,
double_col DOUBLE,
date_col DATE,
`freeze` STRING,
timestamp_col TIMESTAMP
)
STORED BY 'org.apache.hive.storage.jdbc.JdbcStorageHandler'
TBLPROPERTIES (
"hive.sql.database.type" = "MYSQL",
"hive.sql.jdbc.driver" = "com.mysql.cj.jdbc.Driver",
"hive.sql.jdbc.url" = "jdbc:mysql://localhost:3306/functional",
"hive.sql.dbcp.username" = "hiveuser",
"hive.sql.dbcp.password" = "password",
"hive.sql.table" = "quoted_col"
);
DROP TABLE IF EXISTS {0}.country_keystore_mysql;
CREATE EXTERNAL TABLE {0}.country_keystore_mysql
(