From 068158e495d18fc6c2548dd9868b284870f51bbe Mon Sep 17 00:00:00 2001 From: Arnab Karmakar Date: Sat, 11 Oct 2025 23:29:33 +0530 Subject: [PATCH] IMPALA-12401: Support more info types for HS2 GetInfo() API This patch adds support for 40+ additional TGetInfoType values in the HiveServer2 GetInfo() API, improving ODBC/JDBC driver compatibility. Previously, only 3 info types were supported (CLI_SERVER_NAME, CLI_DBMS_NAME, CLI_DBMS_VER). The implementation follows the ODBC CLI specification and matches the behavior of Hive's GetInfo implementation where applicable. Testing: - Added unit tests in test_hs2.py for new info types - Tests verify correct return values and data types for each info type Change-Id: I1ce5f2b9dcc2e4633b4679b002f57b5b4ea3e8bf Reviewed-on: http://gerrit.cloudera.org:8080/23528 Tested-by: Impala Public Jenkins Reviewed-by: Csaba Ringhofer --- be/src/service/CMakeLists.txt | 1 + be/src/service/frontend.cc | 9 +- be/src/service/frontend.h | 4 + be/src/service/impala-hs2-server.cc | 20 +- be/src/service/odbc-helper.cc | 262 ++++++++++++++++++ be/src/service/odbc-helper.h | 49 ++++ common/thrift/hive-1-api/TCLIService.thrift | 1 + .../apache/impala/service/JniFrontend.java | 30 ++ fe/src/main/jflex/sql-scanner.flex | 6 + tests/hs2/test_hs2.py | 149 +++++++++- 10 files changed, 512 insertions(+), 19 deletions(-) create mode 100644 be/src/service/odbc-helper.cc create mode 100644 be/src/service/odbc-helper.h diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt index e35f8c846..066520bfe 100644 --- a/be/src/service/CMakeLists.txt +++ b/be/src/service/CMakeLists.txt @@ -41,6 +41,7 @@ add_library(Service impalad-main.cc impala-server.cc internal-server.cc + odbc-helper.cc query-options.cc query-result-set.cc query-state-record.cc diff --git a/be/src/service/frontend.cc b/be/src/service/frontend.cc index 3ac463db3..6255f8fc8 100644 --- a/be/src/service/frontend.cc +++ b/be/src/service/frontend.cc @@ -156,7 +156,8 @@ Frontend::Frontend() { {"validateSaml2Bearer", "([B)Ljava/lang/String;", &validate_saml2_bearer_id_}, {"abortKuduTransaction", "([B)V", &abort_kudu_txn_}, {"commitKuduTransaction", "([B)V", &commit_kudu_txn_}, - {"cancelExecRequest", "([B)V", &cancel_exec_request_id_} + {"cancelExecRequest", "([B)V", &cancel_exec_request_id_}, + {"getNonOdbcKeywords", "([B)Ljava/lang/String;", &get_non_odbc_keywords_id_} }; JniMethodDescriptor staticMethods[] = { @@ -470,3 +471,9 @@ Status Frontend::HiveLegacyTimezoneConvert( .with_thrift_arg(timezone_t).with_primitive_arg(utc_time_millis) .Call(local_time); } + +Status Frontend::GetNonOdbcKeywords(const string& odbc_keywords_csv, string* response) { + TStringLiteral csv; + csv.__set_value(odbc_keywords_csv); + return JniUtil::CallJniMethod(fe_, get_non_odbc_keywords_id_, csv, response); +} diff --git a/be/src/service/frontend.h b/be/src/service/frontend.h index 85eef03ec..7a7297009 100644 --- a/be/src/service/frontend.h +++ b/be/src/service/frontend.h @@ -262,6 +262,9 @@ class Frontend { Status HiveLegacyTimezoneConvert( const string& timezone, long utc_time_millis, TCivilTime* local_time); + /// Returns a CSV list of Impala keywords excluding the provided ODBC-reserved CSV. + Status GetNonOdbcKeywords(const std::string& odbc_keywords_csv, std::string* response); + private: jclass fe_class_; // org.apache.impala.service.JniFrontend class jobject fe_; // instance of org.apache.impala.service.JniFrontend @@ -309,6 +312,7 @@ class Frontend { jmethodID get_secret_from_key_store_; // JniFrontend.getSecretFromKeyStore() jmethodID hive_legacy_timezone_convert_; // JniFrontend.hiveLegacyTimezoneConvert() jmethodID cancel_exec_request_id_; // JniFrontend.cancelExecRequest() + jmethodID get_non_odbc_keywords_id_; // JniFrontend.getNonOdbcKeywords(String) // Only used for testing. jmethodID build_test_descriptor_table_id_; // JniFrontend.buildTestDescriptorTable() diff --git a/be/src/service/impala-hs2-server.cc b/be/src/service/impala-hs2-server.cc index aace45fc5..b6dfddc53 100644 --- a/be/src/service/impala-hs2-server.cc +++ b/be/src/service/impala-hs2-server.cc @@ -39,6 +39,7 @@ #include "scheduling/admission-controller.h" #include "service/client-request-state.h" #include "service/hs2-util.h" +#include "service/odbc-helper.h" #include "service/query-options.h" #include "service/query-result-set.h" #include "util/auth-util.h" @@ -558,23 +559,8 @@ void ImpalaServer::GetInfo(TGetInfoResp& return_val, session_handle.WithSession(session_id, SecretArg::Session(secret), &session), SQLSTATE_GENERAL_ERROR); - switch (request.infoType) { - case TGetInfoType::CLI_SERVER_NAME: - case TGetInfoType::CLI_DBMS_NAME: - return_val.infoValue.__set_stringValue("Impala"); - break; - case TGetInfoType::CLI_DBMS_VER: - return_val.infoValue.__set_stringValue(GetDaemonBuildVersion()); - break; - default: - return_val.status.__set_statusCode(thrift::TStatusCode::ERROR_STATUS); - return_val.status.__set_errorMessage(("Unsupported operation")); - return_val.status.__set_sqlState((SQLSTATE_OPTIONAL_FEATURE_NOT_IMPLEMENTED)); - // 'infoValue' is a required field of TGetInfoResp - return_val.infoValue.__set_stringValue(""); - return; - } - return_val.status.__set_statusCode(thrift::TStatusCode::SUCCESS_STATUS); + PopulateOdbcGetInfo(return_val, request.infoType, session, + SQLSTATE_OPTIONAL_FEATURE_NOT_IMPLEMENTED); } void ImpalaServer::ExecuteStatementCommon(TExecuteStatementResp& return_val, diff --git a/be/src/service/odbc-helper.cc b/be/src/service/odbc-helper.cc new file mode 100644 index 000000000..e92e32818 --- /dev/null +++ b/be/src/service/odbc-helper.cc @@ -0,0 +1,262 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "service/odbc-helper.h" + +#include + +#include "common/version.h" +#include "runtime/exec-env.h" +#include "service/frontend.h" +#include "service/impala-server.inline.h" +#include "util/auth-util.h" + +#include "common/names.h" + +using namespace apache::hive::service::cli::thrift; +using namespace apache::hive::service::cli; + +namespace impala { + +// ODBC reserved keywords as per ISO/IEF CLI specification and ODBC standard. +// From https://docs.microsoft.com/en-us/sql/t-sql/language-elements/reserved-keywords-transact-sql#odbc-reserved-keywords +const string ODBC_KEYWORDS = + "ABSOLUTE,ACTION,ADA,ADD,ALL,ALLOCATE,ALTER,AND,ANY,ARE,AS,ASC,ASSERTION,AT," + "AUTHORIZATION,AVG,BEGIN,BETWEEN,BIT,BIT_LENGTH,BOTH,BY,CASCADE,CASCADED,CASE," + "CAST,CATALOG,CHAR,CHAR_LENGTH,CHARACTER,CHARACTER_LENGTH,CHECK,CLOSE,COALESCE," + "COLLATE,COLLATION,COLUMN,COMMIT,CONNECT,CONNECTION,CONSTRAINT,CONSTRAINTS," + "CONTINUE,CONVERT,CORRESPONDING,COUNT,CREATE,CROSS,CURRENT,CURRENT_DATE," + "CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,CURSOR,DATE,DAY,DEALLOCATE,DEC," + "DECIMAL,DECLARE,DEFAULT,DEFERRABLE,DEFERRED,DELETE,DESC,DESCRIBE,DESCRIPTOR," + "DIAGNOSTICS,DISCONNECT,DISTINCT,DOMAIN,DOUBLE,DROP,ELSE,END,ESCAPE,EXCEPT," + "EXCEPTION,EXEC,EXECUTE,EXISTS,EXTERNAL,EXTRACT,FALSE,FETCH,FIRST,FLOAT,FOR," + "FOREIGN,FORTRAN,FOUND,FROM,FULL,GET,GLOBAL,GO,GOTO,GRANT,GROUP,HAVING,HOUR," + "IDENTITY,IMMEDIATE,IN,INCLUDE,INDEX,INDICATOR,INITIALLY,INNER,INPUT," + "INSENSITIVE,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,IS,ISOLATION,JOIN,KEY," + "LANGUAGE,LAST,LEADING,LEFT,LEVEL,LIKE,LOCAL,LOWER,MATCH,MAX,MIN,MINUTE,MODULE," + "MONTH,NAMES,NATIONAL,NATURAL,NCHAR,NEXT,NO,NONE,NOT,NULL,NULLIF,NUMERIC," + "OCTET_LENGTH,OF,ON,ONLY,OPEN,OPTION,OR,ORDER,OUTER,OUTPUT,OVERLAPS,PAD,PARTIAL," + "PASCAL,POSITION,PRECISION,PREPARE,PRESERVE,PRIMARY,PRIOR,PRIVILEGES,PROCEDURE," + "PUBLIC,READ,REAL,REFERENCES,RELATIVE,RESTRICT,REVOKE,RIGHT,ROLLBACK,ROWS," + "SCHEMA,SCROLL,SECOND,SECTION,SELECT,SESSION,SESSION_USER,SET,SIZE,SMALLINT," + "SOME,SPACE,SQL,SQLCA,SQLCODE,SQLERROR,SQLSTATE,SQLWARNING,SUBSTRING,SUM," + "SYSTEM_USER,TABLE,TEMPORARY,THEN,TIME,TIMESTAMP,TIMEZONE_HOUR,TIMEZONE_MINUTE," + "TO,TRAILING,TRANSACTION,TRANSLATE,TRANSLATION,TRIM,TRUE,UNION,UNIQUE,UNKNOWN," + "UPDATE,UPPER,USAGE,USER,USING,VALUE,VALUES,VARCHAR,VARYING,VIEW,WHEN,WHENEVER," + "WHERE,WITH,WORK,WRITE,YEAR,ZONE"; + +void PopulateOdbcGetInfo(TGetInfoResp& return_val, TGetInfoType::type info_type, + const shared_ptr& session, + const char* sqlstate_optional_feature_not_implemented) { + switch (info_type) { + case TGetInfoType::CLI_SERVER_NAME: + case TGetInfoType::CLI_DBMS_NAME: + return_val.infoValue.__set_stringValue("Impala"); + break; + case TGetInfoType::CLI_DBMS_VER: + return_val.infoValue.__set_stringValue(GetDaemonBuildVersion()); + break; + case TGetInfoType::CLI_MAX_COLUMN_NAME_LEN: + return_val.infoValue.__set_lenValue(767); + break; + case TGetInfoType::CLI_MAX_SCHEMA_NAME_LEN: + return_val.infoValue.__set_lenValue(128); + break; + case TGetInfoType::CLI_MAX_TABLE_NAME_LEN: + return_val.infoValue.__set_lenValue(128); + break; + case TGetInfoType::CLI_MAX_CATALOG_NAME_LEN: + return_val.infoValue.__set_lenValue(128); + break; + case TGetInfoType::CLI_MAX_CURSOR_NAME_LEN: + return_val.infoValue.__set_lenValue(128); + break; + case TGetInfoType::CLI_MAX_USER_NAME_LEN: + return_val.infoValue.__set_lenValue(128); + break; + case TGetInfoType::CLI_MAX_IDENTIFIER_LEN: + return_val.infoValue.__set_lenValue(128); + break; + case TGetInfoType::CLI_IDENTIFIER_CASE: + // SQL_IC_LOWER = 2 (case insensitive, stored in lowercase) + return_val.infoValue.__set_smallIntValue(2); + break; + case TGetInfoType::CLI_IDENTIFIER_QUOTE_CHAR: + return_val.infoValue.__set_stringValue("`"); + break; + case TGetInfoType::CLI_SEARCH_PATTERN_ESCAPE: + return_val.infoValue.__set_stringValue("\\"); + break; + case TGetInfoType::CLI_DATA_SOURCE_READ_ONLY: + // SQL_FALSE = 0 (not read-only) + return_val.infoValue.__set_smallIntValue(0); + break; + case TGetInfoType::CLI_TXN_CAPABLE: + // SQL_TC_NONE = 0 (no transaction support) + return_val.infoValue.__set_smallIntValue(0); + break; + case TGetInfoType::CLI_USER_NAME: + return_val.infoValue.__set_stringValue(GetEffectiveUser(*session)); + break; + case TGetInfoType::CLI_ORDER_BY_COLUMNS_IN_SELECT: + // ODBC expects "Y"/"N" string: set "N" because Impala does + // not require ORDER BY columns to be in SELECT list. + return_val.infoValue.__set_stringValue("N"); + break; + case TGetInfoType::CLI_MAX_COLUMNS_IN_SELECT: + // No hard documented limit for number of columns in a SELECT + // ODBC: 0 => "no fixed limit / driver dependent" + return_val.infoValue.__set_lenValue(0); + break; + case TGetInfoType::CLI_MAX_COLUMNS_IN_TABLE: + // No hard documented limit for number of columns in a table + return_val.infoValue.__set_lenValue(0); + break; + case TGetInfoType::CLI_MAX_COLUMNS_IN_GROUP_BY: + return_val.infoValue.__set_lenValue(0); // No limit + break; + case TGetInfoType::CLI_MAX_COLUMNS_IN_ORDER_BY: + return_val.infoValue.__set_lenValue(0); // No limit + break; + case TGetInfoType::CLI_MAX_TABLES_IN_SELECT: + return_val.infoValue.__set_lenValue(0); // No limit + break; + case TGetInfoType::CLI_MAX_STATEMENT_LEN: { + // Prefer the server's configured max_statement_length_bytes if available. + // If the option is not set or is <= 0, return 0 (ODBC: unknown/unlimited). + int32_t max_stmt_bytes = 0; + if (session && session->QueryOptions().__isset.max_statement_length_bytes) { + max_stmt_bytes = session->QueryOptions().max_statement_length_bytes; + } + if (max_stmt_bytes <= 0) { + return_val.infoValue.__set_lenValue(0); + } else { + // SQL_MAX_STATEMENT_LEN expects number of characters; using bytes is + // acceptable if the server's limit is in bytes and the client and server + // agree on encoding. Use bytes here to match Impala option units. + return_val.infoValue.__set_lenValue(max_stmt_bytes); + } + break; + } + case TGetInfoType::CLI_MAX_ROW_SIZE: { + // Prefer the session's configured MAX_ROW_SIZE if available; otherwise fall back + // to the documented default of 524288 (512 KB). MAX_ROW_SIZE is in bytes. + int64_t max_row_size = 524288; // Default from TQueryOptions.max_row_size + if (session && session->QueryOptions().__isset.max_row_size) { + max_row_size = session->QueryOptions().max_row_size; + } + if (max_row_size <= 0) { + // Invalid or unset value: return the default + return_val.infoValue.__set_lenValue(524288); + } else { + return_val.infoValue.__set_lenValue(max_row_size); + } + break; + } + case TGetInfoType::CLI_SPECIAL_CHARACTERS: + // Per ODBC SQL_SPECIAL_CHARACTERS: list characters that can appear in identifiers + // beyond a-z/A-Z/0-9/_. Impala identifiers (unquoted) + // allow only underscore as "special" + // Impala does not allow arbitrary special characters in unquoted identifiers, + // so return an empty string (no special chars allowed unquoted). + return_val.infoValue.__set_stringValue(""); + break; + case TGetInfoType::CLI_NULL_COLLATION: + // SQL_NC_HIGH = 2 -> NULLs sort high + // (Impala treats NULL > all other values by default) + return_val.infoValue.__set_smallIntValue(2); + break; + case TGetInfoType::CLI_ALTER_TABLE: + // Bitmask of ALTER TABLE capabilities per ODBC SQLGetInfo(SQL_ALTER_TABLE): + // - SQL_AT_ADD_COLUMN = 0x1 (Impala supports ADD COLUMN(S)) + // - SQL_AT_DROP_COLUMN = 0x2 (Impala supports DROP COLUMN) + // Other bits (defaults, constraints, etc.) are not supported by Impala. + return_val.infoValue.__set_integerBitmask(0x1 | 0x2); + break; + case TGetInfoType::CLI_OJ_CAPABILITIES: + // SQL_OJ_LEFT = 1, SQL_OJ_RIGHT = 2, SQL_OJ_FULL = 4, SQL_OJ_NESTED = 8 + // SQL_OJ_NOT_ORDERED = 16, SQL_OJ_INNER = 32, SQL_OJ_ALL_COMPARISON_OPS = 64 + return_val.infoValue.__set_integerBitmask(127); // All supported + break; + case TGetInfoType::CLI_INTEGRITY: + // SQL_IC_NONE = 0 (no enforced integrity constraints) + return_val.infoValue.__set_smallIntValue(0); + break; + case TGetInfoType::CLI_DESCRIBE_PARAMETER: + // SQL_FALSE = 0 (does not support DESCRIBE PARAMETER) + return_val.infoValue.__set_smallIntValue(0); + break; + case TGetInfoType::CLI_XOPEN_CLI_YEAR: + return_val.infoValue.__set_stringValue("1995"); + break; + case TGetInfoType::CLI_DATA_SOURCE_NAME: + return_val.infoValue.__set_stringValue("Impala"); + break; + case TGetInfoType::CLI_ACCESSIBLE_TABLES: + // SQL_ACCESSIBLE_TABLES = 1 (returns accessible tables) + return_val.infoValue.__set_smallIntValue(1); + break; + case TGetInfoType::CLI_ACCESSIBLE_PROCEDURES: + // SQL_FALSE = 0 (no stored procedures) + return_val.infoValue.__set_smallIntValue(0); + break; + case TGetInfoType::CLI_CURSOR_COMMIT_BEHAVIOR: + // SQL_CB_DELETE = 1 (cursors are closed on commit) + return_val.infoValue.__set_smallIntValue(1); + break; + case TGetInfoType::CLI_DEFAULT_TXN_ISOLATION: + // SQL_TXN_NONE = 0 (no transaction support) + return_val.infoValue.__set_smallIntValue(0); + break; + case TGetInfoType::CLI_TXN_ISOLATION_OPTION: + // SQL_TXN_NONE = 0 (no transaction support) + return_val.infoValue.__set_integerBitmask(0); + break; + case TGetInfoType::CLI_ODBC_KEYWORDS: { + // Return Impala-specific keywords excluding ODBC-reserved keywords + string non_odbc_keywords; + Status kw_status = ExecEnv::GetInstance()->frontend()->GetNonOdbcKeywords( + ODBC_KEYWORDS, &non_odbc_keywords); + if (kw_status.ok()) { + return_val.infoValue.__set_stringValue(non_odbc_keywords); + } else { + // Fallback to empty string on error to avoid returning incorrect keywords. + VLOG(1) << "Failed to fetch non-ODBC keywords: " << kw_status.GetDetail(); + return_val.infoValue.__set_stringValue(""); + } + break; + } + case TGetInfoType::CLI_MAX_DRIVER_CONNECTIONS: + case TGetInfoType::CLI_MAX_CONCURRENT_ACTIVITIES: + case TGetInfoType::CLI_SCROLL_CONCURRENCY: + case TGetInfoType::CLI_GETDATA_EXTENSIONS: + case TGetInfoType::CLI_MAX_COLUMNS_IN_INDEX: + case TGetInfoType::CLI_MAX_INDEX_SIZE: + case TGetInfoType::CLI_CURSOR_SENSITIVITY: + case TGetInfoType::CLI_CATALOG_NAME: + case TGetInfoType::CLI_COLLATION_SEQ: + default: + return_val.status.__set_statusCode(thrift::TStatusCode::ERROR_STATUS); + return_val.status.__set_errorMessage(("Unsupported operation")); + return_val.status.__set_sqlState(sqlstate_optional_feature_not_implemented); + // 'infoValue' is a required field of TGetInfoResp + return_val.infoValue.__set_stringValue(""); + return; + } + return_val.status.__set_statusCode(thrift::TStatusCode::SUCCESS_STATUS); +} +} diff --git a/be/src/service/odbc-helper.h b/be/src/service/odbc-helper.h new file mode 100644 index 000000000..4e162c85b --- /dev/null +++ b/be/src/service/odbc-helper.h @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "gen-cpp/TCLIService_types.h" +#include "service/impala-server.h" + +namespace impala { + +/// ODBC reserved keywords as per ISO/IEF CLI specification and ODBC standard. +/// From https://docs.microsoft.com/en-us/sql/t-sql/language-elements/reserved-keywords-transact-sql#odbc-reserved-keywords +extern const std::string ODBC_KEYWORDS; + +/// Populates the TGetInfoResp structure based on the requested info type. +/// This function handles the ODBC GetInfo metadata calls for HiveServer2. +/// +/// Parameters: +/// return_val - The response structure to be populated +/// info_type - The type of information being requested +/// session - The session state (may be nullptr for session-independent queries) +/// sqlstate_optional_feature_not_implemented - SQLSTATE code for unsupported features +/// +/// The function sets the infoValue field of return_val and may also set error status +/// for unsupported info types. +void PopulateOdbcGetInfo( + apache::hive::service::cli::thrift::TGetInfoResp& return_val, + apache::hive::service::cli::thrift::TGetInfoType::type info_type, + const std::shared_ptr& session, + const char* sqlstate_optional_feature_not_implemented); + +} diff --git a/common/thrift/hive-1-api/TCLIService.thrift b/common/thrift/hive-1-api/TCLIService.thrift index 777fcae24..f540e7a3b 100644 --- a/common/thrift/hive-1-api/TCLIService.thrift +++ b/common/thrift/hive-1-api/TCLIService.thrift @@ -636,6 +636,7 @@ enum TGetInfoType { CLI_CATALOG_NAME = 10003, CLI_COLLATION_SEQ = 10004, CLI_MAX_IDENTIFIER_LEN = 10005, + CLI_ODBC_KEYWORDS = 10006, } union TGetInfoValue { diff --git a/fe/src/main/java/org/apache/impala/service/JniFrontend.java b/fe/src/main/java/org/apache/impala/service/JniFrontend.java index f2b0264bf..2cafe7c03 100644 --- a/fe/src/main/java/org/apache/impala/service/JniFrontend.java +++ b/fe/src/main/java/org/apache/impala/service/JniFrontend.java @@ -36,6 +36,7 @@ import org.apache.hadoop.security.ShellBasedUnixGroupsMapping; import org.apache.hadoop.security.ShellBasedUnixGroupsNetgroupMapping; import org.apache.impala.analysis.DescriptorTable; import org.apache.impala.analysis.ToSqlUtils; +import org.apache.impala.analysis.SqlScanner; import org.apache.impala.authentication.saml.WrappedWebContext; import org.apache.impala.authorization.AuthorizationFactory; import org.apache.impala.authorization.ImpalaInternalAdminUser; @@ -125,6 +126,7 @@ import java.util.Enumeration; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.HashSet; import java.util.TimeZone; /** @@ -352,6 +354,34 @@ public class JniFrontend { } } + /** + * Returns a comma-separated list of Impala SQL keywords that are not part of the + * provided ODBC-reserved keywords CSV. + */ + public String getNonOdbcKeywords(byte[] odbcKeywordsCsvT) throws ImpalaException { + final TStringLiteral odbcCsv = new TStringLiteral(); + JniUtil.deserializeThrift(protocolFactory_, odbcCsv, odbcKeywordsCsvT); + String csv = odbcCsv.isSetValue() + ? StandardCharsets.UTF_8.decode(odbcCsv.value).toString() + : ""; + Set excludes = new HashSet<>(); + if (csv != null && !csv.isEmpty()) { + for (String s : csv.split(",")) { + if (s != null) excludes.add(s.trim().toUpperCase()); + } + } + StringBuilder sb = new StringBuilder(); + for (String kw : SqlScanner.getKeywords()) { + String upper = kw.toUpperCase(); + // Exclude symbolic tokens like &&, || + if (upper.isEmpty() || !Character.isLetter(upper.charAt(0))) continue; + if (excludes.contains(upper)) continue; + if (sb.length() > 0) sb.append(","); + sb.append(upper); + } + return sb.toString(); + } + /** * Returns files info of a table or partition. * The argument is a serialized TShowFilesParams object. diff --git a/fe/src/main/jflex/sql-scanner.flex b/fe/src/main/jflex/sql-scanner.flex index a6ccc2892..df2943b91 100644 --- a/fe/src/main/jflex/sql-scanner.flex +++ b/fe/src/main/jflex/sql-scanner.flex @@ -27,6 +27,7 @@ import java.util.Set; import java.util.Iterator; import java.util.Arrays; import java.util.HashSet; +import java.util.Collections; import com.google.common.base.Preconditions; import org.apache.impala.analysis.SqlParserSymbols; @@ -444,6 +445,11 @@ import org.apache.impala.thrift.TReservedWordsVersion; return token != null && keywordMap.containsKey(token.toLowerCase()); } + // Returns an unmodifiable view of the current keyword names. + public static Set getKeywords() { + return Collections.unmodifiableSet(keywordMap.keySet()); + } + private Symbol newToken(int id, Object value) { return new Symbol(id, yyline+1, yycolumn+1, value); } diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py index c5206346b..2ca89e39f 100644 --- a/tests/hs2/test_hs2.py +++ b/tests/hs2/test_hs2.py @@ -546,7 +546,154 @@ class TestHS2(HS2TestSuite): self.session_handle), TCLIService.TGetInfoType.CLI_DBMS_NAME) TestHS2.check_invalid_session(self.hs2_client.GetInfo(invalid_req)) - # TODO: it would be useful to add positive tests for GetInfo(). + # Test basic info types that were already supported + get_info_req = TCLIService.TGetInfoReq() + get_info_req.sessionHandle = self.session_handle + + # Test CLI_SERVER_NAME + get_info_req.infoType = TCLIService.TGetInfoType.CLI_SERVER_NAME + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + assert get_info_resp.infoValue.stringValue == "Impala" + + # Test CLI_DBMS_NAME + get_info_req.infoType = TCLIService.TGetInfoType.CLI_DBMS_NAME + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + assert get_info_resp.infoValue.stringValue == "Impala" + + # Test CLI_DBMS_VER + get_info_req.infoType = TCLIService.TGetInfoType.CLI_DBMS_VER + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + assert len(get_info_resp.infoValue.stringValue) > 0 # Should have version string + + # Test new length-based info types + length_tests = [ + (TCLIService.TGetInfoType.CLI_MAX_COLUMN_NAME_LEN, 767), + (TCLIService.TGetInfoType.CLI_MAX_SCHEMA_NAME_LEN, 128), + (TCLIService.TGetInfoType.CLI_MAX_TABLE_NAME_LEN, 128), + (TCLIService.TGetInfoType.CLI_MAX_CATALOG_NAME_LEN, 128), + (TCLIService.TGetInfoType.CLI_MAX_CURSOR_NAME_LEN, 128), + (TCLIService.TGetInfoType.CLI_MAX_USER_NAME_LEN, 128), + (TCLIService.TGetInfoType.CLI_MAX_IDENTIFIER_LEN, 128), + (TCLIService.TGetInfoType.CLI_MAX_COLUMNS_IN_SELECT, 0), # No limit + (TCLIService.TGetInfoType.CLI_MAX_COLUMNS_IN_TABLE, 0), # No limit + (TCLIService.TGetInfoType.CLI_MAX_COLUMNS_IN_GROUP_BY, 0), # No limit + (TCLIService.TGetInfoType.CLI_MAX_COLUMNS_IN_ORDER_BY, 0), # No limit + (TCLIService.TGetInfoType.CLI_MAX_TABLES_IN_SELECT, 0), # No limit + (TCLIService.TGetInfoType.CLI_MAX_ROW_SIZE, 524288), # 512 KB + ] + + for info_type, expected_value in length_tests: + get_info_req.infoType = info_type + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + assert get_info_resp.infoValue.lenValue == expected_value + + # Test CLI_MAX_STATEMENT_LEN separately since it depends on query options + get_info_req.infoType = TCLIService.TGetInfoType.CLI_MAX_STATEMENT_LEN + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + # Should return 0 (no limit) if max_statement_length_bytes is not set or <= 0 + assert get_info_resp.infoValue.lenValue >= 0 + + # Test small integer info types + small_int_tests = [ + (TCLIService.TGetInfoType.CLI_IDENTIFIER_CASE, 2), # Case insensitive, lowercase + (TCLIService.TGetInfoType.CLI_DATA_SOURCE_READ_ONLY, 0), # Not read-only + (TCLIService.TGetInfoType.CLI_TXN_CAPABLE, 0), # No transaction support + (TCLIService.TGetInfoType.CLI_NULL_COLLATION, 2), # NULLs sort high + (TCLIService.TGetInfoType.CLI_INTEGRITY, 0), # No integrity constraints + (TCLIService.TGetInfoType.CLI_DESCRIBE_PARAMETER, 0), # No DESCRIBE PARAMETER + (TCLIService.TGetInfoType.CLI_ACCESSIBLE_TABLES, 1), # Returns accessible tables + (TCLIService.TGetInfoType.CLI_ACCESSIBLE_PROCEDURES, 0), # No stored procedures + (TCLIService.TGetInfoType.CLI_CURSOR_COMMIT_BEHAVIOR, 1), + (TCLIService.TGetInfoType.CLI_DEFAULT_TXN_ISOLATION, 0), + ] + + for info_type, expected_value in small_int_tests: + get_info_req.infoType = info_type + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + assert get_info_resp.infoValue.smallIntValue == expected_value + + # Test string info types + string_tests = [ + (TCLIService.TGetInfoType.CLI_IDENTIFIER_QUOTE_CHAR, "`"), + (TCLIService.TGetInfoType.CLI_SEARCH_PATTERN_ESCAPE, "\\"), + (TCLIService.TGetInfoType.CLI_SPECIAL_CHARACTERS, ""), # No special chars + (TCLIService.TGetInfoType.CLI_XOPEN_CLI_YEAR, "1995"), + (TCLIService.TGetInfoType.CLI_DATA_SOURCE_NAME, "Impala"), + ] + + for info_type, expected_value in string_tests: + get_info_req.infoType = info_type + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + assert get_info_resp.infoValue.stringValue == expected_value + + # Test CLI_ORDER_BY_COLUMNS_IN_SELECT (string type: "Y"/"N") + get_info_req.infoType = TCLIService.TGetInfoType.CLI_ORDER_BY_COLUMNS_IN_SELECT + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + assert get_info_resp.infoValue.stringValue == "N" # Not required in SELECT + + # Test CLI_ODBC_KEYWORDS (should return a comma-separated list of Impala-specific + # keywords excluding standard ODBC-reserved keywords) + get_info_req.infoType = TCLIService.TGetInfoType.CLI_ODBC_KEYWORDS + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + assert len(get_info_resp.infoValue.stringValue) > 0 + + keywords_csv = get_info_resp.infoValue.stringValue + tokens = set([k.strip() for k in keywords_csv.split(",") if k.strip()]) + # Common ODBC keyword should be excluded (exact match) + assert "SELECT" not in tokens + # Impala keywords should be present + assert "SHOW" in tokens + assert "KUDU" in tokens + assert "ICEBERG" in tokens + + # Test integer bitmask info types + bitmask_tests = [ + (TCLIService.TGetInfoType.CLI_ALTER_TABLE, 3), # ADD and DROP COLUMN + (TCLIService.TGetInfoType.CLI_OJ_CAPABILITIES, 127), + (TCLIService.TGetInfoType.CLI_TXN_ISOLATION_OPTION, 0), + ] + + for info_type, expected_value in bitmask_tests: + get_info_req.infoType = info_type + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + assert get_info_resp.infoValue.integerBitmask == expected_value + + # Test CLI_USER_NAME (should return the current user) + get_info_req.infoType = TCLIService.TGetInfoType.CLI_USER_NAME + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp) + assert len(get_info_resp.infoValue.stringValue) > 0 # Should have username + + # Test unsupported info types (moved to default case - should return error) + unsupported_info_types = [ + TCLIService.TGetInfoType.CLI_MAX_DRIVER_CONNECTIONS, + TCLIService.TGetInfoType.CLI_MAX_CONCURRENT_ACTIVITIES, + TCLIService.TGetInfoType.CLI_SCROLL_CONCURRENCY, + TCLIService.TGetInfoType.CLI_GETDATA_EXTENSIONS, + TCLIService.TGetInfoType.CLI_MAX_COLUMNS_IN_INDEX, + TCLIService.TGetInfoType.CLI_MAX_INDEX_SIZE, + TCLIService.TGetInfoType.CLI_CURSOR_SENSITIVITY, + TCLIService.TGetInfoType.CLI_FETCH_DIRECTION, + TCLIService.TGetInfoType.CLI_CATALOG_NAME, + TCLIService.TGetInfoType.CLI_COLLATION_SEQ, + 99999, # Completely invalid info type + ] + + for info_type in unsupported_info_types: + get_info_req.infoType = info_type + get_info_resp = self.hs2_client.GetInfo(get_info_req) + TestHS2.check_response(get_info_resp, TCLIService.TStatusCode.ERROR_STATUS) + assert "Unsupported operation" in get_info_resp.status.errorMessage @needs_session() def test_get_schemas(self):