IMPALA-14027: Implement HS2 NULL_TYPE using TStringValue

HS2 NULL_TYPE should be implemented using TStringValue.

However, due to incompatibility with Hive JDBC driver implementation
then, Impala choose to implement NULL type using TBoolValue (see
IMPALA-914, IMPALA-1370).

HIVE-4172 might be the root cause for such decision. Today, the Hive
JDBC (org.apache.hive.jdbc.HiveDriver) does not have that issue anymore,
as shown in this reproduction after applying this patch:

./bin/run-jdbc-client.sh -q "select null" -t NOSASL
Using JDBC Driver Name: org.apache.hive.jdbc.HiveDriver
Connecting to: jdbc:hive2://localhost:21050/;auth=noSasl
Executing: select null
----[START]----
NULL
----[END]----
Returned 1 row(s) in 0.343s

Thus, we can reimplement NULL_TYPE using TStringValue to match
HiveServer2 behavior.

Testing:
- Pass core tests.

Change-Id: I354110164b360013d9893f1eb4398c3418f80472
Reviewed-on: http://gerrit.cloudera.org:8080/22852
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Riza Suminto
2025-05-03 16:45:26 -07:00
committed by Impala Public Jenkins
parent 2f9abc4e80
commit cb496104d9
9 changed files with 46 additions and 26 deletions

View File

@@ -85,7 +85,6 @@ void impala::TColumnValueToHS2TColumn(const TColumnValue& col_val,
string* nulls;
bool is_null;
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::BOOLEAN:
is_null = !col_val.__isset.bool_val;
column->boolVal.values.push_back(col_val.bool_val);
@@ -117,6 +116,7 @@ void impala::TColumnValueToHS2TColumn(const TColumnValue& col_val,
column->doubleVal.values.push_back(col_val.double_val);
nulls = &column->doubleVal.nulls;
break;
case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::TIMESTAMP:
case TPrimitiveType::DATE:
case TPrimitiveType::STRING:
@@ -152,6 +152,25 @@ void ReserveSpace(int reserve_count, T* hs2Vals) {
hs2Vals->nulls.reserve(BitUtil::RoundUpToPowerOfTwo(num_null_bytes));
}
// Implementation for NULL.
// Internally, Impala implement NULL expession using nullable-BooleanVal (IMPALA-914).
// To match with HiveServer2 behavior, IMPALA-14027 change the result mapping to use
// TColumn.stringVal rather than TColumn.boolVal.
static void NullExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
apache::hive::service::cli::thrift::TColumn* column) {
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
// It is actually not necessary to evaluate expr_eval here. But we choose to do it
// and DCHECK the result to be consistent with other functions.
BooleanVal val = expr_eval->GetBooleanVal(it.Get());
DCHECK(val.is_null);
// emplace empty string and set null bit.
column->stringVal.values.emplace_back();
SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
++output_row_idx;
}
}
// Implementation for BOOL.
static void BoolExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
@@ -454,6 +473,9 @@ void impala::ExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::NULL_TYPE:
ReserveSpace(expected_result_count, &column->stringVal);
NullExprValuesToHS2TColumn(
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
case TPrimitiveType::BOOLEAN:
ReserveSpace(expected_result_count, &column->boolVal);
BoolExprValuesToHS2TColumn(
@@ -598,9 +620,9 @@ void impala::ExprValueToHS2TColumnValue(const void* value, const TColumnType& ty
DCHECK_EQ(1, type.types[0].__isset.scalar_type);
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::NULL_TYPE:
// Set NULLs in the bool_val.
hs2_col_val->__isset.boolVal = true;
hs2_col_val->boolVal.__isset.value = false;
// Set NULLs in the stringVal, but don't set the value itself.
hs2_col_val->__isset.stringVal = true;
hs2_col_val->stringVal.__isset.value = false;
break;
case TPrimitiveType::BOOLEAN:
hs2_col_val->__isset.boolVal = true;
@@ -868,10 +890,8 @@ thrift::TTypeEntry impala::ColumnToHs2Type(
const ColumnType& type = ColumnType::FromThrift(columnType);
thrift::TPrimitiveTypeEntry type_entry;
switch (type.type) {
// Map NULL_TYPE to BOOLEAN, otherwise Hive's JDBC driver won't
// work for queries like "SELECT NULL" (IMPALA-914).
case TYPE_NULL:
type_entry.__set_type(thrift::TTypeId::BOOLEAN_TYPE);
type_entry.__set_type(thrift::TTypeId::NULL_TYPE);
break;
case TYPE_BOOLEAN:
type_entry.__set_type(thrift::TTypeId::BOOLEAN_TYPE);

View File

@@ -410,7 +410,6 @@ int HS2ColumnarResultSet::AddRows(
primitiveType = TPrimitiveType::STRING;
}
switch (primitiveType) {
case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::BOOLEAN:
StitchNulls(
num_rows_, rows_added, start_idx, from->boolVal.nulls, &(to->boolVal.nulls));
@@ -454,6 +453,7 @@ int HS2ColumnarResultSet::AddRows(
from->doubleVal.values.begin() + start_idx,
from->doubleVal.values.begin() + start_idx + rows_added);
break;
case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::TIMESTAMP:
case TPrimitiveType::DATE:
case TPrimitiveType::DECIMAL:
@@ -509,7 +509,6 @@ void HS2ColumnarResultSet::InitColumns() {
DCHECK(type_nodes[0].__isset.scalar_type);
TPrimitiveType::type input_type = type_nodes[0].scalar_type.type;
switch (input_type) {
case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::BOOLEAN:
col_output.__isset.boolVal = true;
break;
@@ -529,6 +528,7 @@ void HS2ColumnarResultSet::InitColumns() {
case TPrimitiveType::DOUBLE:
col_output.__isset.doubleVal = true;
break;
case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::TIMESTAMP:
case TPrimitiveType::DATE:
case TPrimitiveType::DECIMAL:

View File

@@ -599,10 +599,10 @@ public class JdbcTest extends JdbcTestBase {
@Test
public void testSelectNull() throws SQLException {
// Regression test for IMPALA-914.
// Regression test for IMPALA-914 / IMPALA-1370 / IMPALA-14027.
ResultSet rs = con_.createStatement().executeQuery("select NULL");
// Expect the column to be of type BOOLEAN to be compatible with Hive.
assertEquals(rs.getMetaData().getColumnType(1), Types.BOOLEAN);
// Expect the column to be of type NULL to be compatible with HiveServer2.
assertEquals(rs.getMetaData().getColumnType(1), Types.NULL);
try {
// We expect exactly one result row with a NULL inside the first column.
assertTrue(rs.next());

View File

@@ -519,7 +519,7 @@ select 3, 'c', NULL, 30.0
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
tinyint, string, boolean, decimal
tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
====
@@ -533,7 +533,7 @@ values(3, 'c', NULL, 30.0)
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
tinyint, string, boolean, decimal
tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
====

View File

@@ -463,7 +463,7 @@ values(1, 'a', NULL, 10.0)
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
tinyint, string, boolean, decimal
tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
====

View File

@@ -195,7 +195,7 @@ limit 6
---- TYPES
int, tinyint, null
---- HS2_TYPES
int, tinyint, boolean
int, tinyint, null
====
---- QUERY
# check cross joins within a subquery

View File

@@ -903,7 +903,7 @@ limit 6
int, tinyint, null
---- HS2_TYPES
# HS2 maps NULL to BOOLEAN
int, tinyint, boolean
int, tinyint, null
====
---- QUERY
# check cross joins within a subquery

View File

@@ -741,7 +741,7 @@ select 3, 'c', NULL, 30.0
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
tinyint, string, boolean, decimal
tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
2,'b',NULL,20.0
@@ -757,7 +757,7 @@ select 1, 'a', NULL, 10.0
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
tinyint, string, boolean, decimal
tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
2,'b',NULL,20.0
@@ -772,7 +772,7 @@ values(3, 'c', NULL, 30.0)
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
tinyint, string, boolean, decimal
tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
2,'b',NULL,20.0
@@ -788,7 +788,7 @@ values(1, 'a', NULL, 10.0)
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
tinyint, string, boolean, decimal
tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
2,'b',NULL,20.0

View File

@@ -242,21 +242,21 @@ class TestFetch(HS2TestSuite):
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
HS2TestSuite.check_response(execute_statement_resp)
# Check that the expected type is boolean (for compatibility with Hive, see also
# IMPALA-914)
# Check that the expected type is NULL_TYPE (for compatibility with HiveServer2,
# see also IMPALA-914, IMPALA-1370, and IMPALA-14027 for history).
get_result_metadata_req = TCLIService.TGetResultSetMetadataReq()
get_result_metadata_req.operationHandle = execute_statement_resp.operationHandle
get_result_metadata_resp = \
self.hs2_client.GetResultSetMetadata(get_result_metadata_req)
col = get_result_metadata_resp.schema.columns[0]
assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.BOOLEAN_TYPE
assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.NULL_TYPE
# Check that the actual type is boolean
# Check that the actual type is string
fetch_results_req = TCLIService.TFetchResultsReq()
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
fetch_results_req.maxRows = 1
fetch_results_resp = self.fetch(fetch_results_req)
assert fetch_results_resp.results.columns[0].boolVal is not None
assert fetch_results_resp.results.columns[0].stringVal is not None
assert self.column_results_to_string(
fetch_results_resp.results.columns) == (1, "NULL\n")