mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-14027: Implement HS2 NULL_TYPE using TStringValue
HS2 NULL_TYPE should be implemented using TStringValue. However, due to incompatibility with Hive JDBC driver implementation then, Impala choose to implement NULL type using TBoolValue (see IMPALA-914, IMPALA-1370). HIVE-4172 might be the root cause for such decision. Today, the Hive JDBC (org.apache.hive.jdbc.HiveDriver) does not have that issue anymore, as shown in this reproduction after applying this patch: ./bin/run-jdbc-client.sh -q "select null" -t NOSASL Using JDBC Driver Name: org.apache.hive.jdbc.HiveDriver Connecting to: jdbc:hive2://localhost:21050/;auth=noSasl Executing: select null ----[START]---- NULL ----[END]---- Returned 1 row(s) in 0.343s Thus, we can reimplement NULL_TYPE using TStringValue to match HiveServer2 behavior. Testing: - Pass core tests. Change-Id: I354110164b360013d9893f1eb4398c3418f80472 Reviewed-on: http://gerrit.cloudera.org:8080/22852 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
2f9abc4e80
commit
cb496104d9
@@ -85,7 +85,6 @@ void impala::TColumnValueToHS2TColumn(const TColumnValue& col_val,
|
||||
string* nulls;
|
||||
bool is_null;
|
||||
switch (type.types[0].scalar_type.type) {
|
||||
case TPrimitiveType::NULL_TYPE:
|
||||
case TPrimitiveType::BOOLEAN:
|
||||
is_null = !col_val.__isset.bool_val;
|
||||
column->boolVal.values.push_back(col_val.bool_val);
|
||||
@@ -117,6 +116,7 @@ void impala::TColumnValueToHS2TColumn(const TColumnValue& col_val,
|
||||
column->doubleVal.values.push_back(col_val.double_val);
|
||||
nulls = &column->doubleVal.nulls;
|
||||
break;
|
||||
case TPrimitiveType::NULL_TYPE:
|
||||
case TPrimitiveType::TIMESTAMP:
|
||||
case TPrimitiveType::DATE:
|
||||
case TPrimitiveType::STRING:
|
||||
@@ -152,6 +152,25 @@ void ReserveSpace(int reserve_count, T* hs2Vals) {
|
||||
hs2Vals->nulls.reserve(BitUtil::RoundUpToPowerOfTwo(num_null_bytes));
|
||||
}
|
||||
|
||||
// Implementation for NULL.
|
||||
// Internally, Impala implement NULL expession using nullable-BooleanVal (IMPALA-914).
|
||||
// To match with HiveServer2 behavior, IMPALA-14027 change the result mapping to use
|
||||
// TColumn.stringVal rather than TColumn.boolVal.
|
||||
static void NullExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
|
||||
int start_idx, int num_rows, uint32_t output_row_idx,
|
||||
apache::hive::service::cli::thrift::TColumn* column) {
|
||||
FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
|
||||
// It is actually not necessary to evaluate expr_eval here. But we choose to do it
|
||||
// and DCHECK the result to be consistent with other functions.
|
||||
BooleanVal val = expr_eval->GetBooleanVal(it.Get());
|
||||
DCHECK(val.is_null);
|
||||
// emplace empty string and set null bit.
|
||||
column->stringVal.values.emplace_back();
|
||||
SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
|
||||
++output_row_idx;
|
||||
}
|
||||
}
|
||||
|
||||
// Implementation for BOOL.
|
||||
static void BoolExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, RowBatch* batch,
|
||||
int start_idx, int num_rows, uint32_t output_row_idx,
|
||||
@@ -454,6 +473,9 @@ void impala::ExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
|
||||
|
||||
switch (type.types[0].scalar_type.type) {
|
||||
case TPrimitiveType::NULL_TYPE:
|
||||
ReserveSpace(expected_result_count, &column->stringVal);
|
||||
NullExprValuesToHS2TColumn(
|
||||
expr_eval, batch, start_idx, num_rows, output_row_idx, column);
|
||||
case TPrimitiveType::BOOLEAN:
|
||||
ReserveSpace(expected_result_count, &column->boolVal);
|
||||
BoolExprValuesToHS2TColumn(
|
||||
@@ -598,9 +620,9 @@ void impala::ExprValueToHS2TColumnValue(const void* value, const TColumnType& ty
|
||||
DCHECK_EQ(1, type.types[0].__isset.scalar_type);
|
||||
switch (type.types[0].scalar_type.type) {
|
||||
case TPrimitiveType::NULL_TYPE:
|
||||
// Set NULLs in the bool_val.
|
||||
hs2_col_val->__isset.boolVal = true;
|
||||
hs2_col_val->boolVal.__isset.value = false;
|
||||
// Set NULLs in the stringVal, but don't set the value itself.
|
||||
hs2_col_val->__isset.stringVal = true;
|
||||
hs2_col_val->stringVal.__isset.value = false;
|
||||
break;
|
||||
case TPrimitiveType::BOOLEAN:
|
||||
hs2_col_val->__isset.boolVal = true;
|
||||
@@ -868,10 +890,8 @@ thrift::TTypeEntry impala::ColumnToHs2Type(
|
||||
const ColumnType& type = ColumnType::FromThrift(columnType);
|
||||
thrift::TPrimitiveTypeEntry type_entry;
|
||||
switch (type.type) {
|
||||
// Map NULL_TYPE to BOOLEAN, otherwise Hive's JDBC driver won't
|
||||
// work for queries like "SELECT NULL" (IMPALA-914).
|
||||
case TYPE_NULL:
|
||||
type_entry.__set_type(thrift::TTypeId::BOOLEAN_TYPE);
|
||||
type_entry.__set_type(thrift::TTypeId::NULL_TYPE);
|
||||
break;
|
||||
case TYPE_BOOLEAN:
|
||||
type_entry.__set_type(thrift::TTypeId::BOOLEAN_TYPE);
|
||||
|
||||
@@ -410,7 +410,6 @@ int HS2ColumnarResultSet::AddRows(
|
||||
primitiveType = TPrimitiveType::STRING;
|
||||
}
|
||||
switch (primitiveType) {
|
||||
case TPrimitiveType::NULL_TYPE:
|
||||
case TPrimitiveType::BOOLEAN:
|
||||
StitchNulls(
|
||||
num_rows_, rows_added, start_idx, from->boolVal.nulls, &(to->boolVal.nulls));
|
||||
@@ -454,6 +453,7 @@ int HS2ColumnarResultSet::AddRows(
|
||||
from->doubleVal.values.begin() + start_idx,
|
||||
from->doubleVal.values.begin() + start_idx + rows_added);
|
||||
break;
|
||||
case TPrimitiveType::NULL_TYPE:
|
||||
case TPrimitiveType::TIMESTAMP:
|
||||
case TPrimitiveType::DATE:
|
||||
case TPrimitiveType::DECIMAL:
|
||||
@@ -509,7 +509,6 @@ void HS2ColumnarResultSet::InitColumns() {
|
||||
DCHECK(type_nodes[0].__isset.scalar_type);
|
||||
TPrimitiveType::type input_type = type_nodes[0].scalar_type.type;
|
||||
switch (input_type) {
|
||||
case TPrimitiveType::NULL_TYPE:
|
||||
case TPrimitiveType::BOOLEAN:
|
||||
col_output.__isset.boolVal = true;
|
||||
break;
|
||||
@@ -529,6 +528,7 @@ void HS2ColumnarResultSet::InitColumns() {
|
||||
case TPrimitiveType::DOUBLE:
|
||||
col_output.__isset.doubleVal = true;
|
||||
break;
|
||||
case TPrimitiveType::NULL_TYPE:
|
||||
case TPrimitiveType::TIMESTAMP:
|
||||
case TPrimitiveType::DATE:
|
||||
case TPrimitiveType::DECIMAL:
|
||||
|
||||
@@ -599,10 +599,10 @@ public class JdbcTest extends JdbcTestBase {
|
||||
|
||||
@Test
|
||||
public void testSelectNull() throws SQLException {
|
||||
// Regression test for IMPALA-914.
|
||||
// Regression test for IMPALA-914 / IMPALA-1370 / IMPALA-14027.
|
||||
ResultSet rs = con_.createStatement().executeQuery("select NULL");
|
||||
// Expect the column to be of type BOOLEAN to be compatible with Hive.
|
||||
assertEquals(rs.getMetaData().getColumnType(1), Types.BOOLEAN);
|
||||
// Expect the column to be of type NULL to be compatible with HiveServer2.
|
||||
assertEquals(rs.getMetaData().getColumnType(1), Types.NULL);
|
||||
try {
|
||||
// We expect exactly one result row with a NULL inside the first column.
|
||||
assertTrue(rs.next());
|
||||
|
||||
@@ -519,7 +519,7 @@ select 3, 'c', NULL, 30.0
|
||||
---- TYPES
|
||||
tinyint, string, null, decimal
|
||||
---- HS2_TYPES
|
||||
tinyint, string, boolean, decimal
|
||||
tinyint, string, null, decimal
|
||||
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
||||
1,'a',NULL,10.0
|
||||
====
|
||||
@@ -533,7 +533,7 @@ values(3, 'c', NULL, 30.0)
|
||||
---- TYPES
|
||||
tinyint, string, null, decimal
|
||||
---- HS2_TYPES
|
||||
tinyint, string, boolean, decimal
|
||||
tinyint, string, null, decimal
|
||||
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
||||
1,'a',NULL,10.0
|
||||
====
|
||||
|
||||
@@ -463,7 +463,7 @@ values(1, 'a', NULL, 10.0)
|
||||
---- TYPES
|
||||
tinyint, string, null, decimal
|
||||
---- HS2_TYPES
|
||||
tinyint, string, boolean, decimal
|
||||
tinyint, string, null, decimal
|
||||
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
||||
1,'a',NULL,10.0
|
||||
====
|
||||
|
||||
@@ -195,7 +195,7 @@ limit 6
|
||||
---- TYPES
|
||||
int, tinyint, null
|
||||
---- HS2_TYPES
|
||||
int, tinyint, boolean
|
||||
int, tinyint, null
|
||||
====
|
||||
---- QUERY
|
||||
# check cross joins within a subquery
|
||||
|
||||
@@ -903,7 +903,7 @@ limit 6
|
||||
int, tinyint, null
|
||||
---- HS2_TYPES
|
||||
# HS2 maps NULL to BOOLEAN
|
||||
int, tinyint, boolean
|
||||
int, tinyint, null
|
||||
====
|
||||
---- QUERY
|
||||
# check cross joins within a subquery
|
||||
|
||||
@@ -741,7 +741,7 @@ select 3, 'c', NULL, 30.0
|
||||
---- TYPES
|
||||
tinyint, string, null, decimal
|
||||
---- HS2_TYPES
|
||||
tinyint, string, boolean, decimal
|
||||
tinyint, string, null, decimal
|
||||
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
||||
1,'a',NULL,10.0
|
||||
2,'b',NULL,20.0
|
||||
@@ -757,7 +757,7 @@ select 1, 'a', NULL, 10.0
|
||||
---- TYPES
|
||||
tinyint, string, null, decimal
|
||||
---- HS2_TYPES
|
||||
tinyint, string, boolean, decimal
|
||||
tinyint, string, null, decimal
|
||||
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
||||
1,'a',NULL,10.0
|
||||
2,'b',NULL,20.0
|
||||
@@ -772,7 +772,7 @@ values(3, 'c', NULL, 30.0)
|
||||
---- TYPES
|
||||
tinyint, string, null, decimal
|
||||
---- HS2_TYPES
|
||||
tinyint, string, boolean, decimal
|
||||
tinyint, string, null, decimal
|
||||
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
||||
1,'a',NULL,10.0
|
||||
2,'b',NULL,20.0
|
||||
@@ -788,7 +788,7 @@ values(1, 'a', NULL, 10.0)
|
||||
---- TYPES
|
||||
tinyint, string, null, decimal
|
||||
---- HS2_TYPES
|
||||
tinyint, string, boolean, decimal
|
||||
tinyint, string, null, decimal
|
||||
---- RESULTS: VERIFY_IS_EQUAL_SORTED
|
||||
1,'a',NULL,10.0
|
||||
2,'b',NULL,20.0
|
||||
|
||||
@@ -242,21 +242,21 @@ class TestFetch(HS2TestSuite):
|
||||
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
||||
HS2TestSuite.check_response(execute_statement_resp)
|
||||
|
||||
# Check that the expected type is boolean (for compatibility with Hive, see also
|
||||
# IMPALA-914)
|
||||
# Check that the expected type is NULL_TYPE (for compatibility with HiveServer2,
|
||||
# see also IMPALA-914, IMPALA-1370, and IMPALA-14027 for history).
|
||||
get_result_metadata_req = TCLIService.TGetResultSetMetadataReq()
|
||||
get_result_metadata_req.operationHandle = execute_statement_resp.operationHandle
|
||||
get_result_metadata_resp = \
|
||||
self.hs2_client.GetResultSetMetadata(get_result_metadata_req)
|
||||
col = get_result_metadata_resp.schema.columns[0]
|
||||
assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.BOOLEAN_TYPE
|
||||
assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.NULL_TYPE
|
||||
|
||||
# Check that the actual type is boolean
|
||||
# Check that the actual type is string
|
||||
fetch_results_req = TCLIService.TFetchResultsReq()
|
||||
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
|
||||
fetch_results_req.maxRows = 1
|
||||
fetch_results_resp = self.fetch(fetch_results_req)
|
||||
assert fetch_results_resp.results.columns[0].boolVal is not None
|
||||
assert fetch_results_resp.results.columns[0].stringVal is not None
|
||||
|
||||
assert self.column_results_to_string(
|
||||
fetch_results_resp.results.columns) == (1, "NULL\n")
|
||||
|
||||
Reference in New Issue
Block a user