mirror of
https://github.com/apache/impala.git
synced 2026-01-07 18:02:33 -05:00
Impala incorrectly returned NULLs in the "Max Size" column of the SHOW COLUMN STATS result when executed through the HS2 interface. The issue was that the column was specified to be type INT in the result schema, but the actual type of the contents that we inserted into it was "long". The reason why this is not an issue in Impala shell is because we stringify the contents without inspecting the metadata for beeswax results. The issue was fixed by changing the type from INT to BIGINT. Change-Id: I419657744635dfdc2e1562fe60a597617fff446e Reviewed-on: http://gerrit.cloudera.org:8080/6109 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Impala Public Jenkins
238 lines
11 KiB
Python
238 lines
11 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
|
|
import pytest
|
|
import re
|
|
from tests.hs2.hs2_test_suite import HS2TestSuite, needs_session
|
|
from TCLIService import TCLIService, constants
|
|
from TCLIService.ttypes import TTypeId
|
|
|
|
# Simple test to make sure all the HS2 types are supported for both the row and
|
|
# column-oriented versions of the HS2 protocol.
|
|
class TestFetch(HS2TestSuite):
|
|
def __verify_primitive_type(self, expected_type, hs2_type):
|
|
assert hs2_type.typeDesc.types[0].primitiveEntry.type == expected_type
|
|
|
|
def __verify_char_max_len(self, t, max_len):
|
|
l = t.typeDesc.types[0].primitiveEntry.typeQualifiers.qualifiers\
|
|
[constants.CHARACTER_MAXIMUM_LENGTH]
|
|
assert l.i32Value == max_len
|
|
|
|
def __verify_decimal_precision_scale(self, hs2_type, precision, scale):
|
|
p = hs2_type.typeDesc.types[0].primitiveEntry.typeQualifiers.qualifiers\
|
|
[constants.PRECISION]
|
|
s = hs2_type.typeDesc.types[0].primitiveEntry.typeQualifiers.qualifiers\
|
|
[constants.SCALE]
|
|
assert p.i32Value == precision
|
|
assert s.i32Value == scale
|
|
|
|
@needs_session(TCLIService.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1)
|
|
def test_result_metadata_v1(self):
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
|
|
# Verify all primitive types in the alltypes table.
|
|
execute_statement_req.statement =\
|
|
"SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 1"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
HS2TestSuite.check_response(execute_statement_resp)
|
|
results = self.fetch_at_most(execute_statement_resp.operationHandle,
|
|
TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
|
|
assert len(results.results.rows) == 1
|
|
metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
|
|
column_types = metadata_resp.schema.columns
|
|
assert len(column_types) == 13
|
|
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[0])
|
|
self.__verify_primitive_type(TTypeId.BOOLEAN_TYPE, column_types[1])
|
|
self.__verify_primitive_type(TTypeId.TINYINT_TYPE, column_types[2])
|
|
self.__verify_primitive_type(TTypeId.SMALLINT_TYPE, column_types[3])
|
|
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[4])
|
|
self.__verify_primitive_type(TTypeId.BIGINT_TYPE, column_types[5])
|
|
self.__verify_primitive_type(TTypeId.FLOAT_TYPE, column_types[6])
|
|
self.__verify_primitive_type(TTypeId.DOUBLE_TYPE, column_types[7])
|
|
self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[8])
|
|
self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[9])
|
|
self.__verify_primitive_type(TTypeId.TIMESTAMP_TYPE, column_types[10])
|
|
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[11])
|
|
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[12])
|
|
self.close(execute_statement_resp.operationHandle)
|
|
|
|
# Verify the result metadata for the DECIMAL type.
|
|
execute_statement_req.statement =\
|
|
"SELECT d1,d5 FROM functional.decimal_tbl ORDER BY d1 LIMIT 1"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
HS2TestSuite.check_response(execute_statement_resp)
|
|
results = self.fetch_at_most(execute_statement_resp.operationHandle,
|
|
TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
|
|
assert len(results.results.rows) == 1
|
|
# Verify the result schema is what we expect. The result has 2 columns, the
|
|
# first is decimal(9,0) and the second is decimal(10,5)
|
|
metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
|
|
column_types = metadata_resp.schema.columns
|
|
assert len(column_types) == 2
|
|
self.__verify_primitive_type(TTypeId.DECIMAL_TYPE, column_types[0])
|
|
self.__verify_decimal_precision_scale(column_types[0], 9, 0)
|
|
self.__verify_primitive_type(TTypeId.DECIMAL_TYPE, column_types[1])
|
|
self.__verify_decimal_precision_scale(column_types[1], 10, 5)
|
|
self.close(execute_statement_resp.operationHandle)
|
|
|
|
# Verify the result metadata for the CHAR/VARCHAR types.
|
|
execute_statement_req.statement =\
|
|
"SELECT * FROM functional.chars_tiny ORDER BY cs LIMIT 1"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
HS2TestSuite.check_response(execute_statement_resp)
|
|
results = self.fetch_at_most(execute_statement_resp.operationHandle,
|
|
TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
|
|
assert len(results.results.rows) == 1
|
|
metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
|
|
column_types = metadata_resp.schema.columns
|
|
assert len(column_types) == 3
|
|
self.__verify_primitive_type(TTypeId.CHAR_TYPE, column_types[0])
|
|
self.__verify_char_max_len(column_types[0], 5)
|
|
self.__verify_primitive_type(TTypeId.CHAR_TYPE, column_types[1])
|
|
self.__verify_char_max_len(column_types[1], 140)
|
|
self.__verify_primitive_type(TTypeId.VARCHAR_TYPE, column_types[2])
|
|
self.__verify_char_max_len(column_types[2], 32)
|
|
self.close(execute_statement_resp.operationHandle)
|
|
|
|
def __query_and_fetch(self, query):
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
execute_statement_req.statement = query
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
HS2TestSuite.check_response(execute_statement_resp)
|
|
|
|
fetch_results_req = TCLIService.TFetchResultsReq()
|
|
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
|
|
fetch_results_req.maxRows = 1024
|
|
fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
|
|
HS2TestSuite.check_response(fetch_results_resp)
|
|
|
|
return fetch_results_resp
|
|
|
|
@needs_session()
|
|
def test_alltypes_v6(self):
|
|
"""Test that a simple select statement works for all types"""
|
|
fetch_results_resp = self.__query_and_fetch(
|
|
"SELECT *, NULL from functional.alltypes ORDER BY id LIMIT 1")
|
|
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert num_rows == 1
|
|
assert result == \
|
|
"0, True, 0, 0, 0, 0, 0.0, 0.0, 01/01/09, 0, 2009-01-01 00:00:00, 2009, 1, NULL\n"
|
|
|
|
# Decimals
|
|
fetch_results_resp = self.__query_and_fetch(
|
|
"SELECT * from functional.decimal_tbl LIMIT 1")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert result == ("1234, 2222, 1.2345678900, "
|
|
"0.12345678900000000000000000000000000000, 12345.78900, 1\n")
|
|
|
|
# VARCHAR
|
|
fetch_results_resp = self.__query_and_fetch("SELECT CAST('str' AS VARCHAR(3))")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert result == "str\n"
|
|
|
|
# CHAR not inlined
|
|
fetch_results_resp = self.__query_and_fetch("SELECT CAST('car' AS CHAR(140))")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert result == "car" + (" " * 137) + "\n"
|
|
|
|
# CHAR inlined
|
|
fetch_results_resp = self.__query_and_fetch("SELECT CAST('car' AS CHAR(5))")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert result == "car \n"
|
|
|
|
@needs_session()
|
|
def test_show_partitions(self):
|
|
"""Regression test for IMPALA-1330"""
|
|
for query in ["SHOW PARTITIONS functional.alltypes",
|
|
"SHOW TABLE STATS functional.alltypes"]:
|
|
fetch_results_resp = self.__query_and_fetch(query)
|
|
num_rows, result = \
|
|
self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert num_rows == 25
|
|
# Match whether stats are computed or not
|
|
assert re.match(
|
|
r"2009, 1, -?\d+, -?\d+, \d*\.?\d+KB, NOT CACHED, NOT CACHED, TEXT", result) is not None
|
|
|
|
@needs_session()
|
|
def test_show_column_stats(self):
|
|
fetch_results_resp = self.__query_and_fetch("SHOW COLUMN STATS functional.alltypes")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert num_rows == 13
|
|
assert re.match(r"id, INT, -?\d+, -?\d+, -?\d+, 4.0", result) is not None
|
|
|
|
@needs_session(TCLIService.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1)
|
|
def test_execute_select_v1(self):
|
|
"""Test that a simple select statement works in the row-oriented protocol"""
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
HS2TestSuite.check_response(execute_statement_resp)
|
|
|
|
fetch_results_req = TCLIService.TFetchResultsReq()
|
|
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
|
|
fetch_results_req.maxRows = 100
|
|
fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
|
|
HS2TestSuite.check_response(fetch_results_resp)
|
|
|
|
assert len(fetch_results_resp.results.rows) == 1
|
|
assert fetch_results_resp.results.startRowOffset == 0
|
|
|
|
try:
|
|
assert not fetch_results_resp.hasMoreRows
|
|
except AssertionError:
|
|
pytest.xfail("IMPALA-558")
|
|
|
|
@needs_session()
|
|
def test_select_null(self):
|
|
"""Regression test for IMPALA-1370, where NULL literals would appear as strings where
|
|
they should be booleans"""
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
execute_statement_req.statement = "select null"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
HS2TestSuite.check_response(execute_statement_resp)
|
|
|
|
# Check that the expected type is boolean (for compatibility with Hive, see also
|
|
# IMPALA-914)
|
|
get_result_metadata_req = TCLIService.TGetResultSetMetadataReq()
|
|
get_result_metadata_req.operationHandle = execute_statement_resp.operationHandle
|
|
get_result_metadata_resp = \
|
|
self.hs2_client.GetResultSetMetadata(get_result_metadata_req)
|
|
col = get_result_metadata_resp.schema.columns[0]
|
|
assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.BOOLEAN_TYPE
|
|
|
|
# Check that the actual type is boolean
|
|
fetch_results_req = TCLIService.TFetchResultsReq()
|
|
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
|
|
fetch_results_req.maxRows = 1
|
|
fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
|
|
HS2TestSuite.check_response(fetch_results_resp)
|
|
assert fetch_results_resp.results.columns[0].boolVal is not None
|
|
|
|
assert self.column_results_to_string(
|
|
fetch_results_resp.results.columns) == (1, "NULL\n")
|
|
|
|
@needs_session()
|
|
def test_compute_stats(self):
|
|
"""Exercise the child query path"""
|
|
self.__query_and_fetch("compute stats functional.alltypes")
|