Files
impala/tests/hs2/test_fetch.py
Joe McDonnell c5a0ec8bdf IMPALA-11980 (part 1): Put all thrift-generated python code into the impala_thrift_gen package
This puts all of the thrift-generated python code into the
impala_thrift_gen package. This is similar to what Impyla
does for its thrift-generated python code, except that it
uses the impala_thrift_gen package rather than impala._thrift_gen.
This is a preparatory patch for fixing the absolute import
issues.

This patches all of the thrift files to add the python namespace.
This has code to apply the patching to the thirdparty thrift
files (hive_metastore.thrift, fb303.thrift) to do the same.

Putting all the generated python into a package makes it easier
to understand where the imports are getting code. When the
subsequent change rearranges the shell code, the thrift generated
code can stay in a separate directory.

This uses isort to sort the imports for the affected Python files
with the provided .isort.cfg file. This also adds an impala-isort
shell script to make it easy to run.

Testing:
 - Ran a core job

Change-Id: Ie2927f22c7257aa38a78084efe5bd76d566493c0
Reviewed-on: http://gerrit.cloudera.org:8080/20169
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
2025-04-15 17:03:02 +00:00

300 lines
14 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
from __future__ import absolute_import, division, print_function
import re
import pytest
from impala_thrift_gen.TCLIService import constants, TCLIService
from impala_thrift_gen.TCLIService.ttypes import TTypeId
from tests.hs2.hs2_test_suite import (
create_op_handle_without_secret,
HS2TestSuite,
needs_session,
)
# Simple test to make sure all the HS2 types are supported for both the row and
# column-oriented versions of the HS2 protocol.
class TestFetch(HS2TestSuite):
def __verify_primitive_type(self, expected_type, hs2_type):
assert hs2_type.typeDesc.types[0].primitiveEntry.type == expected_type
def __verify_char_max_len(self, t, max_len):
qualifiers = t.typeDesc.types[0].primitiveEntry.typeQualifiers.qualifiers
i32val = qualifiers[constants.CHARACTER_MAXIMUM_LENGTH].i32Value
assert i32val == max_len
def __verify_decimal_precision_scale(self, hs2_type, precision, scale):
qualifiers = hs2_type.typeDesc.types[0].primitiveEntry.typeQualifiers.qualifiers
p = qualifiers[constants.PRECISION]
s = qualifiers[constants.SCALE]
assert p.i32Value == precision
assert s.i32Value == scale
def __fetch_result_column_types(self, query, expected_row_count, execute_statement_req):
""" Fetches the results for 'query' and return the response and the
array of column types."""
execute_statement_req.statement = query
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
HS2TestSuite.check_response(execute_statement_resp)
results = self.fetch_at_most(execute_statement_resp.operationHandle,
TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
assert len(results.results.rows) == expected_row_count
metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
return execute_statement_resp, metadata_resp.schema.columns
@needs_session(TCLIService.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1)
def test_result_metadata_v1(self):
execute_statement_req = TCLIService.TExecuteStatementReq()
execute_statement_req.sessionHandle = self.session_handle
# Verify all primitive types in the alltypes table.
execute_statement_resp, column_types = self.__fetch_result_column_types(
"SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 1", 1,
execute_statement_req)
assert len(column_types) == 13
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[0])
self.__verify_primitive_type(TTypeId.BOOLEAN_TYPE, column_types[1])
self.__verify_primitive_type(TTypeId.TINYINT_TYPE, column_types[2])
self.__verify_primitive_type(TTypeId.SMALLINT_TYPE, column_types[3])
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[4])
self.__verify_primitive_type(TTypeId.BIGINT_TYPE, column_types[5])
self.__verify_primitive_type(TTypeId.FLOAT_TYPE, column_types[6])
self.__verify_primitive_type(TTypeId.DOUBLE_TYPE, column_types[7])
self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[8])
self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[9])
self.__verify_primitive_type(TTypeId.TIMESTAMP_TYPE, column_types[10])
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[11])
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[12])
self.close(execute_statement_resp.operationHandle)
# Verify the result metadata for the DECIMAL type.
execute_statement_resp, column_types = self.__fetch_result_column_types(
"SELECT d1,d5 FROM functional.decimal_tbl ORDER BY d1 LIMIT 1", 1,
execute_statement_req)
assert len(column_types) == 2
self.__verify_primitive_type(TTypeId.DECIMAL_TYPE, column_types[0])
self.__verify_decimal_precision_scale(column_types[0], 9, 0)
self.__verify_primitive_type(TTypeId.DECIMAL_TYPE, column_types[1])
self.__verify_decimal_precision_scale(column_types[1], 10, 5)
self.close(execute_statement_resp.operationHandle)
# Verify the result metadata for the CHAR/VARCHAR types.
execute_statement_resp, column_types = self.__fetch_result_column_types(
"SELECT * FROM functional.chars_tiny ORDER BY cs LIMIT 1", 1,
execute_statement_req)
assert len(column_types) == 3
self.__verify_primitive_type(TTypeId.CHAR_TYPE, column_types[0])
self.__verify_char_max_len(column_types[0], 5)
self.__verify_primitive_type(TTypeId.CHAR_TYPE, column_types[1])
self.__verify_char_max_len(column_types[1], 140)
self.__verify_primitive_type(TTypeId.VARCHAR_TYPE, column_types[2])
self.__verify_char_max_len(column_types[2], 32)
self.close(execute_statement_resp.operationHandle)
# Verify the result metadata for the DATE type.
execute_statement_resp, column_types = self.__fetch_result_column_types(
"SELECT * FROM functional.date_tbl ORDER BY date_col LIMIT 1", 1,
execute_statement_req)
assert len(column_types) == 3
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[0])
self.__verify_primitive_type(TTypeId.DATE_TYPE, column_types[1])
self.__verify_primitive_type(TTypeId.DATE_TYPE, column_types[2])
self.close(execute_statement_resp.operationHandle)
# Verify the result metadata for the BINARY type.
execute_statement_resp, column_types = self.__fetch_result_column_types(
"SELECT * from functional.binary_tbl ORDER BY binary_col LIMIT 1", 1,
execute_statement_req)
assert len(column_types) == 3
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[0])
self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[1])
self.__verify_primitive_type(TTypeId.BINARY_TYPE, column_types[2])
self.close(execute_statement_resp.operationHandle)
def __query_and_fetch(self, query):
execute_statement_req = TCLIService.TExecuteStatementReq()
execute_statement_req.sessionHandle = self.session_handle
execute_statement_req.statement = query
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
HS2TestSuite.check_response(execute_statement_resp)
# Do the actual fetch with a valid request.
fetch_results_req = TCLIService.TFetchResultsReq()
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
fetch_results_req.maxRows = 1024
fetch_results_resp = self.fetch(fetch_results_req)
return fetch_results_resp
@needs_session()
def test_alltypes_v6(self):
"""Test that a simple select statement works for all types"""
fetch_results_resp = self.__query_and_fetch(
"SELECT *, NULL from functional.alltypes ORDER BY id LIMIT 1")
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
assert num_rows == 1
assert result == \
"0, True, 0, 0, 0, 0, 0.0, 0.0, 01/01/09, 0, 2009-01-01 00:00:00, 2009, 1, NULL\n"
# Decimals
fetch_results_resp = self.__query_and_fetch(
"SELECT * from functional.decimal_tbl LIMIT 1")
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
assert result == ("1234, 2222, 1.2345678900, "
"0.12345678900000000000000000000000000000, 12345.78900, 1\n")
# VARCHAR
fetch_results_resp = self.__query_and_fetch("SELECT CAST('str' AS VARCHAR(3))")
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
assert result == "str\n"
# CHAR not inlined
fetch_results_resp = self.__query_and_fetch("SELECT CAST('car' AS CHAR(140))")
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
assert result == "car" + (" " * 137) + "\n"
# CHAR inlined
fetch_results_resp = self.__query_and_fetch("SELECT CAST('car' AS CHAR(5))")
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
assert result == "car \n"
# Date
fetch_results_resp = self.__query_and_fetch(
"SELECT * from functional.date_tbl ORDER BY date_col LIMIT 1")
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
assert result == ("0, 0001-01-01, 0001-01-01\n")
# Binary
fetch_results_resp = self.__query_and_fetch(
"SELECT * from functional.binary_tbl ORDER BY id LIMIT 1")
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
assert result == ("1, ascii, binary1\n")
@needs_session()
def test_show_partitions(self):
"""Regression test for IMPALA-1330"""
for query in ["SHOW PARTITIONS functional.alltypes",
"SHOW TABLE STATS functional.alltypes"]:
fetch_results_resp = self.__query_and_fetch(query)
num_rows, result = \
self.column_results_to_string(fetch_results_resp.results.columns)
assert num_rows == 25
# Match whether stats are computed or not
pattern = r"2009, 1, -?\d+, -?\d+, \d*\.?\d+KB, NOT CACHED, NOT CACHED, TEXT"
assert re.match(pattern, result) is not None, \
'Looking for "{}" but none found:\n{}'.format(pattern, result)
@needs_session()
def test_show_column_stats(self):
fetch_results_resp = self.__query_and_fetch("SHOW COLUMN STATS functional.alltypes")
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
assert num_rows == 13
assert re.match(r"id, INT, -?\d+, -?\d+, -?\d+, 4.0", result) is not None
@needs_session(TCLIService.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1)
def test_execute_select_v1(self):
"""Test that a simple select statement works in the row-oriented protocol"""
execute_statement_req = TCLIService.TExecuteStatementReq()
execute_statement_req.sessionHandle = self.session_handle
execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
HS2TestSuite.check_response(execute_statement_resp)
fetch_results_req = TCLIService.TFetchResultsReq()
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
fetch_results_req.maxRows = 100
fetch_results_resp = self.fetch(fetch_results_req)
assert len(fetch_results_resp.results.rows) == 1
assert fetch_results_resp.results.startRowOffset == 0
try:
assert not fetch_results_resp.hasMoreRows
except AssertionError:
pytest.xfail("IMPALA-558")
@needs_session()
def test_select_null(self):
"""Regression test for IMPALA-1370, where NULL literals would appear as strings where
they should be booleans"""
execute_statement_req = TCLIService.TExecuteStatementReq()
execute_statement_req.sessionHandle = self.session_handle
execute_statement_req.statement = "select null"
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
HS2TestSuite.check_response(execute_statement_resp)
# Check that the expected type is boolean (for compatibility with Hive, see also
# IMPALA-914)
get_result_metadata_req = TCLIService.TGetResultSetMetadataReq()
get_result_metadata_req.operationHandle = execute_statement_resp.operationHandle
get_result_metadata_resp = \
self.hs2_client.GetResultSetMetadata(get_result_metadata_req)
col = get_result_metadata_resp.schema.columns[0]
assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.BOOLEAN_TYPE
# Check that the actual type is boolean
fetch_results_req = TCLIService.TFetchResultsReq()
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
fetch_results_req.maxRows = 1
fetch_results_resp = self.fetch(fetch_results_req)
assert fetch_results_resp.results.columns[0].boolVal is not None
assert self.column_results_to_string(
fetch_results_resp.results.columns) == (1, "NULL\n")
@needs_session()
def test_compute_stats(self):
"""Exercise the child query path"""
self.__query_and_fetch("compute stats functional.alltypes")
@needs_session()
def test_invalid_secret(self):
"""Test that the FetchResults, GetResultSetMetadata and CloseOperation APIs validate
the session secret."""
execute_req = TCLIService.TExecuteStatementReq(
self.session_handle, "select 'something something'")
execute_resp = self.hs2_client.ExecuteStatement(execute_req)
HS2TestSuite.check_response(execute_resp)
good_handle = execute_resp.operationHandle
bad_handle = create_op_handle_without_secret(good_handle)
# Fetching and closing operations with an invalid handle should be a no-op, i.e.
# the later operations with the good handle should succeed.
HS2TestSuite.check_invalid_query(self.hs2_client.FetchResults(
TCLIService.TFetchResultsReq(operationHandle=bad_handle, maxRows=1024)),
expect_legacy_err=True)
HS2TestSuite.check_invalid_query(self.hs2_client.GetResultSetMetadata(
TCLIService.TGetResultSetMetadataReq(operationHandle=bad_handle)),
expect_legacy_err=True)
HS2TestSuite.check_invalid_query(self.hs2_client.CloseOperation(
TCLIService.TCloseOperationReq(operationHandle=bad_handle)),
expect_legacy_err=True)
# Ensure that the good handle remained valid.
HS2TestSuite.check_response(self.hs2_client.FetchResults(
TCLIService.TFetchResultsReq(operationHandle=good_handle, maxRows=1024)))
HS2TestSuite.check_response(self.hs2_client.GetResultSetMetadata(
TCLIService.TGetResultSetMetadataReq(operationHandle=good_handle)))
HS2TestSuite.check_response(self.hs2_client.CloseOperation(
TCLIService.TCloseOperationReq(operationHandle=good_handle)))