mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
This puts all of the thrift-generated python code into the impala_thrift_gen package. This is similar to what Impyla does for its thrift-generated python code, except that it uses the impala_thrift_gen package rather than impala._thrift_gen. This is a preparatory patch for fixing the absolute import issues. This patches all of the thrift files to add the python namespace. This has code to apply the patching to the thirdparty thrift files (hive_metastore.thrift, fb303.thrift) to do the same. Putting all the generated python into a package makes it easier to understand where the imports are getting code. When the subsequent change rearranges the shell code, the thrift generated code can stay in a separate directory. This uses isort to sort the imports for the affected Python files with the provided .isort.cfg file. This also adds an impala-isort shell script to make it easy to run. Testing: - Ran a core job Change-Id: Ie2927f22c7257aa38a78084efe5bd76d566493c0 Reviewed-on: http://gerrit.cloudera.org:8080/20169 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
300 lines
14 KiB
Python
300 lines
14 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
import re
|
|
|
|
import pytest
|
|
|
|
from impala_thrift_gen.TCLIService import constants, TCLIService
|
|
from impala_thrift_gen.TCLIService.ttypes import TTypeId
|
|
from tests.hs2.hs2_test_suite import (
|
|
create_op_handle_without_secret,
|
|
HS2TestSuite,
|
|
needs_session,
|
|
)
|
|
|
|
|
|
# Simple test to make sure all the HS2 types are supported for both the row and
|
|
# column-oriented versions of the HS2 protocol.
|
|
class TestFetch(HS2TestSuite):
|
|
def __verify_primitive_type(self, expected_type, hs2_type):
|
|
assert hs2_type.typeDesc.types[0].primitiveEntry.type == expected_type
|
|
|
|
def __verify_char_max_len(self, t, max_len):
|
|
qualifiers = t.typeDesc.types[0].primitiveEntry.typeQualifiers.qualifiers
|
|
i32val = qualifiers[constants.CHARACTER_MAXIMUM_LENGTH].i32Value
|
|
assert i32val == max_len
|
|
|
|
def __verify_decimal_precision_scale(self, hs2_type, precision, scale):
|
|
qualifiers = hs2_type.typeDesc.types[0].primitiveEntry.typeQualifiers.qualifiers
|
|
p = qualifiers[constants.PRECISION]
|
|
s = qualifiers[constants.SCALE]
|
|
assert p.i32Value == precision
|
|
assert s.i32Value == scale
|
|
|
|
def __fetch_result_column_types(self, query, expected_row_count, execute_statement_req):
|
|
""" Fetches the results for 'query' and return the response and the
|
|
array of column types."""
|
|
execute_statement_req.statement = query
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
HS2TestSuite.check_response(execute_statement_resp)
|
|
results = self.fetch_at_most(execute_statement_resp.operationHandle,
|
|
TCLIService.TFetchOrientation.FETCH_NEXT, 1, 1)
|
|
assert len(results.results.rows) == expected_row_count
|
|
metadata_resp = self.result_metadata(execute_statement_resp.operationHandle)
|
|
return execute_statement_resp, metadata_resp.schema.columns
|
|
|
|
@needs_session(TCLIService.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1)
|
|
def test_result_metadata_v1(self):
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
|
|
# Verify all primitive types in the alltypes table.
|
|
execute_statement_resp, column_types = self.__fetch_result_column_types(
|
|
"SELECT * FROM functional.alltypessmall ORDER BY id LIMIT 1", 1,
|
|
execute_statement_req)
|
|
assert len(column_types) == 13
|
|
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[0])
|
|
self.__verify_primitive_type(TTypeId.BOOLEAN_TYPE, column_types[1])
|
|
self.__verify_primitive_type(TTypeId.TINYINT_TYPE, column_types[2])
|
|
self.__verify_primitive_type(TTypeId.SMALLINT_TYPE, column_types[3])
|
|
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[4])
|
|
self.__verify_primitive_type(TTypeId.BIGINT_TYPE, column_types[5])
|
|
self.__verify_primitive_type(TTypeId.FLOAT_TYPE, column_types[6])
|
|
self.__verify_primitive_type(TTypeId.DOUBLE_TYPE, column_types[7])
|
|
self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[8])
|
|
self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[9])
|
|
self.__verify_primitive_type(TTypeId.TIMESTAMP_TYPE, column_types[10])
|
|
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[11])
|
|
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[12])
|
|
self.close(execute_statement_resp.operationHandle)
|
|
|
|
# Verify the result metadata for the DECIMAL type.
|
|
execute_statement_resp, column_types = self.__fetch_result_column_types(
|
|
"SELECT d1,d5 FROM functional.decimal_tbl ORDER BY d1 LIMIT 1", 1,
|
|
execute_statement_req)
|
|
assert len(column_types) == 2
|
|
self.__verify_primitive_type(TTypeId.DECIMAL_TYPE, column_types[0])
|
|
self.__verify_decimal_precision_scale(column_types[0], 9, 0)
|
|
self.__verify_primitive_type(TTypeId.DECIMAL_TYPE, column_types[1])
|
|
self.__verify_decimal_precision_scale(column_types[1], 10, 5)
|
|
self.close(execute_statement_resp.operationHandle)
|
|
|
|
# Verify the result metadata for the CHAR/VARCHAR types.
|
|
execute_statement_resp, column_types = self.__fetch_result_column_types(
|
|
"SELECT * FROM functional.chars_tiny ORDER BY cs LIMIT 1", 1,
|
|
execute_statement_req)
|
|
assert len(column_types) == 3
|
|
self.__verify_primitive_type(TTypeId.CHAR_TYPE, column_types[0])
|
|
self.__verify_char_max_len(column_types[0], 5)
|
|
self.__verify_primitive_type(TTypeId.CHAR_TYPE, column_types[1])
|
|
self.__verify_char_max_len(column_types[1], 140)
|
|
self.__verify_primitive_type(TTypeId.VARCHAR_TYPE, column_types[2])
|
|
self.__verify_char_max_len(column_types[2], 32)
|
|
self.close(execute_statement_resp.operationHandle)
|
|
|
|
# Verify the result metadata for the DATE type.
|
|
execute_statement_resp, column_types = self.__fetch_result_column_types(
|
|
"SELECT * FROM functional.date_tbl ORDER BY date_col LIMIT 1", 1,
|
|
execute_statement_req)
|
|
assert len(column_types) == 3
|
|
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[0])
|
|
self.__verify_primitive_type(TTypeId.DATE_TYPE, column_types[1])
|
|
self.__verify_primitive_type(TTypeId.DATE_TYPE, column_types[2])
|
|
self.close(execute_statement_resp.operationHandle)
|
|
|
|
# Verify the result metadata for the BINARY type.
|
|
execute_statement_resp, column_types = self.__fetch_result_column_types(
|
|
"SELECT * from functional.binary_tbl ORDER BY binary_col LIMIT 1", 1,
|
|
execute_statement_req)
|
|
assert len(column_types) == 3
|
|
self.__verify_primitive_type(TTypeId.INT_TYPE, column_types[0])
|
|
self.__verify_primitive_type(TTypeId.STRING_TYPE, column_types[1])
|
|
self.__verify_primitive_type(TTypeId.BINARY_TYPE, column_types[2])
|
|
self.close(execute_statement_resp.operationHandle)
|
|
|
|
def __query_and_fetch(self, query):
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
execute_statement_req.statement = query
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
HS2TestSuite.check_response(execute_statement_resp)
|
|
|
|
# Do the actual fetch with a valid request.
|
|
fetch_results_req = TCLIService.TFetchResultsReq()
|
|
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
|
|
fetch_results_req.maxRows = 1024
|
|
fetch_results_resp = self.fetch(fetch_results_req)
|
|
|
|
return fetch_results_resp
|
|
|
|
@needs_session()
|
|
def test_alltypes_v6(self):
|
|
"""Test that a simple select statement works for all types"""
|
|
fetch_results_resp = self.__query_and_fetch(
|
|
"SELECT *, NULL from functional.alltypes ORDER BY id LIMIT 1")
|
|
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert num_rows == 1
|
|
assert result == \
|
|
"0, True, 0, 0, 0, 0, 0.0, 0.0, 01/01/09, 0, 2009-01-01 00:00:00, 2009, 1, NULL\n"
|
|
|
|
# Decimals
|
|
fetch_results_resp = self.__query_and_fetch(
|
|
"SELECT * from functional.decimal_tbl LIMIT 1")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert result == ("1234, 2222, 1.2345678900, "
|
|
"0.12345678900000000000000000000000000000, 12345.78900, 1\n")
|
|
|
|
# VARCHAR
|
|
fetch_results_resp = self.__query_and_fetch("SELECT CAST('str' AS VARCHAR(3))")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert result == "str\n"
|
|
|
|
# CHAR not inlined
|
|
fetch_results_resp = self.__query_and_fetch("SELECT CAST('car' AS CHAR(140))")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert result == "car" + (" " * 137) + "\n"
|
|
|
|
# CHAR inlined
|
|
fetch_results_resp = self.__query_and_fetch("SELECT CAST('car' AS CHAR(5))")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert result == "car \n"
|
|
|
|
# Date
|
|
fetch_results_resp = self.__query_and_fetch(
|
|
"SELECT * from functional.date_tbl ORDER BY date_col LIMIT 1")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert result == ("0, 0001-01-01, 0001-01-01\n")
|
|
|
|
# Binary
|
|
fetch_results_resp = self.__query_and_fetch(
|
|
"SELECT * from functional.binary_tbl ORDER BY id LIMIT 1")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert result == ("1, ascii, binary1\n")
|
|
|
|
@needs_session()
|
|
def test_show_partitions(self):
|
|
"""Regression test for IMPALA-1330"""
|
|
for query in ["SHOW PARTITIONS functional.alltypes",
|
|
"SHOW TABLE STATS functional.alltypes"]:
|
|
fetch_results_resp = self.__query_and_fetch(query)
|
|
num_rows, result = \
|
|
self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert num_rows == 25
|
|
# Match whether stats are computed or not
|
|
pattern = r"2009, 1, -?\d+, -?\d+, \d*\.?\d+KB, NOT CACHED, NOT CACHED, TEXT"
|
|
assert re.match(pattern, result) is not None, \
|
|
'Looking for "{}" but none found:\n{}'.format(pattern, result)
|
|
|
|
@needs_session()
|
|
def test_show_column_stats(self):
|
|
fetch_results_resp = self.__query_and_fetch("SHOW COLUMN STATS functional.alltypes")
|
|
num_rows, result = self.column_results_to_string(fetch_results_resp.results.columns)
|
|
assert num_rows == 13
|
|
assert re.match(r"id, INT, -?\d+, -?\d+, -?\d+, 4.0", result) is not None
|
|
|
|
@needs_session(TCLIService.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1)
|
|
def test_execute_select_v1(self):
|
|
"""Test that a simple select statement works in the row-oriented protocol"""
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
execute_statement_req.statement = "SELECT COUNT(*) FROM functional.alltypes"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
HS2TestSuite.check_response(execute_statement_resp)
|
|
|
|
fetch_results_req = TCLIService.TFetchResultsReq()
|
|
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
|
|
fetch_results_req.maxRows = 100
|
|
fetch_results_resp = self.fetch(fetch_results_req)
|
|
|
|
assert len(fetch_results_resp.results.rows) == 1
|
|
assert fetch_results_resp.results.startRowOffset == 0
|
|
|
|
try:
|
|
assert not fetch_results_resp.hasMoreRows
|
|
except AssertionError:
|
|
pytest.xfail("IMPALA-558")
|
|
|
|
@needs_session()
|
|
def test_select_null(self):
|
|
"""Regression test for IMPALA-1370, where NULL literals would appear as strings where
|
|
they should be booleans"""
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
execute_statement_req.statement = "select null"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
HS2TestSuite.check_response(execute_statement_resp)
|
|
|
|
# Check that the expected type is boolean (for compatibility with Hive, see also
|
|
# IMPALA-914)
|
|
get_result_metadata_req = TCLIService.TGetResultSetMetadataReq()
|
|
get_result_metadata_req.operationHandle = execute_statement_resp.operationHandle
|
|
get_result_metadata_resp = \
|
|
self.hs2_client.GetResultSetMetadata(get_result_metadata_req)
|
|
col = get_result_metadata_resp.schema.columns[0]
|
|
assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.BOOLEAN_TYPE
|
|
|
|
# Check that the actual type is boolean
|
|
fetch_results_req = TCLIService.TFetchResultsReq()
|
|
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
|
|
fetch_results_req.maxRows = 1
|
|
fetch_results_resp = self.fetch(fetch_results_req)
|
|
assert fetch_results_resp.results.columns[0].boolVal is not None
|
|
|
|
assert self.column_results_to_string(
|
|
fetch_results_resp.results.columns) == (1, "NULL\n")
|
|
|
|
@needs_session()
|
|
def test_compute_stats(self):
|
|
"""Exercise the child query path"""
|
|
self.__query_and_fetch("compute stats functional.alltypes")
|
|
|
|
@needs_session()
|
|
def test_invalid_secret(self):
|
|
"""Test that the FetchResults, GetResultSetMetadata and CloseOperation APIs validate
|
|
the session secret."""
|
|
execute_req = TCLIService.TExecuteStatementReq(
|
|
self.session_handle, "select 'something something'")
|
|
execute_resp = self.hs2_client.ExecuteStatement(execute_req)
|
|
HS2TestSuite.check_response(execute_resp)
|
|
|
|
good_handle = execute_resp.operationHandle
|
|
bad_handle = create_op_handle_without_secret(good_handle)
|
|
|
|
# Fetching and closing operations with an invalid handle should be a no-op, i.e.
|
|
# the later operations with the good handle should succeed.
|
|
HS2TestSuite.check_invalid_query(self.hs2_client.FetchResults(
|
|
TCLIService.TFetchResultsReq(operationHandle=bad_handle, maxRows=1024)),
|
|
expect_legacy_err=True)
|
|
HS2TestSuite.check_invalid_query(self.hs2_client.GetResultSetMetadata(
|
|
TCLIService.TGetResultSetMetadataReq(operationHandle=bad_handle)),
|
|
expect_legacy_err=True)
|
|
HS2TestSuite.check_invalid_query(self.hs2_client.CloseOperation(
|
|
TCLIService.TCloseOperationReq(operationHandle=bad_handle)),
|
|
expect_legacy_err=True)
|
|
|
|
# Ensure that the good handle remained valid.
|
|
HS2TestSuite.check_response(self.hs2_client.FetchResults(
|
|
TCLIService.TFetchResultsReq(operationHandle=good_handle, maxRows=1024)))
|
|
HS2TestSuite.check_response(self.hs2_client.GetResultSetMetadata(
|
|
TCLIService.TGetResultSetMetadataReq(operationHandle=good_handle)))
|
|
HS2TestSuite.check_response(self.hs2_client.CloseOperation(
|
|
TCLIService.TCloseOperationReq(operationHandle=good_handle)))
|