mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
This puts all of the thrift-generated python code into the impala_thrift_gen package. This is similar to what Impyla does for its thrift-generated python code, except that it uses the impala_thrift_gen package rather than impala._thrift_gen. This is a preparatory patch for fixing the absolute import issues. This patches all of the thrift files to add the python namespace. This has code to apply the patching to the thirdparty thrift files (hive_metastore.thrift, fb303.thrift) to do the same. Putting all the generated python into a package makes it easier to understand where the imports are getting code. When the subsequent change rearranges the shell code, the thrift generated code can stay in a separate directory. This uses isort to sort the imports for the affected Python files with the provided .isort.cfg file. This also adds an impala-isort shell script to make it easy to run. Testing: - Ran a core job Change-Id: Ie2927f22c7257aa38a78084efe5bd76d566493c0 Reviewed-on: http://gerrit.cloudera.org:8080/20169 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
1128 lines
45 KiB
Python
1128 lines
45 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
# Common for connections to Impala. Currently supports Beeswax connections and
|
|
# in the future will support HS2 connections. Provides tracing around all
|
|
# operations.
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
import abc
|
|
import getpass
|
|
import logging
|
|
import re
|
|
import time
|
|
|
|
from future.utils import with_metaclass
|
|
import impala.dbapi as impyla
|
|
import impala.error as impyla_error
|
|
import impala.hiveserver2 as hs2
|
|
|
|
from impala_thrift_gen.beeswax.BeeswaxService import QueryState
|
|
from impala_thrift_gen.Query.ttypes import TQueryOptions
|
|
from impala_thrift_gen.RuntimeProfile.ttypes import TRuntimeProfileFormat
|
|
from tests.beeswax.impala_beeswax import (
|
|
DEFAULT_SLEEP_INTERVAL,
|
|
ImpalaBeeswaxClient,
|
|
ImpalaBeeswaxException,
|
|
)
|
|
import tests.common
|
|
from tests.common.patterns import LOG_FORMAT
|
|
from tests.common.test_vector import BEESWAX, HS2, HS2_HTTP
|
|
from tests.util.thrift_util import op_handle_to_query_id, session_handle_to_session_id
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
console_handler = logging.StreamHandler()
|
|
console_handler.setLevel(logging.INFO)
|
|
# All logging needs to be either executable SQL or a SQL comment (prefix with --).
|
|
console_handler.setFormatter(logging.Formatter(LOG_FORMAT))
|
|
LOG.addHandler(console_handler)
|
|
LOG.propagate = False
|
|
|
|
# Regular expression that matches the "progress" entry in the HS2 log.
|
|
PROGRESS_LOG_RE = re.compile(
|
|
r'^Query [a-z0-9:]+ [0-9]+% Complete \([0-9]+ out of [0-9]+\)$')
|
|
|
|
MAX_SQL_LOGGING_LENGTH = 128 * 1024
|
|
|
|
# Tuple of root exception types from different client protocol.
|
|
IMPALA_CONNECTION_EXCEPTION = (ImpalaBeeswaxException, impyla_error.Error)
|
|
|
|
# String representation of ClientRequestState::ExecState
|
|
INITIALIZED = 'INITIALIZED'
|
|
PENDING = 'PENDING'
|
|
RUNNING = 'RUNNING'
|
|
FINISHED = 'FINISHED'
|
|
ERROR = 'ERROR'
|
|
# ExecState that is final.
|
|
EXEC_STATES_FINAL = set([FINISHED, ERROR])
|
|
# Possible ExecState after query passed admission controller.
|
|
EXEC_STATES_ADMITTED = set([RUNNING, FINISHED, ERROR])
|
|
# Mapping of a ExecState to a set of possible future ExecState.
|
|
LEGAL_FUTURE_STATES = {
|
|
INITIALIZED: set([PENDING, RUNNING, FINISHED, ERROR]),
|
|
PENDING: set([RUNNING, FINISHED, ERROR]),
|
|
RUNNING: set([FINISHED, ERROR]),
|
|
FINISHED: set([ERROR]),
|
|
ERROR: set()
|
|
}
|
|
|
|
|
|
def has_legal_future_state(impala_state, future_states):
|
|
"""Return True if 'impala_state' can transition to one of state listed in
|
|
'future_states'."""
|
|
assert impala_state in LEGAL_FUTURE_STATES
|
|
expected_impala_states = set(future_states)
|
|
return len(LEGAL_FUTURE_STATES[impala_state] & expected_impala_states) > 0
|
|
|
|
|
|
# test_exprs.py's TestExprLimits executes extremely large SQLs (multiple MBs). It is the
|
|
# only test that runs SQL larger than 128KB. Logging these SQLs in execute() increases
|
|
# the size of the JUnitXML files, causing problems for users of JUnitXML like Jenkins.
|
|
# This function limits the size of the returned string if it is larger than 128KB.
|
|
def format_sql_for_logging(sql_stmt):
|
|
"""If the 'sql_stmt' is shorter than MAX_SQL_LOGGING_LENGTH, only wrap sql_stmt with
|
|
new lines and semicolon. If it is larger than MAX_SQL_LOGGING_LENGTH, truncate it
|
|
and comment it out. This function returns a unicode string."""
|
|
# sql_stmt could contain Unicode characters, so explicitly use unicode literals
|
|
# so that Python 2 works.
|
|
if (len(sql_stmt) <= MAX_SQL_LOGGING_LENGTH):
|
|
return u"\n{0};\n".format(sql_stmt)
|
|
else:
|
|
# The logging output should be valid SQL, so the truncated SQL is commented out.
|
|
truncated_sql = u'\n--'.join(
|
|
[line for line in sql_stmt[0:MAX_SQL_LOGGING_LENGTH].split("\n")])
|
|
return (u"\n-- Skip logging full SQL statement of length {0}"
|
|
u"\n-- Logging a truncated version, commented out:"
|
|
u"\n-- {1}"
|
|
u"\n-- [...]\n").format(len(sql_stmt), truncated_sql)
|
|
|
|
|
|
def build_summary_table_from_thrift(thrift_exec_summary):
|
|
from shell.exec_summary import build_exec_summary_table
|
|
result = list()
|
|
build_exec_summary_table(thrift_exec_summary, 0, 0, False, result,
|
|
is_prettyprint=False, separate_prefix_column=True)
|
|
keys = ['prefix', 'operator', 'num_hosts', 'num_instances', 'avg_time', 'max_time',
|
|
'num_rows', 'est_num_rows', 'peak_mem', 'est_peak_mem', 'detail']
|
|
output = list()
|
|
for row in result:
|
|
assert len(keys) == len(row)
|
|
summ_map = dict(zip(keys, row))
|
|
output.append(summ_map)
|
|
return output
|
|
|
|
|
|
def collect_default_query_options(options, name, val):
|
|
name = name.lower()
|
|
if val is not None:
|
|
val = str(val).strip('"')
|
|
if ',' in val or '/' in val:
|
|
# Value is a list or a timezone name containing a slash. Wrap it with double quotes.
|
|
val = '"{}"'.format(val)
|
|
if not val:
|
|
# Value is optional with None as default or just turned into an empty string.
|
|
val = '""'
|
|
options[name] = val
|
|
|
|
|
|
def parse_query_options_from_thrift():
|
|
"""Populate 'options' map with default query options parsed from TQueryOptions
|
|
attributes."""
|
|
result = dict()
|
|
tquery_opts = TQueryOptions()
|
|
for key in dir(tquery_opts):
|
|
non_opts_attrs = ['read', 'write', 'validate', 'thrift_spec']
|
|
if not key.startswith('_') and key not in non_opts_attrs:
|
|
value = getattr(tquery_opts, key)
|
|
if isinstance(value, set):
|
|
# The default value of some query options, e.g.,
|
|
# enabled_runtime_filter_types, can be a set of integer.
|
|
# Turn the set into comma separated values.
|
|
value = ','.join([str(v) for v in value])
|
|
# No need to supply 'kind' since TQueryOptions already exclude
|
|
# removed query options.
|
|
collect_default_query_options(result, key, value)
|
|
return result
|
|
|
|
|
|
# A map of default query option obtained from TQueryOptions.
|
|
# Query option names (the keys) are in lower case string for consistency.
|
|
# Values are all strings and might be double-quoted, making it legal for both setting
|
|
# through 'SET' query or ImpalaConnection.set_configuration_option().
|
|
DEFAULT_QUERY_OPTIONS = parse_query_options_from_thrift()
|
|
|
|
|
|
# Common wrapper around the internal types of HS2/Beeswax operation/query handles.
|
|
class OperationHandle(object):
|
|
def __init__(self, handle, sql_stmt):
|
|
self.__handle = handle
|
|
self.__sql_stmt = sql_stmt
|
|
|
|
def get_handle(self):
|
|
return self.__handle
|
|
|
|
def sql_stmt(self):
|
|
return self.__sql_stmt
|
|
|
|
|
|
# Represents an Impala connection.
|
|
class ImpalaConnection(with_metaclass(abc.ABCMeta, object)):
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
self.close()
|
|
|
|
@abc.abstractmethod
|
|
def get_test_protocol(self):
|
|
"""Return client protocol name that is specific to Impala test framework.
|
|
Possible return value are either of 'beeswax', 'hs2', or 'hs2-http'."""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def get_host_port(self):
|
|
"""Return the 'host:port' string of impala server that this object connecting to."""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def set_configuration_option(self, name, value, is_log_sql=True):
|
|
"""Sets a configuration option name to the given value.
|
|
Return True if option is changing. Otherwise, return False (option already has the
|
|
same value). If is_log_sql True, log the equivalent SET query to INFO. Do note though
|
|
that the option change does not actually happen by issuing SET query."""
|
|
pass
|
|
|
|
def set_configuration(self, config_option_dict):
|
|
"""Replaces existing configuration with the given dictionary.
|
|
If config_option_dict is an empty dictionary, simply clear current client
|
|
configuration."""
|
|
assert isinstance(config_option_dict, dict), \
|
|
"config_option_dict must be a dictionary"
|
|
self.clear_configuration()
|
|
if not config_option_dict:
|
|
return
|
|
log_lines = list()
|
|
for name, value in config_option_dict.items():
|
|
if self.set_configuration_option(name, value, False):
|
|
log_lines.append("set {0}={1};".format(name, value))
|
|
if log_lines:
|
|
self.log_client("set_configuration:\n\n{}\n".format('\n'.join(log_lines)))
|
|
|
|
@abc.abstractmethod
|
|
def clear_configuration(self):
|
|
"""Clears all existing configuration."""
|
|
pass
|
|
|
|
def get_default_configuration(self):
|
|
"""Return the default configuration for the connection, before any modifications are
|
|
made to the session state. Returns a map with the config variable as the key and a
|
|
string representation of the default value as the value."""
|
|
return DEFAULT_QUERY_OPTIONS.copy()
|
|
|
|
@abc.abstractmethod
|
|
def connect(self):
|
|
"""Opens the connection"""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def close(self):
|
|
"""Closes the connection. Can be called multiple times"""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def close_query(self, handle, fetch_profile_after_close=False):
|
|
"""Closes the query."""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def get_state(self, operation_handle):
|
|
"""Returns the state of a query.
|
|
May raise en error, depending on connection type."""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def get_impala_exec_state(self, operation_handle):
|
|
"""Returns a string translation from client specific state of operation_handle
|
|
to Impala's ClientRequestState::ExecState."""
|
|
pass
|
|
|
|
def __is_at_exec_state(self, operation_handle, impala_state):
|
|
self.log_handle(
|
|
operation_handle, 'checking ' + impala_state + ' state for operation')
|
|
return self.get_impala_exec_state(operation_handle) == impala_state
|
|
|
|
def state_is_finished(self, operation_handle):
|
|
"""Returns whether the Impala exec state of a operation_handle is FINISHED.
|
|
DEPRECATED: use is_finished() instead."""
|
|
return self.is_finished(operation_handle)
|
|
|
|
def is_initialized(self, operation_handle):
|
|
"""Returns whether the Impala exec state of a operation_handle is INITIALIZED"""
|
|
return self.__is_at_exec_state(operation_handle, INITIALIZED)
|
|
|
|
def is_pending(self, operation_handle):
|
|
"""Returns whether the Impala exec state of a operation_handle is PENDING"""
|
|
return self.__is_at_exec_state(operation_handle, PENDING)
|
|
|
|
def is_running(self, operation_handle):
|
|
"""Returns whether the Impala exec state of a operation_handle is RUNNING"""
|
|
return self.__is_at_exec_state(operation_handle, RUNNING)
|
|
|
|
def is_finished(self, operation_handle):
|
|
"""Returns whether the Impala exec state of a operation_handle is FINISHED"""
|
|
return self.__is_at_exec_state(operation_handle, FINISHED)
|
|
|
|
def is_error(self, operation_handle):
|
|
"""Returns whether the Impala exec state of a operation_handle is ERROR.
|
|
Internally, it will call get_state(), and any exception thrown by get_state() will
|
|
cause this method to return True."""
|
|
return self.__is_at_exec_state(operation_handle, ERROR)
|
|
|
|
def is_executing(self, operation_handle):
|
|
"""Returns whether the state of a operation_handle is executing or will be
|
|
executing. Return False if operation_handle has ended, either successful or
|
|
with error."""
|
|
return self.get_impala_exec_state(operation_handle) not in EXEC_STATES_FINAL
|
|
|
|
def is_admitted(self, operation_handle):
|
|
"""Returns whether the state of a operation_handle has passed Impala
|
|
admission control. Return True if handle state is error."""
|
|
return self.get_impala_exec_state(operation_handle) in EXEC_STATES_ADMITTED
|
|
|
|
@abc.abstractmethod
|
|
def get_log(self, operation_handle):
|
|
"""Returns the log of an operation as a string, with entries separated by newlines."""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def cancel(self, operation_handle):
|
|
"""Cancels an in-flight operation"""
|
|
pass
|
|
|
|
def execute(self, sql_stmt, user=None, fetch_profile_after_close=False, # noqa: U100
|
|
fetch_exec_summary=False, # noqa: U100
|
|
profile_format=TRuntimeProfileFormat.STRING): # noqa: U100
|
|
"""Executes a query and fetches the results"""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def execute_async(self, sql_stmt):
|
|
"""Issues a query and returns the handle to the caller for processing. Only one
|
|
async operation per connection at a time is supported, due to limitations of the
|
|
Beeswax protocol and the Impyla client."""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def fetch(self, sql_stmt, operation_handle, max_rows=-1, discard_results=False):
|
|
"""Fetches query results up to max_rows given a handle and sql statement.
|
|
Caller must ensure that query has passed PENDING state before calling fetch.
|
|
If max_rows < 0, all rows are fetched. If max_rows > 0 but the number of
|
|
rows returned is less than max_rows, all the rows have been fetched.
|
|
Return None if discard_results is True.
|
|
TODO: 'sql_stmt' can be obtained from 'operation_handle'."""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def get_runtime_profile(self, operation_handle,
|
|
profile_format=TRuntimeProfileFormat.STRING):
|
|
"""Get runtime profile of given 'operation_handle'.
|
|
Handle must stay open."""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def handle_id(self, operation_handle):
|
|
"""Return a string id for given operation_handle.
|
|
Most implementations will return an Impala query id for given handle.
|
|
Otherwise, return str(operation_handle)."""
|
|
pass
|
|
|
|
def log_handle(self, operation_handle, message):
|
|
"""Log 'message' at INFO level, along with id of 'operation_handle'."""
|
|
handle_id = self.handle_id(operation_handle)
|
|
LOG.info(u"{0}: {1}".format(handle_id, message))
|
|
|
|
def log_client(self, message):
|
|
"""Log 'message' at INFO level, prefixed wih the protocol name of this connection."""
|
|
LOG.info(u"{0}: {1}".format(self.get_test_protocol(), message))
|
|
|
|
def wait_for_impala_state(self, operation_handle, expected_impala_state, timeout):
|
|
"""Waits for the given 'operation_handle' to reach the 'expected_impala_state'.
|
|
'expected_impala_state' must be a string of either 'INITIALIZED', 'PENDING',
|
|
'RUNNING', 'FINISHED', or 'ERROR'. If it does not reach the given state within
|
|
'timeout' seconds, the method throws an AssertionError.
|
|
"""
|
|
self.wait_for_any_impala_state(operation_handle, [expected_impala_state], timeout)
|
|
|
|
def wait_for_any_impala_state(self, operation_handle, expected_impala_states,
|
|
timeout_s):
|
|
"""Waits for the given 'operation_handle' to reach one of 'expected_impala_states'.
|
|
Each string in 'expected_impala_states' must either be 'INITIALIZED', 'PENDING',
|
|
'RUNNING', 'FINISHED', or 'ERROR'. If it does not reach one of the given states
|
|
within 'timeout' seconds, the method throws an AssertionError.
|
|
Returns the final state.
|
|
"""
|
|
start_time = time.time()
|
|
timeout_msg = None
|
|
while True:
|
|
impala_state = self.get_impala_exec_state(operation_handle)
|
|
interval = time.time() - start_time
|
|
if impala_state in expected_impala_states:
|
|
# Reached one of expected_impala_states.
|
|
break
|
|
elif not has_legal_future_state(impala_state, expected_impala_states):
|
|
timeout_msg = ("query '{0}' can not transition from last known state '{1}' to "
|
|
"any of the expected states {2}. Stop waiting after {3} "
|
|
"seconds.").format(
|
|
self.handle_id(operation_handle), impala_state, expected_impala_states,
|
|
interval)
|
|
break
|
|
elif interval >= timeout_s:
|
|
timeout_msg = ("query '{0}' did not reach one of the expected states {1}, last "
|
|
"known state {2}").format(
|
|
self.handle_id(operation_handle), expected_impala_states, impala_state)
|
|
break
|
|
time.sleep(DEFAULT_SLEEP_INTERVAL)
|
|
|
|
if timeout_msg is not None:
|
|
raise tests.common.errors.Timeout(timeout_msg)
|
|
return impala_state
|
|
|
|
@abc.abstractmethod
|
|
def wait_for_admission_control(self, operation_handle, timeout_s=60):
|
|
"""Given an 'operation_handle', polls the coordinator waiting for it to complete
|
|
admission control processing of the query.
|
|
Return True if query pass admission control after given 'timeout_s'."""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def get_admission_result(self, operation_handle):
|
|
"""Given an 'operation_handle', returns the admission result from the query
|
|
profile"""
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def get_exec_summary(self, operation_handle): # noqa: U100
|
|
pass
|
|
|
|
def get_exec_summary_table(self, operation_handle):
|
|
summary_table = list()
|
|
summary = self.get_exec_summary(operation_handle)
|
|
if summary:
|
|
summary_table = build_summary_table_from_thrift(summary)
|
|
return summary_table
|
|
|
|
|
|
# Represents a connection to Impala using the Beeswax API.
|
|
class BeeswaxConnection(ImpalaConnection):
|
|
|
|
# This is based on ClientRequestState::BeeswaxQueryState().
|
|
__QUERY_STATE_TO_EXEC_STATE = {
|
|
QueryState.CREATED: INITIALIZED,
|
|
QueryState.COMPILED: PENDING,
|
|
QueryState.RUNNING: RUNNING,
|
|
QueryState.FINISHED: FINISHED,
|
|
QueryState.EXCEPTION: ERROR,
|
|
# These are not official ExecState, but added to complete mapping.
|
|
QueryState.INITIALIZED: 'UNIMPLEMENTED_INITIALIZED',
|
|
}
|
|
|
|
def __init__(self, host_port, use_kerberos=False, user=None, password=None,
|
|
use_ssl=False):
|
|
self.__beeswax_client = ImpalaBeeswaxClient(host_port, use_kerberos, user=user,
|
|
password=password, use_ssl=use_ssl)
|
|
self.__host_port = host_port
|
|
self.QUERY_STATES = self.__beeswax_client.query_states
|
|
|
|
def get_test_protocol(self):
|
|
return BEESWAX
|
|
|
|
def get_host_port(self):
|
|
return self.__host_port
|
|
|
|
def set_configuration_option(self, name, value, is_log_sql=True):
|
|
# Only set the option if it's not already set to the same value.
|
|
name = name.lower()
|
|
value = str(value)
|
|
if self.__beeswax_client.get_query_option(name) != value:
|
|
self.__beeswax_client.set_query_option(name, value)
|
|
if is_log_sql:
|
|
self.log_client("\n\nset {0}={1};\n".format(name, value))
|
|
return True
|
|
return False
|
|
|
|
def clear_configuration(self):
|
|
self.__beeswax_client.clear_query_options()
|
|
# A hook in conftest sets tests.common.current_node.
|
|
if hasattr(tests.common, "current_node"):
|
|
self.set_configuration_option("client_identifier", tests.common.current_node)
|
|
|
|
def connect(self):
|
|
try:
|
|
self.__beeswax_client.connect()
|
|
self.log_client("connected to %s with beeswax" % self.__host_port)
|
|
except Exception as e:
|
|
self.log_client("failed connecting to %s with beeswax" % self.__host_port)
|
|
raise e
|
|
|
|
# TODO: rename to close_connection
|
|
def close(self):
|
|
self.log_client("closing beeswax connection to: %s" % self.__host_port)
|
|
self.__beeswax_client.close_connection()
|
|
|
|
def close_query(self, operation_handle, fetch_profile_after_close=False):
|
|
self.log_handle(operation_handle, 'closing query for operation')
|
|
return self.__beeswax_client.close_query(operation_handle.get_handle(),
|
|
fetch_profile_after_close)
|
|
|
|
def close_dml(self, operation_handle):
|
|
self.log_handle(operation_handle, 'closing DML query')
|
|
self.__beeswax_client.close_dml(operation_handle.get_handle())
|
|
|
|
def execute(self, sql_stmt, user=None, fetch_profile_after_close=False,
|
|
fetch_exec_summary=False, profile_format=TRuntimeProfileFormat.STRING):
|
|
assert profile_format == TRuntimeProfileFormat.STRING, (
|
|
"Beeswax client only supports getting runtime profile in STRING format.")
|
|
self.log_client(u"executing against {0}\n{1}".format(
|
|
self.__host_port, format_sql_for_logging(sql_stmt)))
|
|
return self.__beeswax_client.execute(sql_stmt, user=user,
|
|
fetch_profile_after_close=fetch_profile_after_close,
|
|
fetch_exec_summary=fetch_exec_summary)
|
|
|
|
def execute_async(self, sql_stmt, user=None):
|
|
self.log_client(u"executing async {0}\n{1}".format(
|
|
self.__host_port, format_sql_for_logging(sql_stmt)))
|
|
beeswax_handle = self.__beeswax_client.execute_query_async(sql_stmt, user=user)
|
|
return OperationHandle(beeswax_handle, sql_stmt)
|
|
|
|
def cancel(self, operation_handle):
|
|
self.log_handle(operation_handle, 'canceling operation')
|
|
return self.__beeswax_client.cancel_query(operation_handle.get_handle())
|
|
|
|
def get_state(self, operation_handle):
|
|
self.log_handle(operation_handle, 'getting state')
|
|
return self.__beeswax_client.get_state(operation_handle.get_handle())
|
|
|
|
def get_impala_exec_state(self, operation_handle):
|
|
return self.__QUERY_STATE_TO_EXEC_STATE[self.get_state(operation_handle)]
|
|
|
|
def get_exec_summary(self, operation_handle):
|
|
self.log_handle(operation_handle, 'getting exec summary operation')
|
|
return self.__beeswax_client.get_exec_summary(operation_handle.get_handle())
|
|
|
|
def get_runtime_profile(self, operation_handle,
|
|
profile_format=TRuntimeProfileFormat.STRING):
|
|
assert profile_format == TRuntimeProfileFormat.STRING, (
|
|
"Beeswax client only supports getting runtime profile in STRING format.")
|
|
self.log_handle(operation_handle, 'getting runtime profile operation')
|
|
return self.__beeswax_client.get_runtime_profile(operation_handle.get_handle())
|
|
|
|
def wait_for_finished_timeout(self, operation_handle, timeout):
|
|
self.log_handle(operation_handle, 'waiting for query to reach FINISHED state')
|
|
return self.__beeswax_client.wait_for_finished_timeout(
|
|
operation_handle.get_handle(), timeout)
|
|
|
|
def wait_for_admission_control(self, operation_handle, timeout_s=60):
|
|
self.log_handle(operation_handle, 'waiting for completion of the admission control')
|
|
return self.__beeswax_client.wait_for_admission_control(
|
|
operation_handle.get_handle(), timeout_s=timeout_s)
|
|
|
|
def get_admission_result(self, operation_handle):
|
|
self.log_handle(operation_handle, 'getting the admission result')
|
|
return self.__beeswax_client.get_admission_result(operation_handle.get_handle())
|
|
|
|
def get_log(self, operation_handle):
|
|
self.log_handle(operation_handle, 'getting log for operation')
|
|
return self.__beeswax_client.get_log(operation_handle.get_handle().log_context)
|
|
|
|
def fetch(self, sql_stmt, operation_handle, max_rows=-1, discard_results=False):
|
|
self.log_handle(operation_handle, 'fetching {} rows'.format(
|
|
'all' if max_rows < 0 else max_rows))
|
|
return self.__beeswax_client.fetch_results(
|
|
sql_stmt, operation_handle.get_handle(), max_rows, discard_results)
|
|
|
|
def handle_id(self, operation_handle):
|
|
query_id = operation_handle.get_handle().id
|
|
return query_id if query_id else str(operation_handle)
|
|
|
|
def log_handle(self, operation_handle, message):
|
|
handle_id = self.handle_id(operation_handle)
|
|
LOG.info(u"{0}: {1}".format(handle_id, message))
|
|
|
|
def get_query_id(self, operation_handle):
|
|
return operation_handle.get_handle().id
|
|
|
|
|
|
class ImpylaHS2Connection(ImpalaConnection):
|
|
"""Connection to Impala using the impyla client connecting to HS2 endpoint.
|
|
impyla implements the standard Python dbabi: https://www.python.org/dev/peps/pep-0249/
|
|
plus Impala-specific extensions, e.g. for fetching runtime profiles.
|
|
TODO: implement support for kerberos, SSL, etc.
|
|
"""
|
|
|
|
# ClientRequestState::TOperationState()
|
|
__OPERATION_STATE_TO_EXEC_STATE = {
|
|
'INITIALIZED_STATE': INITIALIZED,
|
|
'PENDING_STATE': PENDING,
|
|
'RUNNING_STATE': RUNNING,
|
|
'FINISHED_STATE': FINISHED,
|
|
'ERROR_STATE': ERROR,
|
|
# These are not official ExecState, but added to complete mapping.
|
|
'CANCELED_STATE': 'UNIMPLEMENTED_CANCELLED',
|
|
'CLOSED_STATE': 'UNIMPLEMENTED_CLOSED',
|
|
'UKNOWN_STATE': 'UNIMPLEMENTED_UNKNOWN'
|
|
}
|
|
|
|
def __init__(self, host_port, use_kerberos=False, is_hive=False,
|
|
use_http_transport=False, http_path="", use_ssl=False,
|
|
collect_profile_and_log=True, user=None):
|
|
self.__host_port = host_port
|
|
self.__use_http_transport = use_http_transport
|
|
self.__http_path = http_path
|
|
self.__use_ssl = use_ssl
|
|
if use_kerberos:
|
|
raise NotImplementedError("Kerberos support not yet implemented")
|
|
# Impyla connection and cursor is initialised in connect(). We need to reuse the same
|
|
# cursor for different operations (as opposed to creating a new cursor per operation)
|
|
# so that the session is preserved. This means that we can only execute one operation
|
|
# at a time per connection, which is a limitation also imposed by the Beeswax API.
|
|
# However, for ease of async query testing, opening multiple cursors through single
|
|
# ImpylaHS2Connection is allowed if executing query through execute_async() or
|
|
# execute() with user parameter that is different than self.__user. Do note though
|
|
# that they will not share the same session with self.__cursor.
|
|
self.__impyla_conn = None
|
|
self.__cursor = None
|
|
# List of all cursors that created through execute_async.
|
|
self.__async_cursors = list()
|
|
# Query options to send along with each query.
|
|
self.__query_options = {}
|
|
self._is_hive = is_hive
|
|
# Some Hive HS2 protocol, such as custom Calcite planner, may be able to collect
|
|
# profile and log from Impala.
|
|
self._collect_profile_and_log = collect_profile_and_log
|
|
self.__user = user
|
|
|
|
def get_test_protocol(self):
|
|
if self.__http_path:
|
|
return HS2_HTTP
|
|
else:
|
|
return HS2
|
|
|
|
def get_host_port(self):
|
|
return self.__host_port
|
|
|
|
def set_configuration_option(self, name, value, is_log_sql=True):
|
|
# Only set the option if it's not already set to the same value.
|
|
# value must be parsed to string.
|
|
name = name.lower()
|
|
value = str(value)
|
|
if self.__query_options.get(name) != value:
|
|
self.__query_options[name] = value
|
|
if is_log_sql:
|
|
self.log_client("\n\nset {0}={1};\n".format(name, value))
|
|
return True
|
|
return False
|
|
|
|
def clear_configuration(self):
|
|
self.__query_options.clear()
|
|
if hasattr(tests.common, "current_node") and not self._is_hive:
|
|
self.set_configuration_option("client_identifier", tests.common.current_node)
|
|
|
|
def __open_single_cursor(self, user=None):
|
|
return self.__impyla_conn.cursor(user=user, convert_types=False,
|
|
close_finished_queries=False)
|
|
|
|
def __close_single_cursor(self, cursor):
|
|
try:
|
|
# Explicitly close the cursor so that it will close the session.
|
|
cursor.close()
|
|
except Exception:
|
|
# The session may no longer be valid if the impalad was restarted during the test.
|
|
pass
|
|
|
|
def default_cursor(self):
|
|
if self.__cursor is None:
|
|
self.__cursor = self.__open_single_cursor(user=self.__user)
|
|
return self.__cursor
|
|
|
|
def connect(self):
|
|
host, port = self.__host_port.split(":")
|
|
conn_kwargs = {}
|
|
if self._is_hive:
|
|
conn_kwargs['auth_mechanism'] = 'PLAIN'
|
|
try:
|
|
self.__impyla_conn = impyla.connect(
|
|
host=host, port=int(port), use_http_transport=self.__use_http_transport,
|
|
http_path=self.__http_path, use_ssl=self.__use_ssl, **conn_kwargs)
|
|
self.log_client("connected to {0} with impyla {1}".format(
|
|
self.__host_port, self.get_test_protocol()))
|
|
except Exception as e:
|
|
self.log_client("failed connecting to {0} with impyla {1}".format(
|
|
self.__host_port, self.get_test_protocol()
|
|
))
|
|
raise e
|
|
|
|
def close(self):
|
|
self.log_client("closing 1 sync and {0} async {1} connections to: {2}".format(
|
|
len(self.__async_cursors), self.get_test_protocol(), self.__host_port))
|
|
if self.__cursor is not None:
|
|
self.__close_single_cursor(self.__cursor)
|
|
for async_cursor in self.__async_cursors:
|
|
self.__close_single_cursor(async_cursor)
|
|
# Remove all async cursors.
|
|
self.__async_cursors = list()
|
|
try:
|
|
self.__impyla_conn.close()
|
|
except AttributeError as e:
|
|
# When the HTTP endpoint restarts, Thrift HTTP will close the endpoint and calling
|
|
# close() will result in an exception.
|
|
if not (self.__use_http_transport and 'NoneType' in str(e)):
|
|
raise
|
|
|
|
def get_tables(self, database=None):
|
|
"""Trigger the GetTables() HS2 request on the given database (None means all dbs).
|
|
Returns a list of (catalogName, dbName, tableName, tableType, tableComment).
|
|
"""
|
|
self.log_client("getting tables for database: {0}".format(database))
|
|
self.default_cursor().get_tables(database_name=database)
|
|
return self.default_cursor().fetchall()
|
|
|
|
def close_query(self, operation_handle, fetch_profile_after_close=False):
|
|
self.log_handle(operation_handle, 'closing query for operation')
|
|
# close_operation() will wipe out _last_operation.
|
|
# Assign it to op_handle so that we can pull the profile after close_operation().
|
|
op_handle = operation_handle.get_handle()._last_operation
|
|
operation_handle.get_handle().close_operation()
|
|
if fetch_profile_after_close:
|
|
assert self._collect_profile_and_log, (
|
|
"This connection is not configured to collect profile.")
|
|
return op_handle.get_profile(TRuntimeProfileFormat.STRING)
|
|
return None
|
|
|
|
def __log_execute(self, cursor, user, sql_stmt):
|
|
self.log_client(
|
|
(u"executing against {0} at {1}. session: {2} main_cursor: {3} "
|
|
u"user: {4}\n{5}").format(
|
|
(self._is_hive and 'Hive' or 'Impala'), self.__host_port,
|
|
self.__get_session_id(cursor), (cursor == self.default_cursor()), user,
|
|
format_sql_for_logging(sql_stmt))
|
|
)
|
|
|
|
def execute(self, sql_stmt, user=None, fetch_profile_after_close=False,
|
|
fetch_exec_summary=False, profile_format=TRuntimeProfileFormat.STRING):
|
|
same_user = (user == self.__user)
|
|
cursor = (self.default_cursor() if same_user
|
|
# Must create a new cursor to supply 'user'.
|
|
else self.__open_single_cursor(user=user))
|
|
result = None
|
|
try:
|
|
self.__log_execute(cursor, user, sql_stmt)
|
|
cursor.execute(sql_stmt, configuration=self.__query_options)
|
|
handle = OperationHandle(cursor, sql_stmt)
|
|
self.log_handle(handle, "query started")
|
|
result = self.__fetch_results_and_profile(
|
|
handle, fetch_profile_after_close=fetch_profile_after_close,
|
|
fetch_exec_summary=fetch_exec_summary, profile_format=profile_format)
|
|
finally:
|
|
cursor.close_operation()
|
|
if not same_user:
|
|
self.__close_single_cursor(cursor)
|
|
return result
|
|
|
|
def __fetch_results_and_profile(
|
|
self, operation_handle, fetch_profile_after_close=False,
|
|
fetch_exec_summary=False, profile_format=TRuntimeProfileFormat.STRING):
|
|
r = None
|
|
try:
|
|
r = self.__fetch_results(operation_handle, fetch_exec_summary=fetch_exec_summary,
|
|
profile_format=profile_format)
|
|
finally:
|
|
if r is None:
|
|
# Try to close the query handle but ignore any exceptions not to replace the
|
|
# original exception raised by '__fetch_results'.
|
|
try:
|
|
self.close_query(operation_handle)
|
|
except Exception:
|
|
pass
|
|
elif fetch_profile_after_close:
|
|
# Match ImpalaBeeswaxResult by placing the full profile including end time and
|
|
# duration into the return object.
|
|
r.runtime_profile = self.close_query(operation_handle, fetch_profile_after_close)
|
|
return r
|
|
else:
|
|
self.close_query(operation_handle)
|
|
return r
|
|
|
|
def execute_async(self, sql_stmt, user=None):
|
|
async_cursor = None
|
|
try:
|
|
async_cursor = self.__open_single_cursor(user=user)
|
|
handle = OperationHandle(async_cursor, sql_stmt)
|
|
self.__log_execute(async_cursor, user, sql_stmt)
|
|
async_cursor.execute_async(sql_stmt, configuration=self.__query_options)
|
|
self.__async_cursors.append(async_cursor)
|
|
return handle
|
|
except Exception as e:
|
|
if async_cursor:
|
|
async_cursor.close_operation()
|
|
self.__close_single_cursor(async_cursor)
|
|
raise e
|
|
|
|
def cancel(self, operation_handle):
|
|
self.log_handle(operation_handle, 'canceling operation')
|
|
cursor = operation_handle.get_handle()
|
|
return cursor.cancel_operation(reset_state=False)
|
|
|
|
def get_query_id(self, operation_handle):
|
|
"""Return the string representation of the query id.
|
|
Return empty string if handle is already canceled or closed."""
|
|
id = None
|
|
last_op = operation_handle.get_handle()._last_operation
|
|
if last_op is not None:
|
|
id = op_handle_to_query_id(last_op.handle)
|
|
return "" if id is None else id
|
|
|
|
def __get_session_id(self, cursor):
|
|
"""Return the string representation of the session id.
|
|
Return empty string if handle is already canceled or closed."""
|
|
id = None
|
|
if cursor.session is not None:
|
|
id = session_handle_to_session_id(cursor.session.handle)
|
|
return "" if id is None else id
|
|
|
|
def handle_id(self, operation_handle):
|
|
query_id = self.get_query_id(operation_handle)
|
|
return query_id if query_id else str(operation_handle)
|
|
|
|
def get_state(self, operation_handle):
|
|
self.log_handle(operation_handle, 'getting state')
|
|
cursor = operation_handle.get_handle()
|
|
# cursor.status contains a string representation of one of
|
|
# TCLIService.TOperationState.
|
|
return cursor.status()
|
|
|
|
def get_impala_exec_state(self, operation_handle):
|
|
try:
|
|
return self.__OPERATION_STATE_TO_EXEC_STATE[self.get_state(operation_handle)]
|
|
except impyla_error.Error:
|
|
return ERROR
|
|
except Exception as e:
|
|
raise e
|
|
|
|
def get_exec_summary(self, operation_handle):
|
|
self.log_handle(operation_handle, 'getting exec summary operation')
|
|
cursor = operation_handle.get_handle()
|
|
# summary returned is thrift, not string.
|
|
return cursor.get_summary()
|
|
|
|
def get_runtime_profile(self, operation_handle,
|
|
profile_format=TRuntimeProfileFormat.STRING):
|
|
self.log_handle(operation_handle, 'getting runtime profile operation')
|
|
cursor = operation_handle.get_handle()
|
|
return cursor.get_profile(profile_format=profile_format)
|
|
|
|
def wait_for_finished_timeout(self, operation_handle, timeout):
|
|
self.log_handle(operation_handle, 'waiting for query to reach FINISHED state')
|
|
start_time = time.time()
|
|
while time.time() - start_time < timeout:
|
|
start_rpc_time = time.time()
|
|
impala_state = self.get_impala_exec_state(operation_handle)
|
|
rpc_time = time.time() - start_rpc_time
|
|
# if the rpc succeeded, the output is the query state
|
|
if impala_state == FINISHED:
|
|
return True
|
|
elif impala_state == ERROR:
|
|
try:
|
|
error_log = operation_handle.get_handle().get_log()
|
|
raise impyla_error.OperationalError(error_log, None)
|
|
finally:
|
|
self.close_query(operation_handle)
|
|
if rpc_time < DEFAULT_SLEEP_INTERVAL:
|
|
time.sleep(DEFAULT_SLEEP_INTERVAL - rpc_time)
|
|
return False
|
|
|
|
def wait_for_admission_control(self, operation_handle, timeout_s=60):
|
|
self.log_handle(operation_handle, 'waiting for completion of the admission control')
|
|
start_time = time.time()
|
|
while time.time() - start_time < timeout_s:
|
|
start_rpc_time = time.time()
|
|
if self.is_admitted(operation_handle):
|
|
return True
|
|
rpc_time = time.time() - start_rpc_time
|
|
if rpc_time < DEFAULT_SLEEP_INTERVAL:
|
|
time.sleep(DEFAULT_SLEEP_INTERVAL - rpc_time)
|
|
return False
|
|
|
|
def get_admission_result(self, operation_handle):
|
|
self.log_handle(operation_handle, 'getting the admission result')
|
|
if self.is_admitted(operation_handle):
|
|
query_profile = self.get_runtime_profile(operation_handle)
|
|
admit_result = re.search(r"Admission result: (.*)", query_profile)
|
|
if admit_result:
|
|
return admit_result.group(1)
|
|
return ""
|
|
|
|
def get_log(self, operation_handle):
|
|
self.log_handle(operation_handle, 'getting log for operation')
|
|
# HS2 includes non-error log messages that we need to filter out.
|
|
cursor = operation_handle.get_handle()
|
|
lines = [line for line in cursor.get_log().split('\n')
|
|
if not PROGRESS_LOG_RE.match(line)]
|
|
return '\n'.join(lines)
|
|
|
|
def fetch(self, sql_stmt, operation_handle, max_rows=-1, discard_results=False):
|
|
self.log_handle(operation_handle, 'fetching {} rows'.format(
|
|
'all' if max_rows < 0 else max_rows))
|
|
return self.__fetch_results(operation_handle, max_rows, discard_results)
|
|
|
|
def __fetch_results(self, handle, max_rows=-1,
|
|
discard_results=False,
|
|
fetch_exec_summary=False,
|
|
profile_format=TRuntimeProfileFormat.STRING):
|
|
"""Implementation of result fetching from handle."""
|
|
cursor = handle.get_handle()
|
|
assert cursor is not None
|
|
# Don't fetch data for queries with no results.
|
|
result_tuples = None
|
|
column_labels = None
|
|
column_types = None
|
|
if cursor.has_result_set:
|
|
desc = cursor.description
|
|
column_labels = [col_desc[0].upper() for col_desc in desc]
|
|
column_types = [col_desc[1].upper() for col_desc in desc]
|
|
if max_rows < 0:
|
|
result_tuples = cursor.fetchall()
|
|
else:
|
|
result_tuples = cursor.fetchmany(max_rows)
|
|
|
|
result = None
|
|
if discard_results:
|
|
return result
|
|
|
|
log = None
|
|
profile = None
|
|
exec_summary = None
|
|
if not self._is_hive:
|
|
if fetch_exec_summary:
|
|
exec_summary = self.get_exec_summary_table(handle)
|
|
if self._collect_profile_and_log:
|
|
log = self.get_log(handle)
|
|
profile = self.get_runtime_profile(handle, profile_format=profile_format)
|
|
|
|
result = ImpylaHS2ResultSet(success=True, result_tuples=result_tuples,
|
|
column_labels=column_labels, column_types=column_types,
|
|
query=handle.sql_stmt(), log=log, profile=profile,
|
|
query_id=self.get_query_id(handle),
|
|
exec_summary=exec_summary)
|
|
return result
|
|
|
|
|
|
class ImpylaHS2ResultSet(object):
|
|
"""This emulates the interface of ImpalaBeeswaxResult so that it can be used in
|
|
place of it. TODO: when we deprecate/remove Beeswax, clean this up."""
|
|
def __init__(self, success, result_tuples, column_labels, column_types, query, log,
|
|
profile, query_id, exec_summary):
|
|
self.success = success
|
|
self.column_labels = column_labels
|
|
self.column_types = column_types
|
|
self.query = query
|
|
self.log = log
|
|
# ImpalaBeeswaxResult store profile at runtime_profile field
|
|
self.runtime_profile = profile
|
|
self.query_id = query_id
|
|
self.__result_tuples = result_tuples
|
|
# self.data is the data in the ImpalaBeeswaxResult format: a list of rows with each
|
|
# row represented as a tab-separated string.
|
|
self.data = None
|
|
if result_tuples is not None:
|
|
self.data = [self.__convert_result_row(tuple) for tuple in result_tuples]
|
|
self.exec_summary = exec_summary
|
|
|
|
def tuples(self):
|
|
"""Return the raw HS2 result set, which is a list of tuples."""
|
|
return self.__result_tuples
|
|
|
|
def __convert_result_row(self, result_tuple):
|
|
"""Take primitive values from a result tuple and construct the tab-separated string
|
|
that would have been returned via beeswax."""
|
|
return '\t'.join([self.__convert_result_value(val) for val in result_tuple])
|
|
|
|
def __convert_result_value(self, val):
|
|
"""Take a primitive value from a result tuple and its type and construct the string
|
|
that would have been returned via beeswax."""
|
|
if val is None:
|
|
return 'NULL'
|
|
if type(val) == float:
|
|
# Same format as what Beeswax uses in the backend.
|
|
return "{:.16g}".format(val)
|
|
else:
|
|
return str(val)
|
|
|
|
|
|
def create_connection(host_port, use_kerberos=False, protocol=BEESWAX,
|
|
is_hive=False, use_ssl=False, collect_profile_and_log=True):
|
|
if protocol == BEESWAX:
|
|
c = BeeswaxConnection(host_port=host_port, use_kerberos=use_kerberos,
|
|
use_ssl=use_ssl)
|
|
elif protocol == HS2:
|
|
c = ImpylaHS2Connection(host_port=host_port, use_kerberos=use_kerberos,
|
|
is_hive=is_hive, use_ssl=use_ssl,
|
|
collect_profile_and_log=collect_profile_and_log)
|
|
else:
|
|
assert protocol == HS2_HTTP
|
|
c = ImpylaHS2Connection(host_port=host_port, use_kerberos=use_kerberos,
|
|
is_hive=is_hive, use_http_transport=True, http_path='cliservice',
|
|
use_ssl=use_ssl, collect_profile_and_log=collect_profile_and_log)
|
|
|
|
# A hook in conftest sets tests.common.current_node. Skip for Hive connections since
|
|
# Hive cannot modify client_identifier at runtime.
|
|
if hasattr(tests.common, "current_node") and not is_hive:
|
|
c.set_configuration_option("client_identifier", tests.common.current_node)
|
|
return c
|
|
|
|
|
|
def create_ldap_connection(host_port, user, password, use_ssl=False):
|
|
return BeeswaxConnection(host_port=host_port, user=user, password=password,
|
|
use_ssl=use_ssl)
|
|
|
|
|
|
class MinimalHS2OperationHandle(OperationHandle):
|
|
def __str__(self):
|
|
return op_handle_to_query_id(self.get_handle())
|
|
|
|
|
|
class MinimalHS2Connection(ImpalaConnection):
|
|
"""
|
|
Connection to Impala using the HiveServer2 (HS2) protocol.
|
|
|
|
This class does not use Impyla's DB-API cursors. Instead, it is built directly on the
|
|
HS2 RPC layer to support manipulating one operation from multiple connections
|
|
concurrently.
|
|
|
|
This class is designed to be minimalistic to facilitate testing. Each method is mapped
|
|
to only one Thrift RPC.
|
|
"""
|
|
def __init__(self, host_port, user=None):
|
|
self.__host_port = host_port
|
|
host, port = host_port.split(":")
|
|
self.__conn = hs2.connect(host, port, auth_mechanism='NOSASL')
|
|
self.__user = user if user is not None else getpass.getuser()
|
|
self.__session = self.__conn.open_session(self.__user)
|
|
|
|
def connect(self):
|
|
pass # Do nothing
|
|
|
|
def close(self):
|
|
LOG.info("-- closing connection to: %s" % self.__host_port)
|
|
try:
|
|
self.__session.close()
|
|
finally:
|
|
self.__conn.close()
|
|
|
|
def execute(self, sql_stmt, user=None, fetch_profile_after_close=False, # noqa: U100
|
|
fetch_exec_summary=False, # noqa: U100
|
|
profile_format=TRuntimeProfileFormat.STRING): # noqa: U100
|
|
raise NotImplementedError()
|
|
|
|
def execute_async(self, sql_stmt):
|
|
hs2_operation = self.__session.execute(sql_stmt)
|
|
operation_handle = MinimalHS2OperationHandle(hs2_operation.handle, sql_stmt)
|
|
LOG.info("Started query {0}".format(operation_handle))
|
|
return operation_handle
|
|
|
|
def __get_operation(self, operation_handle):
|
|
return hs2.Operation(self.__session, operation_handle.get_handle())
|
|
|
|
def fetch(self, sql_stmt, operation_handle, max_rows=-1): # noqa: U100
|
|
"""
|
|
Fetch the results of the query. It will block the current connection if the results
|
|
are not available yet.
|
|
"""
|
|
LOG.info("-- fetching results from: {0}".format(operation_handle))
|
|
return self.__get_operation(operation_handle).fetch(max_rows=max_rows)
|
|
|
|
def fetch_error(self, operation_handle):
|
|
"""
|
|
Fetch the error of the query.
|
|
"""
|
|
try:
|
|
self.fetch(None, operation_handle)
|
|
assert False, "Failed to catch the error of the query."
|
|
except Exception as exc:
|
|
return exc
|
|
|
|
def get_state(self, operation_handle):
|
|
return self.__get_operation(operation_handle).get_status()
|
|
|
|
def wait_for(self, operation_handle, timeout_s=60):
|
|
"""
|
|
Wait until the query is in a terminal state.
|
|
"""
|
|
start_time = time.time()
|
|
while True:
|
|
operation_state = self.get_state(operation_handle)
|
|
if operation_state not in ("PENDING_STATE", "INITIALIZED_STATE", "RUNNING_STATE"):
|
|
return operation_state
|
|
if time.time() - start_time > timeout_s:
|
|
raise Exception("Timed out waiting for the query")
|
|
time.sleep(0.1)
|
|
|
|
def cancel(self, operation_handle):
|
|
LOG.info("-- canceling operation: {0}".format(operation_handle))
|
|
return self.__get_operation(operation_handle).cancel()
|
|
|
|
def close_query(self, operation_handle):
|
|
LOG.info("-- closing query for operation handle: {0}".format(operation_handle))
|
|
return self.__get_operation(operation_handle).close()
|
|
|
|
def state_is_finished(self, operation_handle): # noqa: U100
|
|
raise NotImplementedError()
|
|
|
|
def get_log(self, operation_handle):
|
|
return self.__get_operation(operation_handle).get_log()
|
|
|
|
def set_configuration_option(self, name, value): # noqa: U100
|
|
raise NotImplementedError()
|
|
|
|
def clear_configuration(self):
|
|
raise NotImplementedError()
|
|
|
|
def get_host_port(self):
|
|
return self.__host_port
|
|
|
|
def get_test_protocol(self):
|
|
return HS2
|
|
|
|
def handle_id(self, operation_handle): # noqa: U100
|
|
return str(operation_handle)
|
|
|
|
def get_admission_result(self, operation_handle): # noqa: U100
|
|
raise NotImplementedError()
|
|
|
|
def get_impala_exec_state(self, operation_handle): # noqa: U100
|
|
raise NotImplementedError()
|
|
|
|
def get_runtime_profile(self, operation_handle, # noqa: U100
|
|
profile_format=TRuntimeProfileFormat.STRING): # noqa: U100
|
|
raise NotImplementedError()
|
|
|
|
def wait_for_admission_control(self, operation_handle, timeout_s=60): # noqa: U100
|
|
raise NotImplementedError()
|
|
|
|
def get_exec_summary(self, operation_handle): # noqa: U100
|
|
raise NotImplementedError()
|