IMPALA-11314: Test PyPI package with system python

Sets up a virtualenv with system python to install the impala-shell PyPI
package into. Using system python provides better coverage for Python
versions likely to be used by customers. Runs impala-shell tests using
the PyPI package to provide better coverage for the artifact customers
will use.

Includes a PyPI install in notests_independent_targets because these
seem to be used for Python testing despite -notests.

Change-Id: I384ea6a7dab51945828cca629860400a23fa0c05
Reviewed-on: http://gerrit.cloudera.org:8080/18586
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Tested-by: Joe McDonnell <joemcdonnell@cloudera.com>
This commit is contained in:
Michael Smith
2022-06-02 10:02:23 -07:00
committed by Joe McDonnell
parent c7784bde55
commit 5263d13112
9 changed files with 140 additions and 110 deletions

View File

@@ -451,6 +451,7 @@ add_subdirectory(common/protobuf)
add_subdirectory(be)
add_subdirectory(docker)
add_subdirectory(java)
add_subdirectory(shell)
# Build target for all generated files which most backend code depends on
add_custom_target(gen-deps ALL DEPENDS thrift-deps proto-deps fb-deps
@@ -458,14 +459,6 @@ add_custom_target(gen-deps ALL DEPENDS thrift-deps proto-deps fb-deps
add_custom_target(tarballs ALL DEPENDS shell_tarball)
add_custom_target(shell_tarball DEPENDS gen-deps
COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh"
)
add_custom_target(shell_pypi_package DEPENDS shell_tarball
COMMAND "${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh"
)
add_custom_target(cscope ALL DEPENDS gen-deps
COMMAND "${CMAKE_SOURCE_DIR}/bin/gen-cscope.sh"
)
@@ -475,7 +468,7 @@ add_custom_target(impala_python ALL
)
add_custom_target(notests_independent_targets DEPENDS
java cscope tarballs impala_python
java cscope tarballs impala_python shell_python2_install
)
add_custom_target(notests_regular_targets DEPENDS
impalad statestored catalogd admissiond fesupport loggingsupport ImpalaUdf udasample udfsample impala-profile-tool

View File

@@ -38,7 +38,8 @@ export LD_LIBRARY_PATH=":$(PYTHONPATH=${PYTHONPATH} \
IMPALA_PY_DIR="$(dirname "$0")/../infra/python"
IMPALA_PY_ENV_DIR="${IMPALA_PY_DIR}/env-gcc${IMPALA_GCC_VERSION}"
IMPALA_PYTHON_EXECUTABLE="${IMPALA_PY_ENV_DIR}/bin/python"
# Allow overriding the python executable
IMPALA_PYTHON_EXECUTABLE="${IMPALA_PYTHON_EXECUTABLE:-${IMPALA_PY_ENV_DIR}/bin/python}"
for PYTHON_LIB_DIR in ${THRIFT_PY_ROOT}/python/lib{64,}; do
[[ -d ${PYTHON_LIB_DIR} ]] || continue

21
bin/impala-virtualenv Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
source "$(dirname "$0")/impala-python-common.sh"
exec "$PY_ENV_DIR/bin/python" "$PY_ENV_DIR/bin/virtualenv" "$@"

45
shell/CMakeLists.txt Normal file
View File

@@ -0,0 +1,45 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
add_custom_target(shell_tarball DEPENDS gen-deps
COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh"
)
add_custom_target(shell_pypi_package DEPENDS shell_tarball
COMMAND "${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh"
)
# A separate package target is needed because without OFFICIAL the file name is
# non-deterministic. Uses a custom target to synchronize for multiple dependents.
set(SHELL_TEST_PKG
"${CMAKE_SOURCE_DIR}/shell/build/dist/impala_shell-install-test.tar.gz")
get_filename_component(SHELL_TEST_PKG_DIR "${SHELL_TEST_PKG}" DIRECTORY)
# Generates SHELL_TEST_PKG
add_custom_target(shell_pypi_test_package DEPENDS shell_tarball
COMMAND env BUILD_VERSION=install-test OFFICIAL=true DIST_DIR="${SHELL_TEST_PKG_DIR}"
"${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh"
)
set(PYTHON2_VENV "${CMAKE_SOURCE_DIR}/shell/build/py2_venv")
add_custom_command(OUTPUT "${PYTHON2_VENV}" DEPENDS impala_python
COMMAND impala-virtualenv --python python2 "${PYTHON2_VENV}"
)
add_custom_target(shell_python2_install ALL DEPENDS "${PYTHON2_VENV}" shell_pypi_test_package
COMMAND "${PYTHON2_VENV}/bin/pip" install "${SHELL_TEST_PKG}"
)

View File

@@ -32,7 +32,7 @@ from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.impala_service import ImpaladService
from tests.common.test_dimensions import create_client_protocol_dimension
from tests.shell.util import run_impala_shell_cmd, run_impala_shell_cmd_no_expect, \
ImpalaShell
ImpalaShell, create_impala_shell_executable_dimension
REQUIRED_MIN_OPENSSL_VERSION = 0x10001000L
# Python supports TLSv1.2 from 2.7.9 officially but on Red Hat/CentOS Python2.7.5
@@ -142,6 +142,7 @@ class TestClientSsl(CustomClusterTestSuite):
def add_test_dimensions(cls):
super(TestClientSsl, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(impalad_args=WEBSERVER_SSL_ARGS,

View File

@@ -24,7 +24,7 @@ from tests.common.skip import (SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfLocal, Sk
SkipIfCOS)
from tests.common.test_dimensions import (create_exec_option_dimension_from_dict,
create_client_protocol_dimension, hs2_parquet_constraint)
from tests.shell.util import ImpalaShell
from tests.shell.util import create_impala_shell_executable_dimension
class TestDateQueries(ImpalaTestSuite):
@@ -50,6 +50,7 @@ class TestDateQueries(ImpalaTestSuite):
# via both protocols.
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
def test_queries(self, vector):
if vector.get_value('table_format').file_format == 'avro':

View File

@@ -39,8 +39,8 @@ from tests.common.test_dimensions import (
create_uncompressed_text_dimension, create_single_exec_option_dimension)
from time import sleep, time
from util import (get_impalad_host_port, assert_var_substitution, run_impala_shell_cmd,
ImpalaShell, IMPALA_SHELL_EXECUTABLE, SHELL_IS_PYTHON_2,
build_shell_env, wait_for_query_state)
ImpalaShell, build_shell_env, wait_for_query_state,
create_impala_shell_executable_dimension, get_impala_shell_executable)
from contextlib import closing
@@ -144,7 +144,9 @@ class TestImpalaShell(ImpalaTestSuite):
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_strict_dimension())
cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('protocol') != 'beeswax' or not v.get_value('strict_hs2_protocol'))
v.get_value('protocol') != 'beeswax' or not v.get_value('strict_hs2_protocol'))
# Test with python2 and the raw tarball
cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
def test_no_args(self, vector):
args = ['-q', DEFAULT_QUERY]
@@ -867,7 +869,8 @@ class TestImpalaShell(ImpalaTestSuite):
# Building an one-off shell command instead of using Util::ImpalaShell since we need
# to customize the impala daemon socket.
protocol = vector.get_value("protocol")
shell_cmd = [IMPALA_SHELL_EXECUTABLE, "--protocol={0}".format(protocol)]
impala_shell_executable = get_impala_shell_executable(vector)
shell_cmd = [impala_shell_executable, "--protocol={0}".format(protocol)]
if protocol == 'beeswax':
expected_output = "get_default_configuration"
else:
@@ -1103,16 +1106,13 @@ class TestImpalaShell(ImpalaTestSuite):
assert "1\t1\t10.1" in result.stdout, result.stdout
assert "2\t2\t20.2" in result.stdout, result.stdout
if (vector.get_value("protocol") in ('hs2', 'hs2-http')) and not SHELL_IS_PYTHON_2:
# The HS2 client returns binary values for float/double types, and these must
# be converted to strings for display. However, due to differences between the
# way that python2 and python3 represent floating point values, the output
# from the shell will differ with regard to which version of python the
# shell is running under.
assert "3\t3\t30.299999999999997" in result.stdout, result.stdout
else:
# python 2, or python 3 with beeswax protocol
assert "3\t3\t30.3" in result.stdout, result.stdout
# The HS2 client returns binary values for float/double types, and these must
# be converted to strings for display. However, due to differences between the
# way that python2 and python3 represent floating point values, the output
# from the shell will differ with regard to which version of python the
# shell is running under.
assert("3\t3\t30.299999999999997" in result.stdout or
"3\t3\t30.3" in result.stdout), result.stdout
assert "4\t4\t40.4" in result.stdout, result.stdout

View File

@@ -43,9 +43,9 @@ from tests.common.skip import SkipIfLocal
from tests.common.test_dimensions import (
create_client_protocol_dimension, create_client_protocol_strict_dimension,
create_uncompressed_text_dimension, create_single_exec_option_dimension)
from tests.shell.util import get_unused_port
from util import (assert_var_substitution, ImpalaShell, get_impalad_port, get_shell_cmd,
get_open_sessions_metric, IMPALA_SHELL_EXECUTABLE, spawn_shell)
get_open_sessions_metric, spawn_shell, get_unused_port,
create_impala_shell_executable_dimension, get_impala_shell_executable)
import SimpleHTTPServer
import SocketServer
@@ -175,7 +175,9 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_strict_dimension())
cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('protocol') != 'beeswax' or not v.get_value('strict_hs2_protocol'))
v.get_value('protocol') != 'beeswax' or not v.get_value('strict_hs2_protocol'))
# Test with python2 and the raw tarball
cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
def _expect_with_cmd(self, proc, cmd, vector, expectations=(), db="default"):
"""Executes a command on the expect process instance and verifies a set of
@@ -899,7 +901,7 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
def test_line_with_leading_comment(self, vector, unique_database):
# IMPALA-2195: A line with a comment produces incorrect command.
if vector.get_value('strict_hs2_protocol'):
pytest.skip("Leading omments not supported in strict hs2 mode.")
pytest.skip("Leading comments not supported in strict hs2 mode.")
table = "{0}.leading_comment".format(unique_database)
run_impala_shell_interactive(vector, 'create table {0} (i int);'.format(table))
result = run_impala_shell_interactive(vector, '-- comment\n'
@@ -984,30 +986,6 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
def test_fix_infinite_loop(self, vector):
# IMPALA-6337: Fix infinite loop.
# In case of TL;DR:
# - see IMPALA-9362 for details
# - see tests/shell/util.py for explanation of IMPALA_SHELL_EXECUTABLE
if os.getenv("IMPALA_HOME") not in IMPALA_SHELL_EXECUTABLE:
# The fix for IMPALA-6337 involved patching our internal verison of
# sqlparse 0.1.19 in ${IMPALA_HOME}/shell/ext-py. However, when we
# create the the stand-alone python package of the impala-shell for PyPI,
# we don't include the bundled 3rd party libs -- we expect users to
# install 3rd upstream libraries from PyPI.
#
# We could try to bundle sqlparse with the PyPI package, but there we
# run into the issue that the our bundled version is not python 3
# compatible. The real fix for this would be to upgrade to sqlparse 0.3.0,
# but that's not without complications. See IMPALA-9362 for details.
#
# For the time being, what this means is that IMPALA-6337 is fixed for
# people who are running the shell locally from any host/node that's part
# of a cluster where Impala is installed, but if they are running a
# standalone version of the shell on a client outside of a cluster, then
# they will still be relying on the upstream version of sqlparse 0.1.19,
# and so they may still be affected by the IMPALA-6337.
#
pytest.skip("Test will fail if shell is not part of dev environment.")
result = run_impala_shell_interactive(vector, "select 1 + 1; \"\n;\";")
if vector.get_value('strict_hs2_protocol'):
assert '| 2 |' in result.stdout
@@ -1194,9 +1172,10 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
pytest.skip()
# Check that we get a message about the 503 error when we try to connect.
impala_shell_executable = get_impala_shell_executable(vector)
shell_args = ["--protocol={0}".format(protocol),
"-i{0}:{1}".format(http_503_server.HOST, http_503_server.PORT)]
shell_proc = spawn_shell([IMPALA_SHELL_EXECUTABLE] + shell_args)
shell_proc = spawn_shell([impala_shell_executable] + shell_args)
shell_proc.expect("HTTP code 503", timeout=10)
def test_http_interactions_extra(self, vector, http_503_server_extra):
@@ -1208,10 +1187,11 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
pytest.skip()
# Check that we get a message about the 503 error when we try to connect.
impala_shell_executable = get_impala_shell_executable(vector)
shell_args = ["--protocol={0}".format(protocol),
"-i{0}:{1}".format(http_503_server_extra.HOST,
http_503_server_extra.PORT)]
shell_proc = spawn_shell([IMPALA_SHELL_EXECUTABLE] + shell_args)
shell_proc = spawn_shell([impala_shell_executable] + shell_args)
shell_proc.expect("HTTP code 503: Service Unavailable \[EXTRA\]", timeout=10)

View File

@@ -37,6 +37,7 @@ from tests.common.impala_service import ImpaladService
from tests.common.impala_test_suite import (IMPALAD_BEESWAX_HOST_PORT,
IMPALAD_HS2_HOST_PORT, IMPALAD_HS2_HTTP_HOST_PORT,
STRICT_HS2_HOST_PORT, STRICT_HS2_HTTP_HOST_PORT)
from tests.common.test_vector import ImpalaTestDimension
LOG = logging.getLogger('tests/shell/util.py')
LOG.addHandler(logging.StreamHandler())
@@ -44,22 +45,6 @@ LOG.addHandler(logging.StreamHandler())
SHELL_HISTORY_FILE = os.path.expanduser("~/.impalahistory")
IMPALA_HOME = os.environ['IMPALA_HOME']
# Note that pytest.config.getoption is deprecated usage. We use this
# in a couple of other places. Ultimately, it needs to be addressed if
# we ever want to get off of pytest 2.9.2.
IMPALA_SHELL_EXECUTABLE = pytest.config.getoption('shell_executable')
if IMPALA_SHELL_EXECUTABLE is None:
if ImpalaTestClusterProperties.get_instance().is_remote_cluster():
# With remote cluster testing, we cannot assume that the shell was built locally.
IMPALA_SHELL_EXECUTABLE = os.path.join(IMPALA_HOME, "bin/impala-shell.sh")
else:
# Test the locally built shell distribution.
IMPALA_SHELL_EXECUTABLE = os.path.join(
IMPALA_HOME, "shell/build", "impala-shell-" + IMPALA_LOCAL_BUILD_VERSION,
"impala-shell")
def build_shell_env(env=None):
""" Construct the environment for the shell to run in based on 'env', or the current
process's environment if env is None."""
@@ -74,41 +59,6 @@ def build_shell_env(env=None):
return env
def get_python_version_for_shell_env():
"""
Return the version of python belonging to the tested IMPALA_SHELL_EXECUTABLE.
We need this because some tests behave differently based on the version of
python being used to execute the impala-shell. However, since the test
framework itself is still being run with python2.7.x, sys.version_info
alone can't help us to determine the python version for the environment of
the shell executable. Instead, we have to invoke the shell, and then parse
the python version from the output. This information is present even in the
case of a fatal shell exception, e.g., not being unable to establish a
connection to an impalad.
"""
version_check = Popen([IMPALA_SHELL_EXECUTABLE, '-q', 'version()'],
stdout=PIPE, stderr=PIPE, env=build_shell_env())
stdout, stderr = version_check.communicate()
# e.g. Starting Impala with Kerberos authentication using Python 3.7.6
start_msg_line = stderr.split('\n')[0]
py_version = start_msg_line.split()[-1] # e.g. 3.7.6
try:
major_version, minor_version, micro_version = py_version.split('.')
ret_val = int(major_version)
except (ValueError, UnboundLocalError) as e:
LOG.error(stderr)
sys.exit("Could not determine python version in shell env: {}".format(str(e)))
return ret_val
# Since both test_shell_commandline and test_shell_interactive import from
# this file, this check will be forced before any tests are run.
SHELL_IS_PYTHON_2 = True if (get_python_version_for_shell_env() == 2) else False
def assert_var_substitution(result):
assert_pattern(r'\bfoo_number=.*$', 'foo_number= 123123', result.stdout, \
'Numeric values not replaced correctly')
@@ -222,15 +172,16 @@ def get_impalad_port(vector):
def get_shell_cmd(vector):
"""Get the basic shell command to start the shell, given the provided test vector.
Returns the command as a list of string arguments."""
impala_shell_executable = get_impala_shell_executable(vector)
if vector.get_value_with_default("strict_hs2_protocol", False):
protocol = vector.get_value("protocol")
return [IMPALA_SHELL_EXECUTABLE,
return [impala_shell_executable,
"--protocol={0}".format(protocol),
"--strict_hs2_protocol",
"--use_ldap_test_password",
"-i{0}".format(get_impalad_host_port(vector))]
else:
return [IMPALA_SHELL_EXECUTABLE,
return [impala_shell_executable,
"--protocol={0}".format(vector.get_value("protocol")),
"-i{0}".format(get_impalad_host_port(vector))]
@@ -257,7 +208,7 @@ class ImpalaShellResult(object):
class ImpalaShell(object):
"""A single instance of the Impala shell. The proces is started when this object is
"""A single instance of the Impala shell. The process is started when this object is
constructed, and then users should repeatedly call send_cmd(), followed eventually by
get_result() to retrieve the process output. This constructor will wait until
Impala shell is connected for the specified timeout unless wait_until_connected is
@@ -349,3 +300,40 @@ def wait_for_query_state(vector, stmt, state, max_retry=15):
retry_count += 1
time.sleep(1.0)
raise Exception("Query didn't reach desired state: " + state)
# Returns shell executable, and whether to include pypi variants
def get_dev_impala_shell_executable():
# Note that pytest.config.getoption is deprecated usage. We use this
# in a couple of other places. Ultimately, it needs to be addressed if
# we ever want to get off of pytest 2.9.2.
impala_shell_executable = pytest.config.getoption('shell_executable')
if impala_shell_executable is not None:
return impala_shell_executable, False
if ImpalaTestClusterProperties.get_instance().is_remote_cluster():
# With remote cluster testing, we cannot assume that the shell was built locally.
return os.path.join(IMPALA_HOME, "bin/impala-shell.sh"), False
else:
# Test the locally built shell distribution.
return os.path.join(IMPALA_HOME, "shell/build",
"impala-shell-" + IMPALA_LOCAL_BUILD_VERSION, "impala-shell"), True
def create_impala_shell_executable_dimension():
_, include_pypi = get_dev_impala_shell_executable()
if include_pypi:
return ImpalaTestDimension('impala_shell', 'dev', 'python2')
else:
return ImpalaTestDimension('impala_shell', 'dev')
def get_impala_shell_executable(vector):
# impala-shell is invoked some places where adding a test vector may not make sense;
# use 'dev' as the default.
impala_shell_executable, _ = get_dev_impala_shell_executable()
return {
'dev': impala_shell_executable,
'python2': os.path.join(IMPALA_HOME, 'shell/build/py2_venv/bin/impala-shell')
}[vector.get_value_with_default('impala_shell', 'dev')]