From ba808f67ddb55639d468da1d06bcad0da332b9be Mon Sep 17 00:00:00 2001 From: David Knupp Date: Fri, 20 Sep 2019 17:09:28 -0700 Subject: [PATCH] IMPALA-1071: Distributable python package for impala-shell The patch adds a set of scripts for converting the impala-shell into a true distributable python package. The package can be installed using familiar python commands, e.g.: $ python setup.py (install|develop) or $ pip install -e /path/to/dist/dir The entry point script, make_python_package.sh, will run as a part of the standard sequence of steps that results from calling buildall.sh, and will produce a gzipped tarball inside of Impala/shell/dist as an artifact. Thereafter, make_python_package.sh can be run manually any time. The expectation is that an official maintainer would need to manually upload official releases to the Python Package Index as appropriate. Change-Id: Ib8c745bddddf6a16f0c039430152745a2f00e044 Reviewed-on: http://gerrit.cloudera.org:8080/14181 Reviewed-by: David Knupp Tested-by: Impala Public Jenkins --- CMakeLists.txt | 4 + bin/rat_exclude_files.txt | 3 + shell/impala_client.py | 37 ++++++ shell/packaging/MANIFEST.in | 3 + shell/packaging/README.md | 73 +++++++++++ shell/packaging/__init__.py | 40 ++++++ shell/packaging/make_python_package.sh | 87 +++++++++++++ shell/packaging/requirements.txt | 8 ++ shell/packaging/setup.py | 169 +++++++++++++++++++++++++ 9 files changed, 424 insertions(+) create mode 100644 shell/packaging/MANIFEST.in create mode 100644 shell/packaging/README.md create mode 100644 shell/packaging/__init__.py create mode 100755 shell/packaging/make_python_package.sh create mode 100644 shell/packaging/requirements.txt create mode 100644 shell/packaging/setup.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d72430e3..c3552188d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -421,6 +421,10 @@ add_custom_target(shell_tarball DEPENDS gen-deps COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh" ) +add_custom_target(shell_pypi_package DEPENDS shell_tarball + COMMAND "DIST_DIR=${CMAKE_SOURCE_DIR}/shell/dist CLEAN_DIST=true ${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh" +) + add_custom_target(cscope ALL DEPENDS gen-deps COMMAND "${CMAKE_SOURCE_DIR}/bin/gen-cscope.sh" ) diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt index 9e15b0a40..aefaeefea 100644 --- a/bin/rat_exclude_files.txt +++ b/bin/rat_exclude_files.txt @@ -24,6 +24,8 @@ bin/diagnostics/__init__.py www/index.html lib/python/impala_py_lib/__init__.py lib/python/impala_py_lib/jenkins/__init__.py +shell/packaging/MANIFEST.in +shell/packaging/requirements.txt # See $IMPALA_HOME/LICENSE.txt be/src/gutil/* @@ -91,6 +93,7 @@ docker/README.md be/src/thirdparty/pcg-cpp-0.98/README.md lib/python/README.md lib/python/impala_py_lib/gdb/README.md +shell/packaging/README.md # http://www.apache.org/legal/src-headers.html: "Test data for which the addition of a # source header would cause the tests to fail." diff --git a/shell/impala_client.py b/shell/impala_client.py index 6761e8e85..4dd22a824 100755 --- a/shell/impala_client.py +++ b/shell/impala_client.py @@ -939,6 +939,8 @@ class ImpalaBeeswaxClient(ImpalaClient): if t.type == TApplicationException.UNKNOWN_METHOD: raise MissingThriftMethodException(t.message) raise + except TTransportException as e: + raise DisconnectedException("Error communicating with impalad: %s" % e) return (resp.version, resp.webserver_address) def _create_query_req(self, query_str, set_query_options): @@ -1094,4 +1096,39 @@ class ImpalaBeeswaxClient(ImpalaClient): if t.type == TApplicationException.UNKNOWN_METHOD: raise MissingThriftMethodException(t.message) raise RPCException("Application Exception : %s" % t) + except Exception as e: + # This final except clause should ONLY be exercised in the case of Impala + # shell being installed as a standalone python package from public PyPI, + # rather than being included as part of a typical Impala deployment. + # + # Essentially, it's a hack that is required due to issues stemming from + # IMPALA-6808. Because of the way the Impala python environment has been + # somewhat haphazardly constructed, we end up polluting the top level Impala + # python environment with modules that should really be sub-modules. One of + # the principal places this occurs is with the various modules required by + # the Impala shell. This isn't a concern when the shell is invoked via a + # specially installed version of python that belongs to Impala, but it does + # become an issue when the shell is being run using the system python. + # + # When we install the shell as a standalone package, we need to construct + # it in such a way that all of the internal modules are contained within + # a top-level impala_shell namespace. However, this then breaks various + # imports and, in this case, exception handling in the original code. + # As far as I can tell, there's no clean way to address this without fully + # resolving IMPALA-6808. + # + # Without taking some additional measure here to recognize certain common + # exceptions, especially Beeswax exceptions raised by RPC calls, when + # errors occur during a standalone shell session, we wind up falling + # entirely through this block and returning nothing to the caller (which + # happens to be the primary command loop in impala_shell.py). This in turn + # has the result of disconnecting the shell in the case of, say, even simple + # typos in database or table names. + if suppress_error_on_cancel and self.is_query_cancelled: + raise QueryCancelledByShellException() + else: + if "BeeswaxException" in str(e): + raise RPCException("ERROR: %s" % e.message) + if "QueryNotFoundException" in str(e): + raise QueryStateException('Error: Stale query handle') diff --git a/shell/packaging/MANIFEST.in b/shell/packaging/MANIFEST.in new file mode 100644 index 000000000..ec0d80f39 --- /dev/null +++ b/shell/packaging/MANIFEST.in @@ -0,0 +1,3 @@ +include *.txt *.md *.py +recursive-include impala_shell *.py +recursive-exclude impala_shell *.pyc diff --git a/shell/packaging/README.md b/shell/packaging/README.md new file mode 100644 index 000000000..cd40b121b --- /dev/null +++ b/shell/packaging/README.md @@ -0,0 +1,73 @@ +# Impala Interactive Shell + +You can use the Impala shell tool (impala-shell) to connect to an Impala +service. The shell allows you to set up databases and tables, insert data, +and issue queries. For ad hoc queries and exploration, you can submit SQL +statements in an interactive session. The impala-shell interpreter accepts +all the same SQL statements listed in +[Impala SQL Statements](http://impala.apache.org/docs/build/html/topics/impala_langref_sql.html), +plus some shell-only commands that you can use for tuning performance and +diagnosing problems. + +To automate your work, you can specify command-line options to process a single +statement or a script file. (Other avenues for Impala automation via python +are provided by Impyla or ODBC.) + +## Installing + +``` +$ pip install impala-shell +``` + +## Online documentation + +* [Impala Shell Documentation](http://impala.apache.org/docs/build/html/topics/impala_impala_shell.html) +* [Apache Impala Documentation](http://impala.apache.org/impala-docs.html) + +## Quickstart + +### Non-interactive mode + +Processing a single query, e.g., ```show tables```: + +``` +$ impala-shell -i impalad-host.domain.com -d some_database -q 'show tables' +``` + +Processing a text file with a series of queries: + +``` +$ impala-shell -i impalad-host.domain.com -d some_database -f /path/to/queries.sql +``` + +### Launching the interactive shell + +To connect to an impalad host at the default service port (21000): + +``` +$ impala-shell -i impalad-host.domain.com +Starting Impala Shell without Kerberos authentication +Connected to impalad-host.domain.com:21000 +Server version: impalad version 2.11.0-SNAPSHOT RELEASE (build d4596f9ca3ea32a8008cdc809a7ac9a3dea47962) +*********************************************************************************** +Welcome to the Impala shell. +(Impala Shell v3.0.0-SNAPSHOT (73e90d2) built on Thu Mar 8 00:59:00 PST 2018) + +The '-B' command line flag turns off pretty-printing for query results. Use this +flag to remove formatting from results you want to save for later, or to benchmark +Impala. +*********************************************************************************** +[impalad-host.domain.com:21000] > +``` + +### Launching the interactive shell (secure mode) + +To connect to a secure host using kerberos and SSL: + +``` +$ impala-shell -k --ssl -i impalad-secure-host.domain.com +``` + +### Disconnecting + +To exit the shell when running interactively, press ```Ctrl-D``` at the shell prompt. diff --git a/shell/packaging/__init__.py b/shell/packaging/__init__.py new file mode 100644 index 000000000..43e0baa1f --- /dev/null +++ b/shell/packaging/__init__.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from os.path import dirname, abspath +import sys + +# When installing the python shell as a standalone package, this __init__ is +# used to workaround the issues stemming from IMPALA-6808. Because of the way +# the Impala python environment has been somewhat haphazardly constructed in +# a deployed cluster, it ends up being "polluted" with top-level modules that +# should really be sub-modules. One of the principal places this occurs is with +# the various modules required by the Impala shell. This isn't a concern when +# the shell is invoked via a specially installed version of python that belongs +# to Impala, but it does become an issue when the shell is being run using the +# system python. +# +# If we want to install the shell as a standalone package, we need to construct +# it in such a way that all of the internal modules are contained within a +# top-level impala_shell namespace. However, this then breaks various imports +# throughout the Impala shell code. The way this file corrects that is to add +# the impala_shell directory to PYTHONPATH only when the shell is invoked. As +# far as I can tell, there's no cleaner way to address this without fully +# resolving IMPALA-6808. +impala_shell_dir = dirname(abspath(__file__)) +sys.path.append(impala_shell_dir) diff --git a/shell/packaging/make_python_package.sh b/shell/packaging/make_python_package.sh new file mode 100755 index 000000000..ba95148b6 --- /dev/null +++ b/shell/packaging/make_python_package.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# ---------------------------------------------------------------------- +# This script is invoked during the Impala build process, and creates +# a distributable python package of the Impala shell. The resulting +# archive will be saved to: +# +# ${IMPALA_HOME}/shell/dist/impala_shell-.tar.gz +# +# Until the thrift-generated python files in ${IMPALA_HOME}/shell/gen-py +# have been created by the build process, this script will not work. +# It also relies upon the impala_build_version.py file created by the +# parent packaging script, ${IMPALA_HOME}/shell/make_shell_tarball.sh, +# which needs to be run before this script will work. +# +# After those files exist, however, this script can be run again at will. + +set -eu -o pipefail + +WORKING_DIR="$(cd "$(dirname "$0")" ; pwd -P )" +SHELL_HOME="${IMPALA_HOME}"/shell +STAGING_DIR="${WORKING_DIR}"/staging +DIST_DIR="${DIST_DIR:-$WORKING_DIR/dist}" +PACKAGE_DIR="${STAGING_DIR}"/impala_shell_package +MODULE_LIB_DIR="${PACKAGE_DIR}"/impala_shell +NO_CLEAN_DIST="${NO_CLEAN_DIST:-}" + +assemble_package_files() { + mkdir -p "${MODULE_LIB_DIR}" + + cp -r "${SHELL_HOME}/gen-py"/* "${MODULE_LIB_DIR}" + cp -r "${THRIFT_HOME}/python/lib/python2.7/site-packages/thrift" "${MODULE_LIB_DIR}" + + cp "${WORKING_DIR}/__init__.py" "${MODULE_LIB_DIR}" + cp "${SHELL_HOME}/impala_shell.py" "${MODULE_LIB_DIR}" + cp "${SHELL_HOME}/impala_client.py" "${MODULE_LIB_DIR}" + cp "${SHELL_HOME}/option_parser.py" "${MODULE_LIB_DIR}" + cp "${SHELL_HOME}/shell_output.py" "${MODULE_LIB_DIR}" + cp "${SHELL_HOME}/impala_shell_config_defaults.py" "${MODULE_LIB_DIR}" + cp "${SHELL_HOME}/TSSLSocketWithWildcardSAN.py" "${MODULE_LIB_DIR}" + + cp "${SHELL_HOME}/packaging/README.md" "${PACKAGE_DIR}" + cp "${SHELL_HOME}/packaging/MANIFEST.in" "${PACKAGE_DIR}" + cp "${SHELL_HOME}/packaging/requirements.txt" "${PACKAGE_DIR}" + cp "${SHELL_HOME}/packaging/setup.py" "${PACKAGE_DIR}" + + cp "${IMPALA_HOME}/LICENSE.txt" "${PACKAGE_DIR}" +} + +create_distributable_python_package() { + # Generate a new python package tarball in ${IMPALA_HOME}/shell/dist + if [[ "${NO_CLEAN_DIST}" != "true" ]]; then + rm -rf "${DIST_DIR}" + fi + + mkdir -p "${DIST_DIR}" + + pushd "${PACKAGE_DIR}" + echo "Building package..." + PACKAGE_TYPE="${PACKAGE_TYPE:-}" OFFICIAL="${OFFICIAL:-}" \ + python setup.py sdist --dist-dir "${DIST_DIR}" + popd + + if [[ "${NO_CLEAN_DIST}" != "true" ]]; then + rm -rf "${STAGING_DIR}" + fi +} + +assemble_package_files +create_distributable_python_package diff --git a/shell/packaging/requirements.txt b/shell/packaging/requirements.txt new file mode 100644 index 000000000..32aef56e5 --- /dev/null +++ b/shell/packaging/requirements.txt @@ -0,0 +1,8 @@ +bitarray==1.0.1 +prettytable==0.7.1 +sasl==0.2.1 +setuptools>=36.8.0 +six==1.11.0 +sqlparse==0.1.19 +thrift==0.9.3 +thrift_sasl==0.2.1 diff --git a/shell/packaging/setup.py b/shell/packaging/setup.py new file mode 100644 index 000000000..173e0d807 --- /dev/null +++ b/shell/packaging/setup.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +"""Set up the Impala shell python package.""" + +import datetime +import os +import re +import sys +import time + +from impala_shell import impala_build_version +from setuptools import find_packages, setup +from textwrap import dedent + +CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def parse_requirements(requirements_file='requirements.txt'): + """ + Parse requirements from the requirements file, stripping comments. + + Args: + requirements_file: path to a requirements file + + Returns: + a list of python packages + """ + lines = [] + with open(requirements_file) as reqs: + for _ in reqs: + line = _.split('#')[0] + if line.strip(): + lines.append(line) + return lines + + +def get_version(): + """Generate package version string when calling 'setup.py'. + + When setup.py is being used to CREATE a distribution, e.g., via setup.py sdist + or setup.py bdist, then use the output from impala_build_version.get_version(), + and append modifiers as specified by the RELEASE_TYPE and OFFICIAL environment + variables. By default, the package created will be a dev release, designated + by timestamp. For example, if get_version() returns the string 3.0.0-SNAPSHOT, + the package version may be something like 3.0.0.dev20180322154653. + + It's also possible set an evironment variable for BUILD_VERSION to override the + default build value returned from impala_build_version.get_version(). + + E.g., to specify an offical 3.4 beta 2 release (3.4b2), one would call: + + BUILD_VERSION=3.4 RELEASE_TYPE=b2 OFFICIAL=true python setup.py sdist + + The generated version string will be written to a version.txt file to be + referenced when the distribution is installed. + + When setup.py is invoked during installation, e.g., via pip install or + setup.py install, read the package version from the version.txt file, which + is presumed to contain a single line containing a valid PEP-440 version string. + The file should have been generated when the distribution being installed was + created. (Although a version.txt file can also be created manually.) + + See https://www.python.org/dev/peps/pep-0440/ for more info on python + version strings. + + Returns: + A package version string compliant with PEP-440 + """ + version_file = os.path.join(CURRENT_DIR, 'version.txt') + + if not os.path.isfile(version_file): + # If setup.py is being executed to create a distribution, e.g., via setup.py + # sdist or setup.py bdist, then derive the version and WRITE the version.txt + # file that will later be used for installations. + if os.getenv('BUILD_VERSION') is not None: + package_version = os.getenv('BUILD_VERSION') + else: + version_match = re.search('\d+\.\d+\.\d+', impala_build_version.get_version()) + if version_match is None: + sys.exit('Unable to acquire Impala version.') + package_version = version_match.group(0) + + # packages can be marked as alpha, beta, or rc RELEASE_TYPE + release_type = os.getenv('RELEASE_TYPE') + if release_type: + if not re.match('(a|b|rc)\d+?', release_type): + msg = """\ + RELEASE_TYPE \'{0}\' does not conform to any PEP-440 release format: + + aN (for alpha releases) + bN (for beta releases) + rcN (for release candidates) + + where N is the number of the release""" + sys.exit(dedent(msg).format(release_type)) + package_version += release_type + + # packages that are not marked OFFICIAL have ".dev" + a timestamp appended + if os.getenv('OFFICIAL') != 'true': + epoch_t = time.time() + ts_fmt = '%Y%m%d%H%M%S' + timestamp = datetime.datetime.fromtimestamp(epoch_t).strftime(ts_fmt) + package_version = '{0}.dev{1}'.format(package_version, timestamp) + + with open('version.txt', 'w') as version_file: + version_file.write(package_version) + else: + # If setup.py is being invoked during installation, e.g., via pip install + # or setup.py install, we expect a version.txt file from which to READ the + # version string. + with open(version_file) as version_file: + package_version = version_file.readline() + + return package_version + + +setup( + name='impala_shell', + python_requires='>2.6, <3.0.0', + version=get_version(), + description='Impala Shell', + long_description_content_type='text/markdown', + long_description=open('README.md').read(), + author="Impala Dev", + author_email='dev@impala.apache.org', + url='https://impala.apache.org/', + license='Apache Software License', + packages=find_packages(), + include_package_data=True, + install_requires=parse_requirements(), + entry_points={ + 'console_scripts': [ + 'impala-shell = impala_shell.impala_shell:impala_shell_main' + ] + }, + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'Intended Audience :: End Users/Desktop', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: MacOS :: MacOS X', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 2 :: Only', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Topic :: Database :: Front-Ends' + ] +)