mirror of
https://github.com/apache/impala.git
synced 2025-12-19 09:58:28 -05:00
Python 3 changed the behavior of imports with PEP328. Existing imports become absolute unless they use the new relative import syntax. This adapts the impala-shell code to use absolute imports, fixing issues where it is imported from our test code. There are several parts to this: 1. It moves impala shell code into shell/impala_shell. This matches the directory structure of the PyPi package. 2. It changes the imports in the shell code to be absolute paths (i.e. impala_shell.foo rather than foo). This fixes issues with Python 3 absolute imports. It also eliminates the need for ugly hacks in the PyPi package's __init__.py. 3. This changes Thrift generation to put it directly in $IMPALA_HOME/shell rather than $IMPALA_HOME/shell/gen-py. This means that the generated Thrift code is rooted in the same directory as the shell code. 4. This changes the PYTHONPATH to include $IMPALA_HOME/shell and not $IMPALA_HOME/shell/gen-py. This means that the test code is using the same import paths as the pypi package. With all of these changes, the source code is very close to the directory structure of the PyPi package. As long as CMake has generated the thrift files and the Python version file, only a few differences remain. This removes those differences by moving the setup.py / MANIFEST.in and other files from the packaging directory to the top-level shell/ directory. This means that one can pip install directly from the source code. i.e. pip install $IMPALA_HOME/shell This also moves the shell tarball generation script to the packaging directory and changes bin/impala-shell.sh to use Python 3. This sorts the imports using isort for the affected Python files. Testing: - Ran a regular core job with Python 2 - Ran a core job with Python 3 and verified that the absolute import issues are gone. Change-Id: Ica75a24fa6bcb78999b9b6f4f4356951b81c3124 Reviewed-on: http://gerrit.cloudera.org:8080/22330 Reviewed-by: Riza Suminto <riza.suminto@cloudera.com> Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Riza Suminto <riza.suminto@cloudera.com>
154 lines
6.2 KiB
Python
154 lines
6.2 KiB
Python
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
|
|
import re
|
|
from sys import stdout
|
|
|
|
|
|
class ThriftPrettyPrinter(object):
|
|
"""Implements a pretty printer for Thrift objects.
|
|
Generates string representations. Does not output
|
|
to stdout, stderr, or any other file handles."""
|
|
|
|
# Inputs:
|
|
# redacted_fields - list of names of object attributes whose
|
|
# values will not be printed out
|
|
# indent - string containing only spaces, used as the
|
|
# base indentation where all other indentations
|
|
# will be multiples of this string
|
|
# objects_to_skip - list of names of objects attributes that
|
|
# will not be printed out, useful to ignore
|
|
# duplicate information such as query results
|
|
def __init__(self,
|
|
redacted_fields=("secret", "password"),
|
|
indent=" ",
|
|
objects_to_skip=("TRowSet", "TGetRuntimeProfileResp")):
|
|
if redacted_fields is not None:
|
|
assert type(redacted_fields) is list or \
|
|
type(redacted_fields) is tuple, \
|
|
"redacted_fields must be either a list or a tuple"
|
|
self.base_indent = indent
|
|
self.redacted_fields = redacted_fields
|
|
self.objects_to_skip = objects_to_skip
|
|
|
|
self._objname_re = re.compile("^.*?'(.*?)'.*?$")
|
|
|
|
def print_obj(self, thrift_obj, file_handle=stdout):
|
|
"""Prints the provided 'thrift_obj' to the provided
|
|
file handle. If no file handle is specified, then
|
|
stdout will be used. The 'file_handle' object must
|
|
have a write(string) method.
|
|
|
|
While this class is specifically targeted to printing
|
|
Thrift objects, there is no technical limitation preventing
|
|
any other type of object from being printed. However, the
|
|
output of non-Thrift objects may not be as nicely formatted."""
|
|
|
|
# Inputs:
|
|
# thrift_obj - the object to print out, its attributes will
|
|
# be walked recursively through the entire
|
|
# object structure and printed out
|
|
# file_handle - where the object will be written, defaults to stdout
|
|
# but can be any object with a write(str) method
|
|
self._internal_print(thrift_obj, self.base_indent, file_handle)
|
|
|
|
def _internal_print(self, thrift_obj, indent, file_handle):
|
|
"""Recursive function that does the work of walking and printing
|
|
an object."""
|
|
# parse out the type name of the thrift object
|
|
obj_name = self._objname_re.match(str(type(thrift_obj))) \
|
|
.group(1).split(".")[-1]
|
|
file_handle.write("<{0}>".format(obj_name))
|
|
|
|
if self.objects_to_skip.count(obj_name):
|
|
file_handle.write(" - <skipping>\n")
|
|
return
|
|
|
|
indent = "{0}{1}".format(indent, self.base_indent)
|
|
file_handle.write("\n")
|
|
|
|
if obj_name == "list" or obj_name == "tuple":
|
|
# lists and tuples have to be handled differently
|
|
# because the vars function does not operate on them
|
|
for attr_val in thrift_obj:
|
|
file_handle.write(indent)
|
|
self._internal_print(attr_val, indent, file_handle)
|
|
else:
|
|
# print out simple types first before printing out objects
|
|
# this ensures the simple types are easier to see
|
|
child_simple_attrs = {}
|
|
child_objs = {}
|
|
for attr_name in vars(thrift_obj):
|
|
attr_val = getattr(thrift_obj, attr_name)
|
|
if (hasattr(attr_val, '__dict__')
|
|
or attr_val is list
|
|
or attr_val is tuple):
|
|
child_objs[attr_name] = attr_val
|
|
else:
|
|
child_simple_attrs[attr_name] = attr_val
|
|
|
|
# print out child attributes in alphabetical order
|
|
for child_attr_name in sorted(child_simple_attrs):
|
|
self._print_attr(child_attr_name,
|
|
child_simple_attrs[child_attr_name],
|
|
indent,
|
|
file_handle)
|
|
|
|
# print out complex types objects, lists, or tuples
|
|
# in alphabetical order
|
|
for attr_name in sorted(child_objs):
|
|
self._print_attr(attr_name,
|
|
child_objs[attr_name],
|
|
indent,
|
|
file_handle)
|
|
|
|
def _print_attr(self, attr_name, attr_val, indent, file_handle):
|
|
"""Handles a single object attribute by either printing out
|
|
its name/value (for simple types) or recursing down into the
|
|
object (for objects/lists/tuples)."""
|
|
file_handle.write(indent)
|
|
|
|
if attr_val is not None and self.redacted_fields.count(attr_name) > 0:
|
|
file_handle.write("- {0}: *******\n".format(attr_name))
|
|
elif attr_val is None:
|
|
file_handle.write("- {0}: <None>\n".format(attr_name))
|
|
elif type(attr_val) is list or type(attr_val) is tuple:
|
|
file_handle.write("[")
|
|
self._internal_print(attr_val, indent, file_handle)
|
|
file_handle.write("{0}]\n".format(indent))
|
|
elif hasattr(attr_val, '__dict__'):
|
|
indent += "{0:{1}} {2}".format("", len(attr_name), self.base_indent)
|
|
file_handle.write("- {0}: ".format(attr_name))
|
|
self._internal_print(attr_val, indent, file_handle)
|
|
else:
|
|
file_handle.write("- {0}: ".format(attr_name))
|
|
try:
|
|
str(attr_val).decode("ascii")
|
|
file_handle.write("{0}".format(attr_val))
|
|
except UnicodeDecodeError:
|
|
# python2 - string contains binary data
|
|
file_handle.write("<binary data>")
|
|
except AttributeError:
|
|
# python3 - does not require decoding strings and thus falls into this code
|
|
if isinstance(attr_val, bytes):
|
|
file_handle.write("<binary data>")
|
|
else:
|
|
file_handle.write("{0}".format(attr_val))
|
|
file_handle.write("\n")
|