Files
impala/common/thrift/CMakeLists.txt
Joe McDonnell ea0969a772 IMPALA-11980 (part 2): Fix absolute import issues for impala_shell
Python 3 changed the behavior of imports with PEP328. Existing
imports become absolute unless they use the new relative import
syntax. This adapts the impala-shell code to use absolute
imports, fixing issues where it is imported from our test code.

There are several parts to this:
1. It moves impala shell code into shell/impala_shell.
   This matches the directory structure of the PyPi package.
2. It changes the imports in the shell code to be
   absolute paths (i.e. impala_shell.foo rather than foo).
   This fixes issues with Python 3 absolute imports.
   It also eliminates the need for ugly hacks in the PyPi
   package's __init__.py.
3. This changes Thrift generation to put it directly in
   $IMPALA_HOME/shell rather than $IMPALA_HOME/shell/gen-py.
   This means that the generated Thrift code is rooted in
   the same directory as the shell code.
4. This changes the PYTHONPATH to include $IMPALA_HOME/shell
   and not $IMPALA_HOME/shell/gen-py. This means that the
   test code is using the same import paths as the pypi
   package.

With all of these changes, the source code is very close
to the directory structure of the PyPi package. As long as
CMake has generated the thrift files and the Python version
file, only a few differences remain. This removes those
differences by moving the setup.py / MANIFEST.in and other
files from the packaging directory to the top-level
shell/ directory. This means that one can pip install
directly from the source code. i.e. pip install $IMPALA_HOME/shell

This also moves the shell tarball generation script to the
packaging directory and changes bin/impala-shell.sh to use
Python 3.

This sorts the imports using isort for the affected Python files.

Testing:
 - Ran a regular core job with Python 2
 - Ran a core job with Python 3 and verified that the absolute
   import issues are gone.

Change-Id: Ica75a24fa6bcb78999b9b6f4f4356951b81c3124
Reviewed-on: http://gerrit.cloudera.org:8080/22330
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Tested-by: Riza Suminto <riza.suminto@cloudera.com>
2025-05-21 15:14:11 +00:00

307 lines
14 KiB
CMake

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Helper function to generate build rules. For each input thrift file, this function will
# generate a rule that maps the input file to the output c++ file.
# Thrift will generate multiple output files for each input (including java files) and
# ideally, we'd specify all of the outputs for dependency tracking.
# Unfortunately, it's not easy to figure out all the output files without parsing the
# thrift input. (TODO: can thrift tells us what the java output files will be?)
# The list of output files is used for build dependency tracking so it's not necessary to
# capture all the output files.
#
# To call this function, pass it the output file list followed by the input thrift files:
# i.e. THRIFT_GEN(OUTPUT_FILES, ${THRIFT_FILES})
#
# cmake seems to be case sensitive for some keywords. Changing the first IF check to lower
# case makes it not work. TODO: investigate this
function(THRIFT_GEN VAR)
IF (NOT ARGN)
MESSAGE(SEND_ERROR "Error: THRIFT_GEN called without any src files")
RETURN()
ENDIF(NOT ARGN)
set(${VAR})
foreach(THRIFT_FILE ${ARGN})
# Get full path
get_filename_component(ABS_THRIFT_FILE ${THRIFT_FILE} ABSOLUTE)
# Get basename without the file extension
get_filename_component(THRIFT_FILE_WE ${THRIFT_FILE} NAME_WE)
# All the output files we can determine based on filename.
# - Does not include .skeleton.cpp files
# - Does not include java output files
set(OUTPUT_BE_FILE "${BE_OUTPUT_DIR}/gen-cpp/${THRIFT_FILE_WE}_types.cpp")
set(OUTPUT_BE_FILE ${OUTPUT_BE_FILE} "${BE_OUTPUT_DIR}/gen-cpp/${THRIFT_FILE_WE}_types.h")
list(APPEND ${VAR} ${OUTPUT_BE_FILE})
# BeeswaxService thrift generation
# It depends on hive_meta_store, which in turn depends on fb303.
# The java dependency is handled by maven.
# We need to generate C++ src file for the parent dependencies using the "-r" option.
set(CPP_ARGS ${THRIFT_CPP_INCLUDE_DIR_OPTION}
--gen cpp:moveable_types,no_default_operators,templates -o ${BE_OUTPUT_DIR})
IF (THRIFT_FILE STREQUAL "beeswax.thrift")
set(CPP_ARGS -r ${CPP_ARGS})
ENDIF(THRIFT_FILE STREQUAL "beeswax.thrift")
IF (THRIFT_FILE STREQUAL ${TCLI_SERVICE_THRIFT} OR THRIFT_FILE STREQUAL "parquet.thrift")
# Do not generate Java HiveServer2 and Parquet files because we should use the jar
# from Hive or Parquet.
add_custom_command(
OUTPUT ${OUTPUT_BE_FILE}
COMMAND ${THRIFT_QUIET_WRAPPER} ${THRIFT_CPP_COMPILER} ${CPP_ARGS} ${THRIFT_FILE}
COMMAND ${THRIFT_QUIET_WRAPPER} ${THRIFT_PY_COMPILER} ${PYTHON_ARGS} ${THRIFT_FILE}
# Ugly hack: Thrift incorrectly generates an unnecessary __init__.py at the top
# level. Remove it until we can patch Thrift to avoid generating this.
COMMAND rm -f ${PYTHON_OUTPUT_DIR}/__init__.py
DEPENDS ${ABS_THRIFT_FILE}
COMMENT "Running thrift compiler on ${THRIFT_FILE}"
VERBATIM
)
ELSEIF (THRIFT_FILE STREQUAL "ImpalaService.thrift")
add_custom_command(
OUTPUT ${OUTPUT_BE_FILE}
COMMAND ${THRIFT_QUIET_WRAPPER} ${THRIFT_CPP_COMPILER} ${CPP_ARGS} ${THRIFT_FILE}
# Because of some CMake bug we can't just use
# sed -i.bak "'s|\\(dispatchCallTemplated.*\\));|\\1) override;|'"
# here because for some reason CMake doesn't remove the quotes and generates
# wrong Makefiles. Strange, because at global scope add_custom_command works fine.
COMMAND ${CMAKE_SOURCE_DIR}/bin/cmake_aux/add_override.sh
${BE_OUTPUT_DIR}/gen-cpp/ImpalaHiveServer2Service.h
${BE_OUTPUT_DIR}/gen-cpp/ImpalaService.h
COMMAND ${THRIFT_QUIET_WRAPPER} ${THRIFT_JAVA_COMPILER} ${JAVA_FE_ARGS} ${THRIFT_FILE}
COMMAND ${THRIFT_QUIET_WRAPPER} ${THRIFT_PY_COMPILER} ${PYTHON_ARGS} ${THRIFT_FILE}
# Ugly hack: Thrift incorrectly generates an unnecessary __init__.py at the top
# level. Remove it until we can patch Thrift to avoid generating this.
COMMAND rm -f ${PYTHON_OUTPUT_DIR}/__init__.py
DEPENDS ${ABS_THRIFT_FILE}
COMMENT "Running thrift compiler on ${THRIFT_FILE}"
VERBATIM
)
ELSE (THRIFT_FILE STREQUAL ${TCLI_SERVICE_THRIFT} OR THRIFT_FILE STREQUAL "parquet.thrift")
add_custom_command(
OUTPUT ${OUTPUT_BE_FILE}
COMMAND ${THRIFT_QUIET_WRAPPER} ${THRIFT_CPP_COMPILER} ${CPP_ARGS} ${THRIFT_FILE}
COMMAND ${THRIFT_QUIET_WRAPPER} ${THRIFT_JAVA_COMPILER} ${JAVA_FE_ARGS} ${THRIFT_FILE}
COMMAND ${THRIFT_QUIET_WRAPPER} ${THRIFT_PY_COMPILER} ${PYTHON_ARGS} ${THRIFT_FILE}
# Ugly hack: Thrift incorrectly generates an unnecessary __init__.py at the top
# level. Remove it until we can patch Thrift to avoid generating this.
COMMAND rm -f ${PYTHON_OUTPUT_DIR}/__init__.py
DEPENDS ${ABS_THRIFT_FILE}
COMMENT "Running thrift compiler on ${THRIFT_FILE}"
VERBATIM
)
ENDIF (THRIFT_FILE STREQUAL ${TCLI_SERVICE_THRIFT} OR THRIFT_FILE STREQUAL "parquet.thrift")
endforeach(THRIFT_FILE)
set(${VAR} ${${VAR}} PARENT_SCOPE)
endfunction(THRIFT_GEN)
function(THRIFT_GEN_DS VAR)
IF (NOT ARGN)
MESSAGE(SEND_ERROR "Error: THRIFT_GEN_DS called without any src files")
RETURN()
ENDIF(NOT ARGN)
set(${VAR})
foreach(THRIFT_FILE ${ARGN})
get_filename_component(ABS_THRIFT_FILE ${THRIFT_FILE} ABSOLUTE)
get_filename_component(THRIFT_FILE_WE ${THRIFT_FILE} NAME_WE)
# Create a dummy marker file to track if Thrift compilation of the current file has
# been run or not. This file is created only if Thrift compilation succeeds. Deletion
# of the file will re-trigger Thrift compilation for the current file.
set(OUTPUT_FILE_MESSAGE "CMake state file for Thrift compilation of"
"${ABS_THRIFT_FILE}, deletion of this file will cause"
"${THRIFT_FILE} to be recompiled during the next build")
set(OUTPUT_DIR "${EXT_DS_OUTPUT_DIR}/../target/tmp/generated-sources/")
set(OUTPUT_FILE "${OUTPUT_DIR}/${THRIFT_FILE_WE}.txt")
list(APPEND ${VAR} ${OUTPUT_FILE})
add_custom_command(
OUTPUT ${OUTPUT_FILE}
COMMAND ${THRIFT_QUIET_WRAPPER} ${THRIFT_JAVA_COMPILER} ${JAVA_EXT_DS_ARGS} ${THRIFT_FILE} &&
mkdir -p ${OUTPUT_DIR} && echo ${OUTPUT_FILE_MESSAGE} > ${OUTPUT_FILE}
DEPENDS ${ABS_THRIFT_FILE}
COMMENT "Running thrift compiler for ext-data-source on ${THRIFT_FILE}"
VERBATIM
)
endforeach(THRIFT_FILE)
set(${VAR} ${${VAR}} PARENT_SCOPE)
endfunction(THRIFT_GEN_DS)
set(THIRDPARTY_THRIFT_DIR "thirdparty_thrift")
set(HIVE_THRIFT_SOURCE_DIR "hive-$ENV{IMPALA_HIVE_MAJOR_VERSION}-api")
set(TCLI_SERVICE_THRIFT "${HIVE_THRIFT_SOURCE_DIR}/TCLIService.thrift")
message("Using Thrift CPP compiler: ${THRIFT_CPP_COMPILER}")
message("Using Thrift JAVA compiler: ${THRIFT_JAVA_COMPILER}")
message("Using Thrift PY compiler: ${THRIFT_PY_COMPILER}")
set(THRIFT_QUIET_WRAPPER "${CMAKE_SOURCE_DIR}/bin/thrift-quiet-wrapper.sh")
set(THRIFT_CPP_INCLUDE_DIR_OPTION -I ${THIRDPARTY_THRIFT_DIR}
-I ${HIVE_THRIFT_SOURCE_DIR})
set(THRIFT_JAVA_INCLUDE_DIR_OPTION -I ${THIRDPARTY_THRIFT_DIR}
-I ${HIVE_THRIFT_SOURCE_DIR})
set(THRIFT_PY_INCLUDE_DIR_OPTION -I ${THIRDPARTY_THRIFT_DIR}
-I ${HIVE_THRIFT_SOURCE_DIR})
set(BE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/be/generated-sources)
set(FE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/fe/generated-sources)
# TODO: avoid duplicating generated java classes
set(EXT_DS_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/java/ext-data-source/api/generated-sources)
set(PYTHON_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/shell/)
MESSAGE("Found output dir: " ${PYTHON_OUTPUT_DIR})
file(MAKE_DIRECTORY ${BE_OUTPUT_DIR})
file(MAKE_DIRECTORY ${FE_OUTPUT_DIR})
file(MAKE_DIRECTORY ${EXT_DS_OUTPUT_DIR})
file(MAKE_DIRECTORY ${PYTHON_OUTPUT_DIR})
file(MAKE_DIRECTORY ${HIVE_THRIFT_SOURCE_DIR})
file(MAKE_DIRECTORY ${THIRDPARTY_THRIFT_DIR})
# Args passed to thrift for Java gen
set(JAVA_FE_ARGS ${THRIFT_JAVA_INCLUDE_DIR_OPTION} --gen java -o ${FE_OUTPUT_DIR})
set(JAVA_EXT_DS_ARGS ${THRIFT_JAVA_INCLUDE_DIR_OPTION} --gen java -o ${EXT_DS_OUTPUT_DIR})
set(PYTHON_ARGS ${THRIFT_PY_INCLUDE_DIR_OPTION} -r --gen py:no_utf8strings -out
${PYTHON_OUTPUT_DIR})
set (EXT_DATA_SRC_FILES
ErrorCodes.thrift
ExternalDataSource.thrift
Data.thrift
Status.thrift
Types.thrift
)
set (SRC_FILES
ErrorCodes.thrift
beeswax.thrift
BackendGflags.thrift
CatalogInternalService.thrift
CatalogObjects.thrift
CatalogService.thrift
DataSinks.thrift
Descriptors.thrift
ExecStats.thrift
Frontend.thrift
Exprs.thrift
ExternalDataSource.thrift
ImpalaInternalService.thrift
ImpalaService.thrift
JniCatalog.thrift
LineageGraph.thrift
Logging.thrift
NetworkTest.thrift
MetricDefs.thrift
Metrics.thrift
PlanNodes.thrift
Planner.thrift
Partitions.thrift
parquet.thrift
ResourceProfile.thrift
Query.thrift
Results.thrift
RuntimeProfile.thrift
SqlConstraints.thrift
StatestoreService.thrift
SystemTables.thrift
Zip.thrift
${TCLI_SERVICE_THRIFT}
${EXT_DATA_SRC_FILES}
)
SET_SOURCE_FILES_PROPERTIES(Status.thrift PROPERTIES OBJECT_DEPENDS ErrorCodes.thrift)
SET_SOURCE_FILES_PROPERTIES(CatalogObjects.thrift PROPERTIES OBJECT_DEPENDS
${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift)
SET_SOURCE_FILES_PROPERTIES(CatalogService.thrift PROPERTIES OBJECT_DEPENDS
${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift)
SET_SOURCE_FILES_PROPERTIES(JniCatalog.thrift PROPERTIES OBJECT_DEPENDS
${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift)
SET_SOURCE_FILES_PROPERTIES(SqlConstraints.thrift PROPERTIES OBJECT_DEPENDS
${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift)
SET_SOURCE_FILES_PROPERTIES(beeswax.thrift PROPERTIES OBJECT_DEPENDS
${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift)
add_custom_command(OUTPUT ErrorCodes.thrift
COMMAND python generate_error_codes.py
DEPENDS generate_error_codes.py)
add_custom_command(OUTPUT MetricDefs.thrift
COMMAND python generate_metrics.py
DEPENDS generate_metrics.py metrics.json)
# The thrift-generated java classes defined in TCLIService are also pulled into our build
# in the Hive jars that are downloaded via Maven. Hive2 moved the classes from
# org.apache.hive.service.cli.thrift to org.apache.hive.service.rpc.thrift. Impala calls
# various Hive methods that have these classes in the interface and if the packages don't
# match it won't compile.
add_custom_command(OUTPUT hive-$ENV{IMPALA_HIVE_MAJOR_VERSION}-api/TCLIService.thrift
COMMAND sed
's/namespace java org.apache.hive.service.cli.thrift/namespace java org.apache.hive.service.rpc.thrift/'
hive-1-api/TCLIService.thrift > hive-$ENV{IMPALA_HIVE_MAJOR_VERSION}-api/TCLIService.thrift
DEPENDS hive-1-api/TCLIService.thrift
)
# This generates hive_metastore.thrift in the $THIRDPARTY_THRIFT_DIR. The two
# modification are:
# 1. Set the impala_thrift_gen python namespace
# 2. Rearranges the fb303 reference so that it doesn't have the share/fb303/if
# directory structure
add_custom_command(OUTPUT ${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift
COMMAND ${CMAKE_SOURCE_DIR}/bin/cmake_aux/add_thrift_python_namespace.sh
$ENV{HIVE_METASTORE_THRIFT_DIR}/hive_metastore.thrift
${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift.tmp
COMMAND cat ${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift.tmp |
sed 's|share/fb303/if/||' > ${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift
COMMAND rm ${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift.tmp
DEPENDS $ENV{HIVE_METASTORE_THRIFT_DIR}/hive_metastore.thrift
)
# Generate fb303.thrift in the $THIRDPARTY_THRIFT_DIR with the appropriate
# impala_thrift_gen python namespace.
add_custom_command(OUTPUT ${THIRDPARTY_THRIFT_DIR}/fb303.thrift
COMMAND ${CMAKE_SOURCE_DIR}/bin/cmake_aux/add_thrift_python_namespace.sh
${THRIFT_PY_CONTRIB_DIR}/share/fb303/if/fb303.thrift
${THIRDPARTY_THRIFT_DIR}/fb303.thrift
DEPENDS ${THRIFT_PY_CONTRIB_DIR}/share/fb303/if/fb303.thrift
)
SET_SOURCE_FILES_PROPERTIES(${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift
PROPERTIES OBJECT_DEPENDS ${THIRDPARTY_THRIFT_DIR}/fb303.thrift)
# Create a build command for each of the thrift src files and generate
# a list of files they produce
THRIFT_GEN(THRIFT_ALL_FILES ${SRC_FILES})
THRIFT_GEN_DS(THRIFT_DATA_SRC_FILES ${EXT_DATA_SRC_FILES})
add_custom_target(thrift-generated-files-error DEPENDS ErrorCodes.thrift)
add_custom_target(thrift-generated-files-metrics DEPENDS MetricDefs.thrift)
add_custom_target(thrift-generated-files-tcli-service DEPENDS ${TCLI_SERVICE_THRIFT})
add_custom_target(thrift-generated-files-hive-metastore
DEPENDS ${THIRDPARTY_THRIFT_DIR}/hive_metastore.thrift)
add_custom_target(thrift-generated-files-fb303 DEPENDS ${THIRDPARTY_THRIFT_DIR}/fb303.thrift)
# Add a custom target that generates all the thrift files
add_custom_target(thrift-cpp ALL DEPENDS ${THRIFT_ALL_FILES})
add_dependencies(thrift-cpp thrift-generated-files-metrics thrift-generated-files-error
thrift-generated-files-tcli-service thrift-generated-files-hive-metastore
thrift-generated-files-fb303)
add_custom_target(thrift-ext-data-src ALL DEPENDS ${THRIFT_DATA_SRC_FILES})
add_dependencies(thrift-ext-data-src thrift-cpp)
# Combined target for all thrift dependencies
add_custom_target(thrift-deps ALL)
add_dependencies(thrift-deps thrift-ext-data-src)