mirror of
https://github.com/apache/impala.git
synced 2025-12-22 11:28:09 -05:00
This change adds geospatial functions from Hive's ESRI library as builtin UDFs. Plain Hive UDFs are imported without changes, but the generic and varargs functions are handled differently; generic functions are added with all of the combinations of their parameters (cartesian product of the parameters), and varargs functions are unfolded as an nth parameter simple function. The varargs function wrappers are generated at build time and they can be configured in gen_geospatial_udf_wrappers.py. These additional steps are required because of the limitations in Impala's UDF Executor (lack of varargs support and only partial generics support) which could be further improved; in this case, the additional wrapping/mapping steps could be removed. Changes regarding function handling/creating are sourced from https://gerrit.cloudera.org/c/19177 A new backend flag was added to turn this feature on/off as "geospatial_library". The default value is "NONE" which means no geospatial function gets registered as builtin, "HIVE_ESRI" value enables this implementation. The ESRI geospatial implementation for Hive currently only available in Hive 4, but CDP Hive backported it to Hive 3, therefore for Apache Hive this feature is disabled regardless of the "geospatial_library" flag. Known limitations: - ST_MultiLineString, ST_MultiPolygon only works with the WKT overload - ST_Polygon supports a maximum of 6 pairs of coordinates - ST_MultiPoint, ST_LineString supports a maximum of 7 pairs of coordinates - ST_ConvexHull, ST_Union supports a maximum of 6 geoms These limits can be increased in gen_geospatial_udf_wrappers.py Tests: - test_geospatial_udfs.py added based on https://github.com/Esri/spatial-framework-for-hadoop Co-Authored-by: Csaba Ringhofer <csringhofer@cloudera.com> Change-Id: If0ca02a70b4ba244778c9db6d14df4423072b225 Reviewed-on: http://gerrit.cloudera.org:8080/19425 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
71 lines
2.3 KiB
CMake
71 lines
2.3 KiB
CMake
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
set(BE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/be/generated-sources)
|
|
set(FE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/fe/generated-sources/gen-java/org/apache/impala/)
|
|
|
|
# Set output files for dependency tracking
|
|
set(CODE_GEN_OUTPUT
|
|
${FE_OUTPUT_DIR}/builtins/ScalarBuiltins.java
|
|
)
|
|
|
|
set(GEOSPATIAL_CODE_GEN_OUTPUT
|
|
${FE_OUTPUT_DIR}/builtins/ST_ConvexHull_Wrapper.java
|
|
${FE_OUTPUT_DIR}/builtins/ST_LineString_Wrapper.java
|
|
${FE_OUTPUT_DIR}/builtins/ST_MultiPoint_Wrapper.java
|
|
${FE_OUTPUT_DIR}/builtins/ST_Polygon_Wrapper.java
|
|
${FE_OUTPUT_DIR}/builtins/ST_Union_Wrapper.java
|
|
)
|
|
|
|
# Source python files
|
|
set(FUNCTION_REGISTRY_INPUT
|
|
gen_builtins_catalog.py
|
|
impala_functions.py
|
|
)
|
|
|
|
set(GEOSPATIAL_WRAPPER_INPUT
|
|
gen_builtins_catalog.py
|
|
gen_geospatial_udf_wrappers.py
|
|
)
|
|
|
|
# Run the python scripts
|
|
add_custom_command(
|
|
OUTPUT ${CODE_GEN_OUTPUT}
|
|
COMMAND ./gen_builtins_catalog.py
|
|
DEPENDS ${FUNCTION_REGISTRY_INPUT}
|
|
COMMENT "Generating files for builtins."
|
|
VERBATIM
|
|
)
|
|
|
|
add_custom_command(
|
|
OUTPUT ${GEOSPATIAL_CODE_GEN_OUTPUT}
|
|
COMMAND ./gen_geospatial_udf_wrappers.py
|
|
DEPENDS ${GEOSPATIAL_WRAPPER_INPUT}
|
|
COMMENT "Generating UDF wrappers for geospatial builtins."
|
|
VERBATIM
|
|
)
|
|
|
|
add_custom_target(function-registry ALL DEPENDS ${CODE_GEN_OUTPUT})
|
|
|
|
if( $ENV{USE_APACHE_HIVE} STREQUAL "false")
|
|
add_custom_target(geospatial-udf-wrappers ALL DEPENDS ${GEOSPATIAL_CODE_GEN_OUTPUT})
|
|
add_dependencies(geospatial-udf-wrappers function-registry)
|
|
else()
|
|
# Empty target to skip wrapper generation
|
|
add_custom_target(geospatial-udf-wrappers)
|
|
endif()
|