IMPALA-14573: port critical geospatial functions to c++ (part 1)

This commit contains the simpler parts from
https://gerrit.cloudera.org/#/c/20602

This mainly means accessors for the header of the binary
format and bounding box check (st_envIntersects).
New tests for not yet covered functions / overloads are also added.

For details of the binary format see be/src/exprs/geo/shape-format.h

Differences from the PR above:

Only a subset of functions are added. The criteria was:
1. the native function must be fully compatible with the Java version*
2. must not rely on (de)serializing the full geometry
3. the function must be tested

1 implies 2 because (de)serialization is not implemented yet in
the original patch for >2d geometries, which would break compatibility
for the Java version for ZYZ/XYM/XYZM geometries.

*: there are 2 known differences:
 1. NULL handling: the Java functions return error instead of NULL
    when getting a NULL parameter
 2. st_envIntersects() doesn't check if the SRID matches - the Java
    library looks inconsistant about this

Because the native functions are fairly safe replacements for the Java
ones, they are always used when geospatial_library=HIVE_ESRI.

Change-Id: I0ff950a25320549290a83a3b1c31ce828dd68e3c
Reviewed-on: http://gerrit.cloudera.org:8080/23700
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Csaba Ringhofer
2025-11-20 20:57:25 +01:00
committed by Impala Public Jenkins
parent fe41448780
commit 780e6683a2
15 changed files with 1029 additions and 9 deletions

View File

@@ -531,6 +531,7 @@ set (IMPALA_LIBS
ExecPaimon
Exprs
ExprsIr
ExprsGeoIr
GlobalFlags
histogram_proto
ImpalaThrift

View File

@@ -77,7 +77,7 @@ function(COMPILE_TO_IR_C_ARRAY IR_C_FILE VARNAME)
${CLANG_INCLUDE_FLAGS} ${IR_INPUT_FILES} -o ${IR_TMP_OUTPUT_FILE}
COMMAND ${LLVM_OPT_EXECUTABLE} ${LLVM_OPT_IR_FLAGS} < ${IR_TMP_OUTPUT_FILE} > ${IR_OUTPUT_FILE}
COMMAND rm ${IR_TMP_OUTPUT_FILE}
DEPENDS ExecIr ExecAvroIr ExecKuduIr ExprsIr RuntimeIr UdfIr UtilIr ${IR_INPUT_FILES}
DEPENDS ExecIr ExecAvroIr ExprsGeoIr ExecKuduIr ExprsIr RuntimeIr UdfIr UtilIr ${IR_INPUT_FILES}
)
# Convert LLVM bytecode to C array.

View File

@@ -49,6 +49,7 @@
#include "exprs/date-functions-ir.cc"
#include "exprs/decimal-functions-ir.cc"
#include "exprs/decimal-operators-ir.cc"
#include "exprs/geo/geospatial-functions-ir.cc"
#include "exprs/hive-udf-call-ir.cc"
#include "exprs/iceberg-functions-ir.cc"
#include "exprs/in-predicate-ir.cc"

View File

@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
add_subdirectory(geo)
# where to put generated libraries
set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs")

View File

@@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# where to put generated libraries
set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs/geo")
# where to put generated binaries
set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs/geo")
add_library(ExprsGeoIr
geospatial-functions-ir.cc
)
add_dependencies(ExprsGeoIr gen-deps)
if (BUILD_WITH_NO_TESTS)
return()
endif()
# Add tests here.

49
be/src/exprs/geo/common.h Normal file
View File

@@ -0,0 +1,49 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "udf/udf.h"
#include "util/bit-util.h"
namespace impala::geo {
using impala_udf::FunctionContext;
using impala_udf::StringVal;
// see https://github.com/Esri/spatial-framework-for-hadoop/blob/v2.2.0/hive/src/main/java/com/esri/hadoop/hive/GeometryUtils.java#L21
enum OGCType {
UNKNOWN = 0,
ST_POINT = 1,
ST_LINESTRING = 2,
ST_POLYGON = 3,
ST_MULTIPOINT = 4,
ST_MULTILINESTRING = 5,
ST_MULTIPOLYGON = 6
};
constexpr std::array<const char*, ST_MULTIPOLYGON + 1> OGCTypeToStr = {{
"UNKNOWN",
"ST_POINT",
"ST_LINESTRING",
"ST_POLYGON",
"ST_MULTIPOINT",
"ST_MULTILINESTRING",
"ST_MULTIPOLYGON"
}};
} // namespace impala

View File

@@ -0,0 +1,112 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "exprs/geo/geospatial-functions.h"
#include "exprs/geo/common.h"
#include "exprs/geo/shape-format.h"
#include "runtime/string-value.inline.h"
#include "udf/udf-internal.h"
#include "udf/udf.h"
#include "common/names.h"
namespace impala::geo {
// Accessors
DoubleVal GeospatialFunctions::st_X(FunctionContext* ctx, const StringVal& geom) {
OGCType ogc_type;
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
if (ogc_type != ST_POINT) return DoubleVal::null(); // Only valid for ST_POINT.
return DoubleVal(getMinX(geom));
}
DoubleVal GeospatialFunctions::st_Y(FunctionContext* ctx, const StringVal& geom) {
OGCType ogc_type;
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
if (ogc_type != ST_POINT) return DoubleVal::null(); // Only valid for ST_POINT.
return DoubleVal(getMinY(geom));
}
DoubleVal GeospatialFunctions::st_MinX(FunctionContext* ctx, const StringVal& geom) {
OGCType ogc_type;
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
return DoubleVal(getMinX(geom));
}
DoubleVal GeospatialFunctions::st_MinY(FunctionContext* ctx, const StringVal& geom) {
OGCType ogc_type;
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
return DoubleVal(getMinY(geom));
}
DoubleVal GeospatialFunctions::st_MaxX(FunctionContext* ctx, const StringVal& geom) {
OGCType ogc_type;
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
if (ogc_type == ST_POINT) return DoubleVal(getMinX(geom));
return DoubleVal(getMaxX(geom));
}
DoubleVal GeospatialFunctions::st_MaxY(FunctionContext* ctx, const StringVal& geom) {
OGCType ogc_type;
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
if (ogc_type == ST_POINT) return DoubleVal(getMinY(geom));
return DoubleVal(getMaxY(geom));
}
StringVal GeospatialFunctions::st_GeometryType(FunctionContext* ctx,
const StringVal& geom) {
OGCType ogc_type;
if (!ParseHeader(ctx, geom, &ogc_type)) return StringVal::null();
const char* name = getGeometryType(ogc_type);
return StringVal(name);
}
IntVal GeospatialFunctions::st_Srid(FunctionContext* ctx, const StringVal& geom) {
OGCType ogc_type;
if (!ParseHeader(ctx, geom, &ogc_type)) return IntVal::null();
return getSrid(geom);
}
StringVal GeospatialFunctions::st_SetSrid(FunctionContext* ctx, const StringVal& geom,
const IntVal& srid) {
if (srid.is_null) return geom;
OGCType ogc_type;
if (!ParseHeader(ctx, geom, &ogc_type)) return StringVal::null();
StringVal res = StringVal::CopyFrom(ctx, geom.ptr, geom.len);
setSrid(res, srid.val);
return res;
}
// Predicates
BooleanVal GeospatialFunctions::st_EnvIntersects(
FunctionContext* ctx, const StringVal& lhs_geom,const StringVal& rhs_geom) {
OGCType lhs_type, rhs_type;
// TODO: compare srid? The ESRI UDF does it, but it is not done in other relations:
// https://github.com/apache/hive/blob/rel/release-4.2.0/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/ST_EnvIntersects.java#L63
if (!ParseHeader(ctx, lhs_geom, &lhs_type) || !ParseHeader(ctx, rhs_geom, &rhs_type)) {
return BooleanVal::null();
}
bool result = bBoxIntersects(lhs_geom, rhs_geom, lhs_type, rhs_type);
return BooleanVal(result);
}
}

View File

@@ -0,0 +1,58 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <map>
#include "common/status.h"
#include "udf/udf.h"
namespace impala::geo {
using impala_udf::FunctionContext;
using impala_udf::BooleanVal;
using impala_udf::IntVal;
using impala_udf::BigIntVal;
using impala_udf::DoubleVal;
using impala_udf::StringVal;
class Expr;
class OpcodeRegistry;
struct StringValue;
class TupleRow;
class GeospatialFunctions {
public:
// Accessors
static DoubleVal st_X(FunctionContext* ctx, const StringVal& geom);
static DoubleVal st_Y(FunctionContext* ctx, const StringVal& geom);
static DoubleVal st_MinX(FunctionContext* ctx, const StringVal& geom);
static DoubleVal st_MinY(FunctionContext* ctx, const StringVal& geom);
static DoubleVal st_MaxX(FunctionContext* ctx, const StringVal& geom);
static DoubleVal st_MaxY(FunctionContext* ctx, const StringVal& geom);
static StringVal st_GeometryType(FunctionContext* ctx, const StringVal& geom);
static IntVal st_Srid(FunctionContext* ctx, const StringVal& geom);
static StringVal st_SetSrid(FunctionContext* ctx, const StringVal& geom,
const IntVal& srid);
// Predicates
static BooleanVal st_EnvIntersects(
FunctionContext* ctx, const StringVal& lhs,const StringVal& rhs);
};
}// namespace impala

View File

@@ -0,0 +1,294 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include "exprs/geo/common.h"
namespace impala::geo {
// This file is responsible for handling the header of the "esri shape" format used for
// geometries encoded as BINARY. This format is fully compatible with Java framework
// https://github.com/Esri/spatial-framework-for-hadoop
// A 5 byte "OGC" header followed by the same format as the one used in shape files:
// https://www.esri.com/content/dam/esrisites/sitecore-archive/Files/Pdfs/library/whitepapers/pdfs/shapefile.pdf
//
// The OGC header contains:
// - 4 byte big endian SRID (reference system id)
// - 1 byte type id (OGCType)
// - no padding
// see https://github.com/Esri/spatial-framework-for-hadoop/blob/v2.2.0/hive/src/main/java/com/esri/hadoop/hive/GeometryUtils.java#L16
//
// The header of the shape file format ("ESRI header") contains:
// - 4 byte type id
// for POINT type store the coordinates:
// - dimension * 8 byte to store x/y/z/m as doubles
// for other types store the bounding box:
// - min coordinates: 2 * 8 byte to store x/y as doubles
// - max coordinates: 2 * 8 byte to store x/y as doubles
// - min/max z/m are stored later in the headers - this file doesn't access those
// - no padding
//
// For some types this is follewed by a variable length part, which is not handled here.
// A POC example for handling the a full type:
// https://gerrit.cloudera.org/#/c/20602/6/be/src/exprs/geo/poly-line-shape-format.cc
//
// Currently only 2 dimensions are handled (xy), min/max for z/m has to be accessed
// with Java functions. The xy bounding box has the same offset and format in
// xyz/xym/xyzm geometries so x/y accessors work in this case too.
//
// Functions are defined in the header to allow inlining bounding box check in codegen.
constexpr int SRID_SIZE = 4;
constexpr int OGC_TYPE_SIZE = 1;
constexpr int SRID_OFFSET = 0;
constexpr int OGC_TYPE_OFFSET = 4;
static_assert(OGC_TYPE_OFFSET == SRID_SIZE);
constexpr int ESRI_TYPE_SIZE = 4;
constexpr int ESRI_TYPE_OFFSET = 5;
constexpr int X1_OFFSET = 9;
constexpr int Y1_OFFSET = X1_OFFSET + sizeof(double);
constexpr int X2_OFFSET = Y1_OFFSET + sizeof(double);
constexpr int Y2_OFFSET = X2_OFFSET + sizeof(double);
constexpr int MIN_GEOM_SIZE = 9;
constexpr int MIN_POINT_SIZE = 25;
constexpr int MIN_NON_POINT_SIZE = 41;
static_assert(ESRI_TYPE_OFFSET == OGC_TYPE_OFFSET + OGC_TYPE_SIZE);
static_assert(X1_OFFSET == ESRI_TYPE_OFFSET + ESRI_TYPE_SIZE);
static_assert(MIN_GEOM_SIZE == SRID_SIZE + OGC_TYPE_SIZE + ESRI_TYPE_SIZE);
static_assert(MIN_POINT_SIZE == MIN_GEOM_SIZE + 2 * sizeof(double));
static_assert(MIN_NON_POINT_SIZE == MIN_POINT_SIZE + 2 * sizeof(double));
// See https://github.com/Esri/geometry-api-java/blob/v2.2.4/src/main/java/com/esri/core/geometry/ShapeType.java#L27
enum EsriType: uint32_t {
ShapeNull = 0,
ShapePoint = 1,
ShapePointM = 21,
ShapePointZM = 11,
ShapePointZ = 9,
ShapeMultiPoint = 8,
ShapeMultiPointM = 28,
ShapeMultiPointZM = 18,
ShapeMultiPointZ = 20,
ShapePolyline = 3,
ShapePolylineM = 23,
ShapePolylineZM = 13,
ShapePolylineZ = 10,
ShapePolygon = 5,
ShapePolygonM = 25,
ShapePolygonZM = 15,
ShapePolygonZ = 19,
ShapeMultiPatchM = 31,
ShapeMultiPatch = 32,
ShapeGeneralPolyline = 50,
ShapeGeneralPolygon = 51,
ShapeGeneralPoint = 52,
ShapeGeneralMultiPoint = 53,
ShapeGeneralMultiPatch = 54,
ShapeTypeLast = 55
};
constexpr std::array<EsriType, ST_MULTIPOLYGON + 1> OGCTypeToEsriType = {{
ShapeNull, // UNKNOWN
ShapePoint, // ST_POINT
ShapePolyline, // ST_LINESTRING
ShapePolygon, // ST_POLYGON
ShapeMultiPoint, // ST_MULTIPOINT
ShapePolyline, // ST_MULTILINESTRING
ShapePolygon // ST_MULTIPOLYGON
}};
template <class T>
T readFromGeom(const StringVal& geom, int offset) {
DCHECK_GE(geom.len, offset + sizeof(T));
return *reinterpret_cast<T*>(geom.ptr + offset);
}
template <class T>
void writeToGeom(const T& val, StringVal& geom, int offset) {
DCHECK_GE(geom.len, offset + sizeof(T));
T* ptr = reinterpret_cast<T*>(geom.ptr + offset);
*ptr = val;
}
// getters/setters for OGC header:
inline uint32_t getSrid(const StringVal& geom) {
static_assert(SRID_SIZE == sizeof(uint32_t));
// SRID is in big endian format in 'geom', but Impala only supports little endian so we
// have to convert it.
#ifndef IS_LITTLE_ENDIAN
static_assert(false, "Only the little endian byte order is supported.");
#endif
const uint32_t srid_bytes = readFromGeom<uint32_t>(geom, SRID_OFFSET);
return BitUtil::ByteSwap(srid_bytes);
}
inline OGCType getOGCType(const StringVal& geom) {
static_assert(OGC_TYPE_SIZE == sizeof(char));
const char res = readFromGeom<char>(geom, OGC_TYPE_OFFSET);
return static_cast<OGCType>(res);
}
inline constexpr const char* getGeometryType(OGCType ogc_type) {
return OGCTypeToStr[ogc_type];
}
inline void setSrid(StringVal& geom, uint32_t srid) {
static_assert(SRID_SIZE == sizeof(uint32_t));
// SRID is in big endian format in 'geom', but Impala only supports little endian so we
// have to convert it.
#ifndef IS_LITTLE_ENDIAN
static_assert(false, "Only the little endian byte order is supported.");
#endif
const uint32_t srid_bytes = BitUtil::ByteSwap(srid);
writeToGeom<uint32_t>(srid_bytes, geom, SRID_OFFSET);
}
inline void setOGCType(StringVal& geom, OGCType ogc_type) {
writeToGeom<char>(ogc_type, geom, OGC_TYPE_OFFSET);
}
// getters/setters for ESRI header:
inline EsriType getEsriType(const StringVal& geom) {
static_assert(ESRI_TYPE_SIZE == sizeof(EsriType));
return readFromGeom<EsriType>(geom, ESRI_TYPE_OFFSET);
}
inline double getMinX(const StringVal& geom) {
return readFromGeom<double>(geom, X1_OFFSET);
}
inline double getMinY(const StringVal& geom) {
return readFromGeom<double>(geom, Y1_OFFSET);
}
inline double getMaxX(const StringVal& geom) {
return readFromGeom<double>(geom, X2_OFFSET);
}
inline double getMaxY(const StringVal& geom) {
return readFromGeom<double>(geom, Y2_OFFSET);
}
inline void setEsriType(StringVal& geom, EsriType esri_type) {
static_assert(ESRI_TYPE_SIZE == sizeof(EsriType));
writeToGeom<EsriType>(esri_type, geom, ESRI_TYPE_OFFSET);
}
inline void setMinX(StringVal& geom, double x) {
writeToGeom<double>(x, geom, X1_OFFSET);
}
inline void setMinY(StringVal& geom, double y) {
writeToGeom<double>(y, geom, Y1_OFFSET);
}
inline void setMaxX(StringVal& geom, double x) {
writeToGeom<double>(x, geom, X2_OFFSET);
}
inline void setMaxY(StringVal& geom, double y) {
writeToGeom<double>(y, geom, Y2_OFFSET);
}
// Validate header and get type
inline bool ParseHeader(FunctionContext* ctx, const StringVal& geom, OGCType* ogc_type) {
DCHECK(ogc_type != nullptr);
if (UNLIKELY(geom.is_null)) return false;
if (UNLIKELY(geom.len < MIN_GEOM_SIZE)) {
ctx->SetError("Geometry size too small.");
return false;
}
const OGCType unchecked_ogc_type = getOGCType(geom);
if (UNLIKELY(unchecked_ogc_type < UNKNOWN || unchecked_ogc_type > ST_MULTIPOLYGON)) {
ctx->SetError("Invalid geometry type.");
return false;
}
if (UNLIKELY(unchecked_ogc_type == UNKNOWN)) {
ctx->SetError("Geometry type UNKNOWN.");
return false;
}
if (UNLIKELY(unchecked_ogc_type == ST_POINT)) {
if (geom.len < MIN_POINT_SIZE) {
ctx->SetError("Geometry size too small for ST_POINT type.");
return false;
}
} else {
if (UNLIKELY(geom.len < MIN_NON_POINT_SIZE)) {
ctx->SetError("Geometry size too small for non ST_POINT type.");
return false;
}
}
// TODO: fix Z/M/ZM types and move to a function called from DCHECK
// ogc vs ESRI type checking can be useful during development, but it
// is unnecessary overhead in production
/*const EsriType esri_type = getEsriType(geom);
DCHECK_LT(unchecked_ogc_type, OGCTypeToEsriType.size());
const EsriType expected_esri_type = OGCTypeToEsriType[unchecked_ogc_type];
if (expected_esri_type != esri_type) {
// TODO: To test it we need to create a table with 3D types, we cannot create them
// with native constructors.
ctx->SetError(strings::Substitute(
"Invalid geometry: OGCType and EsriType do not match. "
"Because the OGCType is $0, expected EsriType $1, found $2.",
OGCTypeToStr[unchecked_ogc_type], expected_esri_type, esri_type).c_str());
}*/
*ogc_type = static_cast<OGCType>(unchecked_ogc_type);
return true;
}
// Bounding box check for x/y coordinates of two geometries. z/m are ignored, which
// is consistent with the original Java functions.
inline bool bBoxIntersects(const StringVal& lhs_geom, const StringVal rhs_geom,
OGCType lhs_type, OGCType rhs_type) {
bool is_lhs_point = lhs_type == ST_POINT;
double xmin1 = getMinX(lhs_geom);
double ymin1 = getMinY(lhs_geom);
double xmax1 = is_lhs_point ? xmin1 : getMaxX(lhs_geom);
double ymax1 = is_lhs_point ? ymin1 : getMaxY(lhs_geom);
bool is_rhs_point = rhs_type == ST_POINT;
double xmin2 = getMinX(rhs_geom);
double ymin2 = getMinY(rhs_geom);
double xmax2 = is_rhs_point ? xmin2 : getMaxX(rhs_geom);
double ymax2 = is_rhs_point ? ymin2 : getMaxY(rhs_geom);
if (xmax1 < xmin2 || xmax2 < xmin1 || ymax1 < ymin2 || ymax2 < ymin1 ) return false;
return true;
}
} // namespace impala

View File

@@ -33,6 +33,7 @@
#include "exprs/date-functions.h"
#include "exprs/decimal-functions.h"
#include "exprs/decimal-operators.h"
#include "exprs/geo/geospatial-functions.h"
#include "exprs/hive-udf-call.h"
#include "exprs/iceberg-functions.h"
#include "exprs/in-predicate.h"
@@ -459,6 +460,7 @@ void ScalarExprEvaluator::InitBuiltinsDummy() {
DataSketchesFunctions::DsHllEstimate(nullptr, StringVal::null());
DecimalFunctions::Precision(nullptr, DecimalVal::null());
DecimalOperators::CastToDecimalVal(nullptr, DecimalVal::null());
geo::GeospatialFunctions::st_MaxX(nullptr, StringVal::null());
IcebergFunctions::TruncatePartitionTransform(nullptr, IntVal::null(), IntVal::null());
InPredicate::InIterate(nullptr, BigIntVal::null(), 0, nullptr);
IsNullPredicate::IsNull(nullptr, BooleanVal::null());

View File

@@ -48,42 +48,67 @@ import org.apache.impala.catalog.Type;
import org.apache.impala.hive.executor.BinaryToBinaryHiveLegacyFunctionExtractor;
import org.apache.impala.hive.executor.HiveJavaFunction;
import org.apache.impala.hive.executor.HiveLegacyJavaFunction;
import org.apache.impala.service.BackendConfig;
import com.google.common.base.Preconditions;
import org.apache.impala.analysis.FunctionName;
import org.apache.impala.thrift.TFunctionBinaryType;
import org.apache.impala.thrift.TGeospatialLibrary;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HiveEsriGeospatialBuiltins {
private final static Logger LOG = LoggerFactory.getLogger(
HiveEsriGeospatialBuiltins.class);
/**
* Initializes Hive's ESRI geospatial UDFs as builtins.
*/
public static void initBuiltins(Db db) {
addLegacyUDFs(db);
TGeospatialLibrary lib = BackendConfig.INSTANCE.getGeospatialLibrary();
// Currently all native functions are expected to be 100% compatible with the Java
// version. This is not true for the full function set of
// https://gerrit.cloudera.org/#/c/20602/ , which sets based on a flag to allow
// full compatibility with Hive.
boolean addNatives = true;
addLegacyUDFs(db, addNatives);
addGenericUDFs(db);
addVarargsUDFs(db);
if(addNatives) {
addNatives(db);
}
}
private static void addLegacyUDFs(Db db) {
private static void addLegacyUDFs(Db db, boolean addNatives) {
List<UDF> legacyUDFs = Arrays.asList(new ST_Area(), new ST_AsBinary(),
new ST_AsGeoJson(), new ST_AsJson(), new ST_AsShape(), new ST_AsText(),
new ST_Boundary(), new ST_Buffer(), new ST_Centroid(), new ST_CoordDim(),
new ST_Difference(), new ST_Dimension(), new ST_Distance(), new ST_EndPoint(),
new ST_Envelope(), new ST_EnvIntersects(), new ST_ExteriorRing(),
new ST_Envelope(), new ST_ExteriorRing(),
new ST_GeodesicLengthWGS84(), new ST_GeomCollection(), new ST_GeometryN(),
new ST_GeometryType(), new ST_GeomFromShape(), new ST_GeomFromText(),
new ST_GeomFromShape(), new ST_GeomFromText(),
new ST_GeomFromWKB(), new ST_InteriorRingN(), new ST_Intersection(),
new ST_Is3D(), new ST_IsClosed(), new ST_IsEmpty(), new ST_IsMeasured(),
new ST_IsRing(), new ST_IsSimple(), new ST_Length(), new ST_LineFromWKB(),
new ST_M(), new ST_MaxM(), new ST_MaxX(), new ST_MaxY(), new ST_MaxZ(),
new ST_MinM(), new ST_MinX(), new ST_MinY(), new ST_MinZ(), new ST_MLineFromWKB(),
new ST_M(), new ST_MaxM(), new ST_MaxZ(),
new ST_MinM(), new ST_MinZ(), new ST_MLineFromWKB(),
new ST_MPointFromWKB(), new ST_MPolyFromWKB(), new ST_NumGeometries(),
new ST_NumInteriorRing(), new ST_NumPoints(), new ST_Point(),
new ST_PointFromWKB(), new ST_PointN(), new ST_PointZ(), new ST_PolyFromWKB(),
new ST_Relate(), new ST_SRID(), new ST_StartPoint(), new ST_SymmetricDiff(),
new ST_X(), new ST_Y(), new ST_Z(), new ST_SetSRID());
new ST_Relate(), new ST_StartPoint(), new ST_SymmetricDiff(),
new ST_Z());
List<UDF> legacyUDFsWithNativeImplementation = Arrays.asList(
new ST_EnvIntersects(), new ST_GeometryType(),
new ST_MaxX(), new ST_MaxY(),
new ST_MinX(), new ST_MinY(),
new ST_SRID(), new ST_SetSRID(),
new ST_X(), new ST_Y()
);
if (!addNatives) {
legacyUDFs.addAll(legacyUDFsWithNativeImplementation);
}
for (UDF udf : legacyUDFs) {
for (Function fn : extractFromLegacyHiveBuiltin(udf, db.getName())) {
@@ -206,4 +231,35 @@ public class HiveEsriGeospatialBuiltins {
})
.collect(Collectors.toList());
}
private static void addNative(Db db, String fnNameBase, String fnNameSuffix,
boolean varArgs, Type retType, Type... argTypes) {
String udfName = fnNameBase.toLowerCase();
String geospatialFnPrefix = "impala::geo::GeospatialFunctions::";
String cppSymbolName = geospatialFnPrefix + fnNameBase + fnNameSuffix;
db.addScalarBuiltin(udfName, cppSymbolName, true, varArgs, retType, argTypes);
}
private static void addNative(Db db, String fnName, boolean varArgs, Type retType,
Type... argTypes) {
addNative(db, fnName, "", varArgs, retType, argTypes);
}
private static void addNatives(Db db) {
// Legacy UDFs.
// Accessors.
addNative(db, "st_MinX", false, Type.DOUBLE, Type.BINARY);
addNative(db, "st_MaxX", false, Type.DOUBLE, Type.BINARY);
addNative(db, "st_MinY", false, Type.DOUBLE, Type.BINARY);
addNative(db, "st_MaxY", false, Type.DOUBLE, Type.BINARY);
addNative(db, "st_X", false, Type.DOUBLE, Type.BINARY);
addNative(db, "st_Y", false, Type.DOUBLE, Type.BINARY);
addNative(db, "st_Srid", false, Type.INT, Type.BINARY);
addNative(db, "st_SetSrid", false, Type.BINARY, Type.BINARY, Type.INT);
addNative(db, "st_GeometryType", false, Type.STRING, Type.BINARY);
// Predicates.
addNative(db, "st_EnvIntersects", false, Type.BOOLEAN, Type.BINARY, Type.BINARY);
}
}

View File

@@ -0,0 +1,394 @@
=====
---- QUERY
select st_bin(1, "point empty")
---- TYPES
BIGINT
---- RESULTS
0
====
---- QUERY
select st_bin(1, "point(10 10)")
---- TYPES
BIGINT
---- RESULTS
4611685985093119520
====
---- QUERY
select st_bin(1.0, st_point(10, 10))
---- TYPES
BIGINT
---- RESULTS
4611685985093119520
====
---- QUERY
select st_bin(1.0, "point(-200 50)")
---- TYPES
BIGINT
---- RESULTS
4611685863613099350
====
---- QUERY
select st_bin(1, st_point(-200, 50))
---- TYPES
BIGINT
---- RESULTS
4611685863613099350
====
---- QUERY
select st_bin(1, "point(-500 -1000)")
---- TYPES
BIGINT
---- RESULTS
4611689052463623000
====
---- QUERY
select st_bin(1.0, st_point(-500, -1000))
---- TYPES
BIGINT
---- RESULTS
4611689052463623000
====
---- QUERY
select st_bin(1.0, "point(800 -5000)")
---- TYPES
BIGINT
---- RESULTS
4611701200465620300
====
---- QUERY
select st_bin(1, st_point(800, -5000))
---- TYPES
BIGINT
---- RESULTS
4611701200465620300
====
---- QUERY
select st_astext(st_binenvelope(1, 4611685985093119520));
---- TYPES
STRING
---- RESULTS
'POLYGON ((9.5 9.5, 10.5 9.5, 10.5 10.5, 9.5 10.5, 9.5 9.5))'
====
---- QUERY
select st_astext(st_binenvelope(1.0, 4611685985093119520));
---- TYPES
STRING
---- RESULTS
'POLYGON ((9.5 9.5, 10.5 9.5, 10.5 10.5, 9.5 10.5, 9.5 9.5))'
====
---- QUERY
select st_astext(st_binenvelope(1, 4611685863613099350));
---- TYPES
STRING
---- RESULTS
'POLYGON ((-200.5 49.5, -199.5 49.5, -199.5 50.5, -200.5 50.5, -200.5 49.5))'
====
---- QUERY
select st_astext(st_binenvelope(1.0, 4611685863613099350));
---- TYPES
STRING
---- RESULTS
'POLYGON ((-200.5 49.5, -199.5 49.5, -199.5 50.5, -200.5 50.5, -200.5 49.5))'
====
---- QUERY
select st_astext(st_binenvelope(1, 4611689052463623000));
---- TYPES
STRING
---- RESULTS
'POLYGON ((-500.5 -1000.5, -499.5 -1000.5, -499.5 -999.5, -500.5 -999.5, -500.5 -1000.5))'
====
---- QUERY
select st_astext(st_binenvelope(1.0, 4611689052463623000));
---- TYPES
STRING
---- RESULTS
'POLYGON ((-500.5 -1000.5, -499.5 -1000.5, -499.5 -999.5, -500.5 -999.5, -500.5 -1000.5))'
====
---- QUERY
select st_astext(st_binenvelope(1, 4611701200465620300));
---- TYPES
STRING
---- RESULTS
'POLYGON ((799.5 -5000.5, 800.5 -5000.5, 800.5 -4999.5, 799.5 -4999.5, 799.5 -5000.5))'
====
---- QUERY
select st_astext(st_binenvelope(1.0, 4611701200465620300));
---- TYPES
STRING
---- RESULTS
'POLYGON ((799.5 -5000.5, 800.5 -5000.5, 800.5 -4999.5, 799.5 -4999.5, 799.5 -5000.5))'
====
---- QUERY
select st_astext(st_binenvelope(1, st_point(1, 2)));
---- TYPES
STRING
---- RESULTS
'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
====
---- QUERY
select st_astext(st_binenvelope(1.0, st_point(1, 2)));
---- TYPES
STRING
---- RESULTS
'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
====
---- QUERY
select st_astext(st_binenvelope(1, "point(1 2)"));
---- TYPES
STRING
---- RESULTS
'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
====
---- QUERY
select st_astext(st_binenvelope(1.0, "point(1 2)"));
---- TYPES
STRING
---- RESULTS
'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
====
---- QUERY
# ST_Intersects(BINARY, STRING)
select ST_Intersects(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Intersects(st_polygon(0,0, 0,1, 0.5,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Intersects(st_polygon(0,0, 0,1, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
true,false,true
====
---- QUERY
# ST_Intersects(STRING, BINARY)
select ST_Intersects("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Intersects("POLYGON ((0 0, 0 1, 0.5 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Intersects("POLYGON ((0 0, 0 1, 1 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1));
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
true,false,true
====
---- QUERY
# ST_Intersects(STRING, STRING)
select ST_Intersects("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Intersects("POLYGON ((0 0, 0 1, 0.5 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Intersects("POLYGON ((0 0, 0 1, 1 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
true,false,true
====
---- QUERY
# ST_Overlaps(BINARY, STRING)
select ST_Overlaps(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Overlaps(st_polygon(0,0, 0,1, 0.5,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Overlaps(st_polygon(0,0, 0,1, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
true,false,false
====
---- QUERY
# ST_Overlaps(STRING, BINARY)
select ST_Overlaps("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Overlaps("POLYGON ((0 0, 0 1, 0.5 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Overlaps("POLYGON ((0 0, 0 1, 1 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1));
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
true,false,false
====
---- QUERY
# ST_Overlaps(STRING, STRING)
select ST_Overlaps("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Overlaps("POLYGON ((0 0, 0 1, 0.5 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Overlaps("POLYGON ((0 0, 0 1, 1 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
true,false,false
====
---- QUERY
# ST_Touches(BINARY, STRING)
select ST_Touches(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Touches(st_polygon(0,0, 0,1, 0.5,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Touches(st_polygon(0,0, 0,1, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,false,true
====
---- QUERY
# ST_Touches(STRING, BINARY)
select ST_Touches("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Touches("POLYGON ((0 0, 0 1, 0.5 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Touches("POLYGON ((0 0, 0 1, 1 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1));
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,false,true
====
---- QUERY
# ST_Touches(STRING, STRING)
select ST_Touches("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Touches("POLYGON ((0 0, 0 1, 0.5 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Touches("POLYGON ((0 0, 0 1, 1 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,false,true
====
---- QUERY
# ST_Contains(BINARY, STRING)
select ST_Contains(st_polygon(2,2, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Contains(st_polygon(0,0, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Contains(st_polygon(0,0, 10,0, 0, 10), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,false,true
====
---- QUERY
# ST_Contains(STRING, BINARY)
select ST_Contains("POLYGON ((2 0, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Contains("POLYGON ((0 0, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Contains("POLYGON ((0 0, 10 0, 0 10))", ST_Polygon(1,1, 1,4, 4,4, 4,1));
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,false,true
====
---- QUERY
# ST_Contains(STRING, STRING)
select ST_Contains("POLYGON ((2 0, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Contains("POLYGON ((0 0, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Contains("POLYGON ((0 0, 10 0, 0 10))", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,false,true
====
---- QUERY
# ST_Within(BINARY, STRING)
select ST_Within(st_polygon(2,2, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Within(st_polygon(0,0, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Within(st_polygon(0,0, 10,0, 0, 10), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
true,false,false
====
---- QUERY
# ST_Within(STRING, BINARY)
select ST_Within("POLYGON ((2 2, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Within("POLYGON ((0 0, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Within("POLYGON ((0 0, 10 0, 0 10))", ST_Polygon(1,1, 1,4, 4,4, 4,1));
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
true,false,false
====
---- QUERY
# ST_Within(STRING, STRING)
select ST_Within("POLYGON ((2 2, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Within("POLYGON ((0 0, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Within("POLYGON ((0 0, 10 0, 0 10))", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
true,false,false
====
---- QUERY
# ST_Crosses(BINARY, STRING)
select ST_Crosses(st_linestring(2,2, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Crosses(st_linestring(0,0, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Crosses(st_linestring(0,0, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,false,true
====
---- QUERY
# ST_Crosses(STRING, BINARY)
select ST_Crosses("LINESTRING (2 2, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Crosses("LINESTRING (0 0, 1 1)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Crosses("LINESTRING (0 0, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1));
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,false,true
====
---- QUERY
# ST_Crosses(STRING, STRING)
select ST_Crosses("LINESTRING (2 2, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Crosses("LINESTRING (0 0, 1 1)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Crosses("LINESTRING (0 0, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,false,true
====
---- QUERY
# ST_Disjoint(BINARY, STRING)
select ST_Disjoint(st_linestring(2,2, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Disjoint(st_linestring(1,0, 0,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Disjoint(st_linestring(0,0, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,true,false
====
---- QUERY
# ST_Disjoint(STRING, BINARY)
select ST_Disjoint("LINESTRING (2 2, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Disjoint("LINESTRING (1 0, 0 1)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
ST_Disjoint("LINESTRING (0 0, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1));
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,true,false
====
---- QUERY
# ST_Disjoint(STRING, STRING)
select ST_Disjoint("LINESTRING (2 2, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Disjoint("LINESTRING (1 0, 0 1)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
ST_Disjoint("LINESTRING (0 0, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,true,false
====
---- QUERY
# ST_Equals(BINARY, STRING)
select ST_Equals(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 1))"),
ST_Equals(st_polygon(1,1, 1,4, 4,1), "POLYGON ((1 1, 1 4, 4 1))"),
ST_Equals(st_polygon(0,0, 0,1, 1,0), "POLYGON ((1 1, 1 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,true,false
====
---- QUERY
# ST_Equals(STRING, BINARY)
select ST_Equals("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,1)),
ST_Equals("POLYGON ((1 1, 1 4, 4 1))", ST_Polygon(1,1, 1,4, 4,1)),
ST_Equals("POLYGON ((0 0, 0 1, 1 0))", ST_Polygon(1,1, 1,4, 4,1));
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,true,false
====
---- QUERY
# ST_Equals(STRING, STRING)
select ST_Equals("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 1))"),
ST_Equals("POLYGON ((1 1, 1 4, 4 1))", "POLYGON ((1 1, 1 4, 4 1))"),
ST_Equals("POLYGON ((0 0, 0 1, 1 0))", "POLYGON ((1 1, 1 4, 4 1))");
---- TYPES
BOOLEAN,BOOLEAN,BOOLEAN
---- RESULTS
false,true,false
====

View File

@@ -2717,6 +2717,7 @@ select ST_AsText(ST_SetSRID(ST_GeomFromText('MultiLineString((0 80, 0.03 80.04))
'MULTILINESTRING ((0 80, 0.03 80.04))'
====
---- QUERY
# TODO: move these new tests to geospatial-esri-extra?
# NOTE: Due to HIVE-29323 ESRI returns MULTIPOLYGON EMPTY for single point
# PostGIS would return: POINT (1 2)
select ST_AsText(ST_ConvexHull(ST_Point(1, 2)));

View File

@@ -22,6 +22,7 @@ from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.skip import SkipIfApacheHive
ST_POINT_SIGNATURE = "BINARY\tst_point(STRING)\tJAVA\ttrue"
ST_X_SIGNATURE_BUILTIN = "DOUBLE\tst_x(BINARY)\tBUILTIN\ttrue"
SHOW_FUNCTIONS = "show functions in _impala_builtins"
@@ -34,9 +35,11 @@ class TestGeospatialLibrary(CustomClusterTestSuite):
def test_disabled(self):
result = self.execute_query(SHOW_FUNCTIONS)
assert ST_POINT_SIGNATURE not in result.data
assert ST_X_SIGNATURE_BUILTIN not in result.data
@SkipIfApacheHive.feature_not_supported
@pytest.mark.execute_serially
def test_enabled(self):
result = self.execute_query(SHOW_FUNCTIONS)
assert ST_POINT_SIGNATURE in result.data
assert ST_X_SIGNATURE_BUILTIN in result.data

View File

@@ -18,14 +18,29 @@
from __future__ import absolute_import, division, print_function
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfApacheHive
from tests.common.test_dimensions import create_single_exec_option_dimension
class TestGeospatialFuctions(ImpalaTestSuite):
@classmethod
def add_test_dimensions(cls):
super(TestGeospatialFuctions, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
# Tests do not use tables at the moment, skip other fileformats than Parquet.
cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('table_format').file_format == 'parquet')
"""Tests the geospatial builtin functions"""
@SkipIfApacheHive.feature_not_supported
def test_esri_geospatial_functions(self, vector):
# tests generated from
# https://github.com/Esri/spatial-framework-for-hadoop/tree/master/hive/test
self.run_test_case('QueryTest/geospatial-esri', vector)
# manual tests added
self.run_test_case('QueryTest/geospatial-esri-extra', vector)
@SkipIfApacheHive.feature_not_supported
def test_esri_geospatial_planner(self, vector):
# These tests are not among planner tests because with default flags
# geospatial builtin functions are not loaded.