mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-14573: port critical geospatial functions to c++ (part 1)
This commit contains the simpler parts from https://gerrit.cloudera.org/#/c/20602 This mainly means accessors for the header of the binary format and bounding box check (st_envIntersects). New tests for not yet covered functions / overloads are also added. For details of the binary format see be/src/exprs/geo/shape-format.h Differences from the PR above: Only a subset of functions are added. The criteria was: 1. the native function must be fully compatible with the Java version* 2. must not rely on (de)serializing the full geometry 3. the function must be tested 1 implies 2 because (de)serialization is not implemented yet in the original patch for >2d geometries, which would break compatibility for the Java version for ZYZ/XYM/XYZM geometries. *: there are 2 known differences: 1. NULL handling: the Java functions return error instead of NULL when getting a NULL parameter 2. st_envIntersects() doesn't check if the SRID matches - the Java library looks inconsistant about this Because the native functions are fairly safe replacements for the Java ones, they are always used when geospatial_library=HIVE_ESRI. Change-Id: I0ff950a25320549290a83a3b1c31ce828dd68e3c Reviewed-on: http://gerrit.cloudera.org:8080/23700 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
fe41448780
commit
780e6683a2
@@ -531,6 +531,7 @@ set (IMPALA_LIBS
|
||||
ExecPaimon
|
||||
Exprs
|
||||
ExprsIr
|
||||
ExprsGeoIr
|
||||
GlobalFlags
|
||||
histogram_proto
|
||||
ImpalaThrift
|
||||
|
||||
@@ -77,7 +77,7 @@ function(COMPILE_TO_IR_C_ARRAY IR_C_FILE VARNAME)
|
||||
${CLANG_INCLUDE_FLAGS} ${IR_INPUT_FILES} -o ${IR_TMP_OUTPUT_FILE}
|
||||
COMMAND ${LLVM_OPT_EXECUTABLE} ${LLVM_OPT_IR_FLAGS} < ${IR_TMP_OUTPUT_FILE} > ${IR_OUTPUT_FILE}
|
||||
COMMAND rm ${IR_TMP_OUTPUT_FILE}
|
||||
DEPENDS ExecIr ExecAvroIr ExecKuduIr ExprsIr RuntimeIr UdfIr UtilIr ${IR_INPUT_FILES}
|
||||
DEPENDS ExecIr ExecAvroIr ExprsGeoIr ExecKuduIr ExprsIr RuntimeIr UdfIr UtilIr ${IR_INPUT_FILES}
|
||||
)
|
||||
|
||||
# Convert LLVM bytecode to C array.
|
||||
|
||||
@@ -49,6 +49,7 @@
|
||||
#include "exprs/date-functions-ir.cc"
|
||||
#include "exprs/decimal-functions-ir.cc"
|
||||
#include "exprs/decimal-operators-ir.cc"
|
||||
#include "exprs/geo/geospatial-functions-ir.cc"
|
||||
#include "exprs/hive-udf-call-ir.cc"
|
||||
#include "exprs/iceberg-functions-ir.cc"
|
||||
#include "exprs/in-predicate-ir.cc"
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
add_subdirectory(geo)
|
||||
|
||||
# where to put generated libraries
|
||||
set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs")
|
||||
|
||||
33
be/src/exprs/geo/CMakeLists.txt
Normal file
33
be/src/exprs/geo/CMakeLists.txt
Normal file
@@ -0,0 +1,33 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# where to put generated libraries
|
||||
set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs/geo")
|
||||
|
||||
# where to put generated binaries
|
||||
set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/exprs/geo")
|
||||
|
||||
add_library(ExprsGeoIr
|
||||
geospatial-functions-ir.cc
|
||||
)
|
||||
add_dependencies(ExprsGeoIr gen-deps)
|
||||
|
||||
if (BUILD_WITH_NO_TESTS)
|
||||
return()
|
||||
endif()
|
||||
|
||||
# Add tests here.
|
||||
49
be/src/exprs/geo/common.h
Normal file
49
be/src/exprs/geo/common.h
Normal file
@@ -0,0 +1,49 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "udf/udf.h"
|
||||
#include "util/bit-util.h"
|
||||
|
||||
namespace impala::geo {
|
||||
|
||||
using impala_udf::FunctionContext;
|
||||
using impala_udf::StringVal;
|
||||
|
||||
// see https://github.com/Esri/spatial-framework-for-hadoop/blob/v2.2.0/hive/src/main/java/com/esri/hadoop/hive/GeometryUtils.java#L21
|
||||
enum OGCType {
|
||||
UNKNOWN = 0,
|
||||
ST_POINT = 1,
|
||||
ST_LINESTRING = 2,
|
||||
ST_POLYGON = 3,
|
||||
ST_MULTIPOINT = 4,
|
||||
ST_MULTILINESTRING = 5,
|
||||
ST_MULTIPOLYGON = 6
|
||||
};
|
||||
|
||||
constexpr std::array<const char*, ST_MULTIPOLYGON + 1> OGCTypeToStr = {{
|
||||
"UNKNOWN",
|
||||
"ST_POINT",
|
||||
"ST_LINESTRING",
|
||||
"ST_POLYGON",
|
||||
"ST_MULTIPOINT",
|
||||
"ST_MULTILINESTRING",
|
||||
"ST_MULTIPOLYGON"
|
||||
}};
|
||||
|
||||
} // namespace impala
|
||||
112
be/src/exprs/geo/geospatial-functions-ir.cc
Normal file
112
be/src/exprs/geo/geospatial-functions-ir.cc
Normal file
@@ -0,0 +1,112 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include "exprs/geo/geospatial-functions.h"
|
||||
|
||||
#include "exprs/geo/common.h"
|
||||
#include "exprs/geo/shape-format.h"
|
||||
#include "runtime/string-value.inline.h"
|
||||
#include "udf/udf-internal.h"
|
||||
#include "udf/udf.h"
|
||||
|
||||
#include "common/names.h"
|
||||
|
||||
namespace impala::geo {
|
||||
|
||||
// Accessors
|
||||
|
||||
DoubleVal GeospatialFunctions::st_X(FunctionContext* ctx, const StringVal& geom) {
|
||||
OGCType ogc_type;
|
||||
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
|
||||
if (ogc_type != ST_POINT) return DoubleVal::null(); // Only valid for ST_POINT.
|
||||
return DoubleVal(getMinX(geom));
|
||||
}
|
||||
|
||||
DoubleVal GeospatialFunctions::st_Y(FunctionContext* ctx, const StringVal& geom) {
|
||||
OGCType ogc_type;
|
||||
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
|
||||
if (ogc_type != ST_POINT) return DoubleVal::null(); // Only valid for ST_POINT.
|
||||
return DoubleVal(getMinY(geom));
|
||||
}
|
||||
|
||||
DoubleVal GeospatialFunctions::st_MinX(FunctionContext* ctx, const StringVal& geom) {
|
||||
OGCType ogc_type;
|
||||
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
|
||||
return DoubleVal(getMinX(geom));
|
||||
}
|
||||
|
||||
DoubleVal GeospatialFunctions::st_MinY(FunctionContext* ctx, const StringVal& geom) {
|
||||
OGCType ogc_type;
|
||||
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
|
||||
return DoubleVal(getMinY(geom));
|
||||
}
|
||||
|
||||
DoubleVal GeospatialFunctions::st_MaxX(FunctionContext* ctx, const StringVal& geom) {
|
||||
OGCType ogc_type;
|
||||
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
|
||||
if (ogc_type == ST_POINT) return DoubleVal(getMinX(geom));
|
||||
return DoubleVal(getMaxX(geom));
|
||||
}
|
||||
|
||||
DoubleVal GeospatialFunctions::st_MaxY(FunctionContext* ctx, const StringVal& geom) {
|
||||
OGCType ogc_type;
|
||||
if (!ParseHeader(ctx, geom, &ogc_type)) return DoubleVal::null();
|
||||
if (ogc_type == ST_POINT) return DoubleVal(getMinY(geom));
|
||||
return DoubleVal(getMaxY(geom));
|
||||
}
|
||||
|
||||
StringVal GeospatialFunctions::st_GeometryType(FunctionContext* ctx,
|
||||
const StringVal& geom) {
|
||||
OGCType ogc_type;
|
||||
if (!ParseHeader(ctx, geom, &ogc_type)) return StringVal::null();
|
||||
const char* name = getGeometryType(ogc_type);
|
||||
|
||||
return StringVal(name);
|
||||
}
|
||||
|
||||
IntVal GeospatialFunctions::st_Srid(FunctionContext* ctx, const StringVal& geom) {
|
||||
OGCType ogc_type;
|
||||
if (!ParseHeader(ctx, geom, &ogc_type)) return IntVal::null();
|
||||
return getSrid(geom);
|
||||
}
|
||||
|
||||
StringVal GeospatialFunctions::st_SetSrid(FunctionContext* ctx, const StringVal& geom,
|
||||
const IntVal& srid) {
|
||||
if (srid.is_null) return geom;
|
||||
OGCType ogc_type;
|
||||
if (!ParseHeader(ctx, geom, &ogc_type)) return StringVal::null();
|
||||
|
||||
StringVal res = StringVal::CopyFrom(ctx, geom.ptr, geom.len);
|
||||
setSrid(res, srid.val);
|
||||
return res;
|
||||
}
|
||||
|
||||
// Predicates
|
||||
|
||||
BooleanVal GeospatialFunctions::st_EnvIntersects(
|
||||
FunctionContext* ctx, const StringVal& lhs_geom,const StringVal& rhs_geom) {
|
||||
OGCType lhs_type, rhs_type;
|
||||
// TODO: compare srid? The ESRI UDF does it, but it is not done in other relations:
|
||||
// https://github.com/apache/hive/blob/rel/release-4.2.0/ql/src/java/org/apache/hadoop/hive/ql/udf/esri/ST_EnvIntersects.java#L63
|
||||
if (!ParseHeader(ctx, lhs_geom, &lhs_type) || !ParseHeader(ctx, rhs_geom, &rhs_type)) {
|
||||
return BooleanVal::null();
|
||||
}
|
||||
bool result = bBoxIntersects(lhs_geom, rhs_geom, lhs_type, rhs_type);
|
||||
return BooleanVal(result);
|
||||
}
|
||||
|
||||
}
|
||||
58
be/src/exprs/geo/geospatial-functions.h
Normal file
58
be/src/exprs/geo/geospatial-functions.h
Normal file
@@ -0,0 +1,58 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "udf/udf.h"
|
||||
|
||||
namespace impala::geo {
|
||||
|
||||
using impala_udf::FunctionContext;
|
||||
using impala_udf::BooleanVal;
|
||||
using impala_udf::IntVal;
|
||||
using impala_udf::BigIntVal;
|
||||
using impala_udf::DoubleVal;
|
||||
using impala_udf::StringVal;
|
||||
|
||||
class Expr;
|
||||
class OpcodeRegistry;
|
||||
struct StringValue;
|
||||
class TupleRow;
|
||||
|
||||
class GeospatialFunctions {
|
||||
public:
|
||||
// Accessors
|
||||
static DoubleVal st_X(FunctionContext* ctx, const StringVal& geom);
|
||||
static DoubleVal st_Y(FunctionContext* ctx, const StringVal& geom);
|
||||
static DoubleVal st_MinX(FunctionContext* ctx, const StringVal& geom);
|
||||
static DoubleVal st_MinY(FunctionContext* ctx, const StringVal& geom);
|
||||
static DoubleVal st_MaxX(FunctionContext* ctx, const StringVal& geom);
|
||||
static DoubleVal st_MaxY(FunctionContext* ctx, const StringVal& geom);
|
||||
static StringVal st_GeometryType(FunctionContext* ctx, const StringVal& geom);
|
||||
static IntVal st_Srid(FunctionContext* ctx, const StringVal& geom);
|
||||
static StringVal st_SetSrid(FunctionContext* ctx, const StringVal& geom,
|
||||
const IntVal& srid);
|
||||
|
||||
// Predicates
|
||||
static BooleanVal st_EnvIntersects(
|
||||
FunctionContext* ctx, const StringVal& lhs,const StringVal& rhs);
|
||||
};
|
||||
|
||||
}// namespace impala
|
||||
294
be/src/exprs/geo/shape-format.h
Normal file
294
be/src/exprs/geo/shape-format.h
Normal file
@@ -0,0 +1,294 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "exprs/geo/common.h"
|
||||
|
||||
namespace impala::geo {
|
||||
|
||||
// This file is responsible for handling the header of the "esri shape" format used for
|
||||
// geometries encoded as BINARY. This format is fully compatible with Java framework
|
||||
// https://github.com/Esri/spatial-framework-for-hadoop
|
||||
// A 5 byte "OGC" header followed by the same format as the one used in shape files:
|
||||
// https://www.esri.com/content/dam/esrisites/sitecore-archive/Files/Pdfs/library/whitepapers/pdfs/shapefile.pdf
|
||||
//
|
||||
// The OGC header contains:
|
||||
// - 4 byte big endian SRID (reference system id)
|
||||
// - 1 byte type id (OGCType)
|
||||
// - no padding
|
||||
// see https://github.com/Esri/spatial-framework-for-hadoop/blob/v2.2.0/hive/src/main/java/com/esri/hadoop/hive/GeometryUtils.java#L16
|
||||
//
|
||||
// The header of the shape file format ("ESRI header") contains:
|
||||
// - 4 byte type id
|
||||
// for POINT type store the coordinates:
|
||||
// - dimension * 8 byte to store x/y/z/m as doubles
|
||||
// for other types store the bounding box:
|
||||
// - min coordinates: 2 * 8 byte to store x/y as doubles
|
||||
// - max coordinates: 2 * 8 byte to store x/y as doubles
|
||||
// - min/max z/m are stored later in the headers - this file doesn't access those
|
||||
// - no padding
|
||||
//
|
||||
// For some types this is follewed by a variable length part, which is not handled here.
|
||||
// A POC example for handling the a full type:
|
||||
// https://gerrit.cloudera.org/#/c/20602/6/be/src/exprs/geo/poly-line-shape-format.cc
|
||||
//
|
||||
// Currently only 2 dimensions are handled (xy), min/max for z/m has to be accessed
|
||||
// with Java functions. The xy bounding box has the same offset and format in
|
||||
// xyz/xym/xyzm geometries so x/y accessors work in this case too.
|
||||
//
|
||||
// Functions are defined in the header to allow inlining bounding box check in codegen.
|
||||
|
||||
constexpr int SRID_SIZE = 4;
|
||||
constexpr int OGC_TYPE_SIZE = 1;
|
||||
|
||||
constexpr int SRID_OFFSET = 0;
|
||||
constexpr int OGC_TYPE_OFFSET = 4;
|
||||
|
||||
static_assert(OGC_TYPE_OFFSET == SRID_SIZE);
|
||||
|
||||
|
||||
constexpr int ESRI_TYPE_SIZE = 4;
|
||||
constexpr int ESRI_TYPE_OFFSET = 5;
|
||||
|
||||
constexpr int X1_OFFSET = 9;
|
||||
constexpr int Y1_OFFSET = X1_OFFSET + sizeof(double);
|
||||
constexpr int X2_OFFSET = Y1_OFFSET + sizeof(double);
|
||||
constexpr int Y2_OFFSET = X2_OFFSET + sizeof(double);
|
||||
|
||||
constexpr int MIN_GEOM_SIZE = 9;
|
||||
constexpr int MIN_POINT_SIZE = 25;
|
||||
constexpr int MIN_NON_POINT_SIZE = 41;
|
||||
|
||||
static_assert(ESRI_TYPE_OFFSET == OGC_TYPE_OFFSET + OGC_TYPE_SIZE);
|
||||
static_assert(X1_OFFSET == ESRI_TYPE_OFFSET + ESRI_TYPE_SIZE);
|
||||
static_assert(MIN_GEOM_SIZE == SRID_SIZE + OGC_TYPE_SIZE + ESRI_TYPE_SIZE);
|
||||
static_assert(MIN_POINT_SIZE == MIN_GEOM_SIZE + 2 * sizeof(double));
|
||||
static_assert(MIN_NON_POINT_SIZE == MIN_POINT_SIZE + 2 * sizeof(double));
|
||||
|
||||
// See https://github.com/Esri/geometry-api-java/blob/v2.2.4/src/main/java/com/esri/core/geometry/ShapeType.java#L27
|
||||
enum EsriType: uint32_t {
|
||||
ShapeNull = 0,
|
||||
ShapePoint = 1,
|
||||
ShapePointM = 21,
|
||||
ShapePointZM = 11,
|
||||
ShapePointZ = 9,
|
||||
ShapeMultiPoint = 8,
|
||||
ShapeMultiPointM = 28,
|
||||
ShapeMultiPointZM = 18,
|
||||
ShapeMultiPointZ = 20,
|
||||
ShapePolyline = 3,
|
||||
ShapePolylineM = 23,
|
||||
ShapePolylineZM = 13,
|
||||
ShapePolylineZ = 10,
|
||||
ShapePolygon = 5,
|
||||
ShapePolygonM = 25,
|
||||
ShapePolygonZM = 15,
|
||||
ShapePolygonZ = 19,
|
||||
ShapeMultiPatchM = 31,
|
||||
ShapeMultiPatch = 32,
|
||||
ShapeGeneralPolyline = 50,
|
||||
ShapeGeneralPolygon = 51,
|
||||
ShapeGeneralPoint = 52,
|
||||
ShapeGeneralMultiPoint = 53,
|
||||
ShapeGeneralMultiPatch = 54,
|
||||
ShapeTypeLast = 55
|
||||
};
|
||||
|
||||
constexpr std::array<EsriType, ST_MULTIPOLYGON + 1> OGCTypeToEsriType = {{
|
||||
ShapeNull, // UNKNOWN
|
||||
ShapePoint, // ST_POINT
|
||||
ShapePolyline, // ST_LINESTRING
|
||||
ShapePolygon, // ST_POLYGON
|
||||
ShapeMultiPoint, // ST_MULTIPOINT
|
||||
ShapePolyline, // ST_MULTILINESTRING
|
||||
ShapePolygon // ST_MULTIPOLYGON
|
||||
}};
|
||||
|
||||
template <class T>
|
||||
T readFromGeom(const StringVal& geom, int offset) {
|
||||
DCHECK_GE(geom.len, offset + sizeof(T));
|
||||
return *reinterpret_cast<T*>(geom.ptr + offset);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void writeToGeom(const T& val, StringVal& geom, int offset) {
|
||||
DCHECK_GE(geom.len, offset + sizeof(T));
|
||||
T* ptr = reinterpret_cast<T*>(geom.ptr + offset);
|
||||
*ptr = val;
|
||||
}
|
||||
|
||||
// getters/setters for OGC header:
|
||||
|
||||
inline uint32_t getSrid(const StringVal& geom) {
|
||||
static_assert(SRID_SIZE == sizeof(uint32_t));
|
||||
|
||||
// SRID is in big endian format in 'geom', but Impala only supports little endian so we
|
||||
// have to convert it.
|
||||
#ifndef IS_LITTLE_ENDIAN
|
||||
static_assert(false, "Only the little endian byte order is supported.");
|
||||
#endif
|
||||
const uint32_t srid_bytes = readFromGeom<uint32_t>(geom, SRID_OFFSET);
|
||||
return BitUtil::ByteSwap(srid_bytes);
|
||||
}
|
||||
|
||||
inline OGCType getOGCType(const StringVal& geom) {
|
||||
static_assert(OGC_TYPE_SIZE == sizeof(char));
|
||||
const char res = readFromGeom<char>(geom, OGC_TYPE_OFFSET);
|
||||
return static_cast<OGCType>(res);
|
||||
}
|
||||
|
||||
inline constexpr const char* getGeometryType(OGCType ogc_type) {
|
||||
return OGCTypeToStr[ogc_type];
|
||||
}
|
||||
|
||||
inline void setSrid(StringVal& geom, uint32_t srid) {
|
||||
static_assert(SRID_SIZE == sizeof(uint32_t));
|
||||
|
||||
// SRID is in big endian format in 'geom', but Impala only supports little endian so we
|
||||
// have to convert it.
|
||||
#ifndef IS_LITTLE_ENDIAN
|
||||
static_assert(false, "Only the little endian byte order is supported.");
|
||||
#endif
|
||||
const uint32_t srid_bytes = BitUtil::ByteSwap(srid);
|
||||
writeToGeom<uint32_t>(srid_bytes, geom, SRID_OFFSET);
|
||||
}
|
||||
|
||||
inline void setOGCType(StringVal& geom, OGCType ogc_type) {
|
||||
writeToGeom<char>(ogc_type, geom, OGC_TYPE_OFFSET);
|
||||
}
|
||||
|
||||
// getters/setters for ESRI header:
|
||||
|
||||
inline EsriType getEsriType(const StringVal& geom) {
|
||||
static_assert(ESRI_TYPE_SIZE == sizeof(EsriType));
|
||||
return readFromGeom<EsriType>(geom, ESRI_TYPE_OFFSET);
|
||||
}
|
||||
|
||||
inline double getMinX(const StringVal& geom) {
|
||||
return readFromGeom<double>(geom, X1_OFFSET);
|
||||
}
|
||||
|
||||
inline double getMinY(const StringVal& geom) {
|
||||
return readFromGeom<double>(geom, Y1_OFFSET);
|
||||
}
|
||||
|
||||
inline double getMaxX(const StringVal& geom) {
|
||||
return readFromGeom<double>(geom, X2_OFFSET);
|
||||
}
|
||||
|
||||
inline double getMaxY(const StringVal& geom) {
|
||||
return readFromGeom<double>(geom, Y2_OFFSET);
|
||||
}
|
||||
|
||||
inline void setEsriType(StringVal& geom, EsriType esri_type) {
|
||||
static_assert(ESRI_TYPE_SIZE == sizeof(EsriType));
|
||||
writeToGeom<EsriType>(esri_type, geom, ESRI_TYPE_OFFSET);
|
||||
}
|
||||
|
||||
inline void setMinX(StringVal& geom, double x) {
|
||||
writeToGeom<double>(x, geom, X1_OFFSET);
|
||||
}
|
||||
|
||||
inline void setMinY(StringVal& geom, double y) {
|
||||
writeToGeom<double>(y, geom, Y1_OFFSET);
|
||||
}
|
||||
|
||||
inline void setMaxX(StringVal& geom, double x) {
|
||||
writeToGeom<double>(x, geom, X2_OFFSET);
|
||||
}
|
||||
|
||||
inline void setMaxY(StringVal& geom, double y) {
|
||||
writeToGeom<double>(y, geom, Y2_OFFSET);
|
||||
}
|
||||
|
||||
// Validate header and get type
|
||||
inline bool ParseHeader(FunctionContext* ctx, const StringVal& geom, OGCType* ogc_type) {
|
||||
DCHECK(ogc_type != nullptr);
|
||||
|
||||
if (UNLIKELY(geom.is_null)) return false;
|
||||
|
||||
if (UNLIKELY(geom.len < MIN_GEOM_SIZE)) {
|
||||
ctx->SetError("Geometry size too small.");
|
||||
return false;
|
||||
}
|
||||
|
||||
const OGCType unchecked_ogc_type = getOGCType(geom);
|
||||
if (UNLIKELY(unchecked_ogc_type < UNKNOWN || unchecked_ogc_type > ST_MULTIPOLYGON)) {
|
||||
ctx->SetError("Invalid geometry type.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (UNLIKELY(unchecked_ogc_type == UNKNOWN)) {
|
||||
ctx->SetError("Geometry type UNKNOWN.");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (UNLIKELY(unchecked_ogc_type == ST_POINT)) {
|
||||
if (geom.len < MIN_POINT_SIZE) {
|
||||
ctx->SetError("Geometry size too small for ST_POINT type.");
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (UNLIKELY(geom.len < MIN_NON_POINT_SIZE)) {
|
||||
ctx->SetError("Geometry size too small for non ST_POINT type.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: fix Z/M/ZM types and move to a function called from DCHECK
|
||||
// ogc vs ESRI type checking can be useful during development, but it
|
||||
// is unnecessary overhead in production
|
||||
/*const EsriType esri_type = getEsriType(geom);
|
||||
DCHECK_LT(unchecked_ogc_type, OGCTypeToEsriType.size());
|
||||
const EsriType expected_esri_type = OGCTypeToEsriType[unchecked_ogc_type];
|
||||
if (expected_esri_type != esri_type) {
|
||||
// TODO: To test it we need to create a table with 3D types, we cannot create them
|
||||
// with native constructors.
|
||||
ctx->SetError(strings::Substitute(
|
||||
"Invalid geometry: OGCType and EsriType do not match. "
|
||||
"Because the OGCType is $0, expected EsriType $1, found $2.",
|
||||
OGCTypeToStr[unchecked_ogc_type], expected_esri_type, esri_type).c_str());
|
||||
}*/
|
||||
|
||||
*ogc_type = static_cast<OGCType>(unchecked_ogc_type);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Bounding box check for x/y coordinates of two geometries. z/m are ignored, which
|
||||
// is consistent with the original Java functions.
|
||||
inline bool bBoxIntersects(const StringVal& lhs_geom, const StringVal rhs_geom,
|
||||
OGCType lhs_type, OGCType rhs_type) {
|
||||
bool is_lhs_point = lhs_type == ST_POINT;
|
||||
double xmin1 = getMinX(lhs_geom);
|
||||
double ymin1 = getMinY(lhs_geom);
|
||||
double xmax1 = is_lhs_point ? xmin1 : getMaxX(lhs_geom);
|
||||
double ymax1 = is_lhs_point ? ymin1 : getMaxY(lhs_geom);
|
||||
|
||||
bool is_rhs_point = rhs_type == ST_POINT;
|
||||
double xmin2 = getMinX(rhs_geom);
|
||||
double ymin2 = getMinY(rhs_geom);
|
||||
double xmax2 = is_rhs_point ? xmin2 : getMaxX(rhs_geom);
|
||||
double ymax2 = is_rhs_point ? ymin2 : getMaxY(rhs_geom);
|
||||
|
||||
if (xmax1 < xmin2 || xmax2 < xmin1 || ymax1 < ymin2 || ymax2 < ymin1 ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace impala
|
||||
@@ -33,6 +33,7 @@
|
||||
#include "exprs/date-functions.h"
|
||||
#include "exprs/decimal-functions.h"
|
||||
#include "exprs/decimal-operators.h"
|
||||
#include "exprs/geo/geospatial-functions.h"
|
||||
#include "exprs/hive-udf-call.h"
|
||||
#include "exprs/iceberg-functions.h"
|
||||
#include "exprs/in-predicate.h"
|
||||
@@ -459,6 +460,7 @@ void ScalarExprEvaluator::InitBuiltinsDummy() {
|
||||
DataSketchesFunctions::DsHllEstimate(nullptr, StringVal::null());
|
||||
DecimalFunctions::Precision(nullptr, DecimalVal::null());
|
||||
DecimalOperators::CastToDecimalVal(nullptr, DecimalVal::null());
|
||||
geo::GeospatialFunctions::st_MaxX(nullptr, StringVal::null());
|
||||
IcebergFunctions::TruncatePartitionTransform(nullptr, IntVal::null(), IntVal::null());
|
||||
InPredicate::InIterate(nullptr, BigIntVal::null(), 0, nullptr);
|
||||
IsNullPredicate::IsNull(nullptr, BooleanVal::null());
|
||||
|
||||
@@ -48,42 +48,67 @@ import org.apache.impala.catalog.Type;
|
||||
import org.apache.impala.hive.executor.BinaryToBinaryHiveLegacyFunctionExtractor;
|
||||
import org.apache.impala.hive.executor.HiveJavaFunction;
|
||||
import org.apache.impala.hive.executor.HiveLegacyJavaFunction;
|
||||
import org.apache.impala.service.BackendConfig;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
import org.apache.impala.analysis.FunctionName;
|
||||
import org.apache.impala.thrift.TFunctionBinaryType;
|
||||
import org.apache.impala.thrift.TGeospatialLibrary;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class HiveEsriGeospatialBuiltins {
|
||||
private final static Logger LOG = LoggerFactory.getLogger(
|
||||
HiveEsriGeospatialBuiltins.class);
|
||||
|
||||
/**
|
||||
* Initializes Hive's ESRI geospatial UDFs as builtins.
|
||||
*/
|
||||
public static void initBuiltins(Db db) {
|
||||
addLegacyUDFs(db);
|
||||
TGeospatialLibrary lib = BackendConfig.INSTANCE.getGeospatialLibrary();
|
||||
// Currently all native functions are expected to be 100% compatible with the Java
|
||||
// version. This is not true for the full function set of
|
||||
// https://gerrit.cloudera.org/#/c/20602/ , which sets based on a flag to allow
|
||||
// full compatibility with Hive.
|
||||
boolean addNatives = true;
|
||||
addLegacyUDFs(db, addNatives);
|
||||
addGenericUDFs(db);
|
||||
addVarargsUDFs(db);
|
||||
if(addNatives) {
|
||||
addNatives(db);
|
||||
}
|
||||
}
|
||||
|
||||
private static void addLegacyUDFs(Db db) {
|
||||
private static void addLegacyUDFs(Db db, boolean addNatives) {
|
||||
List<UDF> legacyUDFs = Arrays.asList(new ST_Area(), new ST_AsBinary(),
|
||||
new ST_AsGeoJson(), new ST_AsJson(), new ST_AsShape(), new ST_AsText(),
|
||||
new ST_Boundary(), new ST_Buffer(), new ST_Centroid(), new ST_CoordDim(),
|
||||
new ST_Difference(), new ST_Dimension(), new ST_Distance(), new ST_EndPoint(),
|
||||
new ST_Envelope(), new ST_EnvIntersects(), new ST_ExteriorRing(),
|
||||
new ST_Envelope(), new ST_ExteriorRing(),
|
||||
new ST_GeodesicLengthWGS84(), new ST_GeomCollection(), new ST_GeometryN(),
|
||||
new ST_GeometryType(), new ST_GeomFromShape(), new ST_GeomFromText(),
|
||||
new ST_GeomFromShape(), new ST_GeomFromText(),
|
||||
new ST_GeomFromWKB(), new ST_InteriorRingN(), new ST_Intersection(),
|
||||
new ST_Is3D(), new ST_IsClosed(), new ST_IsEmpty(), new ST_IsMeasured(),
|
||||
new ST_IsRing(), new ST_IsSimple(), new ST_Length(), new ST_LineFromWKB(),
|
||||
new ST_M(), new ST_MaxM(), new ST_MaxX(), new ST_MaxY(), new ST_MaxZ(),
|
||||
new ST_MinM(), new ST_MinX(), new ST_MinY(), new ST_MinZ(), new ST_MLineFromWKB(),
|
||||
new ST_M(), new ST_MaxM(), new ST_MaxZ(),
|
||||
new ST_MinM(), new ST_MinZ(), new ST_MLineFromWKB(),
|
||||
new ST_MPointFromWKB(), new ST_MPolyFromWKB(), new ST_NumGeometries(),
|
||||
new ST_NumInteriorRing(), new ST_NumPoints(), new ST_Point(),
|
||||
new ST_PointFromWKB(), new ST_PointN(), new ST_PointZ(), new ST_PolyFromWKB(),
|
||||
new ST_Relate(), new ST_SRID(), new ST_StartPoint(), new ST_SymmetricDiff(),
|
||||
new ST_X(), new ST_Y(), new ST_Z(), new ST_SetSRID());
|
||||
new ST_Relate(), new ST_StartPoint(), new ST_SymmetricDiff(),
|
||||
new ST_Z());
|
||||
|
||||
List<UDF> legacyUDFsWithNativeImplementation = Arrays.asList(
|
||||
new ST_EnvIntersects(), new ST_GeometryType(),
|
||||
new ST_MaxX(), new ST_MaxY(),
|
||||
new ST_MinX(), new ST_MinY(),
|
||||
new ST_SRID(), new ST_SetSRID(),
|
||||
new ST_X(), new ST_Y()
|
||||
);
|
||||
if (!addNatives) {
|
||||
legacyUDFs.addAll(legacyUDFsWithNativeImplementation);
|
||||
}
|
||||
|
||||
for (UDF udf : legacyUDFs) {
|
||||
for (Function fn : extractFromLegacyHiveBuiltin(udf, db.getName())) {
|
||||
@@ -206,4 +231,35 @@ public class HiveEsriGeospatialBuiltins {
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private static void addNative(Db db, String fnNameBase, String fnNameSuffix,
|
||||
boolean varArgs, Type retType, Type... argTypes) {
|
||||
String udfName = fnNameBase.toLowerCase();
|
||||
String geospatialFnPrefix = "impala::geo::GeospatialFunctions::";
|
||||
String cppSymbolName = geospatialFnPrefix + fnNameBase + fnNameSuffix;
|
||||
|
||||
db.addScalarBuiltin(udfName, cppSymbolName, true, varArgs, retType, argTypes);
|
||||
}
|
||||
|
||||
private static void addNative(Db db, String fnName, boolean varArgs, Type retType,
|
||||
Type... argTypes) {
|
||||
addNative(db, fnName, "", varArgs, retType, argTypes);
|
||||
}
|
||||
|
||||
private static void addNatives(Db db) {
|
||||
// Legacy UDFs.
|
||||
// Accessors.
|
||||
addNative(db, "st_MinX", false, Type.DOUBLE, Type.BINARY);
|
||||
addNative(db, "st_MaxX", false, Type.DOUBLE, Type.BINARY);
|
||||
addNative(db, "st_MinY", false, Type.DOUBLE, Type.BINARY);
|
||||
addNative(db, "st_MaxY", false, Type.DOUBLE, Type.BINARY);
|
||||
addNative(db, "st_X", false, Type.DOUBLE, Type.BINARY);
|
||||
addNative(db, "st_Y", false, Type.DOUBLE, Type.BINARY);
|
||||
addNative(db, "st_Srid", false, Type.INT, Type.BINARY);
|
||||
addNative(db, "st_SetSrid", false, Type.BINARY, Type.BINARY, Type.INT);
|
||||
addNative(db, "st_GeometryType", false, Type.STRING, Type.BINARY);
|
||||
|
||||
// Predicates.
|
||||
addNative(db, "st_EnvIntersects", false, Type.BOOLEAN, Type.BINARY, Type.BINARY);
|
||||
}
|
||||
}
|
||||
|
||||
394
testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-extra.test
vendored
Normal file
394
testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-extra.test
vendored
Normal file
@@ -0,0 +1,394 @@
|
||||
=====
|
||||
---- QUERY
|
||||
select st_bin(1, "point empty")
|
||||
---- TYPES
|
||||
BIGINT
|
||||
---- RESULTS
|
||||
0
|
||||
====
|
||||
---- QUERY
|
||||
select st_bin(1, "point(10 10)")
|
||||
---- TYPES
|
||||
BIGINT
|
||||
---- RESULTS
|
||||
4611685985093119520
|
||||
====
|
||||
---- QUERY
|
||||
select st_bin(1.0, st_point(10, 10))
|
||||
---- TYPES
|
||||
BIGINT
|
||||
---- RESULTS
|
||||
4611685985093119520
|
||||
====
|
||||
---- QUERY
|
||||
select st_bin(1.0, "point(-200 50)")
|
||||
---- TYPES
|
||||
BIGINT
|
||||
---- RESULTS
|
||||
4611685863613099350
|
||||
====
|
||||
---- QUERY
|
||||
select st_bin(1, st_point(-200, 50))
|
||||
---- TYPES
|
||||
BIGINT
|
||||
---- RESULTS
|
||||
4611685863613099350
|
||||
====
|
||||
---- QUERY
|
||||
select st_bin(1, "point(-500 -1000)")
|
||||
---- TYPES
|
||||
BIGINT
|
||||
---- RESULTS
|
||||
4611689052463623000
|
||||
====
|
||||
---- QUERY
|
||||
select st_bin(1.0, st_point(-500, -1000))
|
||||
---- TYPES
|
||||
BIGINT
|
||||
---- RESULTS
|
||||
4611689052463623000
|
||||
====
|
||||
---- QUERY
|
||||
select st_bin(1.0, "point(800 -5000)")
|
||||
---- TYPES
|
||||
BIGINT
|
||||
---- RESULTS
|
||||
4611701200465620300
|
||||
====
|
||||
---- QUERY
|
||||
select st_bin(1, st_point(800, -5000))
|
||||
---- TYPES
|
||||
BIGINT
|
||||
---- RESULTS
|
||||
4611701200465620300
|
||||
====
|
||||
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1, 4611685985093119520));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((9.5 9.5, 10.5 9.5, 10.5 10.5, 9.5 10.5, 9.5 9.5))'
|
||||
====
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1.0, 4611685985093119520));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((9.5 9.5, 10.5 9.5, 10.5 10.5, 9.5 10.5, 9.5 9.5))'
|
||||
====
|
||||
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1, 4611685863613099350));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((-200.5 49.5, -199.5 49.5, -199.5 50.5, -200.5 50.5, -200.5 49.5))'
|
||||
====
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1.0, 4611685863613099350));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((-200.5 49.5, -199.5 49.5, -199.5 50.5, -200.5 50.5, -200.5 49.5))'
|
||||
====
|
||||
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1, 4611689052463623000));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((-500.5 -1000.5, -499.5 -1000.5, -499.5 -999.5, -500.5 -999.5, -500.5 -1000.5))'
|
||||
====
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1.0, 4611689052463623000));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((-500.5 -1000.5, -499.5 -1000.5, -499.5 -999.5, -500.5 -999.5, -500.5 -1000.5))'
|
||||
====
|
||||
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1, 4611701200465620300));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((799.5 -5000.5, 800.5 -5000.5, 800.5 -4999.5, 799.5 -4999.5, 799.5 -5000.5))'
|
||||
====
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1.0, 4611701200465620300));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((799.5 -5000.5, 800.5 -5000.5, 800.5 -4999.5, 799.5 -4999.5, 799.5 -5000.5))'
|
||||
====
|
||||
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1, st_point(1, 2)));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
|
||||
====
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1.0, st_point(1, 2)));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
|
||||
====
|
||||
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1, "point(1 2)"));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
|
||||
====
|
||||
---- QUERY
|
||||
select st_astext(st_binenvelope(1.0, "point(1 2)"));
|
||||
---- TYPES
|
||||
STRING
|
||||
---- RESULTS
|
||||
'POLYGON ((1 1, 2 1, 2 2, 1 2, 1 1))'
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Intersects(BINARY, STRING)
|
||||
select ST_Intersects(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Intersects(st_polygon(0,0, 0,1, 0.5,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Intersects(st_polygon(0,0, 0,1, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
true,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Intersects(STRING, BINARY)
|
||||
select ST_Intersects("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Intersects("POLYGON ((0 0, 0 1, 0.5 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Intersects("POLYGON ((0 0, 0 1, 1 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1));
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
true,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Intersects(STRING, STRING)
|
||||
select ST_Intersects("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Intersects("POLYGON ((0 0, 0 1, 0.5 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Intersects("POLYGON ((0 0, 0 1, 1 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
true,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Overlaps(BINARY, STRING)
|
||||
select ST_Overlaps(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Overlaps(st_polygon(0,0, 0,1, 0.5,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Overlaps(st_polygon(0,0, 0,1, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
true,false,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Overlaps(STRING, BINARY)
|
||||
select ST_Overlaps("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Overlaps("POLYGON ((0 0, 0 1, 0.5 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Overlaps("POLYGON ((0 0, 0 1, 1 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1));
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
true,false,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Overlaps(STRING, STRING)
|
||||
select ST_Overlaps("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Overlaps("POLYGON ((0 0, 0 1, 0.5 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Overlaps("POLYGON ((0 0, 0 1, 1 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
true,false,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Touches(BINARY, STRING)
|
||||
select ST_Touches(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Touches(st_polygon(0,0, 0,1, 0.5,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Touches(st_polygon(0,0, 0,1, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Touches(STRING, BINARY)
|
||||
select ST_Touches("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Touches("POLYGON ((0 0, 0 1, 0.5 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Touches("POLYGON ((0 0, 0 1, 1 1))", ST_Polygon(1,1, 1,4, 4,4, 4,1));
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Touches(STRING, STRING)
|
||||
select ST_Touches("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Touches("POLYGON ((0 0, 0 1, 0.5 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Touches("POLYGON ((0 0, 0 1, 1 1))", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Contains(BINARY, STRING)
|
||||
select ST_Contains(st_polygon(2,2, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Contains(st_polygon(0,0, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Contains(st_polygon(0,0, 10,0, 0, 10), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Contains(STRING, BINARY)
|
||||
select ST_Contains("POLYGON ((2 0, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Contains("POLYGON ((0 0, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Contains("POLYGON ((0 0, 10 0, 0 10))", ST_Polygon(1,1, 1,4, 4,4, 4,1));
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Contains(STRING, STRING)
|
||||
select ST_Contains("POLYGON ((2 0, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Contains("POLYGON ((0 0, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Contains("POLYGON ((0 0, 10 0, 0 10))", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Within(BINARY, STRING)
|
||||
select ST_Within(st_polygon(2,2, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Within(st_polygon(0,0, 2,3, 3,2), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Within(st_polygon(0,0, 10,0, 0, 10), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
true,false,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Within(STRING, BINARY)
|
||||
select ST_Within("POLYGON ((2 2, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Within("POLYGON ((0 0, 2 3, 3 2))", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Within("POLYGON ((0 0, 10 0, 0 10))", ST_Polygon(1,1, 1,4, 4,4, 4,1));
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
true,false,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Within(STRING, STRING)
|
||||
select ST_Within("POLYGON ((2 2, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Within("POLYGON ((0 0, 2 3, 3 2))", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Within("POLYGON ((0 0, 10 0, 0 10))", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
true,false,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Crosses(BINARY, STRING)
|
||||
select ST_Crosses(st_linestring(2,2, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Crosses(st_linestring(0,0, 1,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Crosses(st_linestring(0,0, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Crosses(STRING, BINARY)
|
||||
select ST_Crosses("LINESTRING (2 2, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Crosses("LINESTRING (0 0, 1 1)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Crosses("LINESTRING (0 0, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1));
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Crosses(STRING, STRING)
|
||||
select ST_Crosses("LINESTRING (2 2, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Crosses("LINESTRING (0 0, 1 1)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Crosses("LINESTRING (0 0, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,false,true
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Disjoint(BINARY, STRING)
|
||||
select ST_Disjoint(st_linestring(2,2, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Disjoint(st_linestring(1,0, 0,1), "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Disjoint(st_linestring(0,0, 3,3), "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,true,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Disjoint(STRING, BINARY)
|
||||
select ST_Disjoint("LINESTRING (2 2, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Disjoint("LINESTRING (1 0, 0 1)", ST_Polygon(1,1, 1,4, 4,4, 4,1)),
|
||||
ST_Disjoint("LINESTRING (0 0, 3 3)", ST_Polygon(1,1, 1,4, 4,4, 4,1));
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,true,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Disjoint(STRING, STRING)
|
||||
select ST_Disjoint("LINESTRING (2 2, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Disjoint("LINESTRING (1 0, 0 1)", "POLYGON ((1 1, 1 4, 4 4, 4 1))"),
|
||||
ST_Disjoint("LINESTRING (0 0, 3 3)", "POLYGON ((1 1, 1 4, 4 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,true,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Equals(BINARY, STRING)
|
||||
select ST_Equals(st_polygon(2,0, 2,3, 3,0), "POLYGON ((1 1, 1 4, 4 1))"),
|
||||
ST_Equals(st_polygon(1,1, 1,4, 4,1), "POLYGON ((1 1, 1 4, 4 1))"),
|
||||
ST_Equals(st_polygon(0,0, 0,1, 1,0), "POLYGON ((1 1, 1 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,true,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Equals(STRING, BINARY)
|
||||
select ST_Equals("POLYGON ((2 0, 2 3, 3 0))", ST_Polygon(1,1, 1,4, 4,1)),
|
||||
ST_Equals("POLYGON ((1 1, 1 4, 4 1))", ST_Polygon(1,1, 1,4, 4,1)),
|
||||
ST_Equals("POLYGON ((0 0, 0 1, 1 0))", ST_Polygon(1,1, 1,4, 4,1));
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,true,false
|
||||
====
|
||||
---- QUERY
|
||||
# ST_Equals(STRING, STRING)
|
||||
select ST_Equals("POLYGON ((2 0, 2 3, 3 0))", "POLYGON ((1 1, 1 4, 4 1))"),
|
||||
ST_Equals("POLYGON ((1 1, 1 4, 4 1))", "POLYGON ((1 1, 1 4, 4 1))"),
|
||||
ST_Equals("POLYGON ((0 0, 0 1, 1 0))", "POLYGON ((1 1, 1 4, 4 1))");
|
||||
---- TYPES
|
||||
BOOLEAN,BOOLEAN,BOOLEAN
|
||||
---- RESULTS
|
||||
false,true,false
|
||||
====
|
||||
@@ -2717,6 +2717,7 @@ select ST_AsText(ST_SetSRID(ST_GeomFromText('MultiLineString((0 80, 0.03 80.04))
|
||||
'MULTILINESTRING ((0 80, 0.03 80.04))'
|
||||
====
|
||||
---- QUERY
|
||||
# TODO: move these new tests to geospatial-esri-extra?
|
||||
# NOTE: Due to HIVE-29323 ESRI returns MULTIPOLYGON EMPTY for single point
|
||||
# PostGIS would return: POINT (1 2)
|
||||
select ST_AsText(ST_ConvexHull(ST_Point(1, 2)));
|
||||
|
||||
@@ -22,6 +22,7 @@ from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
|
||||
from tests.common.skip import SkipIfApacheHive
|
||||
|
||||
ST_POINT_SIGNATURE = "BINARY\tst_point(STRING)\tJAVA\ttrue"
|
||||
ST_X_SIGNATURE_BUILTIN = "DOUBLE\tst_x(BINARY)\tBUILTIN\ttrue"
|
||||
SHOW_FUNCTIONS = "show functions in _impala_builtins"
|
||||
|
||||
|
||||
@@ -34,9 +35,11 @@ class TestGeospatialLibrary(CustomClusterTestSuite):
|
||||
def test_disabled(self):
|
||||
result = self.execute_query(SHOW_FUNCTIONS)
|
||||
assert ST_POINT_SIGNATURE not in result.data
|
||||
assert ST_X_SIGNATURE_BUILTIN not in result.data
|
||||
|
||||
@SkipIfApacheHive.feature_not_supported
|
||||
@pytest.mark.execute_serially
|
||||
def test_enabled(self):
|
||||
result = self.execute_query(SHOW_FUNCTIONS)
|
||||
assert ST_POINT_SIGNATURE in result.data
|
||||
assert ST_X_SIGNATURE_BUILTIN in result.data
|
||||
|
||||
@@ -18,14 +18,29 @@
|
||||
from __future__ import absolute_import, division, print_function
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
from tests.common.skip import SkipIfApacheHive
|
||||
from tests.common.test_dimensions import create_single_exec_option_dimension
|
||||
|
||||
|
||||
class TestGeospatialFuctions(ImpalaTestSuite):
|
||||
|
||||
@classmethod
|
||||
def add_test_dimensions(cls):
|
||||
super(TestGeospatialFuctions, cls).add_test_dimensions()
|
||||
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
|
||||
# Tests do not use tables at the moment, skip other fileformats than Parquet.
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
v.get_value('table_format').file_format == 'parquet')
|
||||
|
||||
"""Tests the geospatial builtin functions"""
|
||||
@SkipIfApacheHive.feature_not_supported
|
||||
def test_esri_geospatial_functions(self, vector):
|
||||
# tests generated from
|
||||
# https://github.com/Esri/spatial-framework-for-hadoop/tree/master/hive/test
|
||||
self.run_test_case('QueryTest/geospatial-esri', vector)
|
||||
# manual tests added
|
||||
self.run_test_case('QueryTest/geospatial-esri-extra', vector)
|
||||
|
||||
@SkipIfApacheHive.feature_not_supported
|
||||
def test_esri_geospatial_planner(self, vector):
|
||||
# These tests are not among planner tests because with default flags
|
||||
# geospatial builtin functions are not loaded.
|
||||
|
||||
Reference in New Issue
Block a user