IMPALA-14571: increase planner cost of java functions

The main motivation is to evaluate expensive geospatial
functions (which are Java functions) last in predicates.
Java functions have a major overhead anyway from the JNI
call, so bumping all Java function costs seems beneficial.

Note that currently geospatial functions are the only
built-in Java functions.

Change-Id: I11d1652d76092ec60af18a33502dacc25b284fcc
Reviewed-on: http://gerrit.cloudera.org:8080/22733
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Csaba Ringhofer
2025-04-04 16:22:24 +02:00
committed by Impala Public Jenkins
parent f12bb87d42
commit f6ceca2b4d
4 changed files with 20 additions and 1 deletions

View File

@@ -89,6 +89,7 @@ abstract public class Expr extends TreeNode<Expr> implements ParseNode, Cloneabl
public static final float VAR_LEN_BINARY_PREDICATE_COST = 5;
public static final float COMPOUND_PREDICATE_COST = 1;
public static final float FUNCTION_CALL_COST = 10;
public static final float JAVA_FUNCTION_CALL_COST = 100;
public static final float IS_NOT_EMPTY_COST = 1;
public static final float IS_NULL_COST = 1;
public static final float LIKE_COST = 10;

View File

@@ -840,8 +840,11 @@ public class FunctionCallExpr extends Expr {
@Override
protected float computeEvalCost() {
Preconditions.checkState(fn_ != null);
boolean isJava = fn_.getBinaryType() == TFunctionBinaryType.JAVA;
float callCost = isJava ? JAVA_FUNCTION_CALL_COST : FUNCTION_CALL_COST;
// TODO(tmarshall): Differentiate based on the specific function.
return hasChildCosts() ? getChildCosts() + FUNCTION_CALL_COST : UNKNOWN_COST;
return hasChildCosts() ? getChildCosts() + callCost : UNKNOWN_COST;
}
public FunctionCallExpr getMergeAggInputFn() { return mergeAggInputFn_; }

View File

@@ -0,0 +1,10 @@
=====
---- QUERY
# Check that st_geomfromwkb(), which is a Java function, is moved after the other
# expression in the predicate.
select * from functional.binary_tbl
where st_geomfromwkb(binary_col) is not null and sqrt(id) = 1;
---- RUNTIME_PROFILE
predicates: sqrt(CAST(id AS DOUBLE)) = CAST(1 AS DOUBLE), st_geomfromwkb(binary_col) IS NOT NULL
====

View File

@@ -25,3 +25,8 @@ class TestGeospatialFuctions(ImpalaTestSuite):
@SkipIfApacheHive.feature_not_supported
def test_esri_geospatial_functions(self, vector):
self.run_test_case('QueryTest/geospatial-esri', vector)
def test_esri_geospatial_planner(self, vector):
# These tests are not among planner tests because with default flags
# geospatial builtin functions are not loaded.
self.run_test_case('QueryTest/geospatial-esri-planner', vector)