IMPALA-4865: Reject Expr Rewrite When Appropriate

Avoided rewrite if the resulting string literal exceeds a defined limit.

Testing:
Added three statements in testFoldConstantsRule() to verify that the
expression rewrite is accepted only when the size of the rewritten
expression is below a specified threshold.

Change-Id: I8b078113ccc1aa49b0cea0c86dff2e02e1dd0e23
Reviewed-on: http://gerrit.cloudera.org:8080/12814
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Tim Armstrong <tarmstrong@cloudera.com>
This commit is contained in:
Fang-Yu Rao
2019-03-18 16:52:38 -07:00
committed by Tim Armstrong
parent b51c82d296
commit 931a8f0ba7
14 changed files with 117 additions and 32 deletions

2
.gitignore vendored
View File

@@ -92,6 +92,8 @@ gdb.txt
.project
.classpath
.settings
eclipse-classes/
.pydevproject
# Debugging
ad-hoc.test

View File

@@ -167,11 +167,13 @@ static void SetTColumnValue(
// a predicate evaluation. It requires JniUtil::Init() to have been
// called. Throws a Java exception if an error or warning is encountered during
// the expr evaluation.
// We also reject the expression rewrite if the size of the returned rewritten result
// is too large.
extern "C"
JNIEXPORT jbyteArray JNICALL
Java_org_apache_impala_service_FeSupport_NativeEvalExprsWithoutRow(
JNIEnv* env, jclass fe_support_class, jbyteArray thrift_expr_batch,
jbyteArray thrift_query_ctx_bytes) {
JNIEnv* env, jclass caller_class, jbyteArray thrift_expr_batch,
jbyteArray thrift_query_ctx_bytes, jlong max_result_size) {
Status status;
jbyteArray result_bytes = NULL;
TQueryCtx query_ctx;
@@ -244,9 +246,22 @@ Java_org_apache_impala_service_FeSupport_NativeEvalExprsWithoutRow(
void* result = eval->GetValue(nullptr);
status = eval->GetError();
if (!status.ok()) goto error;
const ColumnType& type = eval->root().type();
// reject the expression rewrite if the returned string greater than
if (type.IsVarLenStringType()) {
const StringValue* string_val = reinterpret_cast<const StringValue*>(result);
if (string_val != nullptr) {
if (string_val->len > max_result_size) {
status = Status(TErrorCode::EXPR_REWRITE_RESULT_LIMIT_EXCEEDED,
string_val->len, max_result_size);
goto error;
}
}
}
// 'output_scale' should only be set for MathFunctions::RoundUpTo()
// with return type double.
const ColumnType& type = eval->root().type();
DCHECK(eval->output_scale() == -1 || type.type == TYPE_DOUBLE);
TColumnValue val;
SetTColumnValue(result, type, &val);
@@ -699,7 +714,7 @@ static JNINativeMethod native_methods[] = {
(void*)::Java_org_apache_impala_service_FeSupport_NativeFeTestInit
},
{
const_cast<char*>("NativeEvalExprsWithoutRow"), const_cast<char*>("([B[B)[B"),
const_cast<char*>("NativeEvalExprsWithoutRow"), const_cast<char*>("([B[BJ)[B"),
(void*)::Java_org_apache_impala_service_FeSupport_NativeEvalExprsWithoutRow
},
{

View File

@@ -397,6 +397,9 @@ error_codes = (
("ROWS_PRODUCED_LIMIT_EXCEEDED", 131,
"Query $0 terminated due to rows produced limit of $1. "
"Unset or increase NUM_ROWS_PRODUCED_LIMIT query option to produce more rows."),
("EXPR_REWRITE_RESULT_LIMIT_EXCEEDED", 132,
"Expression rewrite rejected due to result size ($0) exceeding the limit ($1).")
)
import sys

View File

@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.impala.catalog.Type;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.compat.MetastoreShim;
import org.apache.impala.service.FeSupport;
import org.apache.impala.thrift.TColumn;
import org.apache.impala.util.KuduUtil;
import org.apache.impala.util.MetaStoreUtil;
@@ -251,8 +252,8 @@ public class ColumnDef {
throw new AnalysisException(String.format("Only constant values are allowed " +
"for default values: %s", defaultValue_.toSql()));
}
LiteralExpr defaultValLiteral = LiteralExpr.create(defaultValue_,
analyzer.getQueryCtx());
LiteralExpr defaultValLiteral = LiteralExpr.createBounded(defaultValue_,
analyzer.getQueryCtx(), StringLiteral.MAX_STRING_LEN);
if (defaultValLiteral == null) {
throw new AnalysisException(String.format("Only constant values are allowed " +
"for default values: %s", defaultValue_.toSql()));
@@ -285,7 +286,8 @@ public class ColumnDef {
if (!defaultValLiteral.getType().equals(type_)) {
Expr castLiteral = defaultValLiteral.uncheckedCastTo(type_);
Preconditions.checkNotNull(castLiteral);
defaultValLiteral = LiteralExpr.create(castLiteral, analyzer.getQueryCtx());
defaultValLiteral = LiteralExpr.createBounded(castLiteral,
analyzer.getQueryCtx(), StringLiteral.MAX_STRING_LEN);
}
Preconditions.checkNotNull(defaultValLiteral);
outputDefaultValue_ = defaultValLiteral;

View File

@@ -43,6 +43,7 @@ import com.google.common.base.Preconditions;
*/
public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr> {
private final static Logger LOG = LoggerFactory.getLogger(LiteralExpr.class);
public static final int MAX_STRING_LITERAL_SIZE = 64 * 1024;
public LiteralExpr() {
// Literals start analyzed: there is nothing more to check.
@@ -176,6 +177,17 @@ public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr
"literals");
}
/**
* If it is known that the size of the rewritten expression is fixed, e.g.,
* the size of an integer, then this method will be called to perform the rewrite.
* Otherwise, the method createBounded that takes an additional argument specifying
* the upper bound on the size of rewritten expression should be invoked.
*/
public static LiteralExpr create(Expr constExpr, TQueryCtx queryCtx)
throws AnalysisException {
return createBounded(constExpr, queryCtx, 0);
}
/**
* Evaluates the given constant expr and returns its result as a LiteralExpr.
* Assumes expr has been analyzed. Returns constExpr if is it already a LiteralExpr.
@@ -185,15 +197,15 @@ public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr
* or warnings in the BE.
* TODO: Support non-scalar types.
*/
public static LiteralExpr create(Expr constExpr, TQueryCtx queryCtx)
throws AnalysisException {
public static LiteralExpr createBounded(Expr constExpr, TQueryCtx queryCtx,
int maxResultSize) throws AnalysisException {
Preconditions.checkState(constExpr.isConstant());
Preconditions.checkState(constExpr.getType().isValid());
if (constExpr instanceof LiteralExpr) return (LiteralExpr) constExpr;
TColumnValue val = null;
try {
val = FeSupport.EvalExprWithoutRow(constExpr, queryCtx);
val = FeSupport.EvalExprWithoutRowBounded(constExpr, queryCtx, maxResultSize);
} catch (InternalException e) {
LOG.error(String.format("Failed to evaluate expr '%s': %s",
constExpr.toSql(), e.getMessage()));

View File

@@ -22,6 +22,7 @@ import java.util.Comparator;
import java.util.List;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.service.FeSupport;
import com.google.common.base.Preconditions;
@@ -50,7 +51,8 @@ public class PartitionKeyValue {
"as static partition-key values in '%s'.", toString()));
}
value_.analyze(analyzer);
literalValue_ = LiteralExpr.create(value_, analyzer.getQueryCtx());
literalValue_ = LiteralExpr.createBounded(value_, analyzer.getQueryCtx(),
StringLiteral.MAX_STRING_LEN);
}
public String getColName() { return colName_; }

View File

@@ -17,6 +17,8 @@
package org.apache.impala.analysis;
import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
import java.math.BigInteger;
import java.util.List;
@@ -24,14 +26,13 @@ import org.apache.impala.catalog.Type;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.common.InternalException;
import org.apache.impala.common.Pair;
import org.apache.impala.service.FeSupport;
import org.apache.impala.thrift.TRangePartition;
import org.apache.impala.util.KuduUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
/**
* Represents a range partition of a Kudu table.
*
@@ -164,7 +165,8 @@ public class RangePartition extends StmtNode {
throw new AnalysisException(String.format("Only constant values are allowed " +
"for range-partition bounds: %s", value.toSql()));
}
LiteralExpr literal = LiteralExpr.create(value, analyzer.getQueryCtx());
LiteralExpr literal = LiteralExpr.createBounded(value, analyzer.getQueryCtx(),
StringLiteral.MAX_STRING_LEN);
if (literal == null) {
throw new AnalysisException(String.format("Only constant values are allowed " +
"for range-partition bounds: %s", value.toSql()));
@@ -181,7 +183,8 @@ public class RangePartition extends StmtNode {
// Add an explicit cast to TIMESTAMP
Expr e = new CastExpr(new TypeDef(Type.TIMESTAMP), literal);
e.analyze(analyzer);
literal = LiteralExpr.create(e, analyzer.getQueryCtx());
literal = LiteralExpr.createBounded(e, analyzer.getQueryCtx(),
StringLiteral.MAX_STRING_LEN);
Preconditions.checkNotNull(literal);
if (Expr.IS_NULL_VALUE.apply(literal)) {
throw new AnalysisException(String.format("Range partition value %s cannot be " +
@@ -199,7 +202,8 @@ public class RangePartition extends StmtNode {
if (!literalType.equals(colType)) {
Expr castLiteral = literal.uncheckedCastTo(colType);
Preconditions.checkNotNull(castLiteral);
literal = LiteralExpr.create(castLiteral, analyzer.getQueryCtx());
literal = LiteralExpr.createBounded(castLiteral, analyzer.getQueryCtx(),
StringLiteral.MAX_STRING_LEN);
}
Preconditions.checkNotNull(literal);

View File

@@ -36,6 +36,7 @@ import java_cup.runtime.Symbol;
public class StringLiteral extends LiteralExpr {
private final String value_;
public static int MAX_STRING_LEN = Integer.MAX_VALUE;
// Indicates whether this value needs to be unescaped in toThrift().
private final boolean needsUnescaping_;

View File

@@ -44,6 +44,7 @@ import org.apache.impala.catalog.PrimitiveType;
import org.apache.impala.catalog.Type;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.Pair;
import org.apache.impala.service.FeSupport;
import org.apache.impala.thrift.TExplainLevel;
import org.apache.impala.thrift.THBaseFilter;
import org.apache.impala.thrift.THBaseKeyRange;
@@ -236,8 +237,8 @@ public class HBaseScanNode extends ScanNode {
Preconditions.checkState(rowRange.getLowerBound().isConstant());
Preconditions.checkState(
rowRange.getLowerBound().getType().equals(Type.STRING));
LiteralExpr val = LiteralExpr.create(rowRange.getLowerBound(),
analyzer.getQueryCtx());
LiteralExpr val = LiteralExpr.createBounded(rowRange.getLowerBound(),
analyzer.getQueryCtx(), StringLiteral.MAX_STRING_LEN);
// TODO: Make this a Preconditions.checkState(). If we get here,
// and the value is not a string literal, then we've got a predicate
// that we removed from the conjunct list, but which we won't evaluate
@@ -256,8 +257,8 @@ public class HBaseScanNode extends ScanNode {
Preconditions.checkState(rowRange.getUpperBound().isConstant());
Preconditions.checkState(
rowRange.getUpperBound().getType().equals(Type.STRING));
LiteralExpr val = LiteralExpr.create(rowRange.getUpperBound(),
analyzer.getQueryCtx());
LiteralExpr val = LiteralExpr.createBounded(rowRange.getUpperBound(),
analyzer.getQueryCtx(), StringLiteral.MAX_STRING_LEN);
if (val instanceof StringLiteral) {
StringLiteral litVal = (StringLiteral) val;
stopKey_ = convertToBytes(litVal.getUnescapedValue(),

View File

@@ -18,10 +18,9 @@
package org.apache.impala.rewrite;
import org.apache.impala.analysis.Analyzer;
import org.apache.impala.analysis.CastExpr;
import org.apache.impala.analysis.Expr;
import org.apache.impala.analysis.LiteralExpr;
import org.apache.impala.analysis.CastExpr;
import org.apache.impala.common.AnalysisException;
/**
@@ -64,7 +63,9 @@ public class FoldConstantsRule implements ExprRewriteRule {
expr.analyze(analyzer);
if (!expr.isConstant()) return expr;
}
Expr result = LiteralExpr.create(expr, analyzer.getQueryCtx());
Expr result = LiteralExpr.createBounded(expr, analyzer.getQueryCtx(),
LiteralExpr.MAX_STRING_LITERAL_SIZE);
// Preserve original type so parent Exprs do not need to be re-analyzed.
if (result != null) return result.castTo(expr.getType());
return expr;

View File

@@ -22,6 +22,7 @@ import org.apache.impala.analysis.BinaryPredicate;
import org.apache.impala.analysis.CastExpr;
import org.apache.impala.analysis.Expr;
import org.apache.impala.analysis.LiteralExpr;
import org.apache.impala.analysis.StringLiteral;
import org.apache.impala.analysis.TypeDef;
import org.apache.impala.common.AnalysisException;
@@ -77,8 +78,8 @@ public class RemoveRedundantStringCast implements ExprRewriteRule {
Expr castForRedundancyCheck = new CastExpr(new TypeDef(castExpr.getType()),
new CastExpr(new TypeDef(castExprChild.getType()), literalExpr));
castForRedundancyCheck.analyze(analyzer);
LiteralExpr resultOfReverseCast = LiteralExpr.create(castForRedundancyCheck,
analyzer.getQueryCtx());
LiteralExpr resultOfReverseCast = LiteralExpr.createBounded(castForRedundancyCheck,
analyzer.getQueryCtx(), StringLiteral.MAX_STRING_LEN);
// Need to trim() while comparing char(n) types as conversion might add trailing
// spaces to the 'resultOfReverseCast'.
if (resultOfReverseCast != null &&

View File

@@ -76,7 +76,7 @@ public class FeSupport {
// Returns a serialized TResultRow
public native static byte[] NativeEvalExprsWithoutRow(
byte[] thriftExprBatch, byte[] thriftQueryGlobals);
byte[] thriftExprBatch, byte[] thriftQueryGlobals, long maxResultSize);
// Returns a serialized TSymbolLookupResult
public native static byte[] NativeLookupSymbol(byte[] thriftSymbolLookup);
@@ -172,16 +172,27 @@ public class FeSupport {
return NativeCacheJar(thriftParams);
}
/**
* If it is known that the size of the evaluated expression is fixed, e.g.,
* the size of an integer, then this method will be called to perform the evaluation.
* Otherwise, the method EvalExprWithoutRowBounded that takes an additional argument
* specifying the upper bound on the size of evaluated expression should be invoked.
*/
public static TColumnValue EvalExprWithoutRow(Expr expr, TQueryCtx queryCtx)
throws InternalException {
throws InternalException {
return EvalExprWithoutRowBounded(expr, queryCtx, 0);
}
public static TColumnValue EvalExprWithoutRowBounded(Expr expr, TQueryCtx queryCtx,
int maxResultSize) throws InternalException {
Preconditions.checkState(!expr.contains(SlotRef.class));
TExprBatch exprBatch = new TExprBatch();
exprBatch.addToExprs(expr.treeToThrift());
TSerializer serializer = new TSerializer(new TBinaryProtocol.Factory());
byte[] result;
try {
result = EvalExprsWithoutRow(
serializer.serialize(exprBatch), serializer.serialize(queryCtx));
result = EvalExprsWithoutRowBounded(
serializer.serialize(exprBatch), serializer.serialize(queryCtx), maxResultSize);
Preconditions.checkNotNull(result);
TDeserializer deserializer = new TDeserializer(new TBinaryProtocol.Factory());
TResultRow val = new TResultRow();
@@ -222,14 +233,26 @@ public class FeSupport {
}
}
/**
* If it is known that the size of the evaluated expression is fixed, e.g.,
* the size of an integer, then this method will be called to perform the evaluation.
* Otherwise, the method EvalExprsWithoutRowBounded that takes an additional argument
* specifying the upper bound on the size of rewritten expression should be invoked.
*/
private static byte[] EvalExprsWithoutRow(
byte[] thriftExprBatch, byte[] thriftQueryContext) {
return EvalExprsWithoutRowBounded(thriftExprBatch, thriftQueryContext, 0);
}
private static byte[] EvalExprsWithoutRowBounded(
byte[] thriftExprBatch, byte[] thriftQueryContext, int maxResultSize) {
try {
return NativeEvalExprsWithoutRow(thriftExprBatch, thriftQueryContext);
return NativeEvalExprsWithoutRow(thriftExprBatch, thriftQueryContext,
maxResultSize);
} catch (UnsatisfiedLinkError e) {
loadLibrary();
}
return NativeEvalExprsWithoutRow(thriftExprBatch, thriftQueryContext);
return NativeEvalExprsWithoutRow(thriftExprBatch, thriftQueryContext, maxResultSize);
}
public static boolean EvalPredicate(Expr pred, TQueryCtx queryCtx)

View File

@@ -21,9 +21,9 @@ import static java.lang.String.format;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.concurrent.ConcurrentHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.impala.analysis.Analyzer;
import org.apache.impala.analysis.DescriptorTable;

View File

@@ -205,6 +205,17 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
return qf.verifyWhereRewrite(rules, expectedExprStr);
}
public String repeat(String givenStr, long numberOfRepetitions) {
StringBuilder resultStr = new StringBuilder();
resultStr.append("'");
for (long i = 0; i < numberOfRepetitions; i = i + 1) {
resultStr.append(givenStr);
}
resultStr.append("'");
System.out.println("resultStr.length(): " + resultStr.length());
return resultStr.toString();
}
@Test
public void testBetweenToCompoundRule() throws ImpalaException {
ExprRewriteRule rule = BetweenToCompoundRule.INSTANCE;
@@ -370,6 +381,13 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
RewritesOk("null + 1", rule, "NULL");
RewritesOk("(1 + 1) is null", rule, "FALSE");
RewritesOk("(null + 1) is null", rule, "TRUE");
// Test if the rewrite would be rejected if the result size is larger than
// the predefined threshold, i.e., 65_536
RewritesOk("repeat('AZ', 2)", rule, "'AZAZ'");
RewritesOk("repeat('A', 65536)", rule, repeat("A", 65_536));
RewritesOk("repeat('A', 4294967296)", rule, null);
}
@Test