mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-4865: Reject Expr Rewrite When Appropriate
Avoided rewrite if the resulting string literal exceeds a defined limit. Testing: Added three statements in testFoldConstantsRule() to verify that the expression rewrite is accepted only when the size of the rewritten expression is below a specified threshold. Change-Id: I8b078113ccc1aa49b0cea0c86dff2e02e1dd0e23 Reviewed-on: http://gerrit.cloudera.org:8080/12814 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Tim Armstrong <tarmstrong@cloudera.com>
This commit is contained in:
committed by
Tim Armstrong
parent
b51c82d296
commit
931a8f0ba7
2
.gitignore
vendored
2
.gitignore
vendored
@@ -92,6 +92,8 @@ gdb.txt
|
||||
.project
|
||||
.classpath
|
||||
.settings
|
||||
eclipse-classes/
|
||||
.pydevproject
|
||||
|
||||
# Debugging
|
||||
ad-hoc.test
|
||||
|
||||
@@ -167,11 +167,13 @@ static void SetTColumnValue(
|
||||
// a predicate evaluation. It requires JniUtil::Init() to have been
|
||||
// called. Throws a Java exception if an error or warning is encountered during
|
||||
// the expr evaluation.
|
||||
// We also reject the expression rewrite if the size of the returned rewritten result
|
||||
// is too large.
|
||||
extern "C"
|
||||
JNIEXPORT jbyteArray JNICALL
|
||||
Java_org_apache_impala_service_FeSupport_NativeEvalExprsWithoutRow(
|
||||
JNIEnv* env, jclass fe_support_class, jbyteArray thrift_expr_batch,
|
||||
jbyteArray thrift_query_ctx_bytes) {
|
||||
JNIEnv* env, jclass caller_class, jbyteArray thrift_expr_batch,
|
||||
jbyteArray thrift_query_ctx_bytes, jlong max_result_size) {
|
||||
Status status;
|
||||
jbyteArray result_bytes = NULL;
|
||||
TQueryCtx query_ctx;
|
||||
@@ -244,9 +246,22 @@ Java_org_apache_impala_service_FeSupport_NativeEvalExprsWithoutRow(
|
||||
void* result = eval->GetValue(nullptr);
|
||||
status = eval->GetError();
|
||||
if (!status.ok()) goto error;
|
||||
|
||||
const ColumnType& type = eval->root().type();
|
||||
// reject the expression rewrite if the returned string greater than
|
||||
if (type.IsVarLenStringType()) {
|
||||
const StringValue* string_val = reinterpret_cast<const StringValue*>(result);
|
||||
if (string_val != nullptr) {
|
||||
if (string_val->len > max_result_size) {
|
||||
status = Status(TErrorCode::EXPR_REWRITE_RESULT_LIMIT_EXCEEDED,
|
||||
string_val->len, max_result_size);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 'output_scale' should only be set for MathFunctions::RoundUpTo()
|
||||
// with return type double.
|
||||
const ColumnType& type = eval->root().type();
|
||||
DCHECK(eval->output_scale() == -1 || type.type == TYPE_DOUBLE);
|
||||
TColumnValue val;
|
||||
SetTColumnValue(result, type, &val);
|
||||
@@ -699,7 +714,7 @@ static JNINativeMethod native_methods[] = {
|
||||
(void*)::Java_org_apache_impala_service_FeSupport_NativeFeTestInit
|
||||
},
|
||||
{
|
||||
const_cast<char*>("NativeEvalExprsWithoutRow"), const_cast<char*>("([B[B)[B"),
|
||||
const_cast<char*>("NativeEvalExprsWithoutRow"), const_cast<char*>("([B[BJ)[B"),
|
||||
(void*)::Java_org_apache_impala_service_FeSupport_NativeEvalExprsWithoutRow
|
||||
},
|
||||
{
|
||||
|
||||
@@ -397,6 +397,9 @@ error_codes = (
|
||||
("ROWS_PRODUCED_LIMIT_EXCEEDED", 131,
|
||||
"Query $0 terminated due to rows produced limit of $1. "
|
||||
"Unset or increase NUM_ROWS_PRODUCED_LIMIT query option to produce more rows."),
|
||||
|
||||
("EXPR_REWRITE_RESULT_LIMIT_EXCEEDED", 132,
|
||||
"Expression rewrite rejected due to result size ($0) exceeding the limit ($1).")
|
||||
)
|
||||
|
||||
import sys
|
||||
|
||||
@@ -30,6 +30,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
|
||||
import org.apache.impala.catalog.Type;
|
||||
import org.apache.impala.common.AnalysisException;
|
||||
import org.apache.impala.compat.MetastoreShim;
|
||||
import org.apache.impala.service.FeSupport;
|
||||
import org.apache.impala.thrift.TColumn;
|
||||
import org.apache.impala.util.KuduUtil;
|
||||
import org.apache.impala.util.MetaStoreUtil;
|
||||
@@ -251,8 +252,8 @@ public class ColumnDef {
|
||||
throw new AnalysisException(String.format("Only constant values are allowed " +
|
||||
"for default values: %s", defaultValue_.toSql()));
|
||||
}
|
||||
LiteralExpr defaultValLiteral = LiteralExpr.create(defaultValue_,
|
||||
analyzer.getQueryCtx());
|
||||
LiteralExpr defaultValLiteral = LiteralExpr.createBounded(defaultValue_,
|
||||
analyzer.getQueryCtx(), StringLiteral.MAX_STRING_LEN);
|
||||
if (defaultValLiteral == null) {
|
||||
throw new AnalysisException(String.format("Only constant values are allowed " +
|
||||
"for default values: %s", defaultValue_.toSql()));
|
||||
@@ -285,7 +286,8 @@ public class ColumnDef {
|
||||
if (!defaultValLiteral.getType().equals(type_)) {
|
||||
Expr castLiteral = defaultValLiteral.uncheckedCastTo(type_);
|
||||
Preconditions.checkNotNull(castLiteral);
|
||||
defaultValLiteral = LiteralExpr.create(castLiteral, analyzer.getQueryCtx());
|
||||
defaultValLiteral = LiteralExpr.createBounded(castLiteral,
|
||||
analyzer.getQueryCtx(), StringLiteral.MAX_STRING_LEN);
|
||||
}
|
||||
Preconditions.checkNotNull(defaultValLiteral);
|
||||
outputDefaultValue_ = defaultValLiteral;
|
||||
|
||||
@@ -43,6 +43,7 @@ import com.google.common.base.Preconditions;
|
||||
*/
|
||||
public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr> {
|
||||
private final static Logger LOG = LoggerFactory.getLogger(LiteralExpr.class);
|
||||
public static final int MAX_STRING_LITERAL_SIZE = 64 * 1024;
|
||||
|
||||
public LiteralExpr() {
|
||||
// Literals start analyzed: there is nothing more to check.
|
||||
@@ -176,6 +177,17 @@ public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr
|
||||
"literals");
|
||||
}
|
||||
|
||||
/**
|
||||
* If it is known that the size of the rewritten expression is fixed, e.g.,
|
||||
* the size of an integer, then this method will be called to perform the rewrite.
|
||||
* Otherwise, the method createBounded that takes an additional argument specifying
|
||||
* the upper bound on the size of rewritten expression should be invoked.
|
||||
*/
|
||||
public static LiteralExpr create(Expr constExpr, TQueryCtx queryCtx)
|
||||
throws AnalysisException {
|
||||
return createBounded(constExpr, queryCtx, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluates the given constant expr and returns its result as a LiteralExpr.
|
||||
* Assumes expr has been analyzed. Returns constExpr if is it already a LiteralExpr.
|
||||
@@ -185,15 +197,15 @@ public abstract class LiteralExpr extends Expr implements Comparable<LiteralExpr
|
||||
* or warnings in the BE.
|
||||
* TODO: Support non-scalar types.
|
||||
*/
|
||||
public static LiteralExpr create(Expr constExpr, TQueryCtx queryCtx)
|
||||
throws AnalysisException {
|
||||
public static LiteralExpr createBounded(Expr constExpr, TQueryCtx queryCtx,
|
||||
int maxResultSize) throws AnalysisException {
|
||||
Preconditions.checkState(constExpr.isConstant());
|
||||
Preconditions.checkState(constExpr.getType().isValid());
|
||||
if (constExpr instanceof LiteralExpr) return (LiteralExpr) constExpr;
|
||||
|
||||
TColumnValue val = null;
|
||||
try {
|
||||
val = FeSupport.EvalExprWithoutRow(constExpr, queryCtx);
|
||||
val = FeSupport.EvalExprWithoutRowBounded(constExpr, queryCtx, maxResultSize);
|
||||
} catch (InternalException e) {
|
||||
LOG.error(String.format("Failed to evaluate expr '%s': %s",
|
||||
constExpr.toSql(), e.getMessage()));
|
||||
|
||||
@@ -22,6 +22,7 @@ import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.impala.common.AnalysisException;
|
||||
import org.apache.impala.service.FeSupport;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
@@ -50,7 +51,8 @@ public class PartitionKeyValue {
|
||||
"as static partition-key values in '%s'.", toString()));
|
||||
}
|
||||
value_.analyze(analyzer);
|
||||
literalValue_ = LiteralExpr.create(value_, analyzer.getQueryCtx());
|
||||
literalValue_ = LiteralExpr.createBounded(value_, analyzer.getQueryCtx(),
|
||||
StringLiteral.MAX_STRING_LEN);
|
||||
}
|
||||
|
||||
public String getColName() { return colName_; }
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
|
||||
package org.apache.impala.analysis;
|
||||
|
||||
import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.List;
|
||||
|
||||
@@ -24,14 +26,13 @@ import org.apache.impala.catalog.Type;
|
||||
import org.apache.impala.common.AnalysisException;
|
||||
import org.apache.impala.common.InternalException;
|
||||
import org.apache.impala.common.Pair;
|
||||
import org.apache.impala.service.FeSupport;
|
||||
import org.apache.impala.thrift.TRangePartition;
|
||||
import org.apache.impala.util.KuduUtil;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
|
||||
|
||||
/**
|
||||
* Represents a range partition of a Kudu table.
|
||||
*
|
||||
@@ -164,7 +165,8 @@ public class RangePartition extends StmtNode {
|
||||
throw new AnalysisException(String.format("Only constant values are allowed " +
|
||||
"for range-partition bounds: %s", value.toSql()));
|
||||
}
|
||||
LiteralExpr literal = LiteralExpr.create(value, analyzer.getQueryCtx());
|
||||
LiteralExpr literal = LiteralExpr.createBounded(value, analyzer.getQueryCtx(),
|
||||
StringLiteral.MAX_STRING_LEN);
|
||||
if (literal == null) {
|
||||
throw new AnalysisException(String.format("Only constant values are allowed " +
|
||||
"for range-partition bounds: %s", value.toSql()));
|
||||
@@ -181,7 +183,8 @@ public class RangePartition extends StmtNode {
|
||||
// Add an explicit cast to TIMESTAMP
|
||||
Expr e = new CastExpr(new TypeDef(Type.TIMESTAMP), literal);
|
||||
e.analyze(analyzer);
|
||||
literal = LiteralExpr.create(e, analyzer.getQueryCtx());
|
||||
literal = LiteralExpr.createBounded(e, analyzer.getQueryCtx(),
|
||||
StringLiteral.MAX_STRING_LEN);
|
||||
Preconditions.checkNotNull(literal);
|
||||
if (Expr.IS_NULL_VALUE.apply(literal)) {
|
||||
throw new AnalysisException(String.format("Range partition value %s cannot be " +
|
||||
@@ -199,7 +202,8 @@ public class RangePartition extends StmtNode {
|
||||
if (!literalType.equals(colType)) {
|
||||
Expr castLiteral = literal.uncheckedCastTo(colType);
|
||||
Preconditions.checkNotNull(castLiteral);
|
||||
literal = LiteralExpr.create(castLiteral, analyzer.getQueryCtx());
|
||||
literal = LiteralExpr.createBounded(castLiteral, analyzer.getQueryCtx(),
|
||||
StringLiteral.MAX_STRING_LEN);
|
||||
}
|
||||
Preconditions.checkNotNull(literal);
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ import java_cup.runtime.Symbol;
|
||||
|
||||
public class StringLiteral extends LiteralExpr {
|
||||
private final String value_;
|
||||
public static int MAX_STRING_LEN = Integer.MAX_VALUE;
|
||||
|
||||
// Indicates whether this value needs to be unescaped in toThrift().
|
||||
private final boolean needsUnescaping_;
|
||||
|
||||
@@ -44,6 +44,7 @@ import org.apache.impala.catalog.PrimitiveType;
|
||||
import org.apache.impala.catalog.Type;
|
||||
import org.apache.impala.common.ImpalaException;
|
||||
import org.apache.impala.common.Pair;
|
||||
import org.apache.impala.service.FeSupport;
|
||||
import org.apache.impala.thrift.TExplainLevel;
|
||||
import org.apache.impala.thrift.THBaseFilter;
|
||||
import org.apache.impala.thrift.THBaseKeyRange;
|
||||
@@ -236,8 +237,8 @@ public class HBaseScanNode extends ScanNode {
|
||||
Preconditions.checkState(rowRange.getLowerBound().isConstant());
|
||||
Preconditions.checkState(
|
||||
rowRange.getLowerBound().getType().equals(Type.STRING));
|
||||
LiteralExpr val = LiteralExpr.create(rowRange.getLowerBound(),
|
||||
analyzer.getQueryCtx());
|
||||
LiteralExpr val = LiteralExpr.createBounded(rowRange.getLowerBound(),
|
||||
analyzer.getQueryCtx(), StringLiteral.MAX_STRING_LEN);
|
||||
// TODO: Make this a Preconditions.checkState(). If we get here,
|
||||
// and the value is not a string literal, then we've got a predicate
|
||||
// that we removed from the conjunct list, but which we won't evaluate
|
||||
@@ -256,8 +257,8 @@ public class HBaseScanNode extends ScanNode {
|
||||
Preconditions.checkState(rowRange.getUpperBound().isConstant());
|
||||
Preconditions.checkState(
|
||||
rowRange.getUpperBound().getType().equals(Type.STRING));
|
||||
LiteralExpr val = LiteralExpr.create(rowRange.getUpperBound(),
|
||||
analyzer.getQueryCtx());
|
||||
LiteralExpr val = LiteralExpr.createBounded(rowRange.getUpperBound(),
|
||||
analyzer.getQueryCtx(), StringLiteral.MAX_STRING_LEN);
|
||||
if (val instanceof StringLiteral) {
|
||||
StringLiteral litVal = (StringLiteral) val;
|
||||
stopKey_ = convertToBytes(litVal.getUnescapedValue(),
|
||||
|
||||
@@ -18,10 +18,9 @@
|
||||
package org.apache.impala.rewrite;
|
||||
|
||||
import org.apache.impala.analysis.Analyzer;
|
||||
import org.apache.impala.analysis.CastExpr;
|
||||
import org.apache.impala.analysis.Expr;
|
||||
import org.apache.impala.analysis.LiteralExpr;
|
||||
import org.apache.impala.analysis.CastExpr;
|
||||
|
||||
import org.apache.impala.common.AnalysisException;
|
||||
|
||||
/**
|
||||
@@ -64,7 +63,9 @@ public class FoldConstantsRule implements ExprRewriteRule {
|
||||
expr.analyze(analyzer);
|
||||
if (!expr.isConstant()) return expr;
|
||||
}
|
||||
Expr result = LiteralExpr.create(expr, analyzer.getQueryCtx());
|
||||
Expr result = LiteralExpr.createBounded(expr, analyzer.getQueryCtx(),
|
||||
LiteralExpr.MAX_STRING_LITERAL_SIZE);
|
||||
|
||||
// Preserve original type so parent Exprs do not need to be re-analyzed.
|
||||
if (result != null) return result.castTo(expr.getType());
|
||||
return expr;
|
||||
|
||||
@@ -22,6 +22,7 @@ import org.apache.impala.analysis.BinaryPredicate;
|
||||
import org.apache.impala.analysis.CastExpr;
|
||||
import org.apache.impala.analysis.Expr;
|
||||
import org.apache.impala.analysis.LiteralExpr;
|
||||
import org.apache.impala.analysis.StringLiteral;
|
||||
import org.apache.impala.analysis.TypeDef;
|
||||
import org.apache.impala.common.AnalysisException;
|
||||
|
||||
@@ -77,8 +78,8 @@ public class RemoveRedundantStringCast implements ExprRewriteRule {
|
||||
Expr castForRedundancyCheck = new CastExpr(new TypeDef(castExpr.getType()),
|
||||
new CastExpr(new TypeDef(castExprChild.getType()), literalExpr));
|
||||
castForRedundancyCheck.analyze(analyzer);
|
||||
LiteralExpr resultOfReverseCast = LiteralExpr.create(castForRedundancyCheck,
|
||||
analyzer.getQueryCtx());
|
||||
LiteralExpr resultOfReverseCast = LiteralExpr.createBounded(castForRedundancyCheck,
|
||||
analyzer.getQueryCtx(), StringLiteral.MAX_STRING_LEN);
|
||||
// Need to trim() while comparing char(n) types as conversion might add trailing
|
||||
// spaces to the 'resultOfReverseCast'.
|
||||
if (resultOfReverseCast != null &&
|
||||
|
||||
@@ -76,7 +76,7 @@ public class FeSupport {
|
||||
|
||||
// Returns a serialized TResultRow
|
||||
public native static byte[] NativeEvalExprsWithoutRow(
|
||||
byte[] thriftExprBatch, byte[] thriftQueryGlobals);
|
||||
byte[] thriftExprBatch, byte[] thriftQueryGlobals, long maxResultSize);
|
||||
|
||||
// Returns a serialized TSymbolLookupResult
|
||||
public native static byte[] NativeLookupSymbol(byte[] thriftSymbolLookup);
|
||||
@@ -172,16 +172,27 @@ public class FeSupport {
|
||||
return NativeCacheJar(thriftParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* If it is known that the size of the evaluated expression is fixed, e.g.,
|
||||
* the size of an integer, then this method will be called to perform the evaluation.
|
||||
* Otherwise, the method EvalExprWithoutRowBounded that takes an additional argument
|
||||
* specifying the upper bound on the size of evaluated expression should be invoked.
|
||||
*/
|
||||
public static TColumnValue EvalExprWithoutRow(Expr expr, TQueryCtx queryCtx)
|
||||
throws InternalException {
|
||||
throws InternalException {
|
||||
return EvalExprWithoutRowBounded(expr, queryCtx, 0);
|
||||
}
|
||||
|
||||
public static TColumnValue EvalExprWithoutRowBounded(Expr expr, TQueryCtx queryCtx,
|
||||
int maxResultSize) throws InternalException {
|
||||
Preconditions.checkState(!expr.contains(SlotRef.class));
|
||||
TExprBatch exprBatch = new TExprBatch();
|
||||
exprBatch.addToExprs(expr.treeToThrift());
|
||||
TSerializer serializer = new TSerializer(new TBinaryProtocol.Factory());
|
||||
byte[] result;
|
||||
try {
|
||||
result = EvalExprsWithoutRow(
|
||||
serializer.serialize(exprBatch), serializer.serialize(queryCtx));
|
||||
result = EvalExprsWithoutRowBounded(
|
||||
serializer.serialize(exprBatch), serializer.serialize(queryCtx), maxResultSize);
|
||||
Preconditions.checkNotNull(result);
|
||||
TDeserializer deserializer = new TDeserializer(new TBinaryProtocol.Factory());
|
||||
TResultRow val = new TResultRow();
|
||||
@@ -222,14 +233,26 @@ public class FeSupport {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* If it is known that the size of the evaluated expression is fixed, e.g.,
|
||||
* the size of an integer, then this method will be called to perform the evaluation.
|
||||
* Otherwise, the method EvalExprsWithoutRowBounded that takes an additional argument
|
||||
* specifying the upper bound on the size of rewritten expression should be invoked.
|
||||
*/
|
||||
private static byte[] EvalExprsWithoutRow(
|
||||
byte[] thriftExprBatch, byte[] thriftQueryContext) {
|
||||
return EvalExprsWithoutRowBounded(thriftExprBatch, thriftQueryContext, 0);
|
||||
}
|
||||
|
||||
private static byte[] EvalExprsWithoutRowBounded(
|
||||
byte[] thriftExprBatch, byte[] thriftQueryContext, int maxResultSize) {
|
||||
try {
|
||||
return NativeEvalExprsWithoutRow(thriftExprBatch, thriftQueryContext);
|
||||
return NativeEvalExprsWithoutRow(thriftExprBatch, thriftQueryContext,
|
||||
maxResultSize);
|
||||
} catch (UnsatisfiedLinkError e) {
|
||||
loadLibrary();
|
||||
}
|
||||
return NativeEvalExprsWithoutRow(thriftExprBatch, thriftQueryContext);
|
||||
return NativeEvalExprsWithoutRow(thriftExprBatch, thriftQueryContext, maxResultSize);
|
||||
}
|
||||
|
||||
public static boolean EvalPredicate(Expr pred, TQueryCtx queryCtx)
|
||||
|
||||
@@ -21,9 +21,9 @@ import static java.lang.String.format;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.math.BigInteger;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.impala.analysis.Analyzer;
|
||||
import org.apache.impala.analysis.DescriptorTable;
|
||||
|
||||
@@ -205,6 +205,17 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
|
||||
return qf.verifyWhereRewrite(rules, expectedExprStr);
|
||||
}
|
||||
|
||||
public String repeat(String givenStr, long numberOfRepetitions) {
|
||||
StringBuilder resultStr = new StringBuilder();
|
||||
resultStr.append("'");
|
||||
for (long i = 0; i < numberOfRepetitions; i = i + 1) {
|
||||
resultStr.append(givenStr);
|
||||
}
|
||||
resultStr.append("'");
|
||||
System.out.println("resultStr.length(): " + resultStr.length());
|
||||
return resultStr.toString();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBetweenToCompoundRule() throws ImpalaException {
|
||||
ExprRewriteRule rule = BetweenToCompoundRule.INSTANCE;
|
||||
@@ -370,6 +381,13 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
|
||||
RewritesOk("null + 1", rule, "NULL");
|
||||
RewritesOk("(1 + 1) is null", rule, "FALSE");
|
||||
RewritesOk("(null + 1) is null", rule, "TRUE");
|
||||
|
||||
// Test if the rewrite would be rejected if the result size is larger than
|
||||
// the predefined threshold, i.e., 65_536
|
||||
RewritesOk("repeat('AZ', 2)", rule, "'AZAZ'");
|
||||
RewritesOk("repeat('A', 65536)", rule, repeat("A", 65_536));
|
||||
RewritesOk("repeat('A', 4294967296)", rule, null);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
Reference in New Issue
Block a user