From 70ae2e38eb0c4f9be0084e057c70ba427bbbbcfc Mon Sep 17 00:00:00 2001 From: Marcel Kornacker Date: Mon, 9 Jan 2017 18:13:59 -0800 Subject: [PATCH] IMPALA-4739: ExprRewriter fails on HAVING clauses The bug was that expr rewrite rules such as ExtractCommonConjunctRule analyzed their own output, which doesn't work for syntactic elements that allow column aliases, such as the HAVING clause. The fix was to remove the analysis step (the re-analysis happens anyway in AnalysisCtx). Change-Id: Ife74c61f549f620c42f74928f6474e8a5a7b7f00 Reviewed-on: http://gerrit.cloudera.org:8080/5662 Reviewed-by: Marcel Kornacker Tested-by: Impala Public Jenkins --- .../java/org/apache/impala/analysis/Analyzer.java | 3 +++ .../impala/rewrite/BetweenToCompoundRule.java | 3 ++- .../impala/rewrite/ExtractCommonConjunctRule.java | 4 ++-- .../apache/impala/rewrite/FoldConstantsRule.java | 4 ++-- .../org/apache/impala/common/FrontendTestBase.java | 4 ++-- .../queries/PlannerTest/constant-folding.test | 6 ++++-- .../functional-query/queries/QueryTest/exprs.test | 14 +++++++++++++- 7 files changed, 28 insertions(+), 10 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index 8bea3aa7a..4d638a8f0 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -1060,6 +1060,9 @@ public class Analyzer { // analysis pass, so the conjunct may not have been rewritten yet. ExprRewriter rewriter = new ExprRewriter(BetweenToCompoundRule.INSTANCE); conjunct = rewriter.rewrite(conjunct, this); + // analyze this conjunct here: we know it can't contain references to select list + // aliases and having it analyzed is needed for the following EvalPredicate() call + conjunct.analyze(this);; } if (!FeSupport.EvalPredicate(conjunct, globalState_.queryCtx)) { if (fromHavingClause) { diff --git a/fe/src/main/java/org/apache/impala/rewrite/BetweenToCompoundRule.java b/fe/src/main/java/org/apache/impala/rewrite/BetweenToCompoundRule.java index 296780dcb..90110b8b3 100644 --- a/fe/src/main/java/org/apache/impala/rewrite/BetweenToCompoundRule.java +++ b/fe/src/main/java/org/apache/impala/rewrite/BetweenToCompoundRule.java @@ -28,6 +28,8 @@ import org.apache.impala.common.AnalysisException; /** * Rewrites BetweenPredicates into an equivalent conjunctive/disjunctive * CompoundPredicate. + * It can be applied to pre-analysis expr trees and therefore does not reanalyze + * the transformation output itself. * Examples: * A BETWEEN X AND Y ==> A >= X AND A <= Y * A NOT BETWEEN X AND Y ==> A < X OR A > Y @@ -55,7 +57,6 @@ public class BetweenToCompoundRule implements ExprRewriteRule { bp.getChild(0), bp.getChild(2)); result = new CompoundPredicate(CompoundPredicate.Operator.AND, lower, upper); } - result.analyze(analyzer); return result; } diff --git a/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java b/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java index e1515d396..34934b020 100644 --- a/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java +++ b/fe/src/main/java/org/apache/impala/rewrite/ExtractCommonConjunctRule.java @@ -30,6 +30,8 @@ import com.google.common.collect.Lists; /** * This rule extracts common conjuncts from multiple disjunctions when it is applied * recursively bottom-up to a tree of CompoundPredicates. + * It can be applied to pre-analysis expr trees and therefore does not reanalyze + * the transformation output itself. * * Examples: * (a AND b AND c) OR (b AND d) ==> b AND ((a AND c) OR (d)) @@ -80,7 +82,6 @@ public class ExtractCommonConjunctRule implements ExprRewriteRule { if (child0Conjuncts.isEmpty() || child1Conjuncts.isEmpty()) { Preconditions.checkState(!commonConjuncts.isEmpty()); Expr result = CompoundPredicate.createConjunctivePredicate(commonConjuncts); - result.analyze(analyzer); return result; } @@ -94,7 +95,6 @@ public class ExtractCommonConjunctRule implements ExprRewriteRule { newDisjunction.setPrintSqlInParens(true); Expr result = CompoundPredicate.createConjunction(newDisjunction, CompoundPredicate.createConjunctivePredicate(commonConjuncts)); - result.analyze(analyzer); return result; } diff --git a/fe/src/main/java/org/apache/impala/rewrite/FoldConstantsRule.java b/fe/src/main/java/org/apache/impala/rewrite/FoldConstantsRule.java index f3eb9a81c..146dd83a2 100644 --- a/fe/src/main/java/org/apache/impala/rewrite/FoldConstantsRule.java +++ b/fe/src/main/java/org/apache/impala/rewrite/FoldConstantsRule.java @@ -29,8 +29,8 @@ import org.apache.impala.common.AnalysisException; * TODO: Expressions fed into this rule are currently not required to be analyzed * in order to support constant folding in expressions that contain unresolved * references to select-list aliases (such expressions cannot be analyzed). - * For sanity, we should restructure our analysis/rewriting to only allow analyzed exprs - * to be rewritten. + * The cross-dependencies between rule transformations and analysis are vague at the + * moment and make rule application overly complicated. * * Examples: * 1 + 1 + 1 --> 3 diff --git a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java index 9dc08dff9..297666f82 100644 --- a/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java +++ b/fe/src/test/java/org/apache/impala/common/FrontendTestBase.java @@ -244,8 +244,8 @@ public class FrontendTestBase { try { AnalysisContext analysisCtx = new AnalysisContext(catalog_, TestUtils.createQueryContext(Catalog.DEFAULT_DB, - System.getProperty("user.name")), - AuthorizationConfig.createAuthDisabledConfig()); + System.getProperty("user.name")), + AuthorizationConfig.createAuthDisabledConfig()); analysisCtx.analyze(stmt, analyzer); AnalysisContext.AnalysisResult analysisResult = analysisCtx.getAnalysisResult(); if (expectedWarning != null) { diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test index d76bffa5f..d19d86e26 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test @@ -52,17 +52,19 @@ data source predicates: tinyint_col < 256, 2 > int_col predicates: float_col != 0 ==== # Test aggregation. -select sum(1 + 1 + id) +select sum(1 + 1 + id) sm from functional.alltypes group by timestamp_col = cast('2015-11-15' as timestamp) + interval 1 year having 1024 * 1024 * count(*) % 2 = 0 + and (sm > 1 or sm > 1) + and (sm between 5 and 10) ---- PLAN PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: sum(2 + id), count(*) | group by: timestamp_col = TIMESTAMP '2016-11-15 00:00:00' -| having: 1048576 * count(*) % 2 = 0 +| having: sum(2 + id) <= 10, sum(2 + id) > 1, sum(2 + id) >= 5, 1048576 * count(*) % 2 = 0 | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test index 78c3e09c0..4b1ba7690 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test @@ -2547,7 +2547,7 @@ order by c, cast('2016-11-22 16:40:00.00' as timestamp) BIGINT, TIMESTAMP, TIMESTAMP ==== ---- QUERY -# Constant timestamp expresisons in a join condition / runtime filter as well +# Constant timestamp expressions in a join condition / runtime filter as well # as a select node. select count(*) from ( select a.timestamp_col from @@ -2564,6 +2564,18 @@ where timestamp_col < cast('2013-02-18 20:46:00.01' as timestamp) BIGINT ==== ---- QUERY +# IMPALA-4739: rewrites in HAVING clause +select tinyint_col, count(*) cnt +from functional_parquet.alltypesagg +group by 1 +having cnt > 1000 or cnt > 1000 + and cnt between 1500 and 2500 +---- TYPES +TINYINT, BIGINT +---- RESULTS +NULL,2000 +==== +---- QUERY # IMPALA-4550: Regression test for proper cast analysis after slot substitution within a # no-op explicit cast. select /* +straight_join */ a.id