From 2e83ba5796d42bf3a568bfae5823896dfa644cc2 Mon Sep 17 00:00:00 2001 From: Thomas Tauber-Marshall Date: Thu, 7 Dec 2017 14:39:31 -0800 Subject: [PATCH] IMPALA-6280: Materialize TupleIsNullPredicate for insert sorts When a sort is inserted into a plan for an INSERT due to either the target table being a Kudu table or the use of the 'clustered' hint, and a TupleIsNullPredicate is present in the output of the sort, the TupleIsNullPredicate may reference an incorrect tuple (i.e. not the materialized sort tuple), leading to errors. The solution is to materialize the TupleIsNullPredicate into the sort tuple and then perform the appropriate expr substitutions, as is already done for the case of analytic sorts. Testing: - Added an e2e test with a query that would previously fail. Change-Id: I6c0ca717aa4321a5cc84edd1d5857912f8c85583 Reviewed-on: http://gerrit.cloudera.org:8080/8791 Reviewed-by: Alex Behm Tested-by: Impala Public Jenkins --- .../org/apache/impala/analysis/SortInfo.java | 35 +++++++++++++++++-- .../impala/planner/AnalyticPlanner.java | 19 ++++------ .../queries/QueryTest/insert.test | 11 ++++++ .../queries/QueryTest/kudu_upsert.test | 10 ++++++ 4 files changed, 60 insertions(+), 15 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/SortInfo.java b/fe/src/main/java/org/apache/impala/analysis/SortInfo.java index 88cb375d8..745de6d67 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SortInfo.java +++ b/fe/src/main/java/org/apache/impala/analysis/SortInfo.java @@ -192,8 +192,9 @@ public class SortInfo { // materialized ordering exprs. Using a LinkedHashSet prevents the slots getting // reordered unnecessarily. Set sourceSlots = Sets.newLinkedHashSet(); - TreeNode.collect(Expr.substituteList(resultExprs, substOrderBy, analyzer, false), - Predicates.instanceOf(SlotRef.class), sourceSlots); + List substResultExprs = + Expr.substituteList(resultExprs, substOrderBy, analyzer, false); + TreeNode.collect(substResultExprs, Predicates.instanceOf(SlotRef.class), sourceSlots); TreeNode.collect(Expr.substituteList(orderingExprs_, substOrderBy, analyzer, false), Predicates.instanceOf(SlotRef.class), sourceSlots); for (SlotRef origSlotRef: sourceSlots) { @@ -208,6 +209,9 @@ public class SortInfo { } } + materializeTupleIsNullPredicates( + sortTupleDesc, substResultExprs, sortTupleExprs, substOrderBy, analyzer); + // The ordering exprs are evaluated against the sort tuple, so they must reflect the // materialization decision above. substituteOrderingExprs(substOrderBy, analyzer); @@ -247,4 +251,31 @@ public class SortInfo { } return substOrderBy; } + + /** + * Collects the unique TupleIsNullPredicates from 'exprs' and for each one: + * - Materializes it into a new slot in 'sortTupleDesc' + * - Adds it to 'sortSlotExprs' + * - Adds an entry in 'substOrderBy' mapping it to a SlotRef into the new slot + */ + public static void materializeTupleIsNullPredicates(TupleDescriptor sortTupleDesc, + List exprs, List sortSlotExprs, ExprSubstitutionMap substOrderBy, + Analyzer analyzer) { + List tupleIsNullPreds = Lists.newArrayList(); + TreeNode.collect( + exprs, Predicates.instanceOf(TupleIsNullPredicate.class), tupleIsNullPreds); + Expr.removeDuplicates(tupleIsNullPreds); + + // Materialize relevant unique TupleIsNullPredicates. + for (Expr tupleIsNullPred: tupleIsNullPreds) { + SlotDescriptor sortSlotDesc = analyzer.addSlotDescriptor(sortTupleDesc); + sortSlotDesc.setType(tupleIsNullPred.getType()); + sortSlotDesc.setIsMaterialized(true); + sortSlotDesc.setSourceExpr(tupleIsNullPred); + sortSlotDesc.setLabel(tupleIsNullPred.toSql()); + SlotRef cloneRef = new SlotRef(sortSlotDesc); + substOrderBy.put(tupleIsNullPred, cloneRef); + sortSlotExprs.add(tupleIsNullPred.clone()); + } + } } diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java index 1140a7d0d..156bd5bd7 100644 --- a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java @@ -284,24 +284,17 @@ public class AnalyticPlanner { // by a SlotRef into the sort's tuple in ancestor nodes (IMPALA-1519). ExprSubstitutionMap inputSmap = input.getOutputSmap(); if (inputSmap != null) { - List tupleIsNullPredsToMaterialize = Lists.newArrayList(); + List relevantRhsExprs = Lists.newArrayList(); for (int i = 0; i < inputSmap.size(); ++i) { Expr rhsExpr = inputSmap.getRhs().get(i); // Ignore substitutions that are irrelevant at this plan node and its ancestors. - if (!rhsExpr.isBoundByTupleIds(input.getTupleIds())) continue; - rhsExpr.collect(TupleIsNullPredicate.class, tupleIsNullPredsToMaterialize); + if (rhsExpr.isBoundByTupleIds(input.getTupleIds())) { + relevantRhsExprs.add(rhsExpr); + } } - Expr.removeDuplicates(tupleIsNullPredsToMaterialize); - // Materialize relevant unique TupleIsNullPredicates. - for (Expr tupleIsNullPred: tupleIsNullPredsToMaterialize) { - SlotDescriptor sortSlotDesc = analyzer_.addSlotDescriptor(sortTupleDesc); - sortSlotDesc.setType(tupleIsNullPred.getType()); - sortSlotDesc.setIsMaterialized(true); - sortSlotDesc.setSourceExpr(tupleIsNullPred); - sortSlotDesc.setLabel(tupleIsNullPred.toSql()); - sortSlotExprs.add(tupleIsNullPred.clone()); - } + SortInfo.materializeTupleIsNullPredicates(sortTupleDesc, relevantRhsExprs, + sortSlotExprs, sortSmap, analyzer_); } SortInfo sortInfo = new SortInfo(sortExprs, isAsc, nullsFirst); diff --git a/testdata/workloads/functional-query/queries/QueryTest/insert.test b/testdata/workloads/functional-query/queries/QueryTest/insert.test index 5601b3513..07a665ab9 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/insert.test +++ b/testdata/workloads/functional-query/queries/QueryTest/insert.test @@ -945,3 +945,14 @@ RESET alltypesnopart_insert ---- RESULTS : 100 ==== +---- QUERY +# IMPALA-6280: clustered with outer join, inline view, and TupleisNullPredicate +insert into table alltypesinsert (int_col) +partition (year, month) /*+ clustered,shuffle */ +select v.id, t1.id, t1.month from +(select coalesce(id, 10) id from functional.alltypessmall) v +right outer join functional.alltypestiny t1 on t1.id = v.id +where v.id = 0 +---- RESULTS +year=0/month=1/: 1 +==== \ No newline at end of file diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_upsert.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_upsert.test index 8180075e7..57aaa4678 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/kudu_upsert.test +++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_upsert.test @@ -388,6 +388,16 @@ NumRowErrors: 0 ---- RESULTS ==== ---- QUERY +# IMPALA-6280: check that TupleIsNullPredicate is materialized correctly +upsert into table tdata (id, vali) +select t1.id, v.id from functional.alltypestiny t1 +left outer join (select ifnull(id, 10) id from functional.alltypessmall) v +on t1.id = v.id limit 1 +---- RUNTIME_PROFILE +NumModifiedRows: 1 +NumRowErrors: 0 +==== +---- QUERY create table multiple_key_cols (string_col string, bigint_col bigint, tinyint_col tinyint, smallint_col smallint, bool_col boolean null, int_col int null, double_col double null,