mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
IMPALA-10806: Create single node plan slowdown when hundreds of inline views are joined
Creating a single node plan for the following SQL sometime can slowdown, with about hundreds of inlineviews to join, and view1, view2... outputs hundreds of expressions. select c1 from (select c1, id from view1 where c1 > 10) t1 join (select c2, id from view2 where c1 > 10) t2 on t1.id = t2.id join ... The reasons for the slow generation of plans are as follows 1. Many auxiliary predicates are added to GlobalState.conjuncts causing performance degradation of Analyzer#getUnassignedConjuncts 2. In SingleNodePlanner#createInlineViewPlan the output smap is the composition of the inline view's smap and the output smap of the inline view's plan root. Multiple inline view joins cause ExprSubstitutionMap#compose performance to degrade. For 1, add GlobalState.conjunctsWithoutAuxExpr to save the registered conjuncts without auxiliary predicate. For 2, remove expressions from outputSmap that are not used according to baseSmap. Testing: Add test tests/query_test/test_query_compilation.py Repro query created single node plan went from 2.3 sec to 0.3 sec. Change-Id: Ifb4011b6167a0e61438a73c4dba6f1cd0a4e8c6a Reviewed-on: http://gerrit.cloudera.org:8080/17712 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Qifan Chen <qchen@cloudera.com>
This commit is contained in:
@@ -385,6 +385,11 @@ public class Analyzer {
|
||||
// preserve the order in which conjuncts are added.
|
||||
public final Map<ExprId, Expr> conjuncts = new LinkedHashMap<>();
|
||||
|
||||
// all registered conjuncts without auxiliary predicate. This additional map is used
|
||||
// for performance reasons of getUnassignedConjuncts(List<TupleId> tupleIds,
|
||||
// boolean inclOjConjuncts).
|
||||
public final Map<ExprId, Expr> conjunctsFromQuery = new LinkedHashMap<>();
|
||||
|
||||
// all registered inferred conjuncts (map from tuple id to conjuncts). This map is
|
||||
// used to make sure that slot equivalences are not enforced multiple times (e.g.
|
||||
// duplicated to previously inferred conjuncts).
|
||||
@@ -1483,6 +1488,10 @@ public class Analyzer {
|
||||
e.setId(globalState_.conjunctIdGenerator.getNextId());
|
||||
globalState_.conjuncts.put(e.getId(), e);
|
||||
|
||||
if (!e.isAuxExpr()) {
|
||||
globalState_.conjunctsFromQuery.put(e.getId(), e);
|
||||
}
|
||||
|
||||
List<TupleId> tupleIds = new ArrayList<>();
|
||||
List<SlotId> slotIds = new ArrayList<>();
|
||||
e.getIds(tupleIds, slotIds);
|
||||
@@ -1574,9 +1583,8 @@ public class Analyzer {
|
||||
public List<Expr> getUnassignedConjuncts(
|
||||
List<TupleId> tupleIds, boolean inclOjConjuncts) {
|
||||
List<Expr> result = new ArrayList<>();
|
||||
for (Expr e: globalState_.conjuncts.values()) {
|
||||
for (Expr e: globalState_.conjunctsFromQuery.values()) {
|
||||
if (e.isBoundByTupleIds(tupleIds)
|
||||
&& !e.isAuxExpr()
|
||||
&& !globalState_.assignedConjuncts.contains(e.getId())
|
||||
&& ((inclOjConjuncts && !e.isConstant())
|
||||
|| !globalState_.ojClauseByConjunct.containsKey(e.getId()))) {
|
||||
|
||||
@@ -227,4 +227,22 @@ public final class ExprSubstitutionMap {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove expressions that are not used according to baseTblSMap
|
||||
*/
|
||||
public void trim(ExprSubstitutionMap baseTblSMap, Analyzer analyzer) {
|
||||
Preconditions.checkState(size() == baseTblSMap.size());
|
||||
for (int i = size() - 1; i >= 0; --i) {
|
||||
List<SlotId> slotIds = new ArrayList<>();
|
||||
baseTblSMap.getRhs().get(i).getIds(null, slotIds);
|
||||
for (SlotId id: slotIds) {
|
||||
if (!analyzer.getSlotDesc(id).isMaterialized()) {
|
||||
lhs_.remove(i);
|
||||
rhs_.remove(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1205,8 +1205,12 @@ public class SingleNodePlanner {
|
||||
// of the inline view's plan root. This ensures that all downstream exprs referencing
|
||||
// the inline view are replaced with exprs referencing the physical output of the
|
||||
// inline view's plan.
|
||||
ExprSubstitutionMap outputSmap = ExprSubstitutionMap.compose(
|
||||
inlineViewRef.getSmap(), rootNode.getOutputSmap(), analyzer);
|
||||
ExprSubstitutionMap outputSmap = inlineViewRef.getSmap();
|
||||
if (outputSmap != null && !inlineViewRef.isTableMaskingView()) {
|
||||
outputSmap.trim(inlineViewRef.getBaseTblSmap(), analyzer);
|
||||
}
|
||||
outputSmap = ExprSubstitutionMap.compose(
|
||||
outputSmap, rootNode.getOutputSmap(), analyzer);
|
||||
if (analyzer.isOuterJoined(inlineViewRef.getId())) {
|
||||
// Exprs against non-matched rows of an outer join should always return NULL.
|
||||
// Make the rhs exprs of the output smap nullable, if necessary. This expr wrapping
|
||||
|
||||
40
tests/query_test/test_query_compilation.py
Normal file
40
tests/query_test/test_query_compilation.py
Normal file
@@ -0,0 +1,40 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import pytest
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
|
||||
|
||||
class TestSingleNodePlanCreated(ImpalaTestSuite):
|
||||
@pytest.mark.execute_serially
|
||||
def test_single_node_plan_created_time(self):
|
||||
"""Regression test for IMPALA-10806: Create single node plan slowdown when hundreds
|
||||
of inline views are joined"""
|
||||
query = "with aa as (select * from (select * from functional.widetable_1000_cols) t \
|
||||
where int_col1 = 10) select t1.int_col1 from aa t1 join aa t2 on \
|
||||
t1.int_col1 = t2.int_col2 join aa t3 on t1.int_col1 = t3.int_col1 \
|
||||
join aa t4 on t1.int_col1 = t4.int_col1"
|
||||
profile = self.execute_query(query).runtime_profile
|
||||
key = "Single node plan created"
|
||||
for line in profile.splitlines():
|
||||
if key in line:
|
||||
values = line.split('(')[1].strip('ms)').split('s')
|
||||
value = 0.0
|
||||
if len(values) == 2:
|
||||
value = float(values[0]) * 1000 + float(values[1])
|
||||
assert value < 1000, "Took too long to create single node plan"
|
||||
break
|
||||
Reference in New Issue
Block a user