mirror of
https://github.com/apache/impala.git
synced 2026-01-05 12:01:11 -05:00
IMPALA-3002: The shell prints an incorrect value for '#Rows' in the exec summary for broadcast nodes due to incorrect logic around whether to use max or agg stats. This patch makes the behavior consistent with the way the be treats exec summaries in summary-util.cc. This incorrect logic was also duplicated in the impala_beeswax test framework. IMPALA-1473: When there is a merging exchange with a limit, we may copy rows into the output batch beyond the limit. In this case, we currently update the output batch's size to reflect the limit, but we also need to update ExecNode::num_rows_returned_ or the exec summary may show that the exchange node returned more rows than it really did. Additionally, PlanFragmentExecutor::GetNext does not update rows_produced_counter_ in some cases, leading the runtime profile to display an incorrect value for 'RowsProduced'. Change-Id: I386719370386c9cff09b8b35d15dc712dc6480aa Reviewed-on: http://gerrit.cloudera.org:8080/4679 Reviewed-by: Matthew Jacobs <mj@cloudera.com> Tested-by: Internal Jenkins
53 lines
2.4 KiB
Python
53 lines
2.4 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from tests.common.impala_test_suite import ImpalaTestSuite
|
|
|
|
class TestObservability(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
def test_merge_exchange_num_rows(self):
|
|
"""Regression test for IMPALA-1473 - checks that the exec summary for a merging
|
|
exchange with a limit reports the number of rows returned as equal to the limit,
|
|
and that the coordinator fragment portion of the runtime profile reports the number
|
|
of rows returned correctly."""
|
|
query = """select tinyint_col, count(*) from functional.alltypes
|
|
group by tinyint_col order by tinyint_col limit 5"""
|
|
result = self.execute_query(query)
|
|
assert result.exec_summary[0]['operator'] == '05:MERGING-EXCHANGE'
|
|
assert result.exec_summary[0]['num_rows'] == 5
|
|
assert result.exec_summary[0]['est_num_rows'] == 5
|
|
|
|
for line in result.runtime_profile.split('\n'):
|
|
# The first 'RowsProduced' we find is for the coordinator fragment.
|
|
if 'RowsProduced' in line:
|
|
assert '(5)' in line
|
|
break
|
|
|
|
def test_broadcast_num_rows(self):
|
|
"""Regression test for IMPALA-3002 - checks that the num_rows for a broadcast node
|
|
in the exec summaty is correctly set as the max over all instances, not the sum."""
|
|
query = """select distinct a.int_col, a.string_col from functional.alltypes a
|
|
inner join functional.alltypessmall b on (a.id = b.id)
|
|
where a.year = 2009 and b.month = 2"""
|
|
result = self.execute_query(query)
|
|
assert result.exec_summary[5]['operator'] == '04:EXCHANGE'
|
|
assert result.exec_summary[5]['num_rows'] == 25
|
|
assert result.exec_summary[5]['est_num_rows'] == 25
|