mirror of
https://github.com/apache/impala.git
synced 2025-12-20 02:20:11 -05:00
Adds the counters RowsSent and RowsSentRate to the PLAN_ROOT_SINK
section of the profile:
PLAN_ROOT_SINK:
- PeakMemoryUsage: 4.01 MB (4202496)
- RowBatchGetWaitTime: 0.000ns
- RowBatchSendWaitTime: 0.000ns
- RowsSent: 10 (10)
- RowsSentRate: 416.00 /sec
RowsSent tracks the number of rows sent to the PlanRootSink via
PlanRootSink::Send. RowsSentRate tracks the rate that rows are sent to
the PlanRootSink.
Adds the counters NumRowsFetched, NumRowsFetchedFromCache, and
RowMaterializationRate to the ImpalaServer section of the profile.
ImpalaServer:
- ClientFetchWaitTimer: 11.999ms
- NumRowsFetched: 10 (10)
- NumRowsFetchedFromCache: 10 (10)
- RowMaterializationRate: 9.00 /sec
- RowMaterializationTimer: 1s007ms
NumRowsFetched tracks the total number of rows fetched by the query,
but does not include rows fetched from the cache. NumRowsFetchedFromCache
tracks the total number of rows fetched from the query results cache.
RowMaterializationRate tracks the rate at which rows are materialized.
RowMaterializationTimer already existed and tracks how much time is
spent materializing rows.
Testing:
* Added tests to test_fetch_first.py and query_test/test_fetch.py
* Enabled some tests in test_fetch_first.py that were pending
the completion of IMPALA-8819
* Ran core tests
Change-Id: Id9e101e2f3e2bf8324e149c780d35825ceecc036
Reviewed-on: http://gerrit.cloudera.org:8080/14180
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Sahil Takiar <stakiar@cloudera.com>
87 lines
3.8 KiB
Python
87 lines
3.8 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import pytest
|
|
import re
|
|
|
|
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
|
|
from tests.common.test_dimensions import (
|
|
create_single_exec_option_dimension,
|
|
create_uncompressed_text_dimension)
|
|
from tests.util.web_pages_util import (
|
|
get_num_completed_backends,
|
|
get_mem_admitted_backends_debug_page)
|
|
|
|
|
|
class TestDedicatedCoordinator(CustomClusterTestSuite):
|
|
"""A custom cluster test that tests result spooling against a cluster with a dedicated
|
|
coordinator."""
|
|
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestDedicatedCoordinator, cls).add_test_dimensions()
|
|
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
|
|
# There's no reason to test this on other file formats/compression codecs right now
|
|
cls.ImpalaTestMatrix.add_dimension(
|
|
create_uncompressed_text_dimension(cls.get_workload()))
|
|
|
|
@pytest.mark.execute_serially
|
|
@CustomClusterTestSuite.with_args(cluster_size=2, num_exclusive_coordinators=1)
|
|
def test_dedicated_coordinator(self, vector):
|
|
"""Test the following when result spooling is enabled on a cluster with a dedicated
|
|
coordinator when all results are spooled: (1) all backends are shutdown besides the
|
|
coordinator and (2) all non-coordinator memory is released."""
|
|
num_rows = 2000
|
|
query = "select id from functional_parquet.alltypes order by id limit {0}".format(
|
|
num_rows)
|
|
vector.get_value('exec_option')['spool_query_results'] = 'true'
|
|
|
|
# Amount of time to wait for the query to reach the finished state before throwing a
|
|
# Timeout exception.
|
|
timeout = 10
|
|
|
|
handle = self.execute_query_async(query, vector.get_value('exec_option'))
|
|
try:
|
|
# Wait for the query to finish (all rows are spooled). Assert that the executor
|
|
# has been shutdown and its memory has been released.
|
|
self.wait_for_state(handle, self.client.QUERY_STATES['FINISHED'], timeout)
|
|
self.assert_eventually(timeout, 0.5,
|
|
lambda: re.search("RowsSent:.*({0})".format(num_rows),
|
|
self.client.get_runtime_profile(handle)))
|
|
assert "NumCompletedBackends: 1 (1)" in self.client.get_runtime_profile(handle)
|
|
mem_admitted = get_mem_admitted_backends_debug_page(self.cluster)
|
|
assert mem_admitted['executor'][0] == 0
|
|
assert mem_admitted['coordinator'] > 0
|
|
assert get_num_completed_backends(self.cluster.impalads[0].service,
|
|
handle.get_handle().id) == 1
|
|
|
|
# Fetch all results from the query and assert that the coordinator and the executor
|
|
# have been shutdown and their memory has been released.
|
|
self.client.fetch(query, handle)
|
|
assert "NumCompletedBackends: 2 (2)" in self.client.get_runtime_profile(handle)
|
|
mem_admitted = get_mem_admitted_backends_debug_page(self.cluster)
|
|
assert mem_admitted['executor'][0] == 0
|
|
assert mem_admitted['coordinator'] == 0
|
|
assert get_num_completed_backends(self.cluster.impalads[0].service,
|
|
handle.get_handle().id) == 2
|
|
finally:
|
|
self.client.close_query(handle)
|