Files
impala/tests/query_test/test_tuple_cache_tpc_queries.py
Joe McDonnell 14597c7e2f IMPALA-13964: Fix test_tuple_cache_tpc_queries.py flakiness
This makes two changes to deflake test_tuple_cache_tpc_queries.py.
First, it increases the runtime filter wait time from 60 seconds to
600 seconds. The correctness verification slows down the path
that produces the runtime filter. The slowdown is dependent on
the speed of storage, so this can get very slow on test machines.

Second, this skips correctness checking for locations that are just
after streaming aggregations. Streaming aggregations can produce
variable output that the correctness checking can't handle.
For example a grouping aggregation computing a sum might have
a preaggregation produce either (A: 3) or (A: 2), (A: 1) or
(A: 1), (A: 1), (A: 1). The finalization sees these as equivalent.
This marks the nodes as variable starting with the preaggregation
and clears the mark at the finalize stage.

When skipping correctness checking, the tuple cache node does not
hit the cache normally. This guarantees that its children will run
and go through correctness checking.

Testing:
 - Ran test_tuple_cache_tpc_queries.py locally
 - Added a frontend test for this specific case

Change-Id: If5e1be287bdb489a89aea3b2d7bec416220feb9a
Reviewed-on: http://gerrit.cloudera.org:8080/23010
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Tested-by: Michael Smith <michael.smith@cloudera.com>
2025-06-12 15:48:32 +00:00

84 lines
3.4 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Functional tests running the TPCH and TPCDS workload twice to test tuple cache.
from __future__ import absolute_import, division, print_function
import pytest
from tests.common.environ import IS_TUPLE_CACHE_CORRECT_CHECK
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIf
from tests.common.test_dimensions import create_single_exec_option_dimension
from tests.util.test_file_parser import load_tpc_queries_name_sorted
MT_DOP_VALUES = [0, 4]
def run_tuple_cache_test(self, vector, query, mtdop):
vector.get_value('exec_option')['enable_tuple_cache'] = True
# Use a long runtime filter wait time (1 minute) to ensure filters arrive before
# generating the tuple cache for correctness check.
if IS_TUPLE_CACHE_CORRECT_CHECK:
vector.get_value('exec_option')['runtime_filter_wait_time_ms'] = 600000
vector.get_value('exec_option')['enable_tuple_cache_verification'] = True
vector.get_value('exec_option')['mt_dop'] = mtdop
# Run twice to test write and read the tuple cache.
self.run_test_case(query, vector)
self.run_test_case(query, vector)
@SkipIf.not_tuple_cache
class TestTupleCacheTpchQuery(ImpalaTestSuite):
@classmethod
def get_workload(self):
return 'tpch'
@classmethod
def add_test_dimensions(cls):
super(TestTupleCacheTpchQuery, cls).add_test_dimensions()
if cls.exploration_strategy() != 'exhaustive':
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('table_format').file_format == 'parquet'
and v.get_value('table_format').compression_codec == 'none')
@pytest.mark.parametrize("query", load_tpc_queries_name_sorted('tpch'))
@pytest.mark.parametrize("mtdop", MT_DOP_VALUES)
def test_tpch(self, vector, query, mtdop):
run_tuple_cache_test(self, vector, query, mtdop)
@SkipIf.not_tuple_cache
class TestTupleCacheTpcdsQuery(ImpalaTestSuite):
@classmethod
def get_workload(self):
return 'tpcds'
@classmethod
def add_test_dimensions(cls):
super(TestTupleCacheTpcdsQuery, cls).add_test_dimensions()
if cls.exploration_strategy() != 'exhaustive':
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('table_format').file_format == 'parquet'
and v.get_value('table_format').compression_codec == 'none')
@pytest.mark.parametrize("query", load_tpc_queries_name_sorted('tpcds'))
@pytest.mark.parametrize("mtdop", MT_DOP_VALUES)
def test_tpcds(self, vector, query, mtdop):
run_tuple_cache_test(self, vector, query, mtdop)