IMPALA-12864: Deflake test_query_log_size_in_bytes.

test_query_log_size_in_bytes is flaky in exhaustive builds. The expected
QueryStateRecord is off by around 100KB per query, indicating that the
test query might be too complex and lead to variability in final query
profile that is being archived.

This patch simplifies the test query and relaxes the assertion. This
patch also adds QUERY_LOG_EST_TOTAL_BYTES metric (key name
"impala-server.query-log-est-total-bytes") that is validated as well in
test_query_log_size_in_bytes. query-state-record-test.cc is modified a
bit to resolve segmentation fault issue when building
unified-be-test-validated-executable target run_clang_tidy.sh.

Testing:
- Pass test_query_log_size_in_bytes in exhaustive build.

Change-Id: Ic9b543ffa931a01c11d58e45774ea46af78b3a25
Reviewed-on: http://gerrit.cloudera.org:8080/21100
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Riza Suminto
2024-03-01 13:27:34 -08:00
committed by Impala Public Jenkins
parent 784971c018
commit ff5cbadec4
10 changed files with 55 additions and 29 deletions

View File

@@ -142,5 +142,5 @@ ADD_BE_TEST(session-expiry-test session-expiry-test.cc) # TODO: this leaks thrif
ADD_UNIFIED_BE_LSAN_TEST(hs2-util-test "StitchNullsTest.*:PrintTColumnValueTest.*") ADD_UNIFIED_BE_LSAN_TEST(hs2-util-test "StitchNullsTest.*:PrintTColumnValueTest.*")
ADD_UNIFIED_BE_LSAN_TEST(query-options-test QueryOptions.*) ADD_UNIFIED_BE_LSAN_TEST(query-options-test QueryOptions.*)
ADD_UNIFIED_BE_LSAN_TEST(impala-server-test ImpalaServerTest.*) ADD_UNIFIED_BE_LSAN_TEST(impala-server-test ImpalaServerTest.*)
ADD_UNIFIED_BE_LSAN_TEST(query-state-record-test QueryStateRecordTest.*:PerHostStateTest.*) ADD_UNIFIED_BE_LSAN_TEST(query-state-record-test QueryStateRecordTest.*)
ADD_BE_LSAN_TEST(internal-server-test) ADD_BE_LSAN_TEST(internal-server-test)

View File

@@ -1164,7 +1164,7 @@ void ImpalaServer::ArchiveQuery(const QueryHandle& query_handle) {
{ {
lock_guard<mutex> l(query_log_lock_); lock_guard<mutex> l(query_log_lock_);
// Add record to the beginning of the log, and to the lookup index. // Add record to the beginning of the log, and to the lookup index.
query_log_est_total_size_ += record_size; ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->Increment(record_size);
query_log_est_sizes_.push_front(record_size); query_log_est_sizes_.push_front(record_size);
query_log_.push_front(move(record)); query_log_.push_front(move(record));
query_log_index_[query_handle->query_id()] = &query_log_.front(); query_log_index_[query_handle->query_id()] = &query_log_.front();
@@ -1172,12 +1172,13 @@ void ImpalaServer::ArchiveQuery(const QueryHandle& query_handle) {
while (!query_log_.empty() while (!query_log_.empty()
&& ((FLAGS_query_log_size > -1 && FLAGS_query_log_size < query_log_.size()) && ((FLAGS_query_log_size > -1 && FLAGS_query_log_size < query_log_.size())
|| (FLAGS_query_log_size_in_bytes > -1 || (FLAGS_query_log_size_in_bytes > -1
&& FLAGS_query_log_size_in_bytes < query_log_est_total_size_))) { && FLAGS_query_log_size_in_bytes
< ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->GetValue()))) {
query_log_index_.erase(query_log_.back()->id); query_log_index_.erase(query_log_.back()->id);
query_log_est_total_size_ -= query_log_est_sizes_.back(); ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->Increment(-query_log_est_sizes_.back());
query_log_est_sizes_.pop_back(); query_log_est_sizes_.pop_back();
query_log_.pop_back(); query_log_.pop_back();
DCHECK_GE(query_log_est_total_size_, 0); DCHECK_GE(ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES->GetValue(), 0);
DCHECK_EQ(query_log_.size(), query_log_est_sizes_.size()); DCHECK_EQ(query_log_.size(), query_log_est_sizes_.size());
} }
} }

View File

@@ -1133,8 +1133,7 @@ class ImpalaServer : public ImpalaServiceIf,
/// Random number generator for use in this class, thread safe. /// Random number generator for use in this class, thread safe.
static ThreadSafeRandom rng_; static ThreadSafeRandom rng_;
/// Guards query_log_, query_log_index_, query_log_est_sizes_, /// Guards query_log_, query_log_index_, and query_log_est_sizes_.
/// and query_log_est_total_size_.
std::mutex query_log_lock_; std::mutex query_log_lock_;
/// FIFO list of query records, which are written after the query finishes executing. /// FIFO list of query records, which are written after the query finishes executing.
@@ -1153,9 +1152,8 @@ class ImpalaServer : public ImpalaServiceIf,
QueryLogIndex; QueryLogIndex;
QueryLogIndex query_log_index_; QueryLogIndex query_log_index_;
/// Estimated individual and total size of records in query_log_ in bytes. /// Estimated individual size of records in query_log_ in bytes.
std::list<int64_t> query_log_est_sizes_; std::list<int64_t> query_log_est_sizes_;
int64_t query_log_est_total_size_ = 0;
/// Sets the given query_record (and retried_query_record too if given) for the given /// Sets the given query_record (and retried_query_record too if given) for the given
/// query_id. Returns an error Status if the given query_id cannot be found in the /// query_id. Returns an error Status if the given query_id cannot be found in the

View File

@@ -125,7 +125,7 @@ TEST(QueryStateRecordTest, EventsTimelineIterator) {
} }
} }
TEST(PerHostStateTest, PeakMemoryComparatorLessThan) { TEST(QueryStateRecordTest, PerHostStatePeakMemoryComparatorLessThan) {
TNetworkAddress addr_a; TNetworkAddress addr_a;
PerHostState a; PerHostState a;
a.peak_memory_usage = std::numeric_limits<int64_t>::min(); a.peak_memory_usage = std::numeric_limits<int64_t>::min();
@@ -140,15 +140,13 @@ TEST(PerHostStateTest, PeakMemoryComparatorLessThan) {
EXPECT_FALSE(PerHostPeakMemoryComparator(pair_b, pair_a)); EXPECT_FALSE(PerHostPeakMemoryComparator(pair_b, pair_a));
} }
TEST(PerHostStateTest, PeakMemoryComparatorEqual) { TEST(QueryStateRecordTest, PerHostStatePeakMemoryComparatorEqual) {
TNetworkAddress addr_a; TNetworkAddress addr_a;
PerHostState a; PerHostState a;
a.peak_memory_usage = 0;
std::pair<TNetworkAddress, PerHostState> pair_a = std::make_pair(addr_a, a); std::pair<TNetworkAddress, PerHostState> pair_a = std::make_pair(addr_a, a);
TNetworkAddress addr_b; TNetworkAddress addr_b;
PerHostState b; PerHostState b;
b.peak_memory_usage = 0;
std::pair<TNetworkAddress, PerHostState> pair_b = std::make_pair(addr_b, b); std::pair<TNetworkAddress, PerHostState> pair_b = std::make_pair(addr_b, b);
EXPECT_FALSE(PerHostPeakMemoryComparator(pair_a, pair_b)); EXPECT_FALSE(PerHostPeakMemoryComparator(pair_a, pair_b));

View File

@@ -178,7 +178,7 @@ struct PerHostState {
int32_t fragment_instance_count = 0; int32_t fragment_instance_count = 0;
// Peak Memory Usage // Peak Memory Usage
int64_t peak_memory_usage; int64_t peak_memory_usage = 0;
}; // struct PerHostState }; // struct PerHostState
/// Comparator function that compares two PerHostState structs based on the /// Comparator function that compares two PerHostState structs based on the

View File

@@ -167,6 +167,8 @@ const char* ImpaladMetricKeys::HEDGED_READ_OPS =
const char* ImpaladMetricKeys::HEDGED_READ_OPS_WIN = const char* ImpaladMetricKeys::HEDGED_READ_OPS_WIN =
"impala-server.hedged-read-ops-win"; "impala-server.hedged-read-ops-win";
const char* ImpaladMetricKeys::DEBUG_ACTION_NUM_FAIL = "impala.debug_action.fail"; const char* ImpaladMetricKeys::DEBUG_ACTION_NUM_FAIL = "impala.debug_action.fail";
const char* ImpaladMetricKeys::QUERY_LOG_EST_TOTAL_BYTES =
"impala-server.query-log-est-total-bytes";
// These are created by impala-server during startup. // These are created by impala-server during startup.
// ======= // =======
@@ -237,6 +239,7 @@ IntGauge* ImpaladMetrics::NUM_FILES_OPEN_FOR_INSERT = nullptr;
IntGauge* ImpaladMetrics::NUM_QUERIES_REGISTERED = nullptr; IntGauge* ImpaladMetrics::NUM_QUERIES_REGISTERED = nullptr;
IntGauge* ImpaladMetrics::RESULTSET_CACHE_TOTAL_NUM_ROWS = nullptr; IntGauge* ImpaladMetrics::RESULTSET_CACHE_TOTAL_NUM_ROWS = nullptr;
IntGauge* ImpaladMetrics::RESULTSET_CACHE_TOTAL_BYTES = nullptr; IntGauge* ImpaladMetrics::RESULTSET_CACHE_TOTAL_BYTES = nullptr;
IntGauge* ImpaladMetrics::QUERY_LOG_EST_TOTAL_BYTES = nullptr;
DoubleGauge* ImpaladMetrics::CATALOG_CACHE_AVG_LOAD_TIME = nullptr; DoubleGauge* ImpaladMetrics::CATALOG_CACHE_AVG_LOAD_TIME = nullptr;
DoubleGauge* ImpaladMetrics::CATALOG_CACHE_HIT_RATE = nullptr; DoubleGauge* ImpaladMetrics::CATALOG_CACHE_HIT_RATE = nullptr;
DoubleGauge* ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_RATE = nullptr; DoubleGauge* ImpaladMetrics::CATALOG_CACHE_LOAD_EXCEPTION_RATE = nullptr;
@@ -342,6 +345,8 @@ void ImpaladMetrics::CreateMetrics(MetricGroup* m) {
ImpaladMetricKeys::RESULTSET_CACHE_TOTAL_NUM_ROWS, 0); ImpaladMetricKeys::RESULTSET_CACHE_TOTAL_NUM_ROWS, 0);
RESULTSET_CACHE_TOTAL_BYTES = m->AddGauge( RESULTSET_CACHE_TOTAL_BYTES = m->AddGauge(
ImpaladMetricKeys::RESULTSET_CACHE_TOTAL_BYTES, 0); ImpaladMetricKeys::RESULTSET_CACHE_TOTAL_BYTES, 0);
QUERY_LOG_EST_TOTAL_BYTES =
m->AddGauge(ImpaladMetricKeys::QUERY_LOG_EST_TOTAL_BYTES, 0);
// Initialize scan node metrics // Initialize scan node metrics
NUM_RANGES_PROCESSED = m->AddCounter( NUM_RANGES_PROCESSED = m->AddCounter(

View File

@@ -261,6 +261,10 @@ class ImpaladMetricKeys {
/// --debug_actions is set. /// --debug_actions is set.
static const char* DEBUG_ACTION_NUM_FAIL; static const char* DEBUG_ACTION_NUM_FAIL;
// Estimated total size of query logs that are currently retained in memory.
// Associated metric is modified in ImpalaServer::ArchiveQuery and must hold
// ImpalaServer::query_log_lock_ on modification.
static const char* QUERY_LOG_EST_TOTAL_BYTES;
}; };
/// Global impalad-wide metrics. This is useful for objects that want to update metrics /// Global impalad-wide metrics. This is useful for objects that want to update metrics
@@ -344,6 +348,7 @@ class ImpaladMetrics {
static IntGauge* NUM_QUERIES_REGISTERED; static IntGauge* NUM_QUERIES_REGISTERED;
static IntGauge* RESULTSET_CACHE_TOTAL_NUM_ROWS; static IntGauge* RESULTSET_CACHE_TOTAL_NUM_ROWS;
static IntGauge* RESULTSET_CACHE_TOTAL_BYTES; static IntGauge* RESULTSET_CACHE_TOTAL_BYTES;
static IntGauge* QUERY_LOG_EST_TOTAL_BYTES;
// Properties // Properties
static BooleanProperty* CATALOG_READY; static BooleanProperty* CATALOG_READY;

View File

@@ -33,7 +33,9 @@ TMP_BUILDALL_LOG=$(mktemp)
echo "Compiling, for build logs see ${TMP_BUILDALL_LOG}" echo "Compiling, for build logs see ${TMP_BUILDALL_LOG}"
if ! ./buildall.sh -skiptests -tidy -so -noclean &> "${TMP_BUILDALL_LOG}" if ! ./buildall.sh -skiptests -tidy -so -noclean &> "${TMP_BUILDALL_LOG}"
then then
echo "buildall.sh failed, dumping output" >&2 echo "buildall.sh failed!" >&2
grep "^make.* Error " ${TMP_BUILDALL_LOG} >&2
echo "Dumping output of ./buildall.sh -skiptests -tidy -so -noclean" >&2
cat "${TMP_BUILDALL_LOG}" cat "${TMP_BUILDALL_LOG}"
exit 1 exit 1
fi fi

View File

@@ -2853,6 +2853,16 @@
"kind": "HISTOGRAM", "kind": "HISTOGRAM",
"key": "impala-server.query-durations-ms" "key": "impala-server.query-durations-ms"
}, },
{
"description": "Estimated total size of query logs that are currently retained in memory.",
"contexts": [
"IMPALAD"
],
"label": "Query log size in memory.",
"units": "BYTES",
"kind": "GAUGE",
"key": "impala-server.query-log-est-total-bytes"
},
{ {
"description": "Distribution of DDL operation latencies", "description": "Distribution of DDL operation latencies",
"contexts": [ "contexts": [

View File

@@ -30,6 +30,8 @@ from tests.common.custom_cluster_test_suite import (
CustomClusterTestSuite) CustomClusterTestSuite)
from tests.shell.util import run_impala_shell_cmd from tests.shell.util import run_impala_shell_cmd
SMALL_QUERY_LOG_SIZE_IN_BYTES = 40 * 1024
class TestWebPage(CustomClusterTestSuite): class TestWebPage(CustomClusterTestSuite):
@classmethod @classmethod
@@ -149,23 +151,28 @@ class TestWebPage(CustomClusterTestSuite):
@pytest.mark.execute_serially @pytest.mark.execute_serially
@CustomClusterTestSuite.with_args( @CustomClusterTestSuite.with_args(
impalad_args="--query_log_size_in_bytes=" + str(2 * 1024 * 1024) cluster_size=1,
impalad_args="--query_log_size_in_bytes=" + str(SMALL_QUERY_LOG_SIZE_IN_BYTES)
) )
def test_query_log_size_in_bytes(self): def test_query_log_size_in_bytes(self):
"""Check if query list is limited by query_log_size_in_bytes flag.""" """Check if query list is limited by query_log_size_in_bytes flag."""
# The input query will produce ~627228 bytes of QueryStateRecord in MT_DOP=8. # This simple test query will produce ~8520 bytes of QueryStateRecord.
query = ("with l1 as (select id id1 from functional.alltypes), " query = "select version()"
"l2 as (select a1.id1 id2 from l1 as a1 join l1 as b1 on a1.id1=-b1.id1), " num_queries = 10
"l3 as (select a2.id2 id3 from l2 as a2 join l2 as b2 on a2.id2=-b2.id2), " for i in range(0, num_queries):
"l4 as (select a3.id3 id4 from l3 as a3 join l3 as b3 on a3.id3=-b3.id3), " self.execute_query_expect_success(self.client, query)
"l5 as (select a4.id4 id5 from l4 as a4 join l4 as b4 on a4.id4=-b4.id4) "
"select * from l5 limit 100;") # Retrieve and verify the total size metrics.
self.execute_query("SET MT_DOP=8;") metric_key = "impala-server.query-log-est-total-bytes"
for i in range(0, 5): metric_value = self.cluster.impalads[0].service.get_metric_value(metric_key)
self.execute_query(query) assert metric_value > 0
response = requests.get("http://localhost:25000/queries?json") assert metric_value <= SMALL_QUERY_LOG_SIZE_IN_BYTES
queries_json = json.loads(response.text)
assert len(queries_json["completed_queries"]) == 3 # Verify that the query page only contains a subset of the test queries.
queries_response = requests.get("http://localhost:25000/queries?json")
queries_json = json.loads(queries_response.text)
assert len(queries_json["completed_queries"]) > 0
assert len(queries_json["completed_queries"]) < num_queries
# Checks if 'messages' exists/does not exist in 'result_stderr' based on the value of # Checks if 'messages' exists/does not exist in 'result_stderr' based on the value of
# 'should_exist' # 'should_exist'