From 336034debd3c620bef00d07658482cbe770f2a78 Mon Sep 17 00:00:00 2001 From: jasonmfehr Date: Wed, 17 Sep 2025 12:33:41 -0700 Subject: [PATCH] IMPALA-14480: Optional OpenTelemetry DCHECKs The code in span-manager.cc contains aggressive DCHECKS that rely on the query lifecycle to be deterministic. In reality, the query lifecycle is not completely deterministic due to multiple threads being involved in execution, result retrieval, query shutdown, etc. On debug builds only, a new flag named, otel_trace_exhaustive_dchecks will be available with a default of 'false'. If set to 'true', then optional DCHECKs will be enabled in the SpanManager class to enable identification of edge cases where the query lifecycle proceeds in an unexpected way. The DCHECKs that are controlled by the new flag are those that rely on a specific ordering of start/end child span and add child span event calls. Change-Id: Id6507f3f0e23ecf7c2bece9a6b6c2d86bfac1e57 Reviewed-on: http://gerrit.cloudera.org:8080/23518 Reviewed-by: Michael Smith Reviewed-by: Riza Suminto Tested-by: Impala Public Jenkins --- be/src/observe/otel-flags-trace.cc | 4 ++ be/src/observe/span-manager.cc | 72 ++++++++++++++++++++----- be/src/observe/span-manager.h | 7 ++- tests/custom_cluster/test_otel_trace.py | 2 +- 4 files changed, 68 insertions(+), 17 deletions(-) diff --git a/be/src/observe/otel-flags-trace.cc b/be/src/observe/otel-flags-trace.cc index e116548d1..5b007377d 100644 --- a/be/src/observe/otel-flags-trace.cc +++ b/be/src/observe/otel-flags-trace.cc @@ -61,6 +61,10 @@ DEFINE_validator(otel_trace_exporter, [](const char* flagname, const string& val return false; }); // flag otel_trace_exporter +DEFINE_bool_hidden(otel_trace_exhaustive_dchecks, false, "Specifies whether or not to " + "enable exhaustive DCHECKs in OpenTelemetry code. These DCHECKs fail if spans are " + "not properly started and ended."); + // // Start of HTTP related flags. // diff --git a/be/src/observe/span-manager.cc b/be/src/observe/span-manager.cc index c556fb3a9..0e90f87e9 100644 --- a/be/src/observe/span-manager.cc +++ b/be/src/observe/span-manager.cc @@ -49,6 +49,10 @@ DECLARE_string(cluster_id); DECLARE_int32(otel_trace_retry_policy_max_attempts); DECLARE_int32(otel_trace_retry_policy_max_backoff_s); +#ifndef NDEBUG +DECLARE_bool(otel_trace_exhaustive_dchecks); +#endif + // Names of attributes only on Root spans. static constexpr char const* ATTR_ERROR_MESSAGE = "ErrorMessage"; static constexpr char const* ATTR_QUERY_START_TIME = "QueryStartTime"; @@ -181,8 +185,12 @@ void SpanManager::AddChildSpanEvent(const nostd::string_view& name) { LOG(WARNING) << strings::Substitute("Attempted to add event '$0' with no active " "child span trace_id=\"$1\" span_id=\"$2\"\n$3", name.data(), root_->TraceId(), root_->SpanId(), GetStackTrace()); - DCHECK(current_child_) << strings::Substitute("Cannot add event '$0' when child span " - "is not active.", name.data()); +#ifndef NDEBUG + if (FLAGS_otel_trace_exhaustive_dchecks) { + DCHECK(current_child_) << strings::Substitute("Cannot add event '$0' when child " + "span is not active.", name.data()); + } +#endif } } // function AddChildSpanEvent @@ -219,7 +227,11 @@ void SpanManager::EndChildSpanInit() { } // function EndChildSpanInit inline void SpanManager::DoEndChildSpanInit(const Status* cause) { - DCHECK_CHILD_SPAN_TYPE(ChildSpanType::INIT); +#ifndef NDEBUG + if (FLAGS_otel_trace_exhaustive_dchecks) { + DCHECK_CHILD_SPAN_TYPE(ChildSpanType::INIT); + } +#endif EndChildSpan( cause, @@ -241,7 +253,11 @@ void SpanManager::EndChildSpanSubmitted() { } // function EndChildSpanSubmitted inline void SpanManager::DoEndChildSpanSubmitted(const Status* cause) { - DCHECK_CHILD_SPAN_TYPE(ChildSpanType::SUBMITTED); +#ifndef NDEBUG + if (FLAGS_otel_trace_exhaustive_dchecks) { + DCHECK_CHILD_SPAN_TYPE(ChildSpanType::SUBMITTED); + } +#endif EndChildSpan(cause); } // function DoEndChildSpanSubmitted @@ -257,7 +273,11 @@ void SpanManager::EndChildSpanPlanning() { } // function EndChildSpanPlanning inline void SpanManager::DoEndChildSpanPlanning(const Status* cause) { - DCHECK_CHILD_SPAN_TYPE(ChildSpanType::PLANNING); +#ifndef NDEBUG + if (FLAGS_otel_trace_exhaustive_dchecks) { + DCHECK_CHILD_SPAN_TYPE(ChildSpanType::PLANNING); + } +#endif EndChildSpan( cause, OtelAttributesMap{ @@ -283,7 +303,11 @@ inline void SpanManager::DoEndChildSpanAdmissionControl(const Status* cause) { return; // <-- EARLY RETURN } - DCHECK_CHILD_SPAN_TYPE(ChildSpanType::ADMISSION_CONTROL); +#ifndef NDEBUG + if (FLAGS_otel_trace_exhaustive_dchecks) { + DCHECK_CHILD_SPAN_TYPE(ChildSpanType::ADMISSION_CONTROL); + } +#endif bool queued = false; string adm_result; @@ -344,7 +368,11 @@ inline void SpanManager::DoEndChildSpanQueryExecution(const Status* cause) { return; // <-- EARLY RETURN } - DCHECK_CHILD_SPAN_TYPE(ChildSpanType::QUERY_EXEC); +#ifndef NDEBUG + if (FLAGS_otel_trace_exhaustive_dchecks) { + DCHECK_CHILD_SPAN_TYPE(ChildSpanType::QUERY_EXEC); + } +#endif OtelAttributesMap attrs; if (client_request_state_->exec_request().stmt_type == TStmtType::QUERY) { @@ -399,7 +427,11 @@ void SpanManager::StartChildSpanClose(const Status* cause) { } // function StartChildSpanClose void SpanManager::EndChildSpanClose() { - DCHECK_CHILD_SPAN_TYPE(ChildSpanType::CLOSE); +#ifndef NDEBUG + if (FLAGS_otel_trace_exhaustive_dchecks) { + DCHECK_CHILD_SPAN_TYPE(ChildSpanType::CLOSE); + } +#endif lock_guard l(child_span_mu_); @@ -459,9 +491,13 @@ inline void SpanManager::ChildSpanBuilder(const ChildSpanType& span_type, "another child span '$1' is still active trace_id=\"$2\" span_id=\"$3\"\n$4", to_string(span_type), to_string(child_span_type_), root_->TraceId(), root_->SpanId(), GetStackTrace()); - DCHECK(false) << strings::Substitute("Should not start child span '$0' when child " - "span '$1' is already active.", to_string(span_type), - to_string(child_span_type_)); +#ifndef NDEBUG + if (FLAGS_otel_trace_exhaustive_dchecks) { + DCHECK(false) << strings::Substitute("Should not start child span '$0' when " + "child span '$1' is already active.", to_string(span_type), + to_string(child_span_type_)); + } +#endif { lock_guard crs_lock(*(client_request_state_->lock())); @@ -529,7 +565,11 @@ inline void SpanManager::EndChildSpan(const Status* cause, LOG(WARNING) << strings::Substitute("Attempted to end a non-active child span " "trace_id=\"$0\" span_id=\"$1\"\n$2", root_->TraceId(), root_->SpanId(), GetStackTrace()); - DCHECK(current_child_) << "Cannot end child span when one is not active."; +#ifndef NDEBUG + if (FLAGS_otel_trace_exhaustive_dchecks) { + DCHECK(current_child_) << "Cannot end child span when one is not active."; + } +#endif } } // function EndChildSpan @@ -555,8 +595,12 @@ inline void SpanManager::EndActiveChildSpan(const Status* cause) { LOG(WARNING) << "Attempted to start Close child span while another Close child " "span is already active trace_id=\"$0\" span_id=\"$1\"\n$2", root_->TraceId(), current_child_->SpanId(), GetStackTrace(); - DCHECK(false) << "Cannot start a new Close child span while a Close child span is " - "already active."; +#ifndef NDEBUG + if (FLAGS_otel_trace_exhaustive_dchecks) { + DCHECK(false) << "Cannot start a new Close child span while a Close child span " + "is already active."; + } +#endif break; default: // No-op, no active child span to end. diff --git a/be/src/observe/span-manager.h b/be/src/observe/span-manager.h index 2d8d4ccd5..0dad247c4 100644 --- a/be/src/observe/span-manager.h +++ b/be/src/observe/span-manager.h @@ -67,7 +67,8 @@ public: void AddChildSpanEvent(const opentelemetry::nostd::string_view& name); // Functions to start child spans. If another child span is active, it will be ended, - // a warning will be logged, and a DCHECK failed. + // a warning will be logged, and, if the otel_trace_exhaustive_dchecks flag is true, + // a DCHECK will fail. void StartChildSpanInit(); void StartChildSpanSubmitted(); void StartChildSpanPlanning(); @@ -116,7 +117,9 @@ private: std::shared_ptr root_; // TimedSpan instance for the current child span and the mutex to protect it. - // To ensure no deadlocks, locks must be acquired in the following order: + // To ensure no deadlocks, locks must be acquired in the following order. Note that + // ClientRequestState::lock_ only needs to be held when interacting with the + // client_request_state_ variable. It should not be held otherwise. // 1. SpanManager::child_span_mu_ // 2. ClientRequestState::lock_ std::unique_ptr current_child_; diff --git a/tests/custom_cluster/test_otel_trace.py b/tests/custom_cluster/test_otel_trace.py index eb5b1acbd..b6d5a806a 100644 --- a/tests/custom_cluster/test_otel_trace.py +++ b/tests/custom_cluster/test_otel_trace.py @@ -33,7 +33,7 @@ from tests.util.retry import retry OUT_DIR = "out_dir_traces" TRACE_FILE = "export-trace.jsonl" TRACE_FLAGS = "--otel_trace_enabled=true --otel_trace_exporter=file " \ - "--otel_file_flush_interval_ms=500 " \ + "--otel_trace_exhaustive_dchecks --otel_file_flush_interval_ms=500 " \ "--otel_file_pattern={out_dir_traces}/" + TRACE_FILE