IMPALA-14493: Cap memory usage of global admission service

The global admission service can experience OOM errors under
high concurrency because its process memory tracker is inaccurate
and doesn't account for all memory allocations.

Ensuring memory tracker accurately accounts for every allocation
could be difficult, this patch uses a simpler solution to
introduce a hard memory cap using tcmalloc statistics, which
accurately reflect the true process memory usage. If a new query
is submitted while tcmalloc memory usage is over the process
limit, the query will be rejected immediately to protect from OOM.

Adds a new flag enable_admission_service_mem_safeguard allowing
this feature to be enabled or disabled. By default, this feature is
turned on

Tests:
Added test test_admission_service_low_mem_limit.
Passed exhaustive tests.

Change-Id: I2ee2c942a73fcd69358851fc2fdc0fc4fe531c73
Reviewed-on: http://gerrit.cloudera.org:8080/23542
Reviewed-by: Abhishek Rawat <arawat@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Yida Wu
2025-10-13 16:36:28 -07:00
committed by Yida Wu
parent ff8bb33b91
commit 1bc7cdbff6
4 changed files with 57 additions and 2 deletions

View File

@@ -26,6 +26,7 @@
#include "runtime/bufferpool/reservation-util.h"
#include "runtime/exec-env.h"
#include "runtime/mem-tracker.h"
#include "scheduling/admissiond-env.h"
#include "scheduling/cluster-membership-mgr.h"
#include "scheduling/executor-group.h"
#include "scheduling/schedule-state.h"
@@ -36,6 +37,7 @@
#include "util/bit-util.h"
#include "util/collection-metrics.h"
#include "util/debug-util.h"
#include "util/memory-metrics.h"
#include "util/metrics.h"
#include "util/pretty-printer.h"
#include "util/runtime-profile-counters.h"
@@ -266,6 +268,9 @@ const string REASON_NO_EXECUTOR_GROUPS =
"Waiting for executors to start. Only DDL queries and queries scheduled only on the "
"coordinator (either NUM_NODES set to 1 or when small query optimization is "
"triggered) can currently run.";
const string REASON_EXCEED_MEMORY_LIMIT =
"Admission rejected due to memory pressure in admissiond. Current usage: $0 bytes, "
"limit: $1 bytes";
// The name of the root pool.
const string ROOT_POOL = "root";
@@ -1646,6 +1651,21 @@ Status AdmissionController::SubmitForAdmission(const AdmissionRequest& request,
return Status::Expected(rejected_msg);
}
int64_t bytes_inuse = TcmallocMetric::BYTES_IN_USE->GetValue();
if (!is_trivial && AdmissiondEnv::GetInstance() != nullptr
&& AdmissiondEnv::GetInstance()->admission_service_mem_limit() > 0
&& bytes_inuse > AdmissiondEnv::GetInstance()->admission_service_mem_limit()) {
queue_node->not_admitted_reason = Substitute(REASON_EXCEED_MEMORY_LIMIT,
bytes_inuse, AdmissiondEnv::GetInstance()->admission_service_mem_limit());
request.summary_profile->AddInfoString(
PROFILE_INFO_KEY_ADMISSION_RESULT, PROFILE_INFO_VAL_REJECTED);
stats->metrics()->total_rejected->Increment(1);
const ErrorMsg& rejected_msg = ErrorMsg(TErrorCode::ADMISSION_REJECTED,
queue_node->pool_name, queue_node->not_admitted_reason);
VLOG_QUERY << "query_id=" << PrintId(request.query_id) << " " << rejected_msg.msg();
return Status::Expected(rejected_msg);
}
string user;
RETURN_IF_ERROR(GetEffectiveShortUser(
queue_node->admission_request.request.query_ctx.session, &user));

View File

@@ -30,6 +30,7 @@
#include "util/mem-info.h"
#include "util/memory-metrics.h"
#include "util/metrics.h"
#include "util/pretty-printer.h"
#include "util/uid-util.h"
#include "common/names.h"
@@ -45,6 +46,10 @@ DEFINE_validator(
<< "' must be greater than 0 and less than or equal to 1000.";
return false;
});
DEFINE_bool(enable_admission_service_mem_safeguard, true,
"When true, enables a hard memory limit safeguard for the admission service. "
"This rejects new queries if the in-use process memory from tcmalloc exceeds "
"admission_service_mem_limit to prevent OOM.");
DECLARE_string(state_store_host);
DECLARE_int32(state_store_port);
@@ -107,6 +112,11 @@ Status AdmissiondEnv::Init() {
new MemTracker(AggregateMemoryMetrics::TOTAL_USED, bytes_limit, "Process"));
mem_tracker_->RegisterMetrics(
DaemonEnv::GetInstance()->metrics(), "mem-tracker.process");
if (FLAGS_enable_admission_service_mem_safeguard) {
admission_mem_limit_ = bytes_limit;
LOG(INFO) << "Set admission service memory limit to "
<< PrettyPrinter::Print(admission_mem_limit_, TUnit::BYTES);
}
http_handler_->RegisterHandlers(DaemonEnv::GetInstance()->webserver());
if (DaemonEnv::GetInstance()->metrics_webserver() != nullptr) {

View File

@@ -61,6 +61,7 @@ class AdmissiondEnv {
RpcMgr* rpc_mgr() { return rpc_mgr_.get(); }
Scheduler* scheduler() { return scheduler_.get(); }
StatestoreSubscriber* subscriber() { return statestore_subscriber_.get(); }
int64_t admission_service_mem_limit() { return admission_mem_limit_; }
private:
static AdmissiondEnv* admissiond_env_;
@@ -80,6 +81,11 @@ class AdmissiondEnv {
std::unique_ptr<StatestoreSubscriber> statestore_subscriber_;
MetricGroup* rpc_metrics_ = nullptr;
/// Memory limit for the admission service. If admission_mem_limit_ is set to a value
/// over 0, new admission requests are rejected when the tcmalloc in-use bytes are over
/// this limit.
int64_t admission_mem_limit_ = 0;
};
} // namespace impala