mirror of
https://github.com/apache/impala.git
synced 2026-01-16 09:00:38 -05:00
Google's headers define DCHECK_NOTNULL so that even in release builds the argument is evaluated. This is different from the usual DCHECK() macros. We redefined DCHECK_NOTNULL to be a no-op in release builds, but instead we should just be using DCHECK(X != NULL). One reason is that as we import code from other sources that had used these macros, they would reasonably expect the macro semantics to be the same, leading to subtle bugs that only manifest in release builds. Change-Id: Id8c08faea3099d52cd5604209384a7cca79160a1 Reviewed-on: http://gerrit.cloudera.org:8080/486 Reviewed-by: Marcel Kornacker <marcel@cloudera.com> Tested-by: Internal Jenkins
589 lines
21 KiB
C++
589 lines
21 KiB
C++
// Copyright 2012 Cloudera Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "runtime/plan-fragment-executor.h"
|
|
|
|
#include <thrift/protocol/TDebugProtocol.h>
|
|
#include <boost/date_time/posix_time/posix_time_types.hpp>
|
|
#include <boost/unordered_map.hpp>
|
|
#include <boost/foreach.hpp>
|
|
#include <gutil/strings/substitute.h>
|
|
|
|
#include "codegen/llvm-codegen.h"
|
|
#include "common/logging.h"
|
|
#include "common/object-pool.h"
|
|
#include "exec/data-sink.h"
|
|
#include "exec/exec-node.h"
|
|
#include "exec/exchange-node.h"
|
|
#include "exec/scan-node.h"
|
|
#include "exec/hdfs-scan-node.h"
|
|
#include "exec/hbase-table-scanner.h"
|
|
#include "exprs/expr.h"
|
|
#include "runtime/descriptors.h"
|
|
#include "runtime/data-stream-mgr.h"
|
|
#include "runtime/row-batch.h"
|
|
#include "runtime/mem-tracker.h"
|
|
#include "util/cgroups-mgr.h"
|
|
#include "util/cpu-info.h"
|
|
#include "util/debug-util.h"
|
|
#include "util/container-util.h"
|
|
#include "util/parse-util.h"
|
|
#include "util/mem-info.h"
|
|
#include "util/periodic-counter-updater.h"
|
|
#include "util/llama-util.h"
|
|
#include "util/pretty-printer.h"
|
|
|
|
DEFINE_bool(serialize_batch, false, "serialize and deserialize each returned row batch");
|
|
DEFINE_int32(status_report_interval, 5, "interval between profile reports; in seconds");
|
|
DECLARE_bool(enable_rm);
|
|
|
|
#include "common/names.h"
|
|
|
|
namespace posix_time = boost::posix_time;
|
|
using boost::get_system_time;
|
|
using boost::system_time;
|
|
using namespace apache::thrift;
|
|
using namespace strings;
|
|
|
|
namespace impala {
|
|
|
|
const string PlanFragmentExecutor::PER_HOST_PEAK_MEM_COUNTER = "PerHostPeakMemUsage";
|
|
|
|
PlanFragmentExecutor::PlanFragmentExecutor(ExecEnv* exec_env,
|
|
const ReportStatusCallback& report_status_cb) :
|
|
exec_env_(exec_env), plan_(NULL), report_status_cb_(report_status_cb),
|
|
report_thread_active_(false), done_(false), prepared_(false), closed_(false),
|
|
has_thread_token_(false), average_thread_tokens_(NULL),
|
|
mem_usage_sampled_counter_(NULL), thread_usage_sampled_counter_(NULL) {
|
|
}
|
|
|
|
PlanFragmentExecutor::~PlanFragmentExecutor() {
|
|
Close();
|
|
if (runtime_state_->query_resource_mgr() != NULL) {
|
|
exec_env_->resource_broker()->UnregisterQueryResourceMgr(query_id_);
|
|
}
|
|
// at this point, the report thread should have been stopped
|
|
DCHECK(!report_thread_active_);
|
|
}
|
|
|
|
Status PlanFragmentExecutor::Prepare(const TExecPlanFragmentParams& request) {
|
|
fragment_sw_.Start();
|
|
const TPlanFragmentExecParams& params = request.params;
|
|
query_id_ = request.fragment_instance_ctx.query_ctx.query_id;
|
|
|
|
VLOG_QUERY << "Prepare(): query_id=" << PrintId(query_id_) << " instance_id="
|
|
<< PrintId(request.fragment_instance_ctx.fragment_instance_id);
|
|
VLOG(2) << "params:\n" << ThriftDebugString(params);
|
|
|
|
if (request.__isset.reserved_resource) {
|
|
VLOG_QUERY << "Executing fragment in reserved resource:\n"
|
|
<< request.reserved_resource;
|
|
}
|
|
|
|
string cgroup = "";
|
|
if (FLAGS_enable_rm && request.__isset.reserved_resource) {
|
|
cgroup = exec_env_->cgroups_mgr()->UniqueIdToCgroup(PrintId(query_id_, "_"));
|
|
}
|
|
|
|
runtime_state_.reset(
|
|
new RuntimeState(request, cgroup, exec_env_));
|
|
|
|
// total_time_counter() is in the runtime_state_ so start it up now.
|
|
SCOPED_TIMER(profile()->total_time_counter());
|
|
|
|
// Register after setting runtime_state_ to ensure proper cleanup.
|
|
if (FLAGS_enable_rm && !cgroup.empty() && request.__isset.reserved_resource) {
|
|
bool is_first;
|
|
RETURN_IF_ERROR(exec_env_->cgroups_mgr()->RegisterFragment(
|
|
request.fragment_instance_ctx.fragment_instance_id, cgroup, &is_first));
|
|
// The first fragment using cgroup sets the cgroup's CPU shares based on the reserved
|
|
// resource.
|
|
if (is_first) {
|
|
DCHECK(request.__isset.reserved_resource);
|
|
int32_t cpu_shares = exec_env_->cgroups_mgr()->VirtualCoresToCpuShares(
|
|
request.reserved_resource.v_cpu_cores);
|
|
RETURN_IF_ERROR(exec_env_->cgroups_mgr()->SetCpuShares(cgroup, cpu_shares));
|
|
}
|
|
}
|
|
|
|
// TODO: Find the reservation id when the resource request is not set
|
|
if (FLAGS_enable_rm && request.__isset.reserved_resource) {
|
|
TUniqueId reservation_id;
|
|
reservation_id << request.reserved_resource.reservation_id;
|
|
|
|
// TODO: Combine this with RegisterFragment() etc.
|
|
QueryResourceMgr* res_mgr;
|
|
bool is_first = exec_env_->resource_broker()->GetQueryResourceMgr(query_id_,
|
|
reservation_id, request.local_resource_address, &res_mgr);
|
|
DCHECK(res_mgr != NULL);
|
|
runtime_state_->SetQueryResourceMgr(res_mgr);
|
|
if (is_first) {
|
|
runtime_state_->query_resource_mgr()->InitVcoreAcquisition(
|
|
request.reserved_resource.v_cpu_cores);
|
|
}
|
|
}
|
|
|
|
// reservation or a query option.
|
|
int64_t bytes_limit = -1;
|
|
if (runtime_state_->query_options().__isset.mem_limit &&
|
|
runtime_state_->query_options().mem_limit > 0) {
|
|
bytes_limit = runtime_state_->query_options().mem_limit;
|
|
VLOG_QUERY << "Using query memory limit from query options: "
|
|
<< PrettyPrinter::Print(bytes_limit, TUnit::BYTES);
|
|
}
|
|
|
|
int64_t rm_reservation_size_bytes = -1;
|
|
if (request.__isset.reserved_resource && request.reserved_resource.memory_mb > 0) {
|
|
rm_reservation_size_bytes =
|
|
static_cast<int64_t>(request.reserved_resource.memory_mb) * 1024L * 1024L;
|
|
// Queries that use more than the hard limit will be killed, so it's not useful to
|
|
// have a reservation larger than the hard limit. Clamp reservation bytes limit to the
|
|
// hard limit (if it exists).
|
|
if (rm_reservation_size_bytes > bytes_limit && bytes_limit != -1) {
|
|
runtime_state_->LogError(ErrorMsg(TErrorCode::FRAGMENT_EXECUTOR,
|
|
PrettyPrinter::PrintBytes(rm_reservation_size_bytes),
|
|
PrettyPrinter::PrintBytes(bytes_limit)));
|
|
rm_reservation_size_bytes = bytes_limit;
|
|
}
|
|
VLOG_QUERY << "Using RM reservation memory limit from resource reservation: "
|
|
<< PrettyPrinter::Print(rm_reservation_size_bytes, TUnit::BYTES);
|
|
}
|
|
|
|
DCHECK(!params.request_pool.empty());
|
|
runtime_state_->InitMemTrackers(query_id_, ¶ms.request_pool,
|
|
bytes_limit, rm_reservation_size_bytes);
|
|
RETURN_IF_ERROR(runtime_state_->CreateBlockMgr());
|
|
|
|
// Reserve one main thread from the pool
|
|
runtime_state_->resource_pool()->AcquireThreadToken();
|
|
if (runtime_state_->query_resource_mgr() != NULL) {
|
|
runtime_state_->query_resource_mgr()->NotifyThreadUsageChange(1);
|
|
}
|
|
has_thread_token_ = true;
|
|
|
|
average_thread_tokens_ = profile()->AddSamplingCounter("AverageThreadTokens",
|
|
bind<int64_t>(mem_fn(&ThreadResourceMgr::ResourcePool::num_threads),
|
|
runtime_state_->resource_pool()));
|
|
mem_usage_sampled_counter_ = profile()->AddTimeSeriesCounter("MemoryUsage",
|
|
TUnit::BYTES,
|
|
bind<int64_t>(mem_fn(&MemTracker::consumption),
|
|
runtime_state_->instance_mem_tracker()));
|
|
thread_usage_sampled_counter_ = profile()->AddTimeSeriesCounter("ThreadUsage",
|
|
TUnit::UNIT,
|
|
bind<int64_t>(mem_fn(&ThreadResourceMgr::ResourcePool::num_threads),
|
|
runtime_state_->resource_pool()));
|
|
|
|
// set up desc tbl
|
|
DescriptorTbl* desc_tbl = NULL;
|
|
DCHECK(request.__isset.desc_tbl);
|
|
RETURN_IF_ERROR(
|
|
DescriptorTbl::Create(obj_pool(), request.desc_tbl, &desc_tbl));
|
|
runtime_state_->set_desc_tbl(desc_tbl);
|
|
VLOG_QUERY << "descriptor table for fragment="
|
|
<< request.fragment_instance_ctx.fragment_instance_id
|
|
<< "\n" << desc_tbl->DebugString();
|
|
|
|
// set up plan
|
|
DCHECK(request.__isset.fragment);
|
|
RETURN_IF_ERROR(
|
|
ExecNode::CreateTree(obj_pool(), request.fragment.plan, *desc_tbl, &plan_));
|
|
runtime_state_->set_fragment_root_id(plan_->id());
|
|
|
|
if (request.params.__isset.debug_node_id) {
|
|
DCHECK(request.params.__isset.debug_action);
|
|
DCHECK(request.params.__isset.debug_phase);
|
|
ExecNode::SetDebugOptions(request.params.debug_node_id,
|
|
request.params.debug_phase, request.params.debug_action, plan_);
|
|
}
|
|
|
|
// set #senders of exchange nodes before calling Prepare()
|
|
vector<ExecNode*> exch_nodes;
|
|
plan_->CollectNodes(TPlanNodeType::EXCHANGE_NODE, &exch_nodes);
|
|
BOOST_FOREACH(ExecNode* exch_node, exch_nodes)
|
|
{
|
|
DCHECK_EQ(exch_node->type(), TPlanNodeType::EXCHANGE_NODE);
|
|
int num_senders = FindWithDefault(params.per_exch_num_senders,
|
|
exch_node->id(), 0);
|
|
DCHECK_GT(num_senders, 0);
|
|
static_cast<ExchangeNode*>(exch_node)->set_num_senders(num_senders);
|
|
}
|
|
|
|
// set scan ranges
|
|
vector<ExecNode*> scan_nodes;
|
|
vector<TScanRangeParams> no_scan_ranges;
|
|
plan_->CollectScanNodes(&scan_nodes);
|
|
for (int i = 0; i < scan_nodes.size(); ++i) {
|
|
ScanNode* scan_node = static_cast<ScanNode*>(scan_nodes[i]);
|
|
const vector<TScanRangeParams>& scan_ranges = FindWithDefault(
|
|
params.per_node_scan_ranges, scan_node->id(), no_scan_ranges);
|
|
scan_node->SetScanRanges(scan_ranges);
|
|
}
|
|
|
|
RuntimeProfile::Counter* prepare_timer = ADD_TIMER(profile(), "PrepareTime");
|
|
{
|
|
SCOPED_TIMER(prepare_timer);
|
|
RETURN_IF_ERROR(plan_->Prepare(runtime_state_.get()));
|
|
}
|
|
|
|
PrintVolumeIds(params.per_node_scan_ranges);
|
|
|
|
// set up sink, if required
|
|
if (request.fragment.__isset.output_sink) {
|
|
RETURN_IF_ERROR(DataSink::CreateDataSink(
|
|
obj_pool(), request.fragment.output_sink, request.fragment.output_exprs,
|
|
params, row_desc(), &sink_));
|
|
RETURN_IF_ERROR(sink_->Prepare(runtime_state()));
|
|
|
|
RuntimeProfile* sink_profile = sink_->profile();
|
|
if (sink_profile != NULL) {
|
|
profile()->AddChild(sink_profile);
|
|
}
|
|
} else {
|
|
sink_.reset(NULL);
|
|
}
|
|
|
|
// set up profile counters
|
|
profile()->AddChild(plan_->runtime_profile());
|
|
rows_produced_counter_ =
|
|
ADD_COUNTER(profile(), "RowsProduced", TUnit::UNIT);
|
|
per_host_mem_usage_ =
|
|
ADD_COUNTER(profile(), PER_HOST_PEAK_MEM_COUNTER, TUnit::BYTES);
|
|
|
|
row_batch_.reset(new RowBatch(plan_->row_desc(), runtime_state_->batch_size(),
|
|
runtime_state_->instance_mem_tracker()));
|
|
VLOG(2) << "plan_root=\n" << plan_->DebugString();
|
|
prepared_ = true;
|
|
return Status::OK();
|
|
}
|
|
|
|
void PlanFragmentExecutor::OptimizeLlvmModule() {
|
|
if (!runtime_state_->codegen_created()) return;
|
|
LlvmCodeGen* codegen;
|
|
Status status = runtime_state_->GetCodegen(&codegen, /* initalize */ false);
|
|
DCHECK(status.ok());
|
|
DCHECK(codegen != NULL);
|
|
status = codegen->FinalizeModule();
|
|
if (!status.ok()) {
|
|
stringstream ss;
|
|
ss << "Error with codegen for this query: " << status.GetDetail();
|
|
runtime_state_->LogError(ErrorMsg(TErrorCode::GENERAL, ss.str()));
|
|
}
|
|
}
|
|
|
|
void PlanFragmentExecutor::PrintVolumeIds(
|
|
const PerNodeScanRanges& per_node_scan_ranges) {
|
|
if (per_node_scan_ranges.empty()) return;
|
|
|
|
HdfsScanNode::PerVolumnStats per_volume_stats;
|
|
BOOST_FOREACH(const PerNodeScanRanges::value_type& entry, per_node_scan_ranges) {
|
|
HdfsScanNode::UpdateHdfsSplitStats(entry.second, &per_volume_stats);
|
|
}
|
|
|
|
stringstream str;
|
|
|
|
HdfsScanNode::PrintHdfsSplitStats(per_volume_stats, &str);
|
|
profile()->AddInfoString(HdfsScanNode::HDFS_SPLIT_STATS_DESC, str.str());
|
|
VLOG_FILE
|
|
<< "Hdfs split stats (<volume id>:<# splits>/<split lengths>) for query="
|
|
<< query_id_ << ":\n" << str.str();
|
|
}
|
|
|
|
Status PlanFragmentExecutor::Open() {
|
|
VLOG_QUERY << "Open(): instance_id="
|
|
<< runtime_state_->fragment_instance_id();
|
|
// we need to start the profile-reporting thread before calling Open(), since it
|
|
// may block
|
|
if (!report_status_cb_.empty() && FLAGS_status_report_interval > 0) {
|
|
unique_lock<mutex> l(report_thread_lock_);
|
|
report_thread_.reset(
|
|
new Thread("plan-fragment-executor", "report-profile",
|
|
&PlanFragmentExecutor::ReportProfile, this));
|
|
// make sure the thread started up, otherwise ReportProfile() might get into a race
|
|
// with StopReportThread()
|
|
report_thread_started_cv_.wait(l);
|
|
report_thread_active_ = true;
|
|
}
|
|
|
|
OptimizeLlvmModule();
|
|
|
|
Status status = OpenInternal();
|
|
if (!status.ok() && !status.IsCancelled() && !status.IsMemLimitExceeded()) {
|
|
// Log error message in addition to returning in Status. Queries that do not
|
|
// fetch results (e.g. insert) may not receive the message directly and can
|
|
// only retrieve the log.
|
|
runtime_state_->LogError(status.msg());
|
|
}
|
|
UpdateStatus(status);
|
|
return status;
|
|
}
|
|
|
|
Status PlanFragmentExecutor::OpenInternal() {
|
|
{
|
|
SCOPED_TIMER(profile()->total_time_counter());
|
|
RETURN_IF_ERROR(plan_->Open(runtime_state_.get()));
|
|
}
|
|
|
|
if (sink_.get() == NULL) return Status::OK();
|
|
|
|
RETURN_IF_ERROR(sink_->Open(runtime_state_.get()));
|
|
|
|
// If there is a sink, do all the work of driving it here, so that
|
|
// when this returns the query has actually finished
|
|
while (!done_) {
|
|
RowBatch* batch;
|
|
RETURN_IF_ERROR(GetNextInternal(&batch));
|
|
if (batch == NULL) break;
|
|
if (VLOG_ROW_IS_ON) {
|
|
VLOG_ROW << "OpenInternal: #rows=" << batch->num_rows();
|
|
for (int i = 0; i < batch->num_rows(); ++i) {
|
|
VLOG_ROW << PrintRow(batch->GetRow(i), row_desc());
|
|
}
|
|
}
|
|
|
|
SCOPED_TIMER(profile()->total_time_counter());
|
|
RETURN_IF_ERROR(sink_->Send(runtime_state(), batch, done_));
|
|
}
|
|
|
|
// Close the sink *before* stopping the report thread. Close may
|
|
// need to add some important information to the last report that
|
|
// gets sent. (e.g. table sinks record the files they have written
|
|
// to in this method)
|
|
// The coordinator report channel waits until all backends are
|
|
// either in error or have returned a status report with done =
|
|
// true, so tearing down any data stream state (a separate
|
|
// channel) in Close is safe.
|
|
SCOPED_TIMER(profile()->total_time_counter());
|
|
sink_->Close(runtime_state());
|
|
done_ = true;
|
|
|
|
FragmentComplete();
|
|
return Status::OK();
|
|
}
|
|
|
|
void PlanFragmentExecutor::ReportProfile() {
|
|
VLOG_FILE << "ReportProfile(): instance_id="
|
|
<< runtime_state_->fragment_instance_id();
|
|
DCHECK(!report_status_cb_.empty());
|
|
unique_lock<mutex> l(report_thread_lock_);
|
|
// tell Open() that we started
|
|
report_thread_started_cv_.notify_one();
|
|
|
|
// Jitter the reporting time of remote fragments by a random amount between
|
|
// 0 and the report_interval. This way, the coordinator doesn't get all the
|
|
// updates at once so its better for contention as well as smoother progress
|
|
// reporting.
|
|
int report_fragment_offset = rand() % FLAGS_status_report_interval;
|
|
system_time timeout = get_system_time()
|
|
+ posix_time::seconds(report_fragment_offset);
|
|
// We don't want to wait longer than it takes to run the entire fragment.
|
|
stop_report_thread_cv_.timed_wait(l, timeout);
|
|
|
|
while (report_thread_active_) {
|
|
system_time timeout = get_system_time()
|
|
+ posix_time::seconds(FLAGS_status_report_interval);
|
|
|
|
// timed_wait can return because the timeout occurred or the condition variable
|
|
// was signaled. We can't rely on its return value to distinguish between the
|
|
// two cases (e.g. there is a race here where the wait timed out but before grabbing
|
|
// the lock, the condition variable was signaled). Instead, we will use an external
|
|
// flag, report_thread_active_, to coordinate this.
|
|
stop_report_thread_cv_.timed_wait(l, timeout);
|
|
|
|
if (VLOG_FILE_IS_ON) {
|
|
VLOG_FILE << "Reporting " << (!report_thread_active_ ? "final " : " ")
|
|
<< "profile for instance " << runtime_state_->fragment_instance_id();
|
|
stringstream ss;
|
|
profile()->PrettyPrint(&ss);
|
|
VLOG_FILE << ss.str();
|
|
}
|
|
|
|
if (!report_thread_active_) break;
|
|
|
|
if (completed_report_sent_.Read() == 0) {
|
|
// No complete fragment report has been sent.
|
|
SendReport(false);
|
|
}
|
|
}
|
|
|
|
VLOG_FILE << "exiting reporting thread: instance_id="
|
|
<< runtime_state_->fragment_instance_id();
|
|
}
|
|
|
|
void PlanFragmentExecutor::SendReport(bool done) {
|
|
if (report_status_cb_.empty()) return;
|
|
|
|
Status status;
|
|
{
|
|
lock_guard<mutex> l(status_lock_);
|
|
status = status_;
|
|
}
|
|
|
|
// Update the counter for the peak per host mem usage.
|
|
per_host_mem_usage_->Set(runtime_state()->query_mem_tracker()->peak_consumption());
|
|
|
|
// This will send a report even if we are cancelled. If the query completed correctly
|
|
// but fragments still need to be cancelled (e.g. limit reached), the coordinator will
|
|
// be waiting for a final report and profile.
|
|
report_status_cb_(status, profile(), done || !status.ok());
|
|
}
|
|
|
|
void PlanFragmentExecutor::StopReportThread() {
|
|
if (!report_thread_active_) return;
|
|
{
|
|
lock_guard<mutex> l(report_thread_lock_);
|
|
report_thread_active_ = false;
|
|
}
|
|
stop_report_thread_cv_.notify_one();
|
|
report_thread_->Join();
|
|
}
|
|
|
|
// TODO: why can't we just put the total_time_counter() at the
|
|
// beginning of Open() and GetNext(). This seems to really mess
|
|
// the timer here, presumably because the data stream sender is
|
|
// multithreaded and the timer we use gets confused.
|
|
Status PlanFragmentExecutor::GetNext(RowBatch** batch) {
|
|
VLOG_FILE << "GetNext(): instance_id="
|
|
<< runtime_state_->fragment_instance_id();
|
|
Status status = GetNextInternal(batch);
|
|
UpdateStatus(status);
|
|
if (done_) {
|
|
VLOG_QUERY << "Finished executing fragment query_id=" << PrintId(query_id_)
|
|
<< " instance_id=" << PrintId(runtime_state_->fragment_instance_id());
|
|
FragmentComplete();
|
|
// GetNext() uses *batch = NULL to signal the end.
|
|
if (*batch != NULL && (*batch)->num_rows() == 0) *batch = NULL;
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
Status PlanFragmentExecutor::GetNextInternal(RowBatch** batch) {
|
|
if (done_) {
|
|
*batch = NULL;
|
|
return Status::OK();
|
|
}
|
|
|
|
while (!done_) {
|
|
row_batch_->Reset();
|
|
SCOPED_TIMER(profile()->total_time_counter());
|
|
RETURN_IF_ERROR(
|
|
plan_->GetNext(runtime_state_.get(), row_batch_.get(), &done_));
|
|
*batch = row_batch_.get();
|
|
if (row_batch_->num_rows() > 0) {
|
|
COUNTER_ADD(rows_produced_counter_, row_batch_->num_rows());
|
|
break;
|
|
}
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
|
|
void PlanFragmentExecutor::FragmentComplete() {
|
|
// Check the atomic flag. If it is set, then a fragment complete report has already
|
|
// been sent.
|
|
bool send_report = completed_report_sent_.CompareAndSwap(0,1);
|
|
|
|
fragment_sw_.Stop();
|
|
int64_t cpu_and_wait_time = fragment_sw_.ElapsedTime();
|
|
fragment_sw_ = MonotonicStopWatch();
|
|
int64_t cpu_time = cpu_and_wait_time
|
|
- runtime_state_->total_storage_wait_timer()->value()
|
|
- runtime_state_->total_network_send_timer()->value()
|
|
- runtime_state_->total_network_receive_timer()->value();
|
|
// Timing is not perfect.
|
|
if (cpu_time < 0)
|
|
cpu_time = 0;
|
|
runtime_state_->total_cpu_timer()->Add(cpu_time);
|
|
|
|
ReleaseThreadToken();
|
|
StopReportThread();
|
|
if (send_report) SendReport(true);
|
|
}
|
|
|
|
void PlanFragmentExecutor::UpdateStatus(const Status& status) {
|
|
if (status.ok()) return;
|
|
|
|
bool send_report = completed_report_sent_.CompareAndSwap(0,1);
|
|
|
|
{
|
|
lock_guard<mutex> l(status_lock_);
|
|
if (status_.ok()) {
|
|
if (status.IsMemLimitExceeded()) runtime_state_->SetMemLimitExceeded();
|
|
status_ = status;
|
|
}
|
|
}
|
|
|
|
StopReportThread();
|
|
if (send_report) SendReport(true);
|
|
}
|
|
|
|
void PlanFragmentExecutor::Cancel() {
|
|
VLOG_QUERY << "Cancel(): instance_id="
|
|
<< runtime_state_->fragment_instance_id();
|
|
DCHECK(prepared_);
|
|
runtime_state_->set_is_cancelled(true);
|
|
runtime_state_->stream_mgr()->Cancel(runtime_state_->fragment_instance_id());
|
|
}
|
|
|
|
const RowDescriptor& PlanFragmentExecutor::row_desc() {
|
|
return plan_->row_desc();
|
|
}
|
|
|
|
RuntimeProfile* PlanFragmentExecutor::profile() {
|
|
return runtime_state_->runtime_profile();
|
|
}
|
|
|
|
bool PlanFragmentExecutor::ReachedLimit() {
|
|
return plan_->ReachedLimit();
|
|
}
|
|
|
|
void PlanFragmentExecutor::ReleaseThreadToken() {
|
|
if (has_thread_token_) {
|
|
has_thread_token_ = false;
|
|
runtime_state_->resource_pool()->ReleaseThreadToken(true);
|
|
if (runtime_state_->query_resource_mgr() != NULL) {
|
|
runtime_state_->query_resource_mgr()->NotifyThreadUsageChange(-1);
|
|
}
|
|
PeriodicCounterUpdater::StopSamplingCounter(average_thread_tokens_);
|
|
PeriodicCounterUpdater::StopTimeSeriesCounter(
|
|
thread_usage_sampled_counter_);
|
|
}
|
|
}
|
|
|
|
void PlanFragmentExecutor::Close() {
|
|
if (closed_) return;
|
|
row_batch_.reset();
|
|
// Prepare may not have been called, which sets runtime_state_
|
|
if (runtime_state_.get() != NULL) {
|
|
if (runtime_state_->query_resource_mgr() != NULL) {
|
|
exec_env_->cgroups_mgr()->UnregisterFragment(
|
|
runtime_state_->fragment_instance_id(), runtime_state_->cgroup());
|
|
}
|
|
if (plan_ != NULL) plan_->Close(runtime_state_.get());
|
|
if (sink_.get() != NULL) sink_->Close(runtime_state());
|
|
BOOST_FOREACH(DiskIoMgr::RequestContext* context,
|
|
*runtime_state_->reader_contexts()) {
|
|
runtime_state_->io_mgr()->UnregisterContext(context);
|
|
}
|
|
exec_env_->thread_mgr()->UnregisterPool(runtime_state_->resource_pool());
|
|
}
|
|
if (mem_usage_sampled_counter_ != NULL) {
|
|
PeriodicCounterUpdater::StopTimeSeriesCounter(mem_usage_sampled_counter_);
|
|
mem_usage_sampled_counter_ = NULL;
|
|
}
|
|
closed_ = true;
|
|
}
|
|
|
|
}
|