mirror of
https://github.com/apache/impala.git
synced 2026-01-01 00:00:20 -05:00
The runtime profile as we present it is not very useful and I think the structure of it makes it hard to consume. This patch adds a new client facing schemed set of counters that are collected from the runtime profiles. For example, with this structure it would be easy to have the shell get the stats of a running query and print a useful progress report or to check the most relevant metrics for diagnosing issues. Here's an example of the output for one of the tpch queries: Operator #Hosts Avg Time Max Time #Rows Est. #Rows Peak Mem Est. Peak Mem Detail ------------------------------------------------------------------------------------------------------------------------ 09:MERGING-EXCHANGE 1 79.738us 79.738us 5 5 0 -1.00 B UNPARTITIONED 05:TOP-N 3 84.693us 88.810us 5 5 12.00 KB 120.00 B 04:AGGREGATE 3 5.263ms 6.432ms 5 5 44.00 KB 10.00 MB MERGE FINALIZE 08:AGGREGATE 3 16.659ms 27.444ms 52.52K 600.12K 3.20 MB 15.11 MB MERGE 07:EXCHANGE 3 2.644ms 5.1ms 52.52K 600.12K 0 0 HASH(o_orderpriority) 03:AGGREGATE 3 342.913ms 966.291ms 52.52K 600.12K 10.80 MB 15.11 MB 02:HASH JOIN 3 2s165ms 2s171ms 144.87K 600.12K 13.63 MB 941.01 KB INNER JOIN, BROADCAST |--06:EXCHANGE 3 8.296ms 8.692ms 57.22K 15.00K 0 0 BROADCAST | 01:SCAN HDFS 2 1s412ms 1s978ms 57.22K 15.00K 24.21 MB 176.00 MB tpch.orders o 00:SCAN HDFS 3 8s032ms 8s558ms 3.79M 600.12K 32.29 MB 264.00 MB tpch.lineitem l Change-Id: Iaad4b9dd577c375006313f19442bee6d3e27246a Reviewed-on: http://gerrit.ent.cloudera.com:8080/2964 Reviewed-by: Nong Li <nong@cloudera.com> Tested-by: jenkins
96 lines
3.0 KiB
Thrift
96 lines
3.0 KiB
Thrift
// Copyright 2012 Cloudera Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
namespace cpp impala
|
|
namespace java com.cloudera.impala.thrift
|
|
|
|
include "Status.thrift"
|
|
include "Types.thrift"
|
|
|
|
enum TExecState {
|
|
REGISTERED = 0,
|
|
PLANNING = 1,
|
|
QUEUED = 2,
|
|
RUNNING = 3,
|
|
FINISHED = 4,
|
|
|
|
CANCELLED = 5,
|
|
FAILED = 6,
|
|
}
|
|
|
|
// Execution stats for a single plan node.
|
|
struct TExecStats {
|
|
// The wall clock time spent on the "main" thread. This is the user perceived
|
|
// latency. This value indicates the current bottleneck.
|
|
// Note: anywhere we have a queue between operators, this time can fluctuate
|
|
// significantly without the overall query time changing much (i.e. the bottleneck
|
|
// moved to another operator). This is unavoidable though.
|
|
1: optional i64 latency_ns
|
|
|
|
// Total CPU time spent across all threads. For operators that have an async
|
|
// component (e.g. multi-threaded) this will be >= latency_ns.
|
|
2: optional i64 cpu_time_ns
|
|
|
|
// Number of rows returned.
|
|
3: optional i64 cardinality
|
|
|
|
// Peak memory used (in bytes).
|
|
4: optional i64 memory_used
|
|
}
|
|
|
|
// Summary for a single plan node. This includes labels for how to display the
|
|
// node as well as per instance stats.
|
|
struct TPlanNodeExecSummary {
|
|
1: required Types.TPlanNodeId node_id
|
|
2: required i32 fragment_id
|
|
3: required string label
|
|
4: optional string label_detail
|
|
5: required i32 num_children
|
|
|
|
// Estimated stats generated by the planner
|
|
6: optional TExecStats estimated_stats
|
|
|
|
// One entry for each BE executing this plan node.
|
|
7: optional list<TExecStats> exec_stats
|
|
|
|
// One entry for each BE executing this plan node. True if this plan node is still
|
|
// running.
|
|
8: optional list<bool> is_active
|
|
|
|
// If true, this plan node is an exchange node that is the receiver of a broadcast.
|
|
9: optional bool is_broadcast
|
|
}
|
|
|
|
// Execution summary of an entire query.
|
|
struct TExecSummary {
|
|
// State of the query.
|
|
1: required TExecState state
|
|
|
|
// Contains the error if state is FAILED.
|
|
2: optional Status.TStatus status
|
|
|
|
// Flattened execution summary of the plan tree.
|
|
3: optional list<TPlanNodeExecSummary> nodes
|
|
|
|
// For each exch node in 'nodes', contains the index to the root node of the sending
|
|
// fragment for this exch. Both the key and value are indices into 'nodes'.
|
|
4: optional map<i32, i32> exch_to_sender_map
|
|
|
|
// List of errors that were encountered during execution. This can be non-empty
|
|
// even if status is okay, in which case it contains errors that impala skipped
|
|
// over.
|
|
5: optional list<string> error_logs
|
|
}
|
|
|