// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. namespace py impala_thrift_gen.RuntimeProfile namespace cpp impala namespace java org.apache.impala.thrift include "ExecStats.thrift" include "Metrics.thrift" include "Types.thrift" // NOTE: This file and the includes above define the format of Impala query profiles. As // newer versions of Impala should be able to read profiles written by older versions, // some best practices must be followed when making changes to the structures below: // // - Only append new values at the end of enums. // - Only add new fields at the end of structures, and always make them optional. // - Don't remove fields. // - Don't change the numbering of fields. // Represents the different formats a runtime profile can be represented in. enum TRuntimeProfileFormat { // Pretty printed. STRING = 0 // The thrift profile, serialized, compressed, and encoded. Used for the query log. // See RuntimeProfile::SerializeToArchiveString. BASE64 = 1 // TRuntimeProfileTree. THRIFT = 2 // JSON profile JSON = 3 } // Counter data struct TCounter { 1: required string name 2: required Metrics.TUnit unit 3: required i64 value } // Aggregated version of TCounter, belonging to a TAggregatedRuntimeProfileNode. // Lists have TAggregatedRuntimeProfileNode.num_instances entries. struct TAggCounter { 1: required string name 2: required Metrics.TUnit unit // True if a value was set for this instance. 3: required list has_value // The actual values. values[i] holds the value if has_value[i] == true, and is ignored // if has_value[i] == false. 4: required list values } // Thrift version of RuntimeProfile::EventSequence - list of (label, timestamp) pairs // which represent an ordered sequence of events. struct TEventSequence { 1: required string name 2: required list timestamps 3: required list labels } // Aggregated version of TEventSequence. struct TAggEventSequence { 1: required string name // One entry per unique label. 2: required list label_dict // The below lists have one entry per instance. // The label of each event, represented as the index in label_dict. 3: required list> label_idxs 4: required list> timestamps } // Struct to contain data sampled at even time intervals (e.g. ram usage every // N seconds). // values[0] represents the value when the counter stated (e.g. fragment started) // values[1] is the value at period_ms (e.g. 500 ms later) // values[2] is the value at 2 * period_ms (e.g. 1sec since start) // This can be used to reconstruct a time line for a particular counter. struct TTimeSeriesCounter { 1: required string name 2: required Metrics.TUnit unit // Period of intervals in ms 3: required i32 period_ms // The sampled values. 4: required list values // The index of the first value in this series (this is equal to the total number of // values contained in previous updates for this counter). Values > 0 mean that this // series contains an interval of a larger series. For values > 0, period_ms should be // ignored, as chunked counters don't resample their values. 5: optional i64 start_index } // Aggregated version of TTimeSeriesCounter struct TAggTimeSeriesCounter { 1: required string name 2: required Metrics.TUnit unit // The below lists have one entry per instance, with each list entry containing // the equivalent values to the similarly-named field in TTimeSeriesCounter. 3: required list period_ms 4: required list> values 5: required list start_index } // Thrift version of RuntimeProfile::SummaryStatsCounter. struct TSummaryStatsCounter { 1: required string name 2: required Metrics.TUnit unit 3: required i64 sum 4: required i64 total_num_values 5: required i64 min_value 6: required i64 max_value } // Aggregated version of TSummaryStatsCounter, belonging to a // TAggregatedRuntimeProfileNode. // Lists have TAggregatedRuntimeProfileNode.num_instances entries. struct TAggSummaryStatsCounter { 1: required string name 2: required Metrics.TUnit unit // True if a value was set for this instance. 3: required list has_value // The actual values of the summary stats counter, with each field stored in a separate // list. The ith element of each list holds a valid value if has_value[i] == true, and // is ignored if has_value[i] == false. 4: required list sum 5: required list total_num_values 6: required list min_value 7: required list max_value } // Metadata to help identify what entity the profile node corresponds to. union TRuntimeProfileNodeMetadata { // Set if this node corresponds to a plan node. 1: Types.TPlanNodeId plan_node_id // Set if this node corresponds to a data sink. 2: Types.TDataSinkId data_sink_id } struct TAggregatedRuntimeProfileNode { // Number of instances that were included in the stats in this profile. 1: optional i32 num_instances // Names of the profiles that contributed to this aggregated profile. This is only // set at the root of the aggregated profile tree. Nodes below that root will have // the same number of instances but the names of the input profiles are not duplicated. // The indices of these instances are used in many of the aggregated profile elements. // E.g. indices in TAggCounter.values line up with these indices, and the indices used // as map values in info_strings correspond to the indices here. 2: optional list input_profiles // The nesting of these counters is determined by 'child_counters_map' in the parent // TRuntimeProfileNode. 3: optional list counters // Info strings stored in a dense representation. The first map key is the info string // key. The second map key is a distinct value of that key. The value is a list of // input indices that had the value. This means that the common case, where many // instances have identical strings, can be represented very compactly. // // E.g. the "ExecOption" and "Table Name" fields for 10 HDFS_SCAN_NODE instances could // be represented as: // { // "Table Name": {"tpch.lineitem": [0,1,2,3,4,5,6,7,8,9]}, // "ExecOption": { // "ExecOption: TEXT Codegen Enabled, Codegen enabled: 2 out of 2": // [0,1,2,3,4,6,7,8,9], // "ExecOption: TEXT Codegen Enabled, Codegen enabled: 3 out of 3": // [5] // } // } 4: optional map>> info_strings // List of summary stats counters. 5: optional list summary_stats_counters // List of event sequences. 6: optional list event_sequences // List of time series counters. 7: optional list time_series_counters } // A single node in the runtime profile tree. This can either be an unaggregated node, // which has the singular counter values from the original RuntimeProfile object, or // it can be an aggregated node, which was merged together from one or more unaggregated // input nodes. struct TRuntimeProfileNode { 1: required string name 2: required i32 num_children // In a unaggregated profile, individual counters. // In an aggregated profile, these are the averaged counters only. // The nesting of these counters is determined by 'child_counters_map' in the parent. 3: required list counters // Legacy field. May contain the node ID for plan nodes. // Replaced by node_metadata, which contains richer metadata. 4: required i64 metadata // indicates whether the child will be printed with extra indentation; // corresponds to indent param of RuntimeProfile::AddChild() 5: required bool indent // ====================================================================== // BEGIN: unaggregated state. // These fields represent unaggregated profile state only. // These fields are not used in aggregated profile nodes, i.e. profile V2, // unless otherwise noted. // ====================================================================== // map of key,value info strings that capture any kind of additional information // about the profiled object 6: required map info_strings // Auxiliary structure to capture the info strings display order when printed 7: required list info_strings_display_order // map from parent counter name to child counter name. This applies to both // 'counters' and 'aggregated.counters'. 8: required map> child_counters_map // List of event sequences that capture ordered events in a query's lifetime 9: optional list event_sequences // List of time series counters 10: optional list time_series_counters // ====================================================================== // END: unaggregated state. // ====================================================================== // List of summary stats counters. // Used in both unaggregated profiles and aggregated profiles. In aggregated profiles, // summarizes stats from all input profiles. 11: optional list summary_stats_counters // Metadata about the entity that this node refers to. 12: optional TRuntimeProfileNodeMetadata node_metadata // Aggregated profile counters - contains the same counters as above, except transposed // so that one TRuntimeProfileNode contains counters for all input profiles. 13: optional TAggregatedRuntimeProfileNode aggregated } // A flattened tree of runtime profiles, obtained by an pre-order traversal. The contents // of the profile vary between different version. // // Version 1: // The fully-expanded runtime profile tree generated by the query is included. For every // fragment, each fragment instance has a separate profile subtree and an averaged profile // for the fragment is also included with averaged counter values. // // Version 2 (experimental): // Different from version 1, there is only a single aggregated profile tree for each // fragment. All nodes in this tree use the aggregated profile representation. Otherwise // the structure of the profile is the same. // TODO: IMPALA-9382 - update when non-experimental struct TRuntimeProfileTree { 1: required list nodes 2: optional ExecStats.TExecSummary exec_summary // The version of the runtime profile representation. Different versions may have // different invariants or information. Thrift structures for new versions need to // remain readable by readers with old versions of the thrift file, but may remove // information. // Version 1: this field is unset // Version 2: this field is set to 2 // TODO: IMPALA-9846: document which versions of Impala generate which version. 3: optional i32 profile_version } // A list of TRuntimeProfileTree structures. struct TRuntimeProfileForest { 1: required list profile_trees 2: optional TRuntimeProfileTree host_profile }