Files
impala/common/thrift/RuntimeProfile.thrift
Joe McDonnell c5a0ec8bdf IMPALA-11980 (part 1): Put all thrift-generated python code into the impala_thrift_gen package
This puts all of the thrift-generated python code into the
impala_thrift_gen package. This is similar to what Impyla
does for its thrift-generated python code, except that it
uses the impala_thrift_gen package rather than impala._thrift_gen.
This is a preparatory patch for fixing the absolute import
issues.

This patches all of the thrift files to add the python namespace.
This has code to apply the patching to the thirdparty thrift
files (hive_metastore.thrift, fb303.thrift) to do the same.

Putting all the generated python into a package makes it easier
to understand where the imports are getting code. When the
subsequent change rearranges the shell code, the thrift generated
code can stay in a separate directory.

This uses isort to sort the imports for the affected Python files
with the provided .isort.cfg file. This also adds an impala-isort
shell script to make it easy to run.

Testing:
 - Ran a core job

Change-Id: Ie2927f22c7257aa38a78084efe5bd76d566493c0
Reviewed-on: http://gerrit.cloudera.org:8080/20169
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
2025-04-15 17:03:02 +00:00

304 lines
12 KiB
Thrift

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
namespace py impala_thrift_gen.RuntimeProfile
namespace cpp impala
namespace java org.apache.impala.thrift
include "ExecStats.thrift"
include "Metrics.thrift"
include "Types.thrift"
// NOTE: This file and the includes above define the format of Impala query profiles. As
// newer versions of Impala should be able to read profiles written by older versions,
// some best practices must be followed when making changes to the structures below:
//
// - Only append new values at the end of enums.
// - Only add new fields at the end of structures, and always make them optional.
// - Don't remove fields.
// - Don't change the numbering of fields.
// Represents the different formats a runtime profile can be represented in.
enum TRuntimeProfileFormat {
// Pretty printed.
STRING = 0
// The thrift profile, serialized, compressed, and encoded. Used for the query log.
// See RuntimeProfile::SerializeToArchiveString.
BASE64 = 1
// TRuntimeProfileTree.
THRIFT = 2
// JSON profile
JSON = 3
}
// Counter data
struct TCounter {
1: required string name
2: required Metrics.TUnit unit
3: required i64 value
}
// Aggregated version of TCounter, belonging to a TAggregatedRuntimeProfileNode.
// Lists have TAggregatedRuntimeProfileNode.num_instances entries.
struct TAggCounter {
1: required string name
2: required Metrics.TUnit unit
// True if a value was set for this instance.
3: required list<bool> has_value
// The actual values. values[i] holds the value if has_value[i] == true, and is ignored
// if has_value[i] == false.
4: required list<i64> values
}
// Thrift version of RuntimeProfile::EventSequence - list of (label, timestamp) pairs
// which represent an ordered sequence of events.
struct TEventSequence {
1: required string name
2: required list<i64> timestamps
3: required list<string> labels
}
// Aggregated version of TEventSequence.
struct TAggEventSequence {
1: required string name
// One entry per unique label.
2: required list<string> label_dict
// The below lists have one entry per instance.
// The label of each event, represented as the index in label_dict.
3: required list<list<i32>> label_idxs
4: required list<list<i64>> timestamps
}
// Struct to contain data sampled at even time intervals (e.g. ram usage every
// N seconds).
// values[0] represents the value when the counter stated (e.g. fragment started)
// values[1] is the value at period_ms (e.g. 500 ms later)
// values[2] is the value at 2 * period_ms (e.g. 1sec since start)
// This can be used to reconstruct a time line for a particular counter.
struct TTimeSeriesCounter {
1: required string name
2: required Metrics.TUnit unit
// Period of intervals in ms
3: required i32 period_ms
// The sampled values.
4: required list<i64> values
// The index of the first value in this series (this is equal to the total number of
// values contained in previous updates for this counter). Values > 0 mean that this
// series contains an interval of a larger series. For values > 0, period_ms should be
// ignored, as chunked counters don't resample their values.
5: optional i64 start_index
}
// Aggregated version of TTimeSeriesCounter
struct TAggTimeSeriesCounter {
1: required string name
2: required Metrics.TUnit unit
// The below lists have one entry per instance, with each list entry containing
// the equivalent values to the similarly-named field in TTimeSeriesCounter.
3: required list<i32> period_ms
4: required list<list<i64>> values
5: required list<i64> start_index
}
// Thrift version of RuntimeProfile::SummaryStatsCounter.
struct TSummaryStatsCounter {
1: required string name
2: required Metrics.TUnit unit
3: required i64 sum
4: required i64 total_num_values
5: required i64 min_value
6: required i64 max_value
}
// Aggregated version of TSummaryStatsCounter, belonging to a
// TAggregatedRuntimeProfileNode.
// Lists have TAggregatedRuntimeProfileNode.num_instances entries.
struct TAggSummaryStatsCounter {
1: required string name
2: required Metrics.TUnit unit
// True if a value was set for this instance.
3: required list<bool> has_value
// The actual values of the summary stats counter, with each field stored in a separate
// list. The ith element of each list holds a valid value if has_value[i] == true, and
// is ignored if has_value[i] == false.
4: required list<i64> sum
5: required list<i64> total_num_values
6: required list<i64> min_value
7: required list<i64> max_value
}
// Metadata to help identify what entity the profile node corresponds to.
union TRuntimeProfileNodeMetadata {
// Set if this node corresponds to a plan node.
1: Types.TPlanNodeId plan_node_id
// Set if this node corresponds to a data sink.
2: Types.TDataSinkId data_sink_id
}
struct TAggregatedRuntimeProfileNode {
// Number of instances that were included in the stats in this profile.
1: optional i32 num_instances
// Names of the profiles that contributed to this aggregated profile. This is only
// set at the root of the aggregated profile tree. Nodes below that root will have
// the same number of instances but the names of the input profiles are not duplicated.
// The indices of these instances are used in many of the aggregated profile elements.
// E.g. indices in TAggCounter.values line up with these indices, and the indices used
// as map values in info_strings correspond to the indices here.
2: optional list<string> input_profiles
// The nesting of these counters is determined by 'child_counters_map' in the parent
// TRuntimeProfileNode.
3: optional list<TAggCounter> counters
// Info strings stored in a dense representation. The first map key is the info string
// key. The second map key is a distinct value of that key. The value is a list of
// input indices that had the value. This means that the common case, where many
// instances have identical strings, can be represented very compactly.
//
// E.g. the "ExecOption" and "Table Name" fields for 10 HDFS_SCAN_NODE instances could
// be represented as:
// {
// "Table Name": {"tpch.lineitem": [0,1,2,3,4,5,6,7,8,9]},
// "ExecOption": {
// "ExecOption: TEXT Codegen Enabled, Codegen enabled: 2 out of 2":
// [0,1,2,3,4,6,7,8,9],
// "ExecOption: TEXT Codegen Enabled, Codegen enabled: 3 out of 3":
// [5]
// }
// }
4: optional map<string, map<string, list<i32>>> info_strings
// List of summary stats counters.
5: optional list<TAggSummaryStatsCounter> summary_stats_counters
// List of event sequences.
6: optional list<TAggEventSequence> event_sequences
// List of time series counters.
7: optional list<TAggTimeSeriesCounter> time_series_counters
}
// A single node in the runtime profile tree. This can either be an unaggregated node,
// which has the singular counter values from the original RuntimeProfile object, or
// it can be an aggregated node, which was merged together from one or more unaggregated
// input nodes.
struct TRuntimeProfileNode {
1: required string name
2: required i32 num_children
// In a unaggregated profile, individual counters.
// In an aggregated profile, these are the averaged counters only.
// The nesting of these counters is determined by 'child_counters_map' in the parent.
3: required list<TCounter> counters
// Legacy field. May contain the node ID for plan nodes.
// Replaced by node_metadata, which contains richer metadata.
4: required i64 metadata
// indicates whether the child will be printed with extra indentation;
// corresponds to indent param of RuntimeProfile::AddChild()
5: required bool indent
// ======================================================================
// BEGIN: unaggregated state.
// These fields represent unaggregated profile state only.
// These fields are not used in aggregated profile nodes, i.e. profile V2,
// unless otherwise noted.
// ======================================================================
// map of key,value info strings that capture any kind of additional information
// about the profiled object
6: required map<string, string> info_strings
// Auxiliary structure to capture the info strings display order when printed
7: required list<string> info_strings_display_order
// map from parent counter name to child counter name. This applies to both
// 'counters' and 'aggregated.counters'.
8: required map<string, set<string>> child_counters_map
// List of event sequences that capture ordered events in a query's lifetime
9: optional list<TEventSequence> event_sequences
// List of time series counters
10: optional list<TTimeSeriesCounter> time_series_counters
// ======================================================================
// END: unaggregated state.
// ======================================================================
// List of summary stats counters.
// Used in both unaggregated profiles and aggregated profiles. In aggregated profiles,
// summarizes stats from all input profiles.
11: optional list<TSummaryStatsCounter> summary_stats_counters
// Metadata about the entity that this node refers to.
12: optional TRuntimeProfileNodeMetadata node_metadata
// Aggregated profile counters - contains the same counters as above, except transposed
// so that one TRuntimeProfileNode contains counters for all input profiles.
13: optional TAggregatedRuntimeProfileNode aggregated
}
// A flattened tree of runtime profiles, obtained by an pre-order traversal. The contents
// of the profile vary between different version.
//
// Version 1:
// The fully-expanded runtime profile tree generated by the query is included. For every
// fragment, each fragment instance has a separate profile subtree and an averaged profile
// for the fragment is also included with averaged counter values.
//
// Version 2 (experimental):
// Different from version 1, there is only a single aggregated profile tree for each
// fragment. All nodes in this tree use the aggregated profile representation. Otherwise
// the structure of the profile is the same.
// TODO: IMPALA-9382 - update when non-experimental
struct TRuntimeProfileTree {
1: required list<TRuntimeProfileNode> nodes
2: optional ExecStats.TExecSummary exec_summary
// The version of the runtime profile representation. Different versions may have
// different invariants or information. Thrift structures for new versions need to
// remain readable by readers with old versions of the thrift file, but may remove
// information.
// Version 1: this field is unset
// Version 2: this field is set to 2
// TODO: IMPALA-9846: document which versions of Impala generate which version.
3: optional i32 profile_version
}
// A list of TRuntimeProfileTree structures.
struct TRuntimeProfileForest {
1: required list<TRuntimeProfileTree> profile_trees
2: optional TRuntimeProfileTree host_profile
}