IMPALA-14076: Improve readability of workload management query

This patch improve the readability of workload management's insert dml
query profiles by:
1. Add a newline between each entry in the VALUES clause.
2. Remove analyzed query from the PLAN column in both tables.

For second one, a new query option HIDE_ANALYZED_QUERY is added. If this
option is set to True, 'Analyzed query' will not be printed in Plan
section of runtime profile. This is helpful for long SQL such as
workload management's insert dml query.

Testing:
- Add explain test case for HIDE_ANALYZED_QUERY option.
- Manually run some queries in minicluster with enabled workload
  management. Confirmed that both improvement happen in DML runtime
  profile.

Change-Id: I30576795dbc2af27a6879684f3757becfd8fc8d0
Reviewed-on: http://gerrit.cloudera.org:8080/23085
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Riza Suminto
2025-06-24 15:16:27 -07:00
committed by Impala Public Jenkins
parent d41d325b41
commit c2705fa480
8 changed files with 33 additions and 4 deletions

View File

@@ -1394,6 +1394,10 @@ Status impala::SetQueryOption(TImpalaQueryOptions::type option, const string& va
query_options->__set_mem_estimate_scale_for_spilling_operator(double_val);
break;
}
case TImpalaQueryOptions::HIDE_ANALYZED_QUERY: {
query_options->__set_hide_analyzed_query(IsTrue(value));
break;
}
default:
string key = to_string(option);
if (IsRemovedQueryOption(key)) {

View File

@@ -51,7 +51,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
// plus one. Thus, the second argument to the DCHECK has to be updated every
// time we add or remove a query option to/from the enum TImpalaQueryOptions.
constexpr unsigned NUM_QUERY_OPTIONS =
TImpalaQueryOptions::JSON_BINARY_FORMAT + 1;
TImpalaQueryOptions::HIDE_ANALYZED_QUERY + 1;
#define QUERY_OPTS_TABLE \
DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(), NUM_QUERY_OPTIONS); \
REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \
@@ -377,6 +377,7 @@ constexpr unsigned NUM_QUERY_OPTIONS =
QUERY_OPT_FN(use_calcite_planner, USE_CALCITE_PLANNER, \
TQueryOptionLevel::ADVANCED) \
QUERY_OPT_FN(json_binary_format, JSON_BINARY_FORMAT, TQueryOptionLevel::REGULAR) \
QUERY_OPT_FN(hide_analyzed_query, HIDE_ANALYZED_QUERY, TQueryOptionLevel::ADVANCED) \
;
/// Enforce practical limits on some query options to avoid undesired query state.

View File

@@ -465,7 +465,7 @@ static const string _queryStateToSql(
StringStreamPop sql;
FieldParserContext ctx(rec, FLAGS_cluster_id, sql);
sql << "(";
sql << "\n(";
for (const auto& field : FIELD_DEFINITIONS) {
if (field.second.Include(target_schema_version)) {
@@ -681,6 +681,8 @@ void ImpalaServer::WorkloadManagementWorker(const Version& target_schema_version
if (!FLAGS_debug_actions.empty()) {
insert_query_opts[TImpalaQueryOptions::DEBUG_ACTION] = FLAGS_debug_actions;
}
// Hide analyzed query since it can be prohibitively long.
insert_query_opts[TImpalaQueryOptions::HIDE_ANALYZED_QUERY] = "true";
while (true) {
// Exit this thread if a shutdown was initiated.

View File

@@ -1033,6 +1033,10 @@ enum TImpalaQueryOptions {
// BASE64 - the json binary data is read as base64 encoded string.
// RAWSTRING - the json binary data is read as raw string.
JSON_BINARY_FORMAT = 192
// Hide analyzed query from runtime profile. This is useful if query is too large,
// such as INSERT INTO with hundreds of VALUES.
HIDE_ANALYZED_QUERY = 193
}
// The summary of a DML statement.

View File

@@ -782,6 +782,9 @@ struct TQueryOptions {
// See comment in ImpalaService.thrift
193: optional CatalogObjects.TJsonBinaryFormat json_binary_format =
TJsonBinaryFormat.NONE;
// See comment in ImpalaService.thrift
194: optional bool hide_analyzed_query = false
}
// Impala currently has three types of sessions: Beeswax, HiveServer2 and external

View File

@@ -456,8 +456,9 @@ public class Planner {
hasHeader = true;
}
if (explainLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal() &&
queryStmt != null) {
if (explainLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal()
&& !request.query_ctx.client_request.query_options.hide_analyzed_query
&& queryStmt != null) {
// In extended explain include the analyzed query text showing implicit casts
String queryText = queryStmt.toSql(SHOW_IMPLICIT_CASTS);
String wrappedText = PrintUtils.wrapString("Analyzed query: " + queryText, 80);

View File

@@ -102,3 +102,10 @@ explain select count(*) from tpch.region
row_regex:.* cache key: [0-9a-f][0-9a-f]*.*
row_regex:.*\[.*TPlanNode\(.*\]
====
---- QUERY
# Tests HIDE_ANALYZED_QUERY=TRUE
set HIDE_ANALYZED_QUERY=TRUE;
explain select count(*) from tpch.region
---- RESULTS: VERIFY_IS_NOT_IN
'Analyzed query:'
====

View File

@@ -143,3 +143,10 @@ order by a
''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY a ASC'
====
---- QUERY
# Tests HIDE_ANALYZED_QUERY=TRUE
set HIDE_ANALYZED_QUERY=TRUE;
explain select count(*) from tpch.region
---- RESULTS: VERIFY_IS_NOT_IN
'Analyzed query:'
====