IMPALA-14076: Improve readability of workload management query

This patch improve the readability of workload management's insert dml query profiles by: 1. Add a newline between each entry in the VALUES clause. 2. Remove analyzed query from the PLAN column in both tables. For second one, a new query option HIDE_ANALYZED_QUERY is added. If this option is set to True, 'Analyzed query' will not be printed in Plan section of runtime profile. This is helpful for long SQL such as workload management's insert dml query. Testing: - Add explain test case for HIDE_ANALYZED_QUERY option. - Manually run some queries in minicluster with enabled workload management. Confirmed that both improvement happen in DML runtime profile. Change-Id: I30576795dbc2af27a6879684f3757becfd8fc8d0 Reviewed-on: http://gerrit.cloudera.org:8080/23085 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2025-12-19 18:12:08 -05:00 · 2025-06-24 15:16:27 -07:00
parent d41d325b41
commit c2705fa480
8 changed files with 33 additions and 4 deletions
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -1394,6 +1394,10 @@ Status impala::SetQueryOption(TImpalaQueryOptions::type option, const string& va
        query_options->__set_mem_estimate_scale_for_spilling_operator(double_val);
        break;
      }
+      case TImpalaQueryOptions::HIDE_ANALYZED_QUERY: {
+        query_options->__set_hide_analyzed_query(IsTrue(value));
+        break;
+      }
      default:
        string key = to_string(option);
        if (IsRemovedQueryOption(key)) {
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -51,7 +51,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
 // plus one. Thus, the second argument to the DCHECK has to be updated every
 // time we add or remove a query option to/from the enum TImpalaQueryOptions.
 constexpr unsigned NUM_QUERY_OPTIONS =
-    TImpalaQueryOptions::JSON_BINARY_FORMAT + 1;
+    TImpalaQueryOptions::HIDE_ANALYZED_QUERY + 1;
 #define QUERY_OPTS_TABLE                                                                 \
  DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(), NUM_QUERY_OPTIONS);             \
  REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \
@@ -377,6 +377,7 @@ constexpr unsigned NUM_QUERY_OPTIONS =
  QUERY_OPT_FN(use_calcite_planner, USE_CALCITE_PLANNER,                                 \
      TQueryOptionLevel::ADVANCED)                                                       \
  QUERY_OPT_FN(json_binary_format, JSON_BINARY_FORMAT, TQueryOptionLevel::REGULAR)       \
+  QUERY_OPT_FN(hide_analyzed_query, HIDE_ANALYZED_QUERY, TQueryOptionLevel::ADVANCED)    \
  ;

 /// Enforce practical limits on some query options to avoid undesired query state.
--- a/be/src/service/workload-management-worker.cc
+++ b/be/src/service/workload-management-worker.cc
@@ -465,7 +465,7 @@ static const string _queryStateToSql(
  StringStreamPop sql;
  FieldParserContext ctx(rec, FLAGS_cluster_id, sql);

-  sql << "(";
+  sql << "\n(";

  for (const auto& field : FIELD_DEFINITIONS) {
    if (field.second.Include(target_schema_version)) {
@@ -681,6 +681,8 @@ void ImpalaServer::WorkloadManagementWorker(const Version& target_schema_version
  if (!FLAGS_debug_actions.empty()) {
    insert_query_opts[TImpalaQueryOptions::DEBUG_ACTION] = FLAGS_debug_actions;
  }
+  // Hide analyzed query since it can be prohibitively long.
+  insert_query_opts[TImpalaQueryOptions::HIDE_ANALYZED_QUERY] = "true";

  while (true) {
    // Exit this thread if a shutdown was initiated.
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -1033,6 +1033,10 @@ enum TImpalaQueryOptions {
  //   BASE64 - the json binary data is read as base64 encoded string.
  //   RAWSTRING - the json binary data is read as raw string.
  JSON_BINARY_FORMAT = 192
+
+  // Hide analyzed query from runtime profile. This is useful if query is too large,
+  // such as INSERT INTO with hundreds of VALUES.
+  HIDE_ANALYZED_QUERY = 193
 }

 // The summary of a DML statement.
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -782,6 +782,9 @@ struct TQueryOptions {
  // See comment in ImpalaService.thrift
  193: optional CatalogObjects.TJsonBinaryFormat json_binary_format =
      TJsonBinaryFormat.NONE;
+
+  // See comment in ImpalaService.thrift
+  194: optional bool hide_analyzed_query = false
 }

 // Impala currently has three types of sessions: Beeswax, HiveServer2 and external
--- a/fe/src/main/java/org/apache/impala/planner/Planner.java
+++ b/fe/src/main/java/org/apache/impala/planner/Planner.java
@@ -456,8 +456,9 @@ public class Planner {
      hasHeader = true;
    }

-    if (explainLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal() &&
-        queryStmt != null) {
+    if (explainLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal()
+        && !request.query_ctx.client_request.query_options.hide_analyzed_query
+        && queryStmt != null) {
      // In extended explain include the analyzed query text showing implicit casts
      String queryText = queryStmt.toSql(SHOW_IMPLICIT_CASTS);
      String wrappedText = PrintUtils.wrapString("Analyzed query: " + queryText, 80);
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level2.test
@@ -102,3 +102,10 @@ explain select count(*) from tpch.region
 row_regex:.* cache key: [0-9a-f][0-9a-f]*.*
 row_regex:.*\[.*TPlanNode\(.*\]
 ====
+---- QUERY
+# Tests HIDE_ANALYZED_QUERY=TRUE
+set HIDE_ANALYZED_QUERY=TRUE;
+explain select count(*) from tpch.region
+---- RESULTS: VERIFY_IS_NOT_IN
+'Analyzed query:'
+====
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level3.test
@@ -143,3 +143,10 @@ order by a
 ''yyyy-MM-dd')) a FROM tpcds_parquet.date_dim GROUP BY'
 'month(to_timestamp(CAST(d_date_sk AS STRING), 'yyyy-MM-dd')) ORDER BY a ASC'
 ====
+---- QUERY
+# Tests HIDE_ANALYZED_QUERY=TRUE
+set HIDE_ANALYZED_QUERY=TRUE;
+explain select count(*) from tpch.region
+---- RESULTS: VERIFY_IS_NOT_IN
+'Analyzed query:'
+====