From 88cb68cfbeb399ee3bb561caa329ccf40ad836c0 Mon Sep 17 00:00:00 2001 From: Gabor Kaszab Date: Thu, 19 Oct 2017 12:52:46 +0200 Subject: [PATCH] IMPALA-2181: Add query option levels for display Four display levels are introduced for each query option: REGULAR, ADVANCED, DEVELOPMENT and DEPRECATED. When the query options are displayed in Impala shell using SET then only the REGULAR and ADVANCED options are shown. A new command called SET ALL shows all the options grouped by their option levels. When the query options are displayed through the SET SQL statement then the result set would contain an extra column indicating the level of each option. Similarly to Impala shell here the SET command only diplays the REGULAR and ADVANCED options while SET ALL shows them all. If the Impala shell connects to an Impala daemon that predates this change then all the options would be displayed in the REGULAR group. Change-Id: I75720d0d454527e1a0ed19bb43cf9e4f018ce1d1 Reviewed-on: http://gerrit.cloudera.org:8080/8447 Reviewed-by: Dan Hecht Tested-by: Impala Public Jenkins --- be/src/service/child-query.cc | 2 +- be/src/service/client-request-state.cc | 54 ++++-- be/src/service/client-request-state.h | 7 + be/src/service/impala-server.cc | 14 +- be/src/service/impala-server.h | 10 ++ be/src/service/query-options.cc | 20 ++- be/src/service/query-options.h | 158 ++++++++++------- common/thrift/Frontend.thrift | 8 +- common/thrift/beeswax.thrift | 15 +- fe/src/main/cup/sql-parser.cup | 10 +- .../org/apache/impala/analysis/SetStmt.java | 11 +- .../org/apache/impala/service/Frontend.java | 3 +- shell/impala_client.py | 6 + shell/impala_shell.py | 104 ++++++++--- .../queries/QueryTest/set.test | 166 +++++++++--------- tests/custom_cluster/test_set_and_unset.py | 14 +- tests/hs2/test_hs2.py | 72 +++++--- tests/shell/test_shell_interactive.py | 38 +++- 18 files changed, 486 insertions(+), 226 deletions(-) diff --git a/be/src/service/child-query.cc b/be/src/service/child-query.cc index 58a275be8..520a83442 100644 --- a/be/src/service/child-query.cc +++ b/be/src/service/child-query.cc @@ -106,7 +106,7 @@ Status ChildQuery::ExecAndFetch() { void ChildQuery::SetQueryOptions(const TQueryOptions& parent_options, TExecuteStatementReq* exec_stmt_req) { map conf; -#define QUERY_OPT_FN(NAME, ENUM)\ +#define QUERY_OPT_FN(NAME, ENUM, LEVEL)\ if (parent_options.__isset.NAME) {\ stringstream val;\ val << parent_options.NAME;\ diff --git a/be/src/service/client-request-state.cc b/be/src/service/client-request-state.cc index b925b5e2d..76e2557f5 100644 --- a/be/src/service/client-request-state.cc +++ b/be/src/service/client-request-state.cc @@ -205,19 +205,12 @@ Status ClientRequestState::Exec(TExecRequest* exec_request) { exec_request_.set_query_option_request.value, &session_->set_query_options, &session_->set_query_options_mask)); - SetResultSet({}, {}); + SetResultSet({}, {}, {}); } else { - // "SET" returns a table of all query options. - map config; - TQueryOptionsToMap( - session_->QueryOptions(), &config); - vector keys, values; - map::const_iterator itr = config.begin(); - for (; itr != config.end(); ++itr) { - keys.push_back(itr->first); - values.push_back(itr->second); - } - SetResultSet(keys, values); + // "SET" or "SET ALL" + bool is_set_all = exec_request_.set_query_option_request.__isset.is_set_all && + exec_request_.set_query_option_request.is_set_all; + PopulateResultForSet(is_set_all); } return Status::OK(); } @@ -228,6 +221,27 @@ Status ClientRequestState::Exec(TExecRequest* exec_request) { } } +void ClientRequestState::PopulateResultForSet(bool is_set_all) { + map config; + TQueryOptionsToMap(session_->QueryOptions(), &config); + vector keys, values, levels; + map::const_iterator itr = config.begin(); + for (; itr != config.end(); ++itr) { + const auto opt_level_id = + parent_server_->query_option_levels_[itr->first]; + if (!is_set_all && (opt_level_id == TQueryOptionLevel::DEVELOPMENT || + opt_level_id == TQueryOptionLevel::DEPRECATED)) { + continue; + } + keys.push_back(itr->first); + values.push_back(itr->second); + const auto opt_level = _TQueryOptionLevel_VALUES_TO_NAMES.find(opt_level_id); + DCHECK(opt_level !=_TQueryOptionLevel_VALUES_TO_NAMES.end()); + levels.push_back(opt_level->second); + } + SetResultSet(keys, values, levels); +} + Status ClientRequestState::ExecLocalCatalogOp( const TCatalogOpRequest& catalog_op) { switch (catalog_op.op_type) { @@ -970,6 +984,22 @@ void ClientRequestState::SetResultSet(const vector& col1, } } +void ClientRequestState::SetResultSet(const vector& col1, + const vector& col2, const vector& col3) { + DCHECK_EQ(col1.size(), col2.size()); + DCHECK_EQ(col1.size(), col3.size()); + + request_result_set_.reset(new vector); + request_result_set_->resize(col1.size()); + for (int i = 0; i < col1.size(); ++i) { + (*request_result_set_.get())[i].__isset.colVals = true; + (*request_result_set_.get())[i].colVals.resize(3); + (*request_result_set_.get())[i].colVals[0].__set_string_val(col1[i]); + (*request_result_set_.get())[i].colVals[1].__set_string_val(col2[i]); + (*request_result_set_.get())[i].colVals[2].__set_string_val(col3[i]); + } +} + void ClientRequestState::SetResultSet(const vector& col1, const vector& col2, const vector& col3, const vector& col4) { DCHECK_EQ(col1.size(), col2.size()); diff --git a/be/src/service/client-request-state.h b/be/src/service/client-request-state.h index 968ae041b..87f870a7c 100644 --- a/be/src/service/client-request-state.h +++ b/be/src/service/client-request-state.h @@ -391,6 +391,8 @@ class ClientRequestState { void SetResultSet(const std::vector& results); void SetResultSet(const std::vector& col1, const std::vector& col2); + void SetResultSet(const vector& col1, + const vector& col2, const vector& col3); void SetResultSet(const std::vector& col1, const std::vector& col2, const std::vector& col3, const std::vector& col4); @@ -417,6 +419,11 @@ class ClientRequestState { /// Does not take lock_, but requires it: caller must ensure lock_ /// is taken before calling UpdateQueryState. void UpdateQueryState(beeswax::QueryState::type query_state); + + /// Gets the query options, their values and levels and populates the result set + /// with them. It covers the subset of options for 'SET' and all of them for + /// 'SET ALL' + void PopulateResultForSet(bool is_set_all); }; } diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc index 0aecece02..89ac35fc8 100644 --- a/be/src/service/impala-server.cc +++ b/be/src/service/impala-server.cc @@ -1215,17 +1215,23 @@ void ImpalaServer::InitializeConfigVariables() { map string_map; TQueryOptionsToMap(default_query_options_, &string_map); + string_map["SUPPORT_START_OVER"] = "false"; + PopulateQueryOptionLevels(&query_option_levels_); map::const_iterator itr = string_map.begin(); for (; itr != string_map.end(); ++itr) { ConfigVariable option; option.__set_key(itr->first); option.__set_value(itr->second); + AddOptionLevelToConfig(&option, itr->first); default_configs_.push_back(option); } - ConfigVariable support_start_over; - support_start_over.__set_key("support_start_over"); - support_start_over.__set_value("false"); - default_configs_.push_back(support_start_over); +} + +void ImpalaServer::AddOptionLevelToConfig(ConfigVariable* config, + const string& option_key) const { + const auto query_option_level = query_option_levels_.find(option_key); + DCHECK(query_option_level != query_option_levels_.end()); + config->__set_level(query_option_level->second); } void ImpalaServer::SessionState::ToThrift(const TUniqueId& session_id, diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h index c808c35e5..9e6c83050 100644 --- a/be/src/service/impala-server.h +++ b/be/src/service/impala-server.h @@ -336,6 +336,9 @@ class ImpalaServer : public ImpalaServiceIf, typedef boost::unordered_map BackendDescriptorMap; const BackendDescriptorMap& GetKnownBackends(); + // Mapping between query option names and levels + QueryOptionLevels query_option_levels_; + /// The prefix of audit event log filename. static const string AUDIT_EVENT_LOG_FILE_PREFIX; @@ -535,6 +538,13 @@ class ImpalaServer : public ImpalaServiceIf, /// in the fetch call. void InitializeConfigVariables(); + /// Sets the option level for parameter 'option' based on the mapping stored in + /// 'query_option_levels_'. The option level is used by the Impala shell when it + /// displays the options. 'option_key' is the key for the 'query_option_levels_' + /// to get the level of the query option. + void AddOptionLevelToConfig(beeswax::ConfigVariable* option, + const string& option_key) const; + /// Checks settings for profile logging, including whether the output /// directory exists and is writeable, and initialises the first log file. /// Returns OK unless there is some problem preventing profile log files diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc index 740654ace..49f1698ae 100644 --- a/be/src/service/query-options.cc +++ b/be/src/service/query-options.cc @@ -36,6 +36,7 @@ using boost::algorithm::token_compress_on; using boost::algorithm::split; using boost::algorithm::trim; using std::to_string; +using beeswax::TQueryOptionLevel; using namespace impala; using namespace strings; @@ -57,7 +58,7 @@ void impala::OverlayQueryOptions(const TQueryOptions& src, const QueryOptionsMas TQueryOptions* dst) { DCHECK_GT(mask.size(), _TImpalaQueryOptions_VALUES_TO_NAMES.size()) << "Size of QueryOptionsMask must be increased."; -#define QUERY_OPT_FN(NAME, ENUM)\ +#define QUERY_OPT_FN(NAME, ENUM, LEVEL)\ if (src.__isset.NAME && mask[TImpalaQueryOptions::ENUM]) dst->__set_##NAME(src.NAME); QUERY_OPTS_TABLE #undef QUERY_OPT_FN @@ -65,7 +66,7 @@ void impala::OverlayQueryOptions(const TQueryOptions& src, const QueryOptionsMas void impala::TQueryOptionsToMap(const TQueryOptions& query_options, map* configuration) { -#define QUERY_OPT_FN(NAME, ENUM)\ +#define QUERY_OPT_FN(NAME, ENUM, LEVEL)\ {\ if (query_options.__isset.NAME) { \ stringstream val;\ @@ -83,7 +84,7 @@ void impala::TQueryOptionsToMap(const TQueryOptions& query_options, static void ResetQueryOption(const int option, TQueryOptions* query_options) { const static TQueryOptions defaults; switch (option) { -#define QUERY_OPT_FN(NAME, ENUM)\ +#define QUERY_OPT_FN(NAME, ENUM, LEVEL)\ case TImpalaQueryOptions::ENUM:\ query_options->__isset.NAME = defaults.__isset.NAME;\ query_options->NAME = defaults.NAME;\ @@ -97,7 +98,7 @@ string impala::DebugQueryOptions(const TQueryOptions& query_options) { const static TQueryOptions defaults; int i = 0; stringstream ss; -#define QUERY_OPT_FN(NAME, ENUM)\ +#define QUERY_OPT_FN(NAME, ENUM, LEVEL)\ if (query_options.__isset.NAME &&\ (!defaults.__isset.NAME || query_options.NAME != defaults.NAME)) {\ if (i++ > 0) ss << ",";\ @@ -607,3 +608,14 @@ Status impala::ParseQueryOptions(const string& options, TQueryOptions* query_opt if (errorStatus.msg().details().size() > 0) return errorStatus; return Status::OK(); } + +void impala::PopulateQueryOptionLevels(QueryOptionLevels* query_option_levels) +{ +#define QUERY_OPT_FN(NAME, ENUM, LEVEL)\ + {\ + (*query_option_levels)[#ENUM] = LEVEL;\ + } + QUERY_OPTS_TABLE + QUERY_OPT_FN(support_start_over, SUPPORT_START_OVER, TQueryOptionLevel::ADVANCED) +#undef QUERY_OPT_FN +} diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h index 5ca8c5fb8..64e0647cb 100644 --- a/be/src/service/query-options.h +++ b/be/src/service/query-options.h @@ -20,6 +20,7 @@ #include #include +#include #include #include "common/status.h" @@ -30,72 +31,104 @@ namespace impala { class TQueryOptions; +// Maps query option names to option levels used for displaying the query +// options via SET and SET ALL +typedef std::unordered_map + QueryOptionLevels; + // Macro to help generate functions that use or manipulate query options. // If the DCHECK is hit then handle the missing query option below and update // the DCHECK. #define QUERY_OPTS_TABLE\ DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\ TImpalaQueryOptions::MAX_ROW_SIZE + 1);\ - QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\ - QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR)\ - QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\ - QUERY_OPT_FN(batch_size, BATCH_SIZE)\ - QUERY_OPT_FN(debug_action, DEBUG_ACTION)\ - QUERY_OPT_FN(default_order_by_limit, DEFAULT_ORDER_BY_LIMIT)\ - QUERY_OPT_FN(disable_cached_reads, DISABLE_CACHED_READS)\ - QUERY_OPT_FN(disable_outermost_topn, DISABLE_OUTERMOST_TOPN)\ - QUERY_OPT_FN(disable_codegen, DISABLE_CODEGEN)\ - QUERY_OPT_FN(explain_level, EXPLAIN_LEVEL)\ - QUERY_OPT_FN(hbase_cache_blocks, HBASE_CACHE_BLOCKS)\ - QUERY_OPT_FN(hbase_caching, HBASE_CACHING)\ - QUERY_OPT_FN(max_errors, MAX_ERRORS)\ - QUERY_OPT_FN(max_io_buffers, MAX_IO_BUFFERS)\ - QUERY_OPT_FN(max_scan_range_length, MAX_SCAN_RANGE_LENGTH)\ - QUERY_OPT_FN(mem_limit, MEM_LIMIT)\ - QUERY_OPT_FN(num_nodes, NUM_NODES)\ - QUERY_OPT_FN(num_scanner_threads, NUM_SCANNER_THREADS)\ - QUERY_OPT_FN(compression_codec, COMPRESSION_CODEC)\ - QUERY_OPT_FN(parquet_file_size, PARQUET_FILE_SIZE)\ - QUERY_OPT_FN(request_pool, REQUEST_POOL)\ - QUERY_OPT_FN(reservation_request_timeout, RESERVATION_REQUEST_TIMEOUT)\ - QUERY_OPT_FN(sync_ddl, SYNC_DDL)\ - QUERY_OPT_FN(v_cpu_cores, V_CPU_CORES)\ - QUERY_OPT_FN(rm_initial_mem, RM_INITIAL_MEM)\ - QUERY_OPT_FN(query_timeout_s, QUERY_TIMEOUT_S)\ - QUERY_OPT_FN(buffer_pool_limit, BUFFER_POOL_LIMIT)\ - QUERY_OPT_FN(appx_count_distinct, APPX_COUNT_DISTINCT)\ - QUERY_OPT_FN(disable_unsafe_spills, DISABLE_UNSAFE_SPILLS)\ - QUERY_OPT_FN(seq_compression_mode, SEQ_COMPRESSION_MODE)\ - QUERY_OPT_FN(exec_single_node_rows_threshold, EXEC_SINGLE_NODE_ROWS_THRESHOLD)\ - QUERY_OPT_FN(optimize_partition_key_scans, OPTIMIZE_PARTITION_KEY_SCANS)\ - QUERY_OPT_FN(replica_preference, REPLICA_PREFERENCE)\ - QUERY_OPT_FN(schedule_random_replica, SCHEDULE_RANDOM_REPLICA)\ - QUERY_OPT_FN(scan_node_codegen_threshold, SCAN_NODE_CODEGEN_THRESHOLD)\ - QUERY_OPT_FN(disable_streaming_preaggregations, DISABLE_STREAMING_PREAGGREGATIONS)\ - QUERY_OPT_FN(runtime_filter_mode, RUNTIME_FILTER_MODE)\ - QUERY_OPT_FN(runtime_bloom_filter_size, RUNTIME_BLOOM_FILTER_SIZE)\ - QUERY_OPT_FN(runtime_filter_wait_time_ms, RUNTIME_FILTER_WAIT_TIME_MS)\ - QUERY_OPT_FN(disable_row_runtime_filtering, DISABLE_ROW_RUNTIME_FILTERING)\ - QUERY_OPT_FN(max_num_runtime_filters, MAX_NUM_RUNTIME_FILTERS)\ - QUERY_OPT_FN(parquet_annotate_strings_utf8, PARQUET_ANNOTATE_STRINGS_UTF8)\ - QUERY_OPT_FN(parquet_fallback_schema_resolution, PARQUET_FALLBACK_SCHEMA_RESOLUTION)\ - QUERY_OPT_FN(mt_dop, MT_DOP)\ - QUERY_OPT_FN(s3_skip_insert_staging, S3_SKIP_INSERT_STAGING)\ - QUERY_OPT_FN(runtime_filter_min_size, RUNTIME_FILTER_MIN_SIZE)\ - QUERY_OPT_FN(runtime_filter_max_size, RUNTIME_FILTER_MAX_SIZE)\ - QUERY_OPT_FN(prefetch_mode, PREFETCH_MODE)\ - QUERY_OPT_FN(strict_mode, STRICT_MODE)\ - QUERY_OPT_FN(scratch_limit, SCRATCH_LIMIT)\ - QUERY_OPT_FN(enable_expr_rewrites, ENABLE_EXPR_REWRITES)\ - QUERY_OPT_FN(decimal_v2, DECIMAL_V2)\ - QUERY_OPT_FN(parquet_dictionary_filtering, PARQUET_DICTIONARY_FILTERING)\ - QUERY_OPT_FN(parquet_array_resolution, PARQUET_ARRAY_RESOLUTION)\ - QUERY_OPT_FN(parquet_read_statistics, PARQUET_READ_STATISTICS)\ - QUERY_OPT_FN(default_join_distribution_mode, DEFAULT_JOIN_DISTRIBUTION_MODE)\ - QUERY_OPT_FN(disable_codegen_rows_threshold, DISABLE_CODEGEN_ROWS_THRESHOLD)\ - QUERY_OPT_FN(default_spillable_buffer_size, DEFAULT_SPILLABLE_BUFFER_SIZE)\ - QUERY_OPT_FN(min_spillable_buffer_size, MIN_SPILLABLE_BUFFER_SIZE)\ - QUERY_OPT_FN(max_row_size, MAX_ROW_SIZE)\ + QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED,\ + TQueryOptionLevel::DEPRECATED)\ + QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS,\ + TQueryOptionLevel::DEPRECATED)\ + QUERY_OPT_FN(batch_size, BATCH_SIZE, TQueryOptionLevel::DEVELOPMENT)\ + QUERY_OPT_FN(debug_action, DEBUG_ACTION, TQueryOptionLevel::DEVELOPMENT)\ + QUERY_OPT_FN(default_order_by_limit, DEFAULT_ORDER_BY_LIMIT,\ + TQueryOptionLevel::DEPRECATED)\ + QUERY_OPT_FN(disable_cached_reads, DISABLE_CACHED_READS, TQueryOptionLevel::DEPRECATED)\ + QUERY_OPT_FN(disable_outermost_topn, DISABLE_OUTERMOST_TOPN,\ + TQueryOptionLevel::DEVELOPMENT)\ + QUERY_OPT_FN(disable_codegen, DISABLE_CODEGEN, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(explain_level, EXPLAIN_LEVEL, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(hbase_cache_blocks, HBASE_CACHE_BLOCKS, TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(hbase_caching, HBASE_CACHING, TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(max_errors, MAX_ERRORS, TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(max_io_buffers, MAX_IO_BUFFERS, TQueryOptionLevel::DEPRECATED)\ + QUERY_OPT_FN(max_scan_range_length, MAX_SCAN_RANGE_LENGTH,\ + TQueryOptionLevel::DEVELOPMENT)\ + QUERY_OPT_FN(mem_limit, MEM_LIMIT, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(num_nodes, NUM_NODES, TQueryOptionLevel::DEVELOPMENT)\ + QUERY_OPT_FN(num_scanner_threads, NUM_SCANNER_THREADS, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(compression_codec, COMPRESSION_CODEC, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(parquet_file_size, PARQUET_FILE_SIZE, TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(request_pool, REQUEST_POOL, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(reservation_request_timeout, RESERVATION_REQUEST_TIMEOUT,\ + TQueryOptionLevel::DEPRECATED)\ + QUERY_OPT_FN(sync_ddl, SYNC_DDL, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(v_cpu_cores, V_CPU_CORES, TQueryOptionLevel::DEPRECATED)\ + QUERY_OPT_FN(rm_initial_mem, RM_INITIAL_MEM, TQueryOptionLevel::DEPRECATED)\ + QUERY_OPT_FN(query_timeout_s, QUERY_TIMEOUT_S, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(buffer_pool_limit, BUFFER_POOL_LIMIT, TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(appx_count_distinct, APPX_COUNT_DISTINCT, TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(disable_unsafe_spills, DISABLE_UNSAFE_SPILLS, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(seq_compression_mode, SEQ_COMPRESSION_MODE, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(exec_single_node_rows_threshold, EXEC_SINGLE_NODE_ROWS_THRESHOLD,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(optimize_partition_key_scans, OPTIMIZE_PARTITION_KEY_SCANS,\ + TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(replica_preference, REPLICA_PREFERENCE, TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(schedule_random_replica, SCHEDULE_RANDOM_REPLICA,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(scan_node_codegen_threshold, SCAN_NODE_CODEGEN_THRESHOLD,\ + TQueryOptionLevel::DEPRECATED)\ + QUERY_OPT_FN(disable_streaming_preaggregations, DISABLE_STREAMING_PREAGGREGATIONS,\ + TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(runtime_filter_mode, RUNTIME_FILTER_MODE, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(runtime_bloom_filter_size, RUNTIME_BLOOM_FILTER_SIZE,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(runtime_filter_wait_time_ms, RUNTIME_FILTER_WAIT_TIME_MS,\ + TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(disable_row_runtime_filtering, DISABLE_ROW_RUNTIME_FILTERING,\ + TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(max_num_runtime_filters, MAX_NUM_RUNTIME_FILTERS,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(parquet_annotate_strings_utf8, PARQUET_ANNOTATE_STRINGS_UTF8,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(parquet_fallback_schema_resolution, PARQUET_FALLBACK_SCHEMA_RESOLUTION,\ + TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(mt_dop, MT_DOP, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(s3_skip_insert_staging, S3_SKIP_INSERT_STAGING,\ + TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(runtime_filter_min_size, RUNTIME_FILTER_MIN_SIZE,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(runtime_filter_max_size, RUNTIME_FILTER_MAX_SIZE,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(prefetch_mode, PREFETCH_MODE, TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(strict_mode, STRICT_MODE, TQueryOptionLevel::DEVELOPMENT)\ + QUERY_OPT_FN(scratch_limit, SCRATCH_LIMIT, TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(enable_expr_rewrites, ENABLE_EXPR_REWRITES, TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(decimal_v2, DECIMAL_V2, TQueryOptionLevel::DEVELOPMENT)\ + QUERY_OPT_FN(parquet_dictionary_filtering, PARQUET_DICTIONARY_FILTERING,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(parquet_array_resolution, PARQUET_ARRAY_RESOLUTION,\ + TQueryOptionLevel::REGULAR)\ + QUERY_OPT_FN(parquet_read_statistics, PARQUET_READ_STATISTICS,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(default_join_distribution_mode, DEFAULT_JOIN_DISTRIBUTION_MODE,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(disable_codegen_rows_threshold, DISABLE_CODEGEN_ROWS_THRESHOLD,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(default_spillable_buffer_size, DEFAULT_SPILLABLE_BUFFER_SIZE,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(min_spillable_buffer_size, MIN_SPILLABLE_BUFFER_SIZE,\ + TQueryOptionLevel::ADVANCED)\ + QUERY_OPT_FN(max_row_size, MAX_ROW_SIZE, TQueryOptionLevel::REGULAR)\ ; @@ -136,6 +169,11 @@ Status SetQueryOption(const std::string& key, const std::string& value, Status ParseQueryOptions(const std::string& options, TQueryOptions* query_options, QueryOptionsMask* set_query_options_mask); +/// Based on the query option levels provided to QUERY_OPT_FN macro this function +/// populates the received QueryOptionLevels map with (option name -> option level) +/// entries. +void PopulateQueryOptionLevels(QueryOptionLevels* query_option_levels); + } #endif diff --git a/common/thrift/Frontend.thrift b/common/thrift/Frontend.thrift index d684265d6..fa315bd86 100644 --- a/common/thrift/Frontend.thrift +++ b/common/thrift/Frontend.thrift @@ -475,9 +475,11 @@ struct TCatalogOpRequest { // Parameters for the SET query option command struct TSetQueryOptionRequest { - // Set for "SET key=value", unset for "SET" statement. + // Set for "SET key=value", unset for "SET" and "SET ALL" statements. 1: optional string key 2: optional string value + // Set true for "SET ALL" + 3: optional bool is_set_all } // HiveServer2 Metadata operations (JniFrontend.hiveServer2MetadataOperation) @@ -539,7 +541,7 @@ struct TExecRequest { // Set iff stmt_type is QUERY or DML 3: optional TQueryExecRequest query_exec_request - // Set iff stmt_type is DDL + // Set if stmt_type is DDL 4: optional TCatalogOpRequest catalog_op_request // Metadata of the query result set (not set for DML) @@ -558,7 +560,7 @@ struct TExecRequest { // List of warnings that were generated during analysis. May be empty. 9: required list analysis_warnings - // Set iff stmt_type is SET + // Set if stmt_type is SET 10: optional TSetQueryOptionRequest set_query_option_request // Timeline of planner's operation, for profiling diff --git a/common/thrift/beeswax.thrift b/common/thrift/beeswax.thrift index 6e0bb12c0..5d133dd3f 100644 --- a/common/thrift/beeswax.thrift +++ b/common/thrift/beeswax.thrift @@ -93,13 +93,24 @@ exception BeeswaxException { } exception QueryNotFoundException { -} +} + +// Impala extension: +// Levels to use when displaying query options from Impala shell +enum TQueryOptionLevel { + REGULAR, + ADVANCED, + DEVELOPMENT, + DEPRECATED +} /** Represents a Hadoop-style configuration variable. */ struct ConfigVariable { 1: string key, 2: string value, - 3: string description + 3: string description, + // For displaying purposes in Impala shell + 4: optional TQueryOptionLevel level } service BeeswaxService { diff --git a/fe/src/main/cup/sql-parser.cup b/fe/src/main/cup/sql-parser.cup index 7c0a79406..f7be00cb8 100644 --- a/fe/src/main/cup/sql-parser.cup +++ b/fe/src/main/cup/sql-parser.cup @@ -2299,15 +2299,17 @@ select_clause ::= set_stmt ::= KW_SET ident_or_default:key EQUAL literal:l - {: RESULT = new SetStmt(key, l.getStringValue()); :} + {: RESULT = new SetStmt(key, l.getStringValue(), false); :} | KW_SET ident_or_default:key EQUAL SUBTRACT numeric_literal:l {: l.swapSign(); - RESULT = new SetStmt(key, l.getStringValue()); :} + RESULT = new SetStmt(key, l.getStringValue(), false); :} | KW_SET ident_or_default:key EQUAL ident_or_default:ident - {: RESULT = new SetStmt(key, ident); :} + {: RESULT = new SetStmt(key, ident, false); :} + | KW_SET KW_ALL + {: RESULT = new SetStmt(null, null, true); :} | KW_SET - {: RESULT = new SetStmt(null, null); :} + {: RESULT = new SetStmt(null, null, false); :} ; select_list ::= diff --git a/fe/src/main/java/org/apache/impala/analysis/SetStmt.java b/fe/src/main/java/org/apache/impala/analysis/SetStmt.java index d307ca708..a83efda55 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SetStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/SetStmt.java @@ -26,6 +26,7 @@ import com.google.common.base.Preconditions; public class SetStmt extends StatementBase { private final String key_; private final String value_; + private final boolean isSetAll_; // This key is deprecated in Impala 2.0; COMPRESSION_CODEC_KEY replaces this private static final String DEPRECATED_PARQUET_CODEC_KEY = "PARQUET_COMPRESSION_CODEC"; @@ -39,16 +40,21 @@ public class SetStmt extends StatementBase { return key; } - public SetStmt(String key, String value) { + public SetStmt(String key, String value, boolean isSetAll) { Preconditions.checkArgument((key == null) == (value == null)); Preconditions.checkArgument(key == null || !key.isEmpty()); + Preconditions.checkArgument(!isSetAll || (key == null && value == null) ); key_ = key; value_ = value; + isSetAll_ = isSetAll; } @Override public String toSql() { - if (key_ == null) return "SET"; + if (key_ == null) { + if (isSetAll_) return "SET ALL"; + return "SET"; + } Preconditions.checkNotNull(value_); return "SET " + ToSqlUtils.getIdentSql(key_) + "='" + value_ + "'"; } @@ -64,6 +70,7 @@ public class SetStmt extends StatementBase { request.setKey(resolveThriftKey(key_)); request.setValue(value_); } + if (isSetAll_) request.setIs_set_all(true); return request; } } diff --git a/fe/src/main/java/org/apache/impala/service/Frontend.java b/fe/src/main/java/org/apache/impala/service/Frontend.java index 3f3a6d773..c62fc31b0 100644 --- a/fe/src/main/java/org/apache/impala/service/Frontend.java +++ b/fe/src/main/java/org/apache/impala/service/Frontend.java @@ -1105,7 +1105,8 @@ public class Frontend { result.stmt_type = TStmtType.SET; result.setResult_set_metadata(new TResultSetMetadata(Arrays.asList( new TColumn("option", Type.STRING.toThrift()), - new TColumn("value", Type.STRING.toThrift())))); + new TColumn("value", Type.STRING.toThrift()), + new TColumn("level", Type.STRING.toThrift())))); result.setSet_query_option_request(analysisResult.getSetStmt().toThrift()); return result; } diff --git a/shell/impala_client.py b/shell/impala_client.py index 9509098ef..868d898fa 100755 --- a/shell/impala_client.py +++ b/shell/impala_client.py @@ -71,6 +71,7 @@ class ImpalaClient(object): self.user, self.ldap_password = user, ldap_password self.use_ldap = use_ldap self.default_query_options = {} + self.query_option_levels = {} self.query_state = QueryState._NAMES_TO_VALUES self.fetch_batch_size = 1024 @@ -93,6 +94,11 @@ class ImpalaClient(object): raise RPCException("Unable to retrieve default query options") for option in options: self.default_query_options[option.key.upper()] = option.value + # If connected to an Impala that predates IMPALA-2181 then the received options + # wouldn't contain a level attribute. In this case the query_option_levels + # map is left empty. + if option.level is not None: + self.query_option_levels[option.key.upper()] = option.level def build_summary_table(self, summary, idx, is_fragment_root, indent_level, new_indent_level, output): diff --git a/shell/impala_shell.py b/shell/impala_shell.py index 069e8750e..a9a527aa3 100755 --- a/shell/impala_shell.py +++ b/shell/impala_shell.py @@ -78,6 +78,18 @@ class ImpalaPrettyTable(prettytable.PrettyTable): value = unicode(value, self.encoding, "replace") return value +class QueryOptionLevels: + """These are the levels used when displaying query options. + The values correspond to the ones in TQueryOptionLevel""" + REGULAR = 0 + ADVANCED = 1 + DEVELOPMENT = 2 + DEPRECATED = 3 + +class QueryOptionDisplayModes: + REGULAR_OPTIONS_ONLY = 1 + ALL_OPTIONS = 2 + class ImpalaShell(object, cmd.Cmd): """ Simple Impala Shell. @@ -214,20 +226,65 @@ class ImpalaShell(object, cmd.Cmd): """ self.readline = None - def _print_options(self, default_options, set_options): - # Prints the current query options - # with default values distinguished from set values by brackets [], followed by - # shell-local options. - if not default_options and not set_options: + def _print_options(self, print_mode): + """Prints the current query options with default values distinguished from set values + by brackets [], followed by shell-local options. + The options are displayed in groups based on option levels received in parameter. + Input parameter decides whether all groups or just the 'Regular' and 'Advanced' + options are displayed.""" + print "Query options (defaults shown in []):" + if not self.imp_client.default_query_options and not self.set_query_options: print '\tNo options available.' else: - for k in sorted(default_options): - if k in set_options and set_options[k] != default_options[k]: - print '\n'.join(["\t%s: %s" % (k, set_options[k])]) - else: - print '\n'.join(["\t%s: [%s]" % (k, default_options[k])]) + (regular_options, advanced_options, development_options, deprecated_options) = \ + self._get_query_option_grouping() + self._print_option_group(regular_options) + # If the shell is connected to an Impala that predates IMPALA-2181 then + # the advanced_options would be empty and only the regular options would + # be displayed. + if advanced_options: + print '\nAdvanced Query Options:' + self._print_option_group(advanced_options) + if print_mode == QueryOptionDisplayModes.ALL_OPTIONS: + if development_options: + print '\nDevelopment Query Options:' + self._print_option_group(development_options) + if deprecated_options: + print '\nDeprecated Query Options:' + self._print_option_group(deprecated_options) self._print_shell_options() + def _get_query_option_grouping(self): + """For all the query options received through rpc this function determines the + query option level for display purposes using the received query_option_levels + parameters. + If the option level can't be determined then it defaults to 'REGULAR'""" + regular_options, advanced_options, development_options, deprecated_options = \ + {}, {}, {}, {} + for option_name, option_value in self.imp_client.default_query_options.iteritems(): + level = self.imp_client.query_option_levels.get(option_name, + QueryOptionLevels.REGULAR) + if level == QueryOptionLevels.REGULAR: + regular_options[option_name] = option_value + elif level == QueryOptionLevels.DEVELOPMENT: + development_options[option_name] = option_value + elif level == QueryOptionLevels.DEPRECATED: + deprecated_options[option_name] = option_value + else: + advanced_options[option_name] = option_value + return (regular_options, advanced_options, development_options, deprecated_options) + + def _print_option_group(self, query_options): + """Gets query options and prints them. Value is inside [] for the ones having + default values. + query_options parameter is a subset of the default_query_options map""" + for option_name in sorted(query_options): + if (option_name in self.set_query_options and + self.set_query_options[option_name] != query_options[option_name]): + print '\n'.join(["\t%s: %s" % (option_name, self.set_query_options[option_name])]) + else: + print '\n'.join(["\t%s: [%s]" % (option_name, query_options[option_name])]) + def _print_variables(self): # Prints the currently defined variables. if not self.set_variables: @@ -552,11 +609,17 @@ class ImpalaShell(object, cmd.Cmd): return var_name return None + def _print_with_set(self, print_level): + self._print_options(print_level) + print "\nVariables:" + self._print_variables() + def do_set(self, args): """Set or display query options. Display query options: - Usage: SET + Usage: SET (to display the Regular options) or + SET ALL (to display all the options) Set query options: Usage: SET