mirror of
https://github.com/apache/impala.git
synced 2026-01-03 06:00:52 -05:00
Also add support for "SET", which returns a table of query options and their respective values. The front-end parses the option into a (key, value) pair and then the existing backend logic is used to set the option, or return the result sets. Change-Id: I40dbd98537e2a73bdd5b27d8b2575a2fe6f8295b Reviewed-on: http://gerrit.ent.cloudera.com:8080/3582 Reviewed-by: Daniel Hecht <dhecht@cloudera.com> Tested-by: jenkins (cherry picked from commit aa0f6a2fc1d3fe21f22cc7bc56887e1fdb02250b) Reviewed-on: http://gerrit.ent.cloudera.com:8080/3614
605 lines
20 KiB
Thrift
605 lines
20 KiB
Thrift
// Copyright 2012 Cloudera Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
namespace cpp impala
|
|
namespace java com.cloudera.impala.thrift
|
|
|
|
include "Types.thrift"
|
|
include "ImpalaInternalService.thrift"
|
|
include "PlanNodes.thrift"
|
|
include "Planner.thrift"
|
|
include "Descriptors.thrift"
|
|
include "Data.thrift"
|
|
include "Exprs.thrift"
|
|
include "cli_service.thrift"
|
|
include "Status.thrift"
|
|
include "CatalogObjects.thrift"
|
|
include "CatalogService.thrift"
|
|
|
|
// These are supporting structs for JniFrontend.java, which serves as the glue
|
|
// between our C++ execution environment and the Java frontend.
|
|
|
|
// Struct for HiveUdf expr to create the proper execution object in the FE
|
|
// java side. See exprs/hive-udf-call.h for how hive Udfs are executed in general.
|
|
// TODO: this could be the UdfID, collapsing the first 3 arguments but synchronizing
|
|
// the id will will not be possible without the catalog service.
|
|
struct THiveUdfExecutorCtorParams {
|
|
1: required Types.TFunction fn
|
|
|
|
// Local path to the UDF's jar file
|
|
2: required string local_location
|
|
|
|
// The byte offset for each argument in the input buffer. The BE will
|
|
// call the Java executor with a buffer for all the inputs.
|
|
// input_byte_offsets[0] is the byte offset in the buffer for the first
|
|
// argument; input_byte_offsets[1] is the second, etc.
|
|
3: required list<i32> input_byte_offsets
|
|
|
|
// Native input buffer ptr (cast as i64) for the inputs. The input arguments
|
|
// are written to this buffer directly and read from java with no copies
|
|
// input_null_ptr[i] is true if the i-th input is null.
|
|
// input_buffer_ptr[input_byte_offsets[i]] is the value of the i-th input.
|
|
4: required i64 input_nulls_ptr
|
|
5: required i64 input_buffer_ptr
|
|
|
|
// Native output buffer ptr. For non-variable length types, the output is
|
|
// written here and read from the native side with no copies.
|
|
// The UDF should set *output_null_ptr to true, if the result of the UDF is
|
|
// NULL.
|
|
6: required i64 output_null_ptr
|
|
7: required i64 output_buffer_ptr
|
|
}
|
|
|
|
// Arguments to getTableNames, which returns a list of tables that match an
|
|
// optional pattern.
|
|
struct TGetTablesParams {
|
|
// If not set, match tables in all DBs
|
|
1: optional string db
|
|
|
|
// If not set, match every table
|
|
2: optional string pattern
|
|
|
|
// Session state for the user who initiated this request. If authorization is
|
|
// enabled, only the tables this user has access to will be returned. If not
|
|
// set, access checks will be skipped (used for internal Impala requests)
|
|
3: optional ImpalaInternalService.TSessionState session
|
|
}
|
|
|
|
// getTableNames returns a list of unqualified table names
|
|
struct TGetTablesResult {
|
|
1: list<string> tables
|
|
}
|
|
|
|
// Arguments to getDbNames, which returns a list of dbs that match an optional
|
|
// pattern
|
|
struct TGetDbsParams {
|
|
// If not set, match every database
|
|
1: optional string pattern
|
|
|
|
// Session state for the user who initiated this request. If authorization is
|
|
// enabled, only the databases this user has access to will be returned. If not
|
|
// set, access checks will be skipped (used for internal Impala requests)
|
|
2: optional ImpalaInternalService.TSessionState session
|
|
}
|
|
|
|
// getDbNames returns a list of database names
|
|
struct TGetDbsResult {
|
|
1: list<string> dbs
|
|
}
|
|
|
|
// Arguments to getDataSrcsNames, which returns a list of data sources that match an
|
|
// optional pattern
|
|
struct TGetDataSrcsParams {
|
|
// If not set, match every data source
|
|
1: optional string pattern
|
|
}
|
|
|
|
// getDataSrcsNames returns a list of data source names
|
|
struct TGetDataSrcsResult {
|
|
1: required list<string> data_src_names
|
|
2: required list<string> locations
|
|
3: required list<string> class_names
|
|
4: required list<string> api_versions
|
|
}
|
|
|
|
// Used by DESCRIBE <table> statements to control what information is returned and how to
|
|
// format the output.
|
|
enum TDescribeTableOutputStyle {
|
|
// The default output style if no options are specified for DESCRIBE <table>.
|
|
MINIMAL,
|
|
// Output additional information on the table in formatted style.
|
|
// Set for DESCRIBE FORMATTED statements.
|
|
FORMATTED
|
|
}
|
|
|
|
// Arguments to DescribeTable, which returns a list of column descriptors and additional
|
|
// metadata for a given table. What information is returned is controlled by the
|
|
// given TDescribeTableOutputStyle.
|
|
// NOTE: This struct should only be used for intra-process communication.
|
|
struct TDescribeTableParams {
|
|
1: required string db
|
|
2: required string table_name
|
|
|
|
// Controls the output style for this describe command.
|
|
3: required TDescribeTableOutputStyle output_style
|
|
}
|
|
|
|
// Results of a call to describeTable()
|
|
// NOTE: This struct should only be used for intra-process communication.
|
|
struct TDescribeTableResult {
|
|
// Output from a DESCRIBE TABLE command.
|
|
1: required list<Data.TResultRow> results
|
|
}
|
|
|
|
// Parameters for SHOW DATA SOURCES commands
|
|
struct TShowDataSrcsParams {
|
|
// Optional pattern to match data source names. If not set, all data sources are
|
|
// returned.
|
|
1: optional string show_pattern
|
|
}
|
|
|
|
// Parameters for SHOW DATABASES commands
|
|
struct TShowDbsParams {
|
|
// Optional pattern to match database names. If not set, all databases are returned.
|
|
1: optional string show_pattern
|
|
}
|
|
|
|
// Parameters for SHOW TABLE/COLUMN STATS commands
|
|
struct TShowStatsParams {
|
|
1: required bool is_show_col_stats
|
|
2: CatalogObjects.TTableName table_name
|
|
}
|
|
|
|
// Parameters for SHOW FUNCTIONS commands
|
|
struct TShowFunctionsParams {
|
|
// Type of function to show.
|
|
1: Types.TFunctionType type
|
|
|
|
// Database to use for SHOW FUNCTIONS
|
|
2: optional string db
|
|
|
|
// Optional pattern to match function names. If not set, all functions are returned.
|
|
3: optional string show_pattern
|
|
}
|
|
|
|
// Parameters for SHOW TABLES commands
|
|
struct TShowTablesParams {
|
|
// Database to use for SHOW TABLES
|
|
1: optional string db
|
|
|
|
// Optional pattern to match tables names. If not set, all tables from the given
|
|
// database are returned.
|
|
2: optional string show_pattern
|
|
}
|
|
|
|
// Arguments to getFunctions(), which returns a list of non-qualified function
|
|
// signatures that match an optional pattern. Parameters for SHOW FUNCTIONS.
|
|
struct TGetFunctionsParams {
|
|
1: required Types.TFunctionType type
|
|
|
|
// Database to use for SHOW FUNCTIONS
|
|
2: optional string db
|
|
|
|
// If not set, match every function
|
|
3: optional string pattern
|
|
|
|
// Session state for the user who initiated this request. If authorization is
|
|
// enabled, only the functions this user has access to will be returned. If not
|
|
// set, access checks will be skipped (used for internal Impala requests)
|
|
4: optional ImpalaInternalService.TSessionState session
|
|
}
|
|
|
|
// getFunctions() returns a list of function signatures
|
|
struct TGetFunctionsResult {
|
|
1: list<string> fn_signatures
|
|
2: list<string> fn_ret_types
|
|
}
|
|
|
|
// Parameters for the USE db command
|
|
struct TUseDbParams {
|
|
1: required string db
|
|
}
|
|
|
|
// Results of an EXPLAIN
|
|
struct TExplainResult {
|
|
// each line in the explain plan occupies an entry in the list
|
|
1: required list<Data.TResultRow> results
|
|
}
|
|
|
|
// Metadata required to finalize a query - that is, to clean up after the query is done.
|
|
// Only relevant for INSERT queries.
|
|
struct TFinalizeParams {
|
|
// True if the INSERT query was OVERWRITE, rather than INTO
|
|
1: required bool is_overwrite
|
|
|
|
// The base directory in hdfs of the table targeted by this INSERT
|
|
2: required string hdfs_base_dir
|
|
|
|
// The target table name
|
|
3: required string table_name
|
|
|
|
// The target table database
|
|
4: required string table_db
|
|
|
|
// The full path in HDFS of a directory under which temporary files may be written
|
|
// during an INSERT. For a query with id a:b, files are written to <staging_dir>/.a_b/,
|
|
// and that entire directory is removed after the INSERT completes.
|
|
5: optional string staging_dir
|
|
|
|
// Identifier for the target table in the query-wide descriptor table (see
|
|
// TDescriptorTable and TTableDescriptor).
|
|
6: optional i64 table_id;
|
|
}
|
|
|
|
// Request for a LOAD DATA statement. LOAD DATA is only supported for HDFS backed tables.
|
|
struct TLoadDataReq {
|
|
// Fully qualified table name to load data into.
|
|
1: required CatalogObjects.TTableName table_name
|
|
|
|
// The source data file or directory to load into the table.
|
|
2: required string source_path
|
|
|
|
// If true, loaded files will overwrite all data in the target table/partition's
|
|
// directory. If false, new files will be added alongside existing files. If there are
|
|
// any file name conflicts, the new files will be uniquified by appending a UUID to the
|
|
// base file name preserving the extension if one exists.
|
|
3: required bool overwrite
|
|
|
|
// An optional partition spec. Set if this operation should apply to a specific
|
|
// partition rather than the base table.
|
|
4: optional list<CatalogObjects.TPartitionKeyValue> partition_spec
|
|
}
|
|
|
|
// Response of a LOAD DATA statement.
|
|
struct TLoadDataResp {
|
|
// A result row that contains information on the result of the LOAD operation. This
|
|
// includes details like the number of files moved as part of the request.
|
|
1: required Data.TResultRow load_summary
|
|
}
|
|
|
|
// Result of call to ImpalaPlanService/JniFrontend.CreateQueryRequest()
|
|
struct TQueryExecRequest {
|
|
// global descriptor tbl for all fragments
|
|
1: optional Descriptors.TDescriptorTable desc_tbl
|
|
|
|
// fragments[i] may consume the output of fragments[j > i];
|
|
// fragments[0] is the root fragment and also the coordinator fragment, if
|
|
// it is unpartitioned.
|
|
2: required list<Planner.TPlanFragment> fragments
|
|
|
|
// Specifies the destination fragment of the output of each fragment.
|
|
// parent_fragment_idx.size() == fragments.size() - 1 and
|
|
// fragments[i] sends its output to fragments[dest_fragment_idx[i-1]]
|
|
3: optional list<i32> dest_fragment_idx
|
|
|
|
// A map from scan node ids to a list of scan range locations.
|
|
// The node ids refer to scan nodes in fragments[].plan_tree
|
|
4: optional map<Types.TPlanNodeId, list<Planner.TScanRangeLocations>>
|
|
per_node_scan_ranges
|
|
|
|
// Metadata of the query result set (only for select)
|
|
5: optional Data.TResultSetMetadata result_set_metadata
|
|
|
|
// Set if the query needs finalization after it executes
|
|
6: optional TFinalizeParams finalize_params
|
|
|
|
7: required ImpalaInternalService.TQueryCtx query_ctx
|
|
|
|
// The same as the output of 'explain <query>'
|
|
8: optional string query_plan
|
|
|
|
// The statement type governs when the coordinator can judge a query to be finished.
|
|
// DML queries are complete after Wait(), SELECTs may not be. Generally matches
|
|
// the stmt_type of the parent TExecRequest, but in some cases (such as CREATE TABLE
|
|
// AS SELECT), these may differ.
|
|
9: required Types.TStmtType stmt_type
|
|
|
|
// Estimated per-host peak memory consumption in bytes. Used for resource management.
|
|
10: optional i64 per_host_mem_req
|
|
|
|
// Estimated per-host CPU requirements in YARN virtual cores.
|
|
// Used for resource management.
|
|
11: optional i16 per_host_vcores
|
|
}
|
|
|
|
enum TCatalogOpType {
|
|
SHOW_TABLES,
|
|
SHOW_DBS,
|
|
SHOW_STATS,
|
|
USE,
|
|
DESCRIBE,
|
|
SHOW_FUNCTIONS,
|
|
RESET_METADATA,
|
|
DDL,
|
|
SHOW_CREATE_TABLE,
|
|
SHOW_DATA_SRCS
|
|
}
|
|
|
|
// TODO: Combine SHOW requests with a single struct that contains a field
|
|
// indicating which type of show request it is.
|
|
struct TCatalogOpRequest {
|
|
1: required TCatalogOpType op_type
|
|
|
|
// Parameters for USE commands
|
|
2: optional TUseDbParams use_db_params
|
|
|
|
// Parameters for DESCRIBE table commands
|
|
3: optional TDescribeTableParams describe_table_params
|
|
|
|
// Parameters for SHOW DATABASES
|
|
4: optional TShowDbsParams show_dbs_params
|
|
|
|
// Parameters for SHOW TABLES
|
|
5: optional TShowTablesParams show_tables_params
|
|
|
|
// Parameters for SHOW FUNCTIONS
|
|
6: optional TShowFunctionsParams show_fns_params
|
|
|
|
// Parameters for SHOW DATA SOURCES
|
|
11: optional TShowDataSrcsParams show_data_srcs_params
|
|
|
|
// Parameters for DDL requests executed using the CatalogServer
|
|
// such as CREATE, ALTER, and DROP. See CatalogService.TDdlExecRequest
|
|
// for details.
|
|
7: optional CatalogService.TDdlExecRequest ddl_params
|
|
|
|
// Parameters for RESET/INVALIDATE METADATA, executed using the CatalogServer.
|
|
// See CatalogService.TResetMetadataRequest for more details.
|
|
8: optional CatalogService.TResetMetadataRequest reset_metadata_params
|
|
|
|
// Parameters for SHOW TABLE/COLUMN STATS
|
|
9: optional TShowStatsParams show_stats_params
|
|
|
|
// Parameters for SHOW CREATE TABLE
|
|
10: optional CatalogObjects.TTableName show_create_table_params
|
|
}
|
|
|
|
// Parameters for the SET query option command
|
|
struct TSetQueryOptionRequest {
|
|
// Set for "SET key=value", unset for "SET" statement.
|
|
1: optional string key
|
|
2: optional string value
|
|
}
|
|
|
|
// HiveServer2 Metadata operations (JniFrontend.hiveServer2MetadataOperation)
|
|
enum TMetadataOpcode {
|
|
GET_TYPE_INFO,
|
|
GET_CATALOGS,
|
|
GET_SCHEMAS,
|
|
GET_TABLES,
|
|
GET_TABLE_TYPES,
|
|
GET_COLUMNS,
|
|
GET_FUNCTIONS
|
|
}
|
|
|
|
// Input parameter to JniFrontend.hiveServer2MetadataOperation
|
|
// Each request has an opcode and a corresponding TGet*Req input parameter
|
|
struct TMetadataOpRequest {
|
|
// opcode
|
|
1: required TMetadataOpcode opcode
|
|
|
|
// input parameter
|
|
2: optional cli_service.TGetInfoReq get_info_req
|
|
3: optional cli_service.TGetTypeInfoReq get_type_info_req
|
|
4: optional cli_service.TGetCatalogsReq get_catalogs_req
|
|
5: optional cli_service.TGetSchemasReq get_schemas_req
|
|
6: optional cli_service.TGetTablesReq get_tables_req
|
|
7: optional cli_service.TGetTableTypesReq get_table_types_req
|
|
8: optional cli_service.TGetColumnsReq get_columns_req
|
|
9: optional cli_service.TGetFunctionsReq get_functions_req
|
|
|
|
// Session state for the user who initiated this request. If authorization is
|
|
// enabled, only the server objects this user has access to will be returned.
|
|
// If not set, access checks will be skipped (used for internal Impala requests)
|
|
10: optional ImpalaInternalService.TSessionState session
|
|
}
|
|
|
|
// Tracks accesses to Catalog objects for use during auditing. This information, paired
|
|
// with the current session information, provides a view into what objects a user's
|
|
// query accessed
|
|
struct TAccessEvent {
|
|
// Fully qualified object name
|
|
1: required string name
|
|
|
|
// The object type (ex. DATABASE, VIEW, TABLE)
|
|
2: required CatalogObjects.TCatalogObjectType object_type
|
|
|
|
// The requested privilege on the object
|
|
// TODO: Create an enum for this?
|
|
3: required string privilege
|
|
}
|
|
|
|
// Result of call to createExecRequest()
|
|
struct TExecRequest {
|
|
1: required Types.TStmtType stmt_type
|
|
|
|
// Copied from the corresponding TClientRequest
|
|
2: required ImpalaInternalService.TQueryOptions query_options
|
|
|
|
// TQueryExecRequest for the backend
|
|
// Set iff stmt_type is QUERY or DML
|
|
3: optional TQueryExecRequest query_exec_request
|
|
|
|
// Set iff stmt_type is DDL
|
|
4: optional TCatalogOpRequest catalog_op_request
|
|
|
|
// Metadata of the query result set (not set for DML)
|
|
5: optional Data.TResultSetMetadata result_set_metadata
|
|
|
|
// Result of EXPLAIN. Set iff stmt_type is EXPLAIN
|
|
6: optional TExplainResult explain_result
|
|
|
|
// Request for LOAD DATA statements.
|
|
7: optional TLoadDataReq load_data_request
|
|
|
|
// List of catalog objects accessed by this request. May be empty in this
|
|
// case that the query did not access any Catalog objects.
|
|
8: optional list<TAccessEvent> access_events
|
|
|
|
// List of warnings that were generated during analysis. May be empty.
|
|
9: required list<string> analysis_warnings
|
|
|
|
// Set iff stmt_type is SET
|
|
10: optional TSetQueryOptionRequest set_query_option_request
|
|
}
|
|
|
|
// Parameters to FeSupport.cacheJar().
|
|
struct TCacheJarParams {
|
|
// HDFS URI for the jar
|
|
1: required string hdfs_location
|
|
}
|
|
|
|
// Result from FeSupport.cacheJar().
|
|
struct TCacheJarResult {
|
|
1: required Status.TStatus status
|
|
|
|
// Local path for the jar. Set only if status is OK.
|
|
2: optional string local_path
|
|
}
|
|
|
|
// A UDF may include optional prepare and close functions in addition the main evaluation
|
|
// function. This enum distinguishes between these when doing a symbol lookup.
|
|
enum TSymbolType {
|
|
UDF_EVALUATE,
|
|
UDF_PREPARE,
|
|
UDF_CLOSE,
|
|
}
|
|
|
|
// Parameters to pass to validate that the binary contains the symbol. If the
|
|
// symbols is fully specified (i.e. full mangled name), we validate that the
|
|
// mangled name is correct. If only the function name is specified, we try
|
|
// to find the fully mangled name in the binary.
|
|
// The result is returned in TSymbolLookupResult.
|
|
struct TSymbolLookupParams {
|
|
// HDFS path for the function binary. This binary must exist at the time the
|
|
// function is created.
|
|
1: required string location
|
|
|
|
// This can either be a mangled symbol or before mangling function name.
|
|
2: required string symbol
|
|
|
|
// Type of the udf. e.g. hive, native, ir
|
|
3: required Types.TFunctionBinaryType fn_binary_type
|
|
|
|
// The types of the arguments to the function
|
|
4: required list<Types.TColumnType> arg_types
|
|
|
|
// If true, this function takes var args.
|
|
5: required bool has_var_args
|
|
|
|
// If set this function needs to have an return out argument of this type.
|
|
6: optional Types.TColumnType ret_arg_type
|
|
|
|
// Determines the signature of the mangled symbol
|
|
7: required TSymbolType symbol_type;
|
|
}
|
|
|
|
enum TSymbolLookupResultCode {
|
|
SYMBOL_FOUND,
|
|
BINARY_NOT_FOUND,
|
|
SYMBOL_NOT_FOUND,
|
|
}
|
|
|
|
struct TSymbolLookupResult {
|
|
// The result of the symbol lookup.
|
|
1: required TSymbolLookupResultCode result_code
|
|
|
|
// The symbol that was found. set if result_code == SYMBOL_FOUND.
|
|
2: optional string symbol
|
|
|
|
// The error message if the symbol found not be found.
|
|
3: optional string error_msg
|
|
}
|
|
|
|
// Sent from the impalad BE to FE with the results of each CatalogUpdate heartbeat.
|
|
// Contains details on all catalog objects that need to be updated.
|
|
struct TUpdateCatalogCacheRequest {
|
|
// True if update only contains entries changed from the previous update. Otherwise,
|
|
// contains the entire topic.
|
|
1: required bool is_delta
|
|
|
|
// The Catalog Service ID this update came from.
|
|
2: required Types.TUniqueId catalog_service_id
|
|
|
|
// New or modified items. Empty list if no items were updated.
|
|
3: required list<CatalogObjects.TCatalogObject> updated_objects
|
|
|
|
// Empty if no items were removed or is_delta is false.
|
|
4: required list<CatalogObjects.TCatalogObject> removed_objects
|
|
}
|
|
|
|
// Response from a TUpdateCatalogCacheRequest.
|
|
struct TUpdateCatalogCacheResponse {
|
|
// The catalog service id this version is from.
|
|
1: required Types.TUniqueId catalog_service_id
|
|
}
|
|
|
|
// Contains all interesting statistics from a single 'memory pool' in the JVM.
|
|
// All numeric values are measured in bytes.
|
|
struct TJvmMemoryPool {
|
|
// Memory committed by the operating system to this pool (i.e. not just virtual address
|
|
// space)
|
|
1: required i64 committed
|
|
|
|
// The initial amount of memory committed to this pool
|
|
2: required i64 init
|
|
|
|
// The maximum amount of memory this pool will use.
|
|
3: required i64 max
|
|
|
|
// The amount of memory currently in use by this pool (will be <= committed).
|
|
4: required i64 used
|
|
|
|
// Maximum committed memory over time
|
|
5: required i64 peak_committed
|
|
|
|
// Should be always == init
|
|
6: required i64 peak_init
|
|
|
|
// Peak maximum memory over time (usually will not change)
|
|
7: required i64 peak_max
|
|
|
|
// Peak consumed memory over time
|
|
8: required i64 peak_used
|
|
|
|
// Name of this pool, defined by the JVM
|
|
9: required string name
|
|
}
|
|
|
|
// Request to get one or all sets of memory pool metrics.
|
|
struct TGetJvmMetricsRequest {
|
|
// If set, return all pools
|
|
1: required bool get_all
|
|
|
|
// If get_all is false, this must be set to the name of the memory pool to return.
|
|
2: optional string memory_pool
|
|
}
|
|
|
|
// Response from JniUtil::GetJvmMetrics()
|
|
struct TGetJvmMetricsResponse {
|
|
// One entry for every pool tracked by the Jvm, plus a synthetic aggregate pool called
|
|
// 'total'
|
|
1: required list<TJvmMemoryPool> memory_pools
|
|
}
|
|
|
|
struct TGetHadoopConfigRequest {
|
|
// The value of the <name> in the config <property>
|
|
1: required string name
|
|
}
|
|
|
|
struct TGetHadoopConfigResponse {
|
|
// The corresponding value if one exists
|
|
1: optional string value
|
|
}
|