Files
impala/common/thrift/CatalogObjects.thrift
Paden Tomasello 0326f17bb3 Adding Lz4 Codec.
Change-Id: I037d4e0de3b2cd2b8582caea058c8e1f2f880ff3
Reviewed-on: http://gerrit.ent.cloudera.com:8080/3027
Reviewed-by: Paden Tomasello <paden.tomasello@cloudera.com>
Tested-by: jenkins
2014-06-16 14:20:34 -07:00

411 lines
12 KiB
Thrift

// Copyright 2012 Cloudera Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
namespace cpp impala
namespace java com.cloudera.impala.thrift
include "Exprs.thrift"
include "Status.thrift"
include "Types.thrift"
include "hive_metastore.thrift"
// Types used to represent catalog objects.
// Type of Catalog object.
enum TCatalogObjectType {
// UNKNOWN is used to indicate an error condition when converting
// strings to their matching TCatalogObjectType.
UNKNOWN,
CATALOG,
DATABASE,
TABLE,
VIEW,
FUNCTION,
DATA_SOURCE,
ROLE,
PRIVILEGE,
HDFS_CACHE_POOL,
}
enum TTableType {
HDFS_TABLE,
HBASE_TABLE,
VIEW,
DATA_SOURCE_TABLE
}
enum THdfsFileFormat {
TEXT,
LZO_TEXT,
RC_FILE,
SEQUENCE_FILE,
AVRO,
PARQUET
}
enum THdfsCompression {
NONE,
DEFAULT,
GZIP,
DEFLATE,
BZIP2,
SNAPPY,
SNAPPY_BLOCKED, // Used by sequence and rc files but not stored in the metadata.
LZO,
LZ4
}
// The table property type.
enum TTablePropertyType {
TBL_PROPERTY,
SERDE_PROPERTY
}
// The access level that is available to Impala on the Catalog object.
enum TAccessLevel {
NONE,
READ_WRITE,
READ_ONLY,
WRITE_ONLY,
}
// Mapping from names defined by Avro to values in the THdfsCompression enum.
const map<string, THdfsCompression> COMPRESSION_MAP = {
"": THdfsCompression.NONE,
"none": THdfsCompression.NONE,
"deflate": THdfsCompression.DEFAULT,
"gzip": THdfsCompression.GZIP,
"bzip2": THdfsCompression.BZIP2,
"snappy": THdfsCompression.SNAPPY
}
// Represents a single item in a partition spec (column name + value)
struct TPartitionKeyValue {
// Partition column name
1: required string name,
// Partition value
2: required string value
}
// Represents a fully qualified table name.
struct TTableName {
// Name of the table's parent database.
1: required string db_name
// Name of the table
2: required string table_name
}
struct TTableStats {
// Estimated number of rows in the table or -1 if unknown
1: required i64 num_rows;
}
// Column stats data that Impala uses.
struct TColumnStats {
// Average size and max size, in bytes. Excludes serialization overhead.
// For fixed-length types (those which don't need additional storage besides the slot
// they occupy), sets avg_size and max_size to their slot size.
1: required double avg_size
2: required i64 max_size
// Estimated number of distinct values.
3: required i64 num_distinct_values
// Estimated number of null values.
4: required i64 num_nulls
}
struct TColumn {
1: required string columnName
2: required Types.TColumnType columnType
3: optional string comment
// Stats for this table, if any are available.
4: optional TColumnStats col_stats
// Ordinal position in the source table
5: optional i32 position
// Indicates whether this is an HBase column. If true, implies
// all following HBase-specific fields are set.
6: optional bool is_hbase_column
7: optional string column_family
8: optional string column_qualifier
9: optional bool is_binary
}
// Represents a block in an HDFS file
struct THdfsFileBlock {
// Offset of this block within the file
1: required i64 offset
// Total length of the block
2: required i64 length
// Hosts that contain replicas of this block. Each value in the list is an index in to
// the network_addresses list of THdfsTable.
3: required list<i32> replica_host_idxs
// The list of disk ids for the file block. May not be set if disk ids are not supported
4: optional list<i32> disk_ids
// For each replica, specifies if the block is cached in memory.
5: optional list<bool> is_replica_cached
}
// Represents an HDFS file in a partition.
struct THdfsFileDesc {
// The name of the file (not the full path). The parent path is assumed to be the
// 'location' of the THdfsPartition this file resides within.
1: required string file_name
// The total length of the file, in bytes.
2: required i64 length
// The type of compression used for this file.
3: required THdfsCompression compression
// The last modified time of the file.
4: required i64 last_modification_time
// List of THdfsFileBlocks that make up this file.
5: required list<THdfsFileBlock> file_blocks
}
// Represents an HDFS partition
struct THdfsPartition {
1: required byte lineDelim
2: required byte fieldDelim
3: required byte collectionDelim
4: required byte mapKeyDelim
5: required byte escapeChar
6: required THdfsFileFormat fileFormat
7: list<Exprs.TExpr> partitionKeyExprs
8: required i32 blockSize
9: required THdfsCompression compression
10: optional list<THdfsFileDesc> file_desc
11: optional string location
// The access level Impala has on this partition (READ_WRITE, READ_ONLY, etc).
12: optional TAccessLevel access_level
// Statistics on this partition, e.g., number of rows in this partition.
13: optional TTableStats stats
// True if this partition has been marked as cached (does not necessarily mean the
// underlying data is cached).
14: optional bool is_marked_cached
// Unique (in this table) id of this partition. If -1, the partition does not currently
// exist.
15: optional i64 id
}
struct THdfsTable {
1: required string hdfsBaseDir
// Deprecated. Use TTableDescriptor.colNames.
2: required list<string> colNames;
// The string used to represent NULL partition keys.
3: required string nullPartitionKeyValue
// String to indicate a NULL column value in text files
5: required string nullColumnValue
// Set to the table's Avro schema if this is an Avro table
6: optional string avroSchema
// map from partition id to partition metadata
4: required map<i64, THdfsPartition> partitions
// Each TNetworkAddress is a datanode which contains blocks of a file in the table.
// Used so that each THdfsFileBlock can just reference an index in this list rather
// than duplicate the list of network address, which helps reduce memory usage.
7: optional list<Types.TNetworkAddress> network_addresses
}
struct THBaseTable {
1: required string tableName
2: required list<string> families
3: required list<string> qualifiers
// Column i is binary encoded if binary_encoded[i] is true. Otherwise, column i is
// text encoded.
4: optional list<bool> binary_encoded
}
// Represents an external data source
struct TDataSource {
// Name of the data source
1: required string name
// HDFS URI of the library
2: required string hdfs_location
// Class name of the data source implementing the ExternalDataSource interface.
3: required string class_name
// Version of the ExternalDataSource interface. Currently only 'V1' exists.
4: required string api_version
}
// Represents a table scanned by an external data source.
struct TDataSourceTable {
// The data source that will scan this table.
1: required TDataSource data_source
// Init string for the table passed to the data source. May be an empty string.
2: required string init_string
}
// Represents a table or view.
struct TTable {
// Name of the parent database. Case insensitive, expected to be stored as lowercase.
1: required string db_name
// Unqualified table name. Case insensitive, expected to be stored as lowercase.
2: required string tbl_name
// Set if there were any errors loading the Table metadata. The remaining fields in
// the struct may not be set if there were problems loading the table metadata.
// By convention, the final error message in the Status should contain the call stack
// string pointing to where the metadata loading error occurred.
3: optional Status.TStatus load_status
// Table identifier.
4: optional Types.TTableId id
// The access level Impala has on this table (READ_WRITE, READ_ONLY, etc).
5: optional TAccessLevel access_level
// List of columns (excludes clustering columns)
6: optional list<TColumn> columns
// List of clustering columns (empty list if table has no clustering columns)
7: optional list<TColumn> clustering_columns
// Table stats data for the table.
8: optional TTableStats table_stats
// Determines the table type - either HDFS, HBASE, or VIEW.
9: optional TTableType table_type
// Set iff this is an HDFS table
10: optional THdfsTable hdfs_table
// Set iff this is an Hbase table
11: optional THBaseTable hbase_table
// The Hive Metastore representation of this table. May not be set if there were
// errors loading the table metadata
12: optional hive_metastore.Table metastore_table
// Set iff this is a table from an external data source
13: optional TDataSourceTable data_source_table
}
// Represents a database.
struct TDatabase {
// Name of the database. Case insensitive, expected to be stored as lowercase.
1: required string db_name
// The HDFS location new tables will default their base directory to
2: optional string location
}
// Represents a role in an authorization policy.
struct TRole {
// Case-insensitive role name
1: required string role_name
// Unique ID of this role, generated by the Catalog Server.
2: required i32 role_id
// List of groups this role has been granted to (group names are case sensitive).
// TODO: Keep a list of grant groups globally (in TCatalog?) and reference by ID since
// the same groups will likely be shared across multiple roles.
3: required list<string> grant_groups
}
// The scope a TPrivilege applies to.
enum TPrivilegeScope {
SERVER,
URI,
DATABASE,
TABLE,
}
// Represents a privilege granted to a role in an authorization policy.
struct TPrivilege {
// The Sentry defined name of this privilege. Will be in the form of:
// [ServerName]->[DbName]->[TableName]->[Action Granted] and may contain wildcard/"*"
// characters. The combination of role_id + privilege_name is guaranteed to be unique.
1: required string privilege_name
// The scope of the privilege: SERVER, DATABASE, URI, or TABLE
2: required TPrivilegeScope scope
// The ID of the role this privilege belongs to.
3: required i32 role_id
}
// Thrift representation of an HdfsCachePool.
struct THdfsCachePool {
// Name of the cache pool
1: required string pool_name
// In the future we may want to include additional info on the pool such as
// the pool limits, pool owner, etc.
}
// Represents state associated with the overall catalog.
struct TCatalog {
// The CatalogService service ID.
1: required Types.TUniqueId catalog_service_id
}
// Union of all Thrift Catalog objects
struct TCatalogObject {
// The object type (Database, Table, View, or Function)
1: required TCatalogObjectType type
// The Catalog version this object is from
2: required i64 catalog_version
// Set iff object type is CATALOG
3: optional TCatalog catalog
// Set iff object type is DATABASE
4: optional TDatabase db
// Set iff object type is TABLE or VIEW
5: optional TTable table
// Set iff object type is FUNCTION
6: optional Types.TFunction fn
// Set iff object type is DATA SOURCE
7: optional TDataSource data_source
// Set iff object type is ROLE
8: optional TRole role
// Set iff object type is PRIVILEGE
9: optional TPrivilege privilege
// Set iff object type is HDFS_CACHE_POOL
10: optional THdfsCachePool cache_pool
}