// Copyright 2012 Cloudera Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. namespace cpp impala namespace java com.cloudera.impala.thrift include "Exprs.thrift" include "Status.thrift" include "Types.thrift" include "hive_metastore.thrift" // Types used to represent catalog objects. // Type of Catalog object. enum TCatalogObjectType { // UNKNOWN is used to indicate an error condition when converting // strings to their matching TCatalogObjectType. UNKNOWN, CATALOG, DATABASE, TABLE, VIEW, FUNCTION, DATA_SOURCE, ROLE, PRIVILEGE, HDFS_CACHE_POOL, } enum TTableType { HDFS_TABLE, HBASE_TABLE, VIEW, DATA_SOURCE_TABLE } enum THdfsFileFormat { TEXT, LZO_TEXT, RC_FILE, SEQUENCE_FILE, AVRO, PARQUET } enum THdfsCompression { NONE, DEFAULT, GZIP, DEFLATE, BZIP2, SNAPPY, SNAPPY_BLOCKED, // Used by sequence and rc files but not stored in the metadata. LZO, LZ4 } // The table property type. enum TTablePropertyType { TBL_PROPERTY, SERDE_PROPERTY } // The access level that is available to Impala on the Catalog object. enum TAccessLevel { NONE, READ_WRITE, READ_ONLY, WRITE_ONLY, } // Mapping from names defined by Avro to values in the THdfsCompression enum. const map COMPRESSION_MAP = { "": THdfsCompression.NONE, "none": THdfsCompression.NONE, "deflate": THdfsCompression.DEFAULT, "gzip": THdfsCompression.GZIP, "bzip2": THdfsCompression.BZIP2, "snappy": THdfsCompression.SNAPPY } // Represents a single item in a partition spec (column name + value) struct TPartitionKeyValue { // Partition column name 1: required string name, // Partition value 2: required string value } // Represents a fully qualified table name. struct TTableName { // Name of the table's parent database. 1: required string db_name // Name of the table 2: required string table_name } struct TTableStats { // Estimated number of rows in the table or -1 if unknown 1: required i64 num_rows; } // Column stats data that Impala uses. struct TColumnStats { // Average size and max size, in bytes. Excludes serialization overhead. // For fixed-length types (those which don't need additional storage besides the slot // they occupy), sets avg_size and max_size to their slot size. 1: required double avg_size 2: required i64 max_size // Estimated number of distinct values. 3: required i64 num_distinct_values // Estimated number of null values. 4: required i64 num_nulls } struct TColumn { 1: required string columnName 2: required Types.TColumnType columnType 3: optional string comment // Stats for this table, if any are available. 4: optional TColumnStats col_stats // Ordinal position in the source table 5: optional i32 position // Indicates whether this is an HBase column. If true, implies // all following HBase-specific fields are set. 6: optional bool is_hbase_column 7: optional string column_family 8: optional string column_qualifier 9: optional bool is_binary } // Represents a block in an HDFS file struct THdfsFileBlock { // Offset of this block within the file 1: required i64 offset // Total length of the block 2: required i64 length // Hosts that contain replicas of this block. Each value in the list is an index in to // the network_addresses list of THdfsTable. 3: required list replica_host_idxs // The list of disk ids for the file block. May not be set if disk ids are not supported 4: optional list disk_ids // For each replica, specifies if the block is cached in memory. 5: optional list is_replica_cached } // Represents an HDFS file in a partition. struct THdfsFileDesc { // The name of the file (not the full path). The parent path is assumed to be the // 'location' of the THdfsPartition this file resides within. 1: required string file_name // The total length of the file, in bytes. 2: required i64 length // The type of compression used for this file. 3: required THdfsCompression compression // The last modified time of the file. 4: required i64 last_modification_time // List of THdfsFileBlocks that make up this file. 5: required list file_blocks } // Represents an HDFS partition struct THdfsPartition { 1: required byte lineDelim 2: required byte fieldDelim 3: required byte collectionDelim 4: required byte mapKeyDelim 5: required byte escapeChar 6: required THdfsFileFormat fileFormat 7: list partitionKeyExprs 8: required i32 blockSize 9: required THdfsCompression compression 10: optional list file_desc 11: optional string location // The access level Impala has on this partition (READ_WRITE, READ_ONLY, etc). 12: optional TAccessLevel access_level // Statistics on this partition, e.g., number of rows in this partition. 13: optional TTableStats stats // True if this partition has been marked as cached (does not necessarily mean the // underlying data is cached). 14: optional bool is_marked_cached // Unique (in this table) id of this partition. If -1, the partition does not currently // exist. 15: optional i64 id } struct THdfsTable { 1: required string hdfsBaseDir // Deprecated. Use TTableDescriptor.colNames. 2: required list colNames; // The string used to represent NULL partition keys. 3: required string nullPartitionKeyValue // String to indicate a NULL column value in text files 5: required string nullColumnValue // Set to the table's Avro schema if this is an Avro table 6: optional string avroSchema // map from partition id to partition metadata 4: required map partitions // Each TNetworkAddress is a datanode which contains blocks of a file in the table. // Used so that each THdfsFileBlock can just reference an index in this list rather // than duplicate the list of network address, which helps reduce memory usage. 7: optional list network_addresses } struct THBaseTable { 1: required string tableName 2: required list families 3: required list qualifiers // Column i is binary encoded if binary_encoded[i] is true. Otherwise, column i is // text encoded. 4: optional list binary_encoded } // Represents an external data source struct TDataSource { // Name of the data source 1: required string name // HDFS URI of the library 2: required string hdfs_location // Class name of the data source implementing the ExternalDataSource interface. 3: required string class_name // Version of the ExternalDataSource interface. Currently only 'V1' exists. 4: required string api_version } // Represents a table scanned by an external data source. struct TDataSourceTable { // The data source that will scan this table. 1: required TDataSource data_source // Init string for the table passed to the data source. May be an empty string. 2: required string init_string } // Represents a table or view. struct TTable { // Name of the parent database. Case insensitive, expected to be stored as lowercase. 1: required string db_name // Unqualified table name. Case insensitive, expected to be stored as lowercase. 2: required string tbl_name // Set if there were any errors loading the Table metadata. The remaining fields in // the struct may not be set if there were problems loading the table metadata. // By convention, the final error message in the Status should contain the call stack // string pointing to where the metadata loading error occurred. 3: optional Status.TStatus load_status // Table identifier. 4: optional Types.TTableId id // The access level Impala has on this table (READ_WRITE, READ_ONLY, etc). 5: optional TAccessLevel access_level // List of columns (excludes clustering columns) 6: optional list columns // List of clustering columns (empty list if table has no clustering columns) 7: optional list clustering_columns // Table stats data for the table. 8: optional TTableStats table_stats // Determines the table type - either HDFS, HBASE, or VIEW. 9: optional TTableType table_type // Set iff this is an HDFS table 10: optional THdfsTable hdfs_table // Set iff this is an Hbase table 11: optional THBaseTable hbase_table // The Hive Metastore representation of this table. May not be set if there were // errors loading the table metadata 12: optional hive_metastore.Table metastore_table // Set iff this is a table from an external data source 13: optional TDataSourceTable data_source_table } // Represents a database. struct TDatabase { // Name of the database. Case insensitive, expected to be stored as lowercase. 1: required string db_name // The HDFS location new tables will default their base directory to 2: optional string location } // Represents a role in an authorization policy. struct TRole { // Case-insensitive role name 1: required string role_name // Unique ID of this role, generated by the Catalog Server. 2: required i32 role_id // List of groups this role has been granted to (group names are case sensitive). // TODO: Keep a list of grant groups globally (in TCatalog?) and reference by ID since // the same groups will likely be shared across multiple roles. 3: required list grant_groups } // The scope a TPrivilege applies to. enum TPrivilegeScope { SERVER, URI, DATABASE, TABLE, } // Represents a privilege granted to a role in an authorization policy. struct TPrivilege { // The Sentry defined name of this privilege. Will be in the form of: // [ServerName]->[DbName]->[TableName]->[Action Granted] and may contain wildcard/"*" // characters. The combination of role_id + privilege_name is guaranteed to be unique. 1: required string privilege_name // The scope of the privilege: SERVER, DATABASE, URI, or TABLE 2: required TPrivilegeScope scope // The ID of the role this privilege belongs to. 3: required i32 role_id } // Thrift representation of an HdfsCachePool. struct THdfsCachePool { // Name of the cache pool 1: required string pool_name // In the future we may want to include additional info on the pool such as // the pool limits, pool owner, etc. } // Represents state associated with the overall catalog. struct TCatalog { // The CatalogService service ID. 1: required Types.TUniqueId catalog_service_id } // Union of all Thrift Catalog objects struct TCatalogObject { // The object type (Database, Table, View, or Function) 1: required TCatalogObjectType type // The Catalog version this object is from 2: required i64 catalog_version // Set iff object type is CATALOG 3: optional TCatalog catalog // Set iff object type is DATABASE 4: optional TDatabase db // Set iff object type is TABLE or VIEW 5: optional TTable table // Set iff object type is FUNCTION 6: optional Types.TFunction fn // Set iff object type is DATA SOURCE 7: optional TDataSource data_source // Set iff object type is ROLE 8: optional TRole role // Set iff object type is PRIVILEGE 9: optional TPrivilege privilege // Set iff object type is HDFS_CACHE_POOL 10: optional THdfsCachePool cache_pool }