mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-14081: Support create/drop paimon table for impala
This patch mainly implement the creation/drop of paimon table
through impala.
Supported impala data types:
- BOOLEAN
- TINYINT
- SMALLINT
- INTEGER
- BIGINT
- FLOAT
- DOUBLE
- STRING
- DECIMAL(P,S)
- TIMESTAMP
- CHAR(N)
- VARCHAR(N)
- BINARY
- DATE
Syntax for creating paimon table:
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
(
[col_name data_type ,...]
[PRIMARY KEY (col1,col2)]
)
[PARTITIONED BY (col_name data_type [COMMENT 'col_comment'], ...)]
STORED AS PAIMON
[LOCATION 'hdfs_path']
[TBLPROPERTIES (
'primary-key'='col1,col2',
'file.format' = 'orc/parquet',
'bucket' = '2',
'bucket-key' = 'col3',
];
Two types of paimon catalogs are supported.
(1) Create table with hive catalog:
CREATE TABLE paimon_hive_cat(userid INT,movieId INT)
STORED AS PAIMON;
(2) Create table with hadoop catalog:
CREATE [EXTERNAL] TABLE paimon_hadoop_cat
STORED AS PAIMON
TBLPROPERTIES('paimon.catalog'='hadoop',
'paimon.catalog_location'='/path/to/paimon_hadoop_catalog',
'paimon.table_identifier'='paimondb.paimontable');
SHOW TABLE STAT/SHOW COLUMN STAT/SHOW PARTITIONS/SHOW FILES
statements are also supported.
TODO:
- Patches pending submission:
- Query support for paimon data files.
- Partition pruning and predicate push down.
- Query support with time travel.
- Query support for paimon meta tables.
- WIP:
- Complex type query support.
- Virtual Column query support for querying
paimon data table.
- Native paimon table scanner, instead of
jni based.
Testing:
- Add unit test for paimon impala type conversion.
- Add unit test for ToSqlTest.java.
- Add unit test for AnalyzeDDLTest.java.
- Update default_file_format TestEnumCase in
be/src/service/query-options-test.cc.
- Update test case in
testdata/workloads/functional-query/queries/QueryTest/set.test.
- Add test cases in metadata/test_show_create_table.py.
- Add custom test test_paimon.py.
Change-Id: I57e77f28151e4a91353ef77050f9f0cd7d9d05ef
Reviewed-on: http://gerrit.cloudera.org:8080/22914
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
This commit is contained in:
@@ -65,6 +65,7 @@ enum TTableType {
|
||||
MATERIALIZED_VIEW = 7
|
||||
// Represents a system table reflecting backend internal state.
|
||||
SYSTEM_TABLE = 8
|
||||
PAIMON_TABLE = 9
|
||||
}
|
||||
|
||||
// TODO: Separate the storage engines (e.g. Kudu) from the file formats.
|
||||
@@ -82,6 +83,7 @@ enum THdfsFileFormat {
|
||||
ICEBERG = 8
|
||||
JSON = 9
|
||||
JDBC = 10
|
||||
PAIMON = 11
|
||||
}
|
||||
|
||||
enum TVirtualColumnType {
|
||||
@@ -90,7 +92,9 @@ enum TVirtualColumnType {
|
||||
FILE_POSITION,
|
||||
PARTITION_SPEC_ID,
|
||||
ICEBERG_PARTITION_SERIALIZED,
|
||||
ICEBERG_DATA_SEQUENCE_NUMBER
|
||||
ICEBERG_DATA_SEQUENCE_NUMBER,
|
||||
PARTITION_VALUE_SERIALIZED,
|
||||
BUCKET_ID
|
||||
}
|
||||
|
||||
// TODO: Since compression is also enabled for Kudu columns, we should
|
||||
@@ -698,6 +702,25 @@ struct TSystemTable {
|
||||
1: required TSystemTableName table_name
|
||||
}
|
||||
|
||||
enum TPaimonCatalog {
|
||||
HADOOP_CATALOG = 0
|
||||
HIVE_CATALOG = 1
|
||||
}
|
||||
|
||||
// Paimon Table kind
|
||||
enum TPaimonTableKind {
|
||||
JNI = 0
|
||||
NATIVE = 1
|
||||
}
|
||||
|
||||
// Represents a Paimon Table
|
||||
struct TPaimonTable {
|
||||
// Paimon table kind.
|
||||
1: required TPaimonTableKind kind
|
||||
// Jni table object.
|
||||
2: optional binary jni_tbl_obj
|
||||
}
|
||||
|
||||
// Represents a table or view.
|
||||
struct TTable {
|
||||
// Name of the parent database. Case insensitive, expected to be stored as lowercase.
|
||||
@@ -759,6 +782,9 @@ struct TTable {
|
||||
|
||||
// Set if this is a system table
|
||||
19: optional TSystemTable system_table
|
||||
|
||||
// Set if this is a paimon table
|
||||
20: optional TPaimonTable paimon_table
|
||||
}
|
||||
|
||||
// Represents a database.
|
||||
|
||||
@@ -91,6 +91,7 @@ struct TTableDescriptor {
|
||||
10: optional CatalogObjects.TKuduTable kuduTable
|
||||
11: optional CatalogObjects.TIcebergTable icebergTable
|
||||
12: optional CatalogObjects.TSystemTable systemTable
|
||||
13: optional CatalogObjects.TPaimonTable paimonTable
|
||||
|
||||
// Unqualified name of table
|
||||
7: required string tableName
|
||||
|
||||
Reference in New Issue
Block a user