IMPALA-14081: Support create/drop paimon table for impala

This patch mainly implement the creation/drop of paimon table
through impala.

Supported impala data types:
- BOOLEAN
- TINYINT
- SMALLINT
- INTEGER
- BIGINT
- FLOAT
- DOUBLE
- STRING
- DECIMAL(P,S)
- TIMESTAMP
- CHAR(N)
- VARCHAR(N)
- BINARY
- DATE

Syntax for creating paimon table:

CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
(
[col_name data_type ,...]
[PRIMARY KEY (col1,col2)]
)
[PARTITIONED BY (col_name data_type [COMMENT 'col_comment'], ...)]
STORED AS PAIMON
[LOCATION 'hdfs_path']
[TBLPROPERTIES (
'primary-key'='col1,col2',
'file.format' = 'orc/parquet',
'bucket' = '2',
'bucket-key' = 'col3',
];

Two types of paimon catalogs are supported.

(1) Create table with hive catalog:

CREATE TABLE paimon_hive_cat(userid INT,movieId INT)
STORED AS PAIMON;

(2) Create table with hadoop catalog:

CREATE [EXTERNAL] TABLE paimon_hadoop_cat
STORED AS PAIMON
TBLPROPERTIES('paimon.catalog'='hadoop',
'paimon.catalog_location'='/path/to/paimon_hadoop_catalog',
'paimon.table_identifier'='paimondb.paimontable');

SHOW TABLE STAT/SHOW COLUMN STAT/SHOW PARTITIONS/SHOW FILES
statements are also supported.

TODO:
    - Patches pending submission:
        - Query support for paimon data files.
        - Partition pruning and predicate push down.
        - Query support with time travel.
        - Query support for paimon meta tables.
    - WIP:
        - Complex type query support.
        - Virtual Column query support for querying
          paimon data table.
        - Native paimon table scanner, instead of
          jni based.
Testing:
    - Add unit test for paimon impala type conversion.
    - Add unit test for ToSqlTest.java.
    - Add unit test for AnalyzeDDLTest.java.
    - Update default_file_format TestEnumCase in
      be/src/service/query-options-test.cc.
    - Update test case in
      testdata/workloads/functional-query/queries/QueryTest/set.test.
    - Add test cases in metadata/test_show_create_table.py.
    - Add custom test test_paimon.py.

Change-Id: I57e77f28151e4a91353ef77050f9f0cd7d9d05ef
Reviewed-on: http://gerrit.cloudera.org:8080/22914
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
This commit is contained in:
jichen0919
2025-05-20 15:58:06 +08:00
committed by Riza Suminto
parent a41c5cbfdd
commit 826c8cf9b0
100 changed files with 4159 additions and 37 deletions

View File

@@ -233,7 +233,7 @@ TEST(QueryOptions, SetEnumOptions) {
(THREE_LEVEL, TWO_LEVEL, TWO_LEVEL_THEN_THREE_LEVEL)), true);
TestEnumCase(options, CASE(default_file_format, THdfsFileFormat,
(TEXT, RC_FILE, SEQUENCE_FILE, AVRO, PARQUET, KUDU, ORC, HUDI_PARQUET, ICEBERG,
JSON, JDBC)), true);
JSON, JDBC, PAIMON)), true);
TestEnumCase(options, CASE(runtime_filter_mode, TRuntimeFilterMode,
(OFF, LOCAL, GLOBAL)), true);
TestEnumCase(options, CASE(kudu_read_mode, TKuduReadMode,

View File

@@ -296,7 +296,7 @@ export IMPALA_DBCP2_VERSION=2.9.0
export IMPALA_DROPWIZARD_METRICS_VERSION=4.2.26
export IMPALA_AIRCOMPRESSOR_VERSION=0.27
export IMPALA_DATASKETCHES_VERSION=6.0.0
export IMPALA_PAIMON_VERSION=1.1.1
# When Impala is building docker images on Redhat-based distributions,
# it is useful to be able to customize the base image. Some users will
# want to use open source / free distributions like Centos/Rocky/Alma/etc.
@@ -1242,6 +1242,7 @@ echo "IMPALA_HUDI_VERSION = $IMPALA_HUDI_VERSION"
echo "IMPALA_KUDU_VERSION = $IMPALA_KUDU_VERSION"
echo "IMPALA_RANGER_VERSION = $IMPALA_RANGER_VERSION"
echo "IMPALA_ICEBERG_VERSION = $IMPALA_ICEBERG_VERSION"
echo "IMPALA_PAIMON_VERSION = $IMPALA_PAIMON_VERSION"
echo "IMPALA_COS_VERSION = $IMPALA_COS_VERSION"
echo "IMPALA_OBS_VERSION = $IMPALA_OBS_VERSION"
echo "IMPALA_SYSTEM_PYTHON2 = $IMPALA_SYSTEM_PYTHON2"

View File

@@ -170,6 +170,7 @@ testdata/data/widerow.txt
testdata/data/local_tbl/00000.txt
testdata/data/hudi_parquet/*
testdata/data/iceberg_test/*
testdata/data/paimon_test/*
testdata/data/json_test/*
testdata/data/sfs_d2.txt
testdata/data/sfs_d4.txt

View File

@@ -65,6 +65,7 @@ enum TTableType {
MATERIALIZED_VIEW = 7
// Represents a system table reflecting backend internal state.
SYSTEM_TABLE = 8
PAIMON_TABLE = 9
}
// TODO: Separate the storage engines (e.g. Kudu) from the file formats.
@@ -82,6 +83,7 @@ enum THdfsFileFormat {
ICEBERG = 8
JSON = 9
JDBC = 10
PAIMON = 11
}
enum TVirtualColumnType {
@@ -90,7 +92,9 @@ enum TVirtualColumnType {
FILE_POSITION,
PARTITION_SPEC_ID,
ICEBERG_PARTITION_SERIALIZED,
ICEBERG_DATA_SEQUENCE_NUMBER
ICEBERG_DATA_SEQUENCE_NUMBER,
PARTITION_VALUE_SERIALIZED,
BUCKET_ID
}
// TODO: Since compression is also enabled for Kudu columns, we should
@@ -698,6 +702,25 @@ struct TSystemTable {
1: required TSystemTableName table_name
}
enum TPaimonCatalog {
HADOOP_CATALOG = 0
HIVE_CATALOG = 1
}
// Paimon Table kind
enum TPaimonTableKind {
JNI = 0
NATIVE = 1
}
// Represents a Paimon Table
struct TPaimonTable {
// Paimon table kind.
1: required TPaimonTableKind kind
// Jni table object.
2: optional binary jni_tbl_obj
}
// Represents a table or view.
struct TTable {
// Name of the parent database. Case insensitive, expected to be stored as lowercase.
@@ -759,6 +782,9 @@ struct TTable {
// Set if this is a system table
19: optional TSystemTable system_table
// Set if this is a paimon table
20: optional TPaimonTable paimon_table
}
// Represents a database.

View File

@@ -91,6 +91,7 @@ struct TTableDescriptor {
10: optional CatalogObjects.TKuduTable kuduTable
11: optional CatalogObjects.TIcebergTable icebergTable
12: optional CatalogObjects.TSystemTable systemTable
13: optional CatalogObjects.TPaimonTable paimonTable
// Unqualified name of table
7: required string tableName

View File

@@ -347,6 +347,7 @@ under the License.
<topicref href="topics/impala_kudu.xml"/>
<topicref href="topics/impala_hbase.xml"/>
<topicref rev="4.1.0" href="topics/impala_iceberg.xml"/>
<topicref rev="5.0.0" href="topics/impala_paimon.xml"/>
<topicref href="topics/impala_s3.xml"/>
<topicref rev="2.9.0" href="topics/impala_adls.xml"/>
<topicref href="topics/impala_isilon.xml"/>

View File

@@ -61,6 +61,10 @@ under the License.
<topicmeta><linktext>the Apache Iceberg Puffin site</linktext></topicmeta>
</keydef>
<keydef href="https://https://paimon.apache.org" scope="external" format="html" keys="upstream_paimon_site">
<topicmeta><linktext>the Apache Paimon site</linktext></topicmeta>
</keydef>
<keydef href="https://ozone.apache.org" scope="external" format="html" keys="upstream_ozone_site">
<topicmeta><linktext>the Apache Ozone site</linktext></topicmeta>
</keydef>

View File

@@ -0,0 +1,352 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
<concept id="impala_paimon">
<title id="paimon">Using Impala with Paimon Tables</title>
<titlealts audience="PDF"><navtitle>Paimon Tables</navtitle></titlealts>
<prolog>
<metadata>
<data name="Category" value="Impala"/>
<data name="Category" value="Paimon"/>
<data name="Category" value="Querying"/>
<data name="Category" value="Data Analysts"/>
<data name="Category" value="Developers"/>
<data name="Category" value="Tables"/>
</metadata>
</prolog>
<conbody>
<p>
<indexterm audience="hidden">Paimon</indexterm>
Impala now adds experimental support for Apache Paimon, which is an open table format for realtime lakehouse.
With this functionality, you can access any existing Paimon tables using SQL and perform
analytics over them. It now supports Hive catalog and Hadoop catalog.
</p>
<p>
For more information on Paimon, see <xref keyref="upstream_paimon_site"/>.
</p>
<p outputclass="toc inpage"/>
</conbody>
<concept id="paimon_features">
<title>Overview of Paimon features</title>
<prolog>
<metadata>
<data name="Category" value="Concepts"/>
</metadata>
</prolog>
<conbody>
<ul>
<li>
<b>Real time updates:</b>
<ul>
<li>
Primary key table supports writing of large-scale updates, has very high update performance,
typically through Flink Streaming.
</li>
<li>
Support defining Merge Engines, update records however you like.
Deduplicate to keep the last row, or partial-update, or aggregate records, or first-row, you decide.
</li>
</ul>
</li>
<li>
<b>Data Lake Capabilities:</b>
<ul>
<li>
Scalable metadata: supports storing Petabyte large-scale datasets and storing a large
number of partitions.
</li>
<li>
Supports ACID Transactions &amp; Time Travel &amp; Schema Evolution.
</li>
</ul>
</li>
</ul>
</conbody>
</concept>
<concept id="paimon_create">
<title>Creating Paimon tables with Impala</title>
<prolog>
<metadata>
<data name="Category" value="Concepts"/>
</metadata>
</prolog>
<conbody>
<p>
When you have an existing Paimon table that is not yet present in the Hive Metastore,
you can use the <codeph>CREATE EXTERNAL TABLE</codeph> command in Impala to add the table to the Hive
Metastore and make Impala able to interact with this table. Currently Impala supports
HadoopCatalog, and HiveCatalog. If you have an existing table in HiveCatalog,
and you are using the same Hive Metastore, you need no further actions.
</p>
<ul>
<li>
<b>HadoopCatalog</b>. A table in HadoopCatalog means that there is a catalog location
in the file system under which Paimon tables are stored. Use the following command
to add a table in a HadoopCatalog to Impala:
<codeblock>
CREATE EXTERNAL TABLE paimon_hadoop_cat
STORED AS PAIMON
TBLPROPERTIES('paimon.catalog'='hadoop',
'paimon.catalog_location'='/path/to/paimon_hadoop_catalog',
'paimon.table_identifier'='paimondb.paimontable');
</codeblock>
</li>
<li>
<b>HiveCatalog</b>. User can create managed paimon table in HMS like below :
<codeblock>
CREATE TABLE paimon_hive_cat(userid INT,movieId INT)
STORED AS PAIMON;
</codeblock>
</li>
</ul>
<p>
<b>Syntax for creating DDL tables</b>
<codeblock>
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
(
[col_name data_type ,...]
[PRIMARY KEY (col1,col2)]
)
[PARTITIONED BY (col_name data_type [COMMENT 'col_comment'], ...)]
STORED AS PAIMON
[LOCATION 'hdfs_path']
[TBLPROPERTIES (
'primary-key'='col1,col2',
'file.format' = 'orc/parquet',
'bucket' = '2',
'bucket-key' = 'col3',
]
</codeblock>
</p>
<ul>
<li>
<b>Create Partitioned paimon table example:</b>
<codeblock>
CREATE TABLE support_partitioned_by_table2 (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field',
)
PARTITIONED BY (
dt STRING COMMENT 'The dt field',
hh STRING COMMENT 'The hh field'
)
STORED AS PAIMON;
</codeblock>
</li>
<li>
<b>Create Partitioned paimon table example with primary key:</b>
<codeblock>
CREATE TABLE test_create_managed_part_pk_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
PARTITIONED BY (
dt STRING COMMENT 'The dt field',
hh STRING COMMENT 'The hh field'
)
STORED AS PAIMON
TBLPROPERTIES (
'primary-key'='user_id'
);
</codeblock>
</li>
<li>
<b>Create Partitioned paimon table example with bucket:</b>
<codeblock>
CREATE TABLE test_create_managed_bucket_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
STORED AS PAIMON
TBLPROPERTIES (
'bucket' = '4',
'bucket-key'='behavior'
);
</codeblock>
</li>
<li>
<b>Create External paimon table example with no column definitions:</b>
<p>For external table creation, user can ignore column definitions, impala will infer schema from underlying paimon
table. for example:</p>
<codeblock>
CREATE EXTERNAL TABLE ext_paimon_table
STORED AS PAIMON
[LOCATION 'underlying_paimon_table_location']
</codeblock>
</li>
</ul>
</conbody>
</concept>
<concept id="paimon_drop">
<title>Dropping Paimon tables</title>
<conbody>
<p>
One can use <codeph>DROP TABLE</codeph> statement to remove an Paimon table:
<codeblock>
DROP TABLE test_create_managed_bucket_paimon_table;
</codeblock>
</p>
<p>
When <codeph>external.table.purge</codeph> table property is set to true, then the
<codeph>DROP TABLE</codeph> statement will also delete the data files. This property
is set to true when Impala creates the Paimon table via <codeph>CREATE TABLE</codeph>.
When <codeph>CREATE EXTERNAL TABLE</codeph> is used (the table already exists in some
catalog) then this <codeph>external.table.purge</codeph> is set to false, i.e.
<codeph>DROP TABLE</codeph> doesn't remove any files, only the table definition
in HMS.
</p>
</conbody>
</concept>
<concept id="paimon_types">
<title>Supported Data Types for Paimon Columns</title>
<conbody>
<p>
You can get information about the supported Paimon data types in
<xref href="https://paimon.apache.org/docs/1.1/concepts/data-types/" scope="external" format="html">
the Paimon spec</xref>.
</p>
<p>
The Paimon data types can be mapped to the following SQL types in Impala:
<table rowsep="1" colsep="1" id="paimon_types_sql_types">
<tgroup cols="2">
<colspec colname="c1" colnum="1"/>
<colspec colname="c2" colnum="2"/>
<thead>
<row>
<entry>Paimon type</entry>
<entry>SQL type in Impala</entry>
</row>
</thead>
<tbody>
<row>
<entry>BOOLEAN</entry>
<entry>BOOLEAN</entry>
</row>
<row>
<entry>TINYINT</entry>
<entry>TINYINT</entry>
</row>
<row>
<entry>SMALLINT</entry>
<entry>SMALLINT</entry>
</row>
<row>
<entry>INT</entry>
<entry>INTEGER</entry>
</row>
<row>
<entry>BIGINT</entry>
<entry>BIGINT</entry>
</row>
<row>
<entry>FLOAT</entry>
<entry>FLOAT</entry>
</row>
<row>
<entry>DOUBLE</entry>
<entry>DOUBLE</entry>
</row>
<row>
<entry>STRING</entry>
<entry>STRING</entry>
</row>
<row>
<entry>DECIMAL(P,S)</entry>
<entry>DECIMAL(P,S)</entry>
</row>
<row>
<entry>TIMESTAMP</entry>
<entry>TIMESTAMP</entry>
</row>
<row>
<entry>TIMESTAMP(*WITH*TIMEZONE)</entry>
<entry>Not Supported</entry>
</row>
<row>
<entry>CHAR(N)</entry>
<entry>CHAR(N)</entry>
</row>
<row>
<entry>VARCHAR(N)</entry>
<entry>VARCHAR(N)</entry>
</row>
<row>
<entry>BINARY(N)</entry>
<entry>BINARY(N)</entry>
</row>
<row>
<entry>VARBINARY(N)</entry>
<entry>BINARY(N)</entry>
</row>
<row>
<entry>DATE</entry>
<entry>DATE</entry>
</row>
<row>
<entry>TIME</entry>
<entry>Not Supported</entry>
</row>
<row>
<entry>Not Supported</entry>
<entry>DATETIME</entry>
</row>
<row>
<entry>MULTISET&lt;t&gt;</entry>
<entry>Not Supported</entry>
</row>
<row>
<entry>ARRAY&lt;t&gt;</entry>
<entry>Not Supported For Now</entry>
</row>
<row>
<entry>MAP&lt;t&gt;</entry>
<entry>Not Supported For Now</entry>
</row>
<row>
<entry>ROW&lt;n1 t1,n2 t2&gt;</entry>
<entry>Not Supported For Now</entry>
</row>
</tbody>
</tgroup>
</table>
</p>
<p>
note: the unsupported type for paimon and impala is noted as "Not Supported".
The item noted 'Not Supported for Now' will be supported later.
</p>
</conbody>
</concept>
</concept>

View File

@@ -1588,6 +1588,13 @@ See the history, any recent changes, here:
<entry>X<fn>Impala 4.0 and higher</fn></entry>
<entry/>
</row>
<row>
<entry><codeph>paimon</codeph></entry>
<entry/>
<entry/>
<entry>X<fn>Impala 5.0 and higher</fn></entry>
<entry/>
</row>
<row>
<entry><codeph>identity</codeph></entry>
<entry>X</entry>

View File

@@ -642,6 +642,22 @@ under the License.
<version>2.0.3</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.paimon</groupId>
<artifactId>paimon-hive-connector-3.1</artifactId>
<version>${paimon.version}</version>
<exclusions>
<exclusion>
<artifactId>lz4-java</artifactId>
<groupId>org.lz4</groupId>
</exclusion>
<exclusion>
<artifactId>caffeine</artifactId>
<groupId>com.github.ben-manes.caffeine</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<reporting>

View File

@@ -341,7 +341,7 @@ terminal
KW_NOVALIDATE, KW_NULL, KW_NULLS, KW_OF, KW_OFFSET, KW_ON, KW_OPTIMIZE, KW_OR,
KW_ORC, KW_ORDER, KW_OUTER,
KW_OVER, KW_OVERWRITE, KW_PARQUET, KW_PARQUETFILE, KW_PARTITION, KW_PARTITIONED,
KW_PARTITIONS, KW_PRECEDING, KW_PREPARE_FN, KW_PRIMARY, KW_PRODUCED, KW_PURGE, KW_QUERY,
KW_PARTITIONS, KW_PRECEDING, KW_PREPARE_FN, KW_PAIMON, KW_PRIMARY, KW_PRODUCED, KW_PURGE, KW_QUERY,
KW_RANGE, KW_RCFILE, KW_RECOVER, KW_REFERENCES, KW_REFRESH, KW_REGEXP, KW_RELY,
KW_RENAME, KW_REPEATABLE, KW_REPLACE, KW_REPLICATION, KW_RESTRICT, KW_RETURNS,
KW_REVOKE, KW_RIGHT, KW_RLIKE, KW_ROLE, KW_ROLES, KW_ROLLUP, KW_ROW, KW_ROWS, KW_RWSTORAGE,
@@ -2330,6 +2330,8 @@ file_format_val ::=
{: RESULT = THdfsFileFormat.JSON; :}
| KW_JDBC
{: RESULT = THdfsFileFormat.JDBC; :}
| KW_PAIMON
{: RESULT = THdfsFileFormat.PAIMON; :}
;
storage_engine_val ::=
@@ -2339,6 +2341,8 @@ storage_engine_val ::=
{: RESULT = THdfsFileFormat.ICEBERG; :}
| KW_JDBC
{: RESULT = THdfsFileFormat.JDBC; :}
| KW_PAIMON
{: RESULT = THdfsFileFormat.PAIMON; :}
;
tbl_properties ::=
@@ -4598,6 +4602,8 @@ word ::=
{: RESULT = r.toString(); :}
| KW_ICEBERG:r
{: RESULT = r.toString(); :}
| KW_PAIMON:r
{: RESULT = r.toString(); :}
| KW_IF:r
{: RESULT = r.toString(); :}
| KW_IGNORE:r

View File

@@ -21,28 +21,27 @@ import org.apache.impala.common.AnalysisException;
import java.util.Collection;
class AnalysisUtils {
public class AnalysisUtils {
static <T> void throwIfNotNull(T o, String message) throws AnalysisException {
public static <T> void throwIfNotNull(T o, String message) throws AnalysisException {
if (o != null) throw new AnalysisException(message);
}
static void throwIfNotEmpty(Collection<?> c, String message)
public static void throwIfNotEmpty(Collection<?> c, String message)
throws AnalysisException {
if (c != null && !c.isEmpty()) throw new AnalysisException(message);
}
static <T> void throwIfNull(T o, String message) throws AnalysisException {
public static <T> void throwIfNull(T o, String message) throws AnalysisException {
if (o == null) throw new AnalysisException(message);
}
static void throwIfNullOrEmpty(Collection<?> c, String message)
public static void throwIfNullOrEmpty(Collection<?> c, String message)
throws AnalysisException {
if (c == null || c.isEmpty()) throw new AnalysisException(message);
}
static void throwIfTrue(boolean cond, String message)
throws AnalysisException {
public static void throwIfTrue(boolean cond, String message) throws AnalysisException {
if (cond) throw new AnalysisException(message);
}
}

View File

@@ -79,6 +79,7 @@ import org.apache.impala.catalog.VirtualColumn;
import org.apache.impala.catalog.VirtualTable;
import org.apache.impala.catalog.iceberg.IcebergMetadataTable;
import org.apache.impala.catalog.local.LocalKuduTable;
import org.apache.impala.catalog.paimon.FePaimonTable;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.common.IdGenerator;
import org.apache.impala.common.ImpalaException;
@@ -1136,7 +1137,8 @@ public class Analyzer {
Preconditions.checkState(table instanceof FeFsTable ||
table instanceof FeKuduTable ||
table instanceof FeHBaseTable ||
table instanceof FeDataSourceTable);
table instanceof FeDataSourceTable ||
table instanceof FePaimonTable);
return new BaseTableRef(tableRef, resolvedPath);
} else {
return new CollectionTableRef(tableRef, resolvedPath, false);

View File

@@ -29,6 +29,7 @@ import org.apache.hadoop.hive.metastore.api.SQLForeignKey;
import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.mr.Catalogs;
import org.apache.impala.analysis.paimon.PaimonAnalyzer;
import org.apache.impala.authorization.AuthorizationConfig;
import org.apache.impala.catalog.DataSourceTable;
import org.apache.impala.catalog.HdfsStorageDescriptor;
@@ -135,12 +136,12 @@ public class CreateTableStmt extends StatementBase implements SingleTableStmt {
public TSortingOrder getSortingOrder() { return tableDef_.getSortingOrder(); }
public String getComment() { return tableDef_.getComment(); }
public Map<String, String> getTblProperties() { return tableDef_.getTblProperties(); }
private HdfsCachingOp getCachingOp() { return tableDef_.getCachingOp(); }
public HdfsCachingOp getCachingOp() { return tableDef_.getCachingOp(); }
public HdfsUri getLocation() { return tableDef_.getLocation(); }
Map<String, String> getSerdeProperties() { return tableDef_.getSerdeProperties(); }
public THdfsFileFormat getFileFormat() { return tableDef_.getFileFormat(); }
RowFormat getRowFormat() { return tableDef_.getRowFormat(); }
private void putGeneratedProperty(String key, String value) {
public void putGeneratedProperty(String key, String value) {
tableDef_.putGeneratedProperty(key, value);
}
public Map<String, String> getGeneratedKuduProperties() {
@@ -272,8 +273,9 @@ public class CreateTableStmt extends StatementBase implements SingleTableStmt {
// Avro tables can have empty column defs because they can infer them from the Avro
// schema. Likewise for external Kudu tables, the schema can be read from Kudu.
if (getColumnDefs().isEmpty() && getFileFormat() != THdfsFileFormat.AVRO
&& getFileFormat() != THdfsFileFormat.KUDU && getFileFormat() !=
THdfsFileFormat.ICEBERG) {
&& getFileFormat() != THdfsFileFormat.KUDU
&& getFileFormat() != THdfsFileFormat.ICEBERG
&& getFileFormat() != THdfsFileFormat.PAIMON) {
throw new AnalysisException("Table requires at least 1 column");
}
if (getRowFormat() != null) {
@@ -320,6 +322,10 @@ public class CreateTableStmt extends StatementBase implements SingleTableStmt {
analyzeJdbcSchema(analyzer);
}
if (getFileFormat() == THdfsFileFormat.PAIMON) {
PaimonAnalyzer.analyzeCreateTableStmt(this, analyzer);
}
// If lineage logging is enabled, compute minimal lineage graph.
if (BackendConfig.INSTANCE.getComputeLineage() || RuntimeEnv.INSTANCE.isTestEnv()) {
computeLineageGraph(analyzer);
@@ -932,7 +938,7 @@ public class CreateTableStmt extends StatementBase implements SingleTableStmt {
/**
* @return true for external tables that don't have "external.table.purge" set to true.
*/
private boolean isExternalWithNoPurge() {
public boolean isExternalWithNoPurge() {
return isExternal() && !Boolean.parseBoolean(getTblProperties().get(
Table.TBL_PROP_EXTERNAL_TABLE_PURGE));
}

View File

@@ -22,6 +22,7 @@ import java.util.List;
import org.apache.impala.authorization.Privilege;
import org.apache.impala.catalog.FeFsTable;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.paimon.FeShowFileStmtSupport;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.thrift.TShowFilesParams;
import org.apache.impala.thrift.TTableName;
@@ -78,13 +79,23 @@ public class ShowFilesStmt extends StatementBase implements SingleTableStmt {
}
table_ = tableRef.getTable();
Preconditions.checkNotNull(table_);
if (!(table_ instanceof FeFsTable)) {
throw new AnalysisException("SHOW FILES is applicable only to a HDFS table");
if (!(table_ instanceof FeFsTable) && !(table_ instanceof FeShowFileStmtSupport)) {
throw new AnalysisException("SHOW FILES is applicable only to file-based tables.");
}
tableRef.analyze(analyzer);
// Analyze the partition spec, if one was specified.
if (partitionSet_ != null) {
if (table_ instanceof FeShowFileStmtSupport) {
FeShowFileStmtSupport showFileStmtSupport = (FeShowFileStmtSupport) table_;
if (!showFileStmtSupport.supportPartitionFilter()) {
throw new AnalysisException(
"SHOW FILES with partition filter is not applicable to" +
" table type:" +
showFileStmtSupport.getTableFormat().name()
);
}
}
partitionSet_.setTableName(table_.getTableName());
partitionSet_.setPartitionShouldExist();
partitionSet_.setPrivilegeRequirement(Privilege.VIEW_METADATA);

View File

@@ -19,12 +19,14 @@ package org.apache.impala.analysis;
import java.util.List;
import org.apache.impala.analysis.paimon.PaimonAnalyzer;
import org.apache.impala.authorization.Privilege;
import org.apache.impala.catalog.FeFsTable;
import org.apache.impala.catalog.FeIcebergTable;
import org.apache.impala.catalog.FeKuduTable;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.FeView;
import org.apache.impala.catalog.paimon.FePaimonTable;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.thrift.TShowStatsOp;
import org.apache.impala.thrift.TShowStatsParams;
@@ -114,6 +116,8 @@ public class ShowStatsStmt extends StatementBase implements SingleTableStmt {
throw new AnalysisException(getSqlPrefix() + " must target a Kudu table: " +
table_.getFullName());
}
} else if (table_ instanceof FePaimonTable) {
PaimonAnalyzer.analyzeShowStatStmt(this, (FePaimonTable) table_, analyzer);
} else if (table_ instanceof FeKuduTable) {
FeKuduTable kuduTable = (FeKuduTable) table_;
if ((op_ == TShowStatsOp.RANGE_PARTITIONS || op_ == TShowStatsOp.HASH_SCHEMA) &&

View File

@@ -24,6 +24,7 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.RecognitionException;
@@ -52,7 +53,10 @@ import org.apache.impala.catalog.KuduColumn;
import org.apache.impala.catalog.KuduTable;
import org.apache.impala.catalog.RowFormat;
import org.apache.impala.catalog.Table;
import org.apache.impala.catalog.paimon.FePaimonTable;
import org.apache.impala.catalog.paimon.PaimonUtil;
import org.apache.impala.common.Pair;
import org.apache.impala.service.CatalogOpExecutor;
import org.apache.impala.thrift.TBucketInfo;
import org.apache.impala.thrift.TBucketType;
import org.apache.impala.thrift.TIcebergCatalog;
@@ -61,6 +65,7 @@ import org.apache.impala.util.AcidUtils;
import org.apache.impala.util.BucketUtils;
import org.apache.impala.util.IcebergUtil;
import org.apache.impala.util.KuduUtil;
import org.apache.paimon.CoreOptions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -470,6 +475,50 @@ public class ToSqlUtils {
} catch (Exception e) {
throw new CatalogException("Could not get primary key/foreign keys sql.", e);
}
} else if (table instanceof FePaimonTable) {
// format
String inputFormat = msTable.getSd().getInputFormat();
String serDeLib = msTable.getSd().getSerdeInfo().getSerializationLib();
format = HdfsFileFormat.fromHdfsInputFormatClass(inputFormat, serDeLib);
storageHandlerClassName = null;
isPrimaryKeyUnique = true;
properties.remove(CoreOptions.PRIMARY_KEY.key());
properties.remove(CoreOptions.PARTITION.key());
properties.remove(PaimonUtil.STORAGE_HANDLER);
properties.remove(CatalogOpExecutor.CAPABILITIES_KEY);
// for synchronized table, show sql like a managed table
if (PaimonUtil.isSynchronizedTable(msTable)) {
properties.remove("TRANSLATED_TO_EXTERNAL");
properties.remove(Table.TBL_PROP_EXTERNAL_TABLE_PURGE);
if ((location != null)
&& location.toLowerCase().endsWith(table.getName().toLowerCase())) {
location = null;
}
isExternal = false;
}
try {
FePaimonTable fePaimonTable = (FePaimonTable) table;
primaryKeySql = fePaimonTable.getPaimonApiTable()
.primaryKeys()
.stream()
.map(String::toLowerCase)
.collect(Collectors.toList());
partitionColsSql = new ArrayList<>();
for (int i = 0; i < table.getNumClusteringCols(); i++) {
Column col = table.getColumns().get(i);
partitionColsSql.add(columnToSql(col));
}
colsSql = new ArrayList<>();
for (int i = table.getNumClusteringCols(); i < table.getColumns().size(); i++) {
Column col = table.getColumns().get(i);
colsSql.add(columnToSql(col));
}
} catch (Exception e) {
throw new CatalogException("Could not get primary key/foreign keys sql.", e);
}
} else if (table instanceof FeDataSourceTable) {
// Mask sensitive table properties for external JDBC table.
Set<String> keysToBeMasked = DataSourceTable.getJdbcTblPropertyMaskKeys();
@@ -641,8 +690,16 @@ public class ToSqlUtils {
return sb.toString();
}
private static String encodeColumnName(String name) {
if (impalaNeedsQuotes(name)) {
return String.format("`%s`", name);
} else {
return name;
}
}
private static String columnToSql(Column col) {
StringBuilder sb = new StringBuilder(col.getName());
StringBuilder sb = new StringBuilder(encodeColumnName(col.getName()));
if (col.getType() != null) sb.append(" " + col.getType().toSql());
if (col instanceof KuduColumn) {
KuduColumn kuduCol = (KuduColumn) col;

View File

@@ -0,0 +1,290 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.impala.analysis.paimon;
import com.google.common.collect.Sets;
import org.apache.impala.analysis.AnalysisUtils;
import org.apache.impala.analysis.Analyzer;
import org.apache.impala.analysis.ColumnDef;
import org.apache.impala.analysis.CreateTableStmt;
import org.apache.impala.analysis.ShowStatsStmt;
import org.apache.impala.catalog.HdfsFileFormat;
import org.apache.impala.catalog.Table;
import org.apache.impala.catalog.paimon.FePaimonTable;
import org.apache.impala.catalog.paimon.ImpalaTypeUtils;
import org.apache.impala.catalog.paimon.PaimonUtil;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.thrift.TBucketType;
import org.apache.impala.thrift.TPaimonCatalog;
import org.apache.impala.thrift.TShowStatsParams;
import org.apache.paimon.CoreOptions;
import org.apache.paimon.options.OptionsUtils;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Paimon analyzer utils to perform paimon-related analyze tasks.
*
* This class is used to reduce coupling of paimon-related implementation
* with {@link Analyzer}.
*/
public class PaimonAnalyzer {
/**
* Check Paimon Create Table statement.
* @param stmt
* @param analyzer
*
*/
public static void analyzeCreateTableStmt(CreateTableStmt stmt, Analyzer analyzer)
throws AnalysisException {
// TODO: Not supported for now, try to enable later
AnalysisUtils.throwIfNotNull(
stmt.getCachingOp(), "A Paimon table cannot be cached in HDFS.");
AnalysisUtils.throwIfTrue(
stmt.getTblProperties().containsKey(CoreOptions.PARTITION.key()),
"Can't specify 'partition' table property in Paimon DDL, " +
"use PARTITIONED BY clause instead.");
AnalysisUtils.throwIfTrue(
stmt.geTBucketInfo().bucket_type != TBucketType.NONE,
"CLUSTERED BY clause is not support by PAIMON now, " +
"use property bucket-key instead.");
// primary-key and PRIMARY KEY caused should not be both specified
AnalysisUtils.throwIfTrue(
stmt.getTblProperties().containsKey(CoreOptions.PRIMARY_KEY.key())
&& !stmt.getPrimaryKeys().isEmpty(),
"Can't specify both PRIMARY KEY clause and 'primary-key' table property " +
"in Paimon DDL.");
analyzePaimonColumns(stmt, analyzer);
analyzePaimonFormat(stmt, analyzer);
}
/**
* Check Paimon Show Table Stats statement.
* @param statsOp
* @param analyzer
*
*/
public static void analyzeShowStatStmt(ShowStatsStmt statsOp, FePaimonTable table,
Analyzer analyzer) throws AnalysisException {
TShowStatsParams params = statsOp.toThrift();
switch (params.getOp()) {
case TABLE_STATS:
case COLUMN_STATS: return;
case PARTITIONS:
if (!PaimonUtil.hasPartition(table.getPaimonApiTable())) {
throw new AnalysisException("Table is not partitioned: " + table.getFullName());
}
break;
default:
throw new AnalysisException(
statsOp.toSql() + " is not supported for Paimon Table");
}
}
/**
* Setup paimon related property.
*/
private static void putPaimonProperty(CreateTableStmt stmt, String key, String value) {
stmt.putGeneratedProperty(key, value);
}
/**
* Check paimon format related setting and update stmt object.
*/
private static void analyzePaimonFormat(CreateTableStmt stmt, Analyzer analyzer)
throws AnalysisException {
Map<String, String> tblProperties = stmt.getTblProperties();
Map<String, String> paimonTblProperties = OptionsUtils.convertToPropertiesPrefixKey(
tblProperties, PaimonUtil.PAIMON_PROPERTY_PREFIX);
boolean isExternal = stmt.isExternal();
// A managed table cannot have 'external.table.purge' property set
if (!isExternal
&& Boolean.parseBoolean(tblProperties.get(Table.TBL_PROP_EXTERNAL_TABLE_PURGE))) {
throw new AnalysisException(String.format("Table property '%s' cannot be set to "
+ "true with a managed paimon table.",
Table.TBL_PROP_EXTERNAL_TABLE_PURGE));
}
// External table with purging is not supported.
if (stmt.isExternal()
&& Boolean.parseBoolean(
tblProperties.getOrDefault(Table.TBL_PROP_EXTERNAL_TABLE_PURGE, "false"))) {
throw new AnalysisException(" External table with purge is not supported.");
}
// check storage handler, add storage handler for compatibility.
String handler = tblProperties.get(PaimonUtil.STORAGE_HANDLER);
if (handler != null && !handler.equals(PaimonUtil.PAIMON_STORAGE_HANDLER)) {
throw new AnalysisException("Invalid storage handler "
+ "specified for Paimon format: " + handler);
}
stmt.putGeneratedProperty(
PaimonUtil.STORAGE_HANDLER, PaimonUtil.PAIMON_STORAGE_HANDLER);
// enable the deletion-vector mode by default if not specified
if (!paimonTblProperties.containsKey(CoreOptions.DELETION_VECTORS_ENABLED.key())
&& !paimonTblProperties.containsKey(
CoreOptions.FULL_COMPACTION_DELTA_COMMITS.key())) {
putPaimonProperty(stmt, CoreOptions.DELETION_VECTORS_ENABLED.key(), "true");
}
// check format support
String fileformat = paimonTblProperties.get(CoreOptions.FILE_FORMAT.key());
HdfsFileFormat hdfsFileFormat = PaimonUtil.getPaimonFileFormat(fileformat);
if (fileformat != null && hdfsFileFormat == null) {
throw new AnalysisException(
"Unsupported fileformat for Paimon table: " + fileformat);
}
// Determine the Paimon catalog being used. The default catalog is HiveCatalog.
TPaimonCatalog catalog = PaimonUtil.getTPaimonCatalog(tblProperties);
switch (catalog) {
case HIVE_CATALOG: validateTableInHiveCatalog(stmt, tblProperties); break;
case HADOOP_CATALOG: validateTableInHadoopCatalog(stmt, tblProperties); break;
default:
throw new AnalysisException(
String.format("Unknown Paimon catalog type: %s", catalog));
}
// HMS will override 'external.table.purge' to 'TRUE' When 'paimon.catalog' is not
// the Hive Catalog for managed tables.
if (!isExternal && !catalog.equals(TPaimonCatalog.HIVE_CATALOG)
&& "false".equalsIgnoreCase(
tblProperties.get(Table.TBL_PROP_EXTERNAL_TABLE_PURGE))) {
analyzer.addWarning("The table property 'external.table.purge' will be set "
+ "to 'TRUE' on newly created managed Paimon tables.");
}
}
/**
* Validate for hive catalog.
* @param stmt
* @param tblProperties
*/
private static void validateTableInHiveCatalog(
CreateTableStmt stmt, Map<String, String> tblProperties) throws AnalysisException {
// Check if hadoop catalog related properties are defined in the hive catalog
if (tblProperties.get(PaimonUtil.PAIMON_HADOOP_CATALOG_LOCATION) != null) {
throw new AnalysisException(String.format("%s cannot be set for Paimon table "
+ "stored in hive.catalog",
PaimonUtil.PAIMON_HADOOP_CATALOG_LOCATION));
}
if (tblProperties.get(PaimonUtil.PAIMON_TABLE_IDENTIFIER) != null) {
throw new AnalysisException(String.format("%s cannot be set for Paimon table "
+ "stored in hive.catalog",
PaimonUtil.PAIMON_TABLE_IDENTIFIER));
}
AnalysisUtils.throwIfTrue(stmt.isExternal() && stmt.getLocation() == null,
"Location must be set for external Paimon table stored in hive catalog");
}
/**
* Validate for hadoop catalog.
* @param stmt
* @param tblProperties
*/
private static void validateTableInHadoopCatalog(
CreateTableStmt stmt, Map<String, String> tblProperties) throws AnalysisException {
String catalogLoc = tblProperties.get(PaimonUtil.PAIMON_HADOOP_CATALOG_LOCATION);
if (catalogLoc == null || catalogLoc.isEmpty()) {
throw new AnalysisException(String.format("Table property '%s' is necessary "
+ "for Paimon table with 'hadoop.catalog'.",
PaimonUtil.PAIMON_HADOOP_CATALOG_LOCATION));
}
// Table identifier should be specified for external table
AnalysisUtils.throwIfTrue(stmt.isExternal()
&& tblProperties.get(PaimonUtil.PAIMON_TABLE_IDENTIFIER) == null,
String.format(
"Table property '%s' is necessary for Paimon table with 'hadoop.catalog'.",
PaimonUtil.PAIMON_TABLE_IDENTIFIER));
}
/**
* Check column type support for the column definitions.
* @param columnDef
*/
private static void throwIfColumnTypeIsNotSupported(ColumnDef columnDef)
throws AnalysisException {
if (!ImpalaTypeUtils.isSupportedColumnType(columnDef.getType())) {
throw new AnalysisException("Tables stored by Paimon do not support the column "
+ columnDef.getColName() + " type: " + columnDef.getType().toSql());
}
}
/**
* Check column definitions of paimon table.
* @param stmt
* @param analyzer
*/
private static void analyzePaimonColumns(CreateTableStmt stmt, Analyzer analyzer)
throws AnalysisException {
Set<String> colSets = Sets.newHashSet();
// Check if the columns definitions are supported
for (ColumnDef col : stmt.getColumnDefs()) {
throwIfColumnTypeIsNotSupported(col);
colSets.add(col.getColName().toLowerCase());
}
// Check if the partition columns definitions are supported
for (ColumnDef col : stmt.getPartitionColumnDefs()) {
throwIfColumnTypeIsNotSupported(col);
colSets.add(col.getColName().toLowerCase());
}
// Check if primary keys are in the column definitions
if (stmt.getTblProperties().containsKey(CoreOptions.PRIMARY_KEY.key())) {
List<String> colNames = PaimonUtil.extractColumnNames(
stmt.getTblProperties().get(CoreOptions.PRIMARY_KEY.key()));
for (String col : colNames) {
AnalysisUtils.throwIfTrue(!colSets.contains(col),
String.format(
"Invalid col name %s specified in 'primary-key' table properties.", col));
}
}
// Check if bucket keys are in the column definitions
if (stmt.getTblProperties().containsKey(CoreOptions.BUCKET_KEY.key())) {
List<String> parts = PaimonUtil.extractColumnNames(
stmt.getTblProperties().get(CoreOptions.BUCKET_KEY.key()));
for (String col : parts) {
AnalysisUtils.throwIfTrue(!colSets.contains(col),
String.format(
"Invalid col name %s specified in 'bucket-key' table properties.", col));
}
}
// Check rule: Managed table does not support inferring schema from underlying paimon
// table.
if (stmt.getColumnDefs().isEmpty()) {
AnalysisUtils.throwIfTrue(!stmt.isExternal(),
"Managed table does not support inferring schema from underlying paimon table");
}
}
}

View File

@@ -85,8 +85,10 @@ public enum HdfsFileFormat {
"org.apache.iceberg.mr.hive.HiveIcebergSerDe", false, false, false),
JDBC("org.apache.hadoop.hive.jdbc.JdbcInputFormat",
"org.apache.hadoop.hive.jdbc.JdbcOutputFormat",
"org.apache.hadoop.hive.jdbc.JdbcSerDe", false, false, true);
"org.apache.hadoop.hive.jdbc.JdbcSerDe", false, false, true),
PAIMON("org.apache.paimon.hive.mapred.PaimonInputFormat",
"org.apache.paimon.hive.mapred.PaimonOutputFormat",
"org.apache.paimon.hive.PaimonSerDe", true, false, true);
private final String inputFormat_;
private final String outputFormat_;
@@ -143,6 +145,7 @@ public enum HdfsFileFormat {
.put(ORC.inputFormat(), ORC)
.put(HUDI_PARQUET.inputFormat(), HUDI_PARQUET)
.put(ICEBERG.inputFormat(), ICEBERG)
.put(PAIMON.inputFormat(), PAIMON)
.build();
/**
@@ -197,6 +200,7 @@ public enum HdfsFileFormat {
case ICEBERG: return HdfsFileFormat.ICEBERG;
case JSON: return HdfsFileFormat.JSON;
case JDBC: return HdfsFileFormat.JDBC;
case PAIMON: return HdfsFileFormat.PAIMON;
default:
throw new RuntimeException("Unknown THdfsFileFormat: "
+ thriftFormat + " - should never happen!");
@@ -216,6 +220,7 @@ public enum HdfsFileFormat {
case ICEBERG: return THdfsFileFormat.ICEBERG;
case JSON: return THdfsFileFormat.JSON;
case JDBC: return THdfsFileFormat.JDBC;
case PAIMON: return THdfsFileFormat.PAIMON;
default:
throw new RuntimeException("Unknown HdfsFormat: "
+ this + " - should never happen!");
@@ -243,6 +248,8 @@ public enum HdfsFileFormat {
case ICEBERG: return "ICEBERG";
case JSON: return "JSONFILE";
case JDBC: return "JDBC";
case PAIMON: return "PAIMON";
default:
throw new RuntimeException("Unknown HdfsFormat: "
+ this + " - should never happen!");
@@ -308,6 +315,7 @@ public enum HdfsFileFormat {
* Returns true if the format is Parquet, false otherwise.
*/
public boolean isParquetBased() {
return this == HdfsFileFormat.PARQUET || this == HdfsFileFormat.HUDI_PARQUET;
return this == HdfsFileFormat.PARQUET || this == HdfsFileFormat.HUDI_PARQUET
|| this == HdfsFileFormat.PAIMON;
}
}

View File

@@ -79,7 +79,8 @@ public class HdfsStorageDescriptor {
// support for the new input/output format classes. See IMPALA-4214.
"org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", // (parquet)
"org.apache.iceberg.mr.hive.HiveIcebergSerDe", // (iceberg)
"org.apache.hadoop.hive.serde2.JsonSerDe");// (json)
"org.apache.hadoop.hive.serde2.JsonSerDe", // (json)
"org.apache.paimon.hive.PaimonSerDe"); // (paimon)
private final static Logger LOG = LoggerFactory.getLogger(HdfsStorageDescriptor.class);

View File

@@ -40,11 +40,13 @@ import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.impala.analysis.TableName;
import org.apache.impala.catalog.events.InFlightEvents;
import org.apache.impala.catalog.monitor.CatalogMonitor;
import org.apache.impala.compat.MetastoreShim;
import org.apache.impala.catalog.paimon.PaimonTable;
import org.apache.impala.catalog.paimon.PaimonUtil;
import org.apache.impala.common.ImpalaRuntimeException;
import org.apache.impala.common.Metrics;
import org.apache.impala.common.Pair;
import org.apache.impala.common.RuntimeEnv;
import org.apache.impala.compat.MetastoreShim;
import org.apache.impala.service.MetadataOp;
import org.apache.impala.thrift.TAccessLevel;
import org.apache.impala.thrift.TCatalogObject;
@@ -558,6 +560,8 @@ public abstract class Table extends CatalogObjectImpl implements FeTable {
table = new KuduTable(msTbl, db, msTbl.getTableName(), msTbl.getOwner());
} else if (IcebergTable.isIcebergTable(msTbl)) {
table = new IcebergTable(msTbl, db, msTbl.getTableName(), msTbl.getOwner());
} else if (PaimonUtil.isPaimonTable(msTbl)) {
table = new PaimonTable(msTbl, db, msTbl.getTableName(), msTbl.getOwner());
} else if (DataSourceTable.isDataSourceTable(msTbl)) {
// It's important to check if this is a DataSourceTable before HdfsTable because
// DataSourceTables are still represented by HDFS tables in the metastore but

View File

@@ -50,6 +50,14 @@ public class VirtualColumn extends Column {
Type.BIGINT,
TVirtualColumnType.ICEBERG_DATA_SEQUENCE_NUMBER);
// Paimon-related virtual columns.
public static VirtualColumn PARTITION_VALUE_SERIALIZED = new
VirtualColumn("PARTITION__VALUE__SERIALIZED", Type.BINARY,
TVirtualColumnType.PARTITION_VALUE_SERIALIZED);
public static VirtualColumn BUCKET_ID = new VirtualColumn("BUCKET_ID",
Type.INT, TVirtualColumnType.BUCKET_ID);
public static VirtualColumn getVirtualColumn(TVirtualColumnType virtColType) {
switch (virtColType) {
case INPUT_FILE_NAME: return INPUT_FILE_NAME;
@@ -57,6 +65,8 @@ public class VirtualColumn extends Column {
case PARTITION_SPEC_ID: return PARTITION_SPEC_ID;
case ICEBERG_PARTITION_SERIALIZED: return ICEBERG_PARTITION_SERIALIZED;
case ICEBERG_DATA_SEQUENCE_NUMBER: return ICEBERG_DATA_SEQUENCE_NUMBER;
case PARTITION_VALUE_SERIALIZED: return PARTITION_VALUE_SERIALIZED;
case BUCKET_ID: return BUCKET_ID;
default: break;
}
return null;

View File

@@ -0,0 +1,77 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.catalog.local;
import com.google.common.base.Preconditions;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.impala.catalog.TableLoadingException;
import org.apache.impala.catalog.paimon.FePaimonTable;
import org.apache.impala.catalog.paimon.PaimonUtil;
import org.apache.impala.thrift.TTableDescriptor;
import org.apache.impala.thrift.TTableType;
import org.apache.log4j.Logger;
import org.apache.paimon.table.Table;
import java.io.IOException;
import java.util.Set;
/**
* Paimon table for LocalCatalog
*/
public class LocalPaimonTable extends LocalTable implements FePaimonTable {
private static final Logger LOG = Logger.getLogger(LocalPaimonTable.class);
private Table table_;
public static LocalPaimonTable load(LocalDb db,
org.apache.hadoop.hive.metastore.api.Table msTbl, MetaProvider.TableMetaRef ref)
throws TableLoadingException {
Preconditions.checkNotNull(db);
Preconditions.checkNotNull(msTbl);
Preconditions.checkNotNull(ref);
try {
LocalPaimonTable localPaimonTable = new LocalPaimonTable(db, msTbl, ref);
return localPaimonTable;
} catch (MetaException ex) {
throw new TableLoadingException("Failed to load table" + msTbl.getTableName(), ex);
}
}
protected LocalPaimonTable(LocalDb db, org.apache.hadoop.hive.metastore.api.Table msTbl,
MetaProvider.TableMetaRef ref) throws MetaException {
super(db, msTbl, ref);
table_ = PaimonUtil.createFileStoreTable(msTbl);
applyPaimonTableStatsIfPresent();
}
@Override
public Table getPaimonApiTable() {
return table_;
}
@Override
public TTableDescriptor toThriftDescriptor(
int tableId, Set<Long> referencedPartitions) {
TTableDescriptor tableDescriptor = new TTableDescriptor(tableId,
TTableType.PAIMON_TABLE, getTColumnDescriptors(), 0, name_, db_.getName());
try {
tableDescriptor.setPaimonTable(PaimonUtil.getTPaimonTable(this));
} catch (IOException e) { throw new RuntimeException(e); }
return tableDescriptor;
}
}

View File

@@ -49,6 +49,7 @@ import org.apache.impala.catalog.SystemTable;
import org.apache.impala.catalog.TableLoadingException;
import org.apache.impala.catalog.VirtualColumn;
import org.apache.impala.catalog.local.MetaProvider.TableMetaRef;
import org.apache.impala.catalog.paimon.PaimonUtil;
import org.apache.impala.common.Pair;
import org.apache.impala.common.RuntimeEnv;
import org.apache.impala.compat.MetastoreShim;
@@ -129,12 +130,13 @@ abstract class LocalTable implements FeTable {
t = LocalKuduTable.loadFromKudu(db, msTbl, ref);
} else if (IcebergTable.isIcebergTable(msTbl)) {
t = LocalIcebergTable.loadIcebergTableViaMetaProvider(db, msTbl, ref);
} else if (PaimonUtil.isPaimonTable(msTbl)) {
t = LocalPaimonTable.load(db, msTbl, ref);
} else if (DataSourceTable.isDataSourceTable(msTbl)) {
t = LocalDataSourceTable.load(db, msTbl, ref);
} else if (SystemTable.isSystemTable(msTbl)) {
t = LocalSystemTable.load(db, msTbl, ref);
} else if (HdfsFileFormat.isHdfsInputFormatClass(
msTbl.getSd().getInputFormat())) {
} else if (HdfsFileFormat.isHdfsInputFormatClass(msTbl.getSd().getInputFormat())) {
t = LocalFsTable.load(db, msTbl, ref);
}

View File

@@ -0,0 +1,132 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.catalog.paimon;
import com.google.common.base.Preconditions;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.mapred.JobConf;
import org.apache.impala.catalog.Column;
import org.apache.impala.catalog.ColumnStats;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.HdfsFileFormat;
import org.apache.impala.thrift.TResultSet;
import org.apache.impala.thrift.TShowFilesParams;
import org.apache.impala.thrift.TShowStatsOp;
import org.apache.paimon.stats.ColStats;
import org.apache.paimon.stats.Statistics;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.Table;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.RowType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
import java.util.Optional;
/**
* Frontend interface for interacting with an Paimon-backed table.
*/
public interface FePaimonTable extends FeTable, FeShowFileStmtSupport {
final static Logger LOG = LoggerFactory.getLogger(FePaimonTable.class);
public static JobConf jobConf = new JobConf();
/**
* Returns the cached paimon Table object that stores the metadata loaded by Paimon.
*/
Table getPaimonApiTable();
default boolean hasSnapshotMetaTable() {
Table table = getPaimonApiTable();
return table instanceof FileStoreTable;
}
/**
* @return the Paimon schema.
*/
default RowType getPaimonSchema() { return getPaimonApiTable().rowType(); }
default void applyPaimonTableStatsIfPresent() {
Preconditions.checkState(getTTableStats() != null);
Table table = getPaimonApiTable();
Optional<Statistics> stats = table.statistics();
if (!stats.isPresent()) return;
if (getTTableStats().getNum_rows() < 0) {
getTTableStats().setNum_rows(stats.get().mergedRecordCount().orElse(-1));
}
getTTableStats().setTotal_file_bytes(stats.get().mergedRecordSize().orElse(-1));
}
default void applyPaimonColumnStatsIfPresent() {
Preconditions.checkState(getTTableStats() != null);
Table table = getPaimonApiTable();
Optional<Statistics> stats = table.statistics();
if (!stats.isPresent()) return;
Map<String, ColStats<?>> colStatsMap = stats.get().colStats();
for (String colName : colStatsMap.keySet()) {
Column col = this.getColumn(colName.toLowerCase());
if (null == col) { continue; }
if (!ColumnStats.isSupportedColType(col.getType())) {
LOG.warn(String.format("Statistics for %s, column %s are not supported as " +
"column has type %s",
getFullName(), col.getName(), col.getType()));
continue;
}
ColStats<?> colStats = colStatsMap.get(colName);
DataField dataField = table.rowType().getField(colName);
Optional<ColumnStatisticsData> colStatsData =
PaimonUtil.convertColStats(colStats, dataField);
if (colStatsData.isPresent()) {
if (!col.updateStats(colStatsData.get())) {
LOG.warn(String.format(
"Failed to load column stats for %s, column %s. Stats may be " +
"incompatible with column type %s. Consider regenerating " +
"statistics for %s.",
getFullName(), col.getName(), col.getType(), getFullName()));
}
}
}
}
default TResultSet getTableStats(TShowStatsOp op) {
if (TShowStatsOp.TABLE_STATS == op) {
return PaimonUtil.doGetTableStats(this);
} else if (TShowStatsOp.PARTITIONS == op) {
return PaimonUtil.doGetPartitionStats(this);
} else {
throw new UnsupportedOperationException(
"paimon table doesn't support Show Stats Op" + op.name());
}
}
default TResultSet doGetTableFiles(TShowFilesParams request) {
return PaimonUtil.doGetTableFiles(this, request);
}
default HdfsFileFormat getTableFormat() {
return HdfsFileFormat.PAIMON;
}
default boolean supportPartitionFilter() {
return false;
}
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.impala.catalog.paimon;
import org.apache.impala.analysis.Analyzer;
import org.apache.impala.catalog.HdfsFileFormat;
import org.apache.impala.thrift.TResultSet;
import org.apache.impala.thrift.TShowFilesParams;
/**
* Interface for show file statement support.
*
* This class is used to support Show Files statement
* for paimon format.
*/
public interface FeShowFileStmtSupport {
// Get Table files.
TResultSet doGetTableFiles(TShowFilesParams request);
// Current Table format.
HdfsFileFormat getTableFormat();
// Support partition filter.
boolean supportPartitionFilter();
}

View File

@@ -0,0 +1,283 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.impala.catalog.paimon;
import com.google.common.base.Preconditions;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.impala.catalog.PrimitiveType;
import org.apache.impala.catalog.ScalarType;
import org.apache.impala.catalog.StructField;
import org.apache.impala.catalog.StructType;
import org.apache.impala.catalog.Type;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.BigIntType;
import org.apache.paimon.types.BinaryType;
import org.apache.paimon.types.BooleanType;
import org.apache.paimon.types.CharType;
import org.apache.paimon.types.DataType;
import org.apache.paimon.types.DataTypeDefaultVisitor;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.DateType;
import org.apache.paimon.types.DecimalType;
import org.apache.paimon.types.DoubleType;
import org.apache.paimon.types.FloatType;
import org.apache.paimon.types.IntType;
import org.apache.paimon.types.LocalZonedTimestampType;
import org.apache.paimon.types.MapType;
import org.apache.paimon.types.RowType;
import org.apache.paimon.types.SmallIntType;
import org.apache.paimon.types.TimestampType;
import org.apache.paimon.types.TinyIntType;
import org.apache.paimon.types.VarBinaryType;
import org.apache.paimon.types.VarCharType;
import java.util.List;
import java.util.stream.Collectors;
/** Utils for converting types related classes between Paimon and Impala. */
public class ImpalaTypeUtils {
/**
* Convert paimon data type {@link DataType} to impala data type {@link Type}.
*
* @param logicalType paimon data type.
* @return impala type info.
*/
public static Type toImpalaType(DataType logicalType) {
return logicalType.accept(PaimonToImpalaTypeVisitor.INSTANCE);
}
/**
* Convert impala data type {@link Type} to paimon data type {@link DataType}.
*
* @param Type hive type info
* @return paimon data type
*/
public static DataType fromImpalaType(Type Type) {
return ImpalaToPaimonTypeVisitor.visit(Type);
}
private static class PaimonToImpalaTypeVisitor extends DataTypeDefaultVisitor<Type> {
private static final PaimonToImpalaTypeVisitor INSTANCE =
new PaimonToImpalaTypeVisitor();
@Override
public Type visit(BooleanType booleanType) {
return Type.BOOLEAN;
}
@Override
public Type visit(TinyIntType tinyIntType) {
return Type.TINYINT;
}
@Override
public Type visit(SmallIntType smallIntType) {
return Type.SMALLINT;
}
@Override
public Type visit(IntType intType) {
return Type.INT;
}
@Override
public Type visit(BigIntType bigIntType) {
return Type.BIGINT;
}
@Override
public Type visit(FloatType floatType) {
return Type.FLOAT;
}
@Override
public Type visit(DoubleType doubleType) {
return Type.DOUBLE;
}
@Override
public Type visit(DecimalType decimalType) {
return ScalarType.createDecimalType(
decimalType.getPrecision(), decimalType.getScale());
}
@Override
public Type visit(CharType charType) {
if (charType.getLength() > HiveChar.MAX_CHAR_LENGTH) {
return Type.STRING;
} else {
return ScalarType.createCharType(charType.getLength());
}
}
@Override
public Type visit(VarCharType varCharType) {
if (varCharType.getLength() > HiveVarchar.MAX_VARCHAR_LENGTH) {
return Type.STRING;
} else {
return ScalarType.createVarcharType(varCharType.getLength());
}
}
@Override
public Type visit(BinaryType binaryType) {
return Type.BINARY;
}
@Override
public Type visit(VarBinaryType varBinaryType) {
return Type.BINARY;
}
@Override
public Type visit(DateType dateType) {
return Type.DATE;
}
@Override
public Type visit(TimestampType timestampType) {
return Type.TIMESTAMP;
}
@Override
public Type visit(LocalZonedTimestampType localZonedTimestampType) {
return Type.TIMESTAMP;
}
@Override
public Type visit(ArrayType arrayType) {
DataType elementType = arrayType.getElementType();
return new org.apache.impala.catalog.ArrayType(elementType.accept(this));
}
@Override
public Type visit(MapType mapType) {
return new org.apache.impala.catalog.MapType(
mapType.getKeyType().accept(this), mapType.getValueType().accept(this));
}
@Override
public Type visit(RowType rowType) {
List<StructField> structFields =
rowType.getFields()
.stream()
.map(dataField
-> new StructField(
dataField.name().toLowerCase(), dataField.type().accept(this)))
.collect(Collectors.toList());
return new StructType(structFields);
}
@Override
protected Type defaultMethod(org.apache.paimon.types.DataType dataType) {
throw new UnsupportedOperationException("Unsupported type: " + dataType);
}
}
private static class ImpalaToPaimonTypeVisitor {
static DataType visit(Type type) {
return visit(type, new ImpalaToPaimonTypeVisitor());
}
static DataType visit(Type type, ImpalaToPaimonTypeVisitor visitor) {
if (type.isStructType()) {
StructType structTypeInfo = (StructType) type;
List<StructField> fields = structTypeInfo.getFields();
RowType.Builder builder = RowType.builder();
for (StructField field : fields) {
builder.field(field.getName(), visit(field.getType(), visitor));
}
return builder.build();
} else if (type.isMapType()) {
org.apache.impala.catalog.MapType mapTypeInfo =
(org.apache.impala.catalog.MapType) type;
return DataTypes.MAP(visit(mapTypeInfo.getKeyType(), visitor),
visit(mapTypeInfo.getValueType(), visitor));
} else if (type.isArrayType()) {
org.apache.impala.catalog.ArrayType listTypeInfo =
(org.apache.impala.catalog.ArrayType) type;
return DataTypes.ARRAY(visit(listTypeInfo.getItemType(), visitor));
} else {
return visitor.atomic(type);
}
}
public DataType atomic(Type atomic) {
PrimitiveType primitiveType = atomic.getPrimitiveType();
switch (primitiveType) {
case BOOLEAN: return DataTypes.BOOLEAN();
case TINYINT: return DataTypes.TINYINT();
case SMALLINT: return DataTypes.SMALLINT();
case INT: return DataTypes.INT();
case BIGINT: return DataTypes.BIGINT();
case FLOAT: return DataTypes.FLOAT();
case DOUBLE: return DataTypes.DOUBLE();
case DECIMAL:
ScalarType scalarType = (ScalarType) atomic;
return DataTypes.DECIMAL(
scalarType.decimalPrecision(), scalarType.decimalScale());
case CHAR: return DataTypes.CHAR(((ScalarType) atomic).getLength());
case VARCHAR: return DataTypes.VARCHAR(((ScalarType) atomic).getLength());
case STRING: return DataTypes.STRING();
case DATE: return DataTypes.DATE();
case TIMESTAMP: return DataTypes.TIMESTAMP();
case BINARY: return DataTypes.BINARY(BinaryType.MAX_LENGTH);
default:
throw new UnsupportedOperationException(
"Not a supported type: " + atomic);
}
}
}
/**
* Returns true if the primitive type is supported.
*/
public static boolean isSupportedPrimitiveType(PrimitiveType primitiveType) {
Preconditions.checkNotNull(primitiveType);
switch (primitiveType) {
case BIGINT:
case INT:
case SMALLINT:
case TINYINT:
case DOUBLE:
case FLOAT:
case BOOLEAN:
case STRING:
case TIMESTAMP:
case DECIMAL:
case DATE:
case BINARY:
case CHAR:
case DATETIME:
case VARCHAR: return true;
default: return false;
}
}
/**
* Returns true if the column type is supported.
*/
public static boolean isSupportedColumnType(Type colType) {
Preconditions.checkNotNull(colType);
return isSupportedPrimitiveType(colType.getPrimitiveType());
}
}

View File

@@ -0,0 +1,212 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.catalog.paimon;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.impala.catalog.TableLoadingException;
import org.apache.impala.common.ImpalaRuntimeException;
import org.apache.impala.thrift.TColumn;
import org.apache.impala.thrift.TCreateTableParams;
import org.apache.impala.thrift.TDropTableOrViewParams;
import org.apache.impala.util.EventSequence;
import org.apache.paimon.CoreOptions;
import org.apache.paimon.catalog.CatalogContext;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.schema.Schema;
import org.apache.paimon.schema.SchemaManager;
import org.apache.paimon.schema.TableSchema;
import org.apache.paimon.table.Table;
import org.apache.paimon.types.DataField;
import org.apache.paimon.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
/**
* This is a helper for the CatalogOpExecutor to provide Paimon related DDL functionality
* such as creating and dropping tables from Paimon.
*/
public class PaimonCatalogOpExecutor {
public static final Logger LOG = LoggerFactory.getLogger(PaimonCatalogOpExecutor.class);
public static final String LOADED_PAIMON_TABLE = "Loaded paimon table";
public static final String CREATED_PAIMON_TABLE =
"Created table using Paimon Catalog ";
/**
* Create Paimon table by Paimon api
* Return value is table object from Paimon
*/
public static String createTable(Identifier identifier, String location,
TCreateTableParams params, org.apache.hadoop.hive.metastore.api.Table newTable)
throws ImpalaRuntimeException {
try {
List<TColumn> all_columns = Lists.newArrayList(params.getColumns());
List<String> partitionKeys = Lists.newArrayList();
List<String> primaryKeys = Lists.newArrayList();
// handle partition columns
if (params.getPartition_columnsSize() > 0) {
Preconditions.checkArgument(
!params.getTable_properties().containsKey(CoreOptions.PARTITION.key()));
all_columns.addAll(params.getPartition_columns());
partitionKeys.addAll(params.getPartition_columns()
.stream()
.map(c -> c.getColumnName().toLowerCase())
.collect(Collectors.toList()));
params.getTable_properties().remove(CoreOptions.PARTITION.key());
}
if (params.getTable_properties().containsKey(CoreOptions.PARTITION.key())) {
Preconditions.checkArgument(params.getPartition_columnsSize() <= 0);
partitionKeys.clear();
List<String> newPartKeys = Arrays
.stream(params.getTable_properties()
.get(CoreOptions.PARTITION.key())
.split(","))
.collect(Collectors.toList());
partitionKeys.addAll(newPartKeys);
params.getTable_properties().remove(CoreOptions.PARTITION.key());
}
// handle primary keys
if (params.getPrimary_key_column_namesSize() > 0) {
primaryKeys.addAll(params.getPrimary_key_column_names());
Preconditions.checkArgument(
!params.getTable_properties().containsKey(CoreOptions.PRIMARY_KEY.key()));
}
if (params.getTable_properties().containsKey(CoreOptions.PRIMARY_KEY.key())) {
primaryKeys.clear();
List<String> exist = Arrays
.stream(params.getTable_properties()
.get(CoreOptions.PRIMARY_KEY.key())
.split(","))
.collect(Collectors.toList());
primaryKeys.addAll(exist);
}
params.getTable_properties().put(
CoreOptions.PRIMARY_KEY.key(), StringUtils.join(primaryKeys, ","));
Path path = new Path(location);
CatalogContext context = PaimonUtil.catalogContext(newTable, location);
FileIO fileIO;
try {
fileIO = FileIO.get(path, context);
} catch (IOException e) {
throw new ImpalaRuntimeException("Failed to get file IO for paimon table", e);
}
SchemaManager schemaManager = new SchemaManager(fileIO, path);
Optional<TableSchema> tableSchema = schemaManager.latest();
if (!tableSchema.isPresent()) {
Schema schema = PaimonUtil.genPaimonSchema(
all_columns, partitionKeys, params.getTable_properties());
schemaManager.createTable(schema);
LOG.info("Create paimon table successful.");
} else {
throw new AlreadyExistsException(
"Can't create paimon table, since the table location is not clean.");
}
return location;
} catch (Exception ex) {
throw new ImpalaRuntimeException("Failed to create paimon table", ex);
}
}
/**
* Populates HMS table schema based on the Paimon table's schema.
*/
public static void populateExternalTableSchemaFromPaimonTable(
org.apache.hadoop.hive.metastore.api.Table msTbl, Table tbl)
throws TableLoadingException {
try {
List<FieldSchema> cols = Lists.newArrayList();
List<FieldSchema> partCols = Lists.newArrayList();
Set<String> partSet = tbl.partitionKeys()
.stream()
.map(String::toLowerCase)
.collect(Collectors.toSet());
List<String> primaryKeys = tbl.primaryKeys()
.stream()
.map(String::toLowerCase)
.collect(Collectors.toList());
List<FieldSchema> hiveFields = PaimonUtil.convertToHiveSchema(tbl.rowType());
for (int i = 0; i < tbl.rowType().getFields().size(); i++) {
DataField dataField = tbl.rowType().getFields().get(i);
if (partSet.contains(dataField.name().toLowerCase())) {
partCols.add(hiveFields.get(i));
} else {
cols.add(hiveFields.get(i));
}
}
msTbl.getSd().setCols(cols);
msTbl.setPartitionKeys(partCols);
Map<String, String> parameters = msTbl.getParameters();
// Update primary key
if (!primaryKeys.isEmpty()) {
parameters.put(CoreOptions.PRIMARY_KEY.key(), StringUtils.join(primaryKeys, ","));
}
} catch (ImpalaRuntimeException e) {
throw new TableLoadingException("Error while infer schema from underlying" +
" paimon table", e);
}
}
/**
* Drop Paimon table by Paimon api
* Return value is whether requires to drop hms table
*/
public static boolean dropTable(org.apache.hadoop.hive.metastore.api.Table msTbl,
org.apache.impala.catalog.Table existingTbl, EventSequence catalogTimeline,
TDropTableOrViewParams param) throws ImpalaRuntimeException {
if (msTbl == null) { return false; }
Preconditions.checkArgument(existingTbl instanceof PaimonTable);
boolean isSynchronizedPaimonTable =
PaimonUtil.isSynchronizedTable(msTbl) || param.isPurge();
if (isSynchronizedPaimonTable) {
String location = msTbl.getSd().getLocation();
Path path = new Path(location);
CatalogContext context = PaimonUtil.catalogContext(msTbl, location);
try {
FileIO fileIO = FileIO.get(path, context);
if (fileIO.exists(path)) {
// clear the data
fileIO.deleteDirectoryQuietly(path);
fileIO.mkdirs(path);
}
} catch (IOException e) {
LOG.warn("Delete directory '{}' fail for the paimon table.", path, e);
}
catalogTimeline.markEvent("Dropped table using Paimon");
}
return true;
}
}

View File

@@ -0,0 +1,266 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.impala.catalog.paimon;
import com.codahale.metrics.Timer;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.impala.catalog.Column;
import org.apache.impala.catalog.Db;
import org.apache.impala.catalog.Table;
import org.apache.impala.catalog.TableLoadingException;
import org.apache.impala.catalog.VirtualColumn;
import org.apache.impala.common.ImpalaRuntimeException;
import org.apache.impala.thrift.TCatalogObjectType;
import org.apache.impala.thrift.TPaimonCatalog;
import org.apache.impala.thrift.TPaimonTable;
import org.apache.impala.thrift.TPaimonTableKind;
import org.apache.impala.thrift.TTable;
import org.apache.impala.thrift.TTableDescriptor;
import org.apache.impala.thrift.TTableType;
import org.apache.impala.util.EventSequence;
import org.apache.paimon.CoreOptions;
import org.apache.paimon.types.DataField;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Representation of an Paimon table in the catalog cache.
*/
public class PaimonTable extends Table implements FePaimonTable {
private static final Set<String> PAIMON_EXCLUDED_PROPERTIES = Sets.newHashSet();
static {
PAIMON_EXCLUDED_PROPERTIES.add(CoreOptions.PATH.key());
PAIMON_EXCLUDED_PROPERTIES.add("owner");
}
// Paimon api table.
private org.apache.paimon.table.Table table_;
public PaimonTable(org.apache.hadoop.hive.metastore.api.Table msTable, Db db,
String name, String owner) {
super(msTable, db, name, owner);
}
@Override
public TTableDescriptor toThriftDescriptor(
int tableId, Set<Long> referencedPartitions) {
TTableDescriptor tableDescriptor =
new TTableDescriptor(tableId, TTableType.PAIMON_TABLE, getTColumnDescriptors(),
numClusteringCols_, name_, db_.getName());
try {
tableDescriptor.setPaimonTable(PaimonUtil.getTPaimonTable(this));
} catch (IOException e) { throw new RuntimeException(e); }
return tableDescriptor;
}
@Override
public TTable toThrift() {
TTable tTable = super.toThrift();
try {
tTable.setPaimon_table(PaimonUtil.getTPaimonTable(this));
} catch (IOException e) { throw new RuntimeException(e); }
return tTable;
}
@Override
protected void loadFromThrift(TTable thriftTable) throws TableLoadingException {
super.loadFromThrift(thriftTable);
TPaimonTable tpaimon = thriftTable.getPaimon_table();
try {
Preconditions.checkArgument(tpaimon.getKind() == TPaimonTableKind.JNI);
table_ = PaimonUtil.deserialize(ByteBuffer.wrap(tpaimon.getJni_tbl_obj()));
} catch (Exception e) {
throw new TableLoadingException("Failed to load paimon table from" +
" thrift data.",e);
}
}
@Override
public TCatalogObjectType getCatalogObjectType() {
return TCatalogObjectType.TABLE;
}
/**
* Verify the table metadata.
* @throws TableLoadingException when it is unsafe to load the table.
*/
private void verifyTable(org.apache.hadoop.hive.metastore.api.Table msTbl)
throws TableLoadingException {
TPaimonCatalog catalog = PaimonUtil.getTPaimonCatalog(msTbl);
if (catalog == TPaimonCatalog.HADOOP_CATALOG) {
if (!msTbl.getParameters()
.containsKey(PaimonUtil.PAIMON_HADOOP_CATALOG_LOCATION)) {
throw new TableLoadingException(
String.format(
"%s is required for paimon hadoop catalog table.",
PaimonUtil.PAIMON_HADOOP_CATALOG_LOCATION)
);
}
}
}
/**
* Load schema and partitioning schemes directly from Paimon.
*/
public void loadSchemaFromPaimon()
throws TableLoadingException, ImpalaRuntimeException {
loadSchema();
addVirtualColumns();
}
/**
* Loads the HMS schema by Paimon schema
*/
private void loadSchema() throws TableLoadingException {
clearColumns();
try {
List<DataField> dataFields = getPaimonSchema().getFields();
List<String> partitionKeys = getPaimonApiTable()
.partitionKeys()
.stream()
.map(String::toLowerCase)
.collect(Collectors.toList());
List<FieldSchema> hiveFields = PaimonUtil.convertToHiveSchema(getPaimonSchema());
List<Column> impalaFields = PaimonUtil.convertToImpalaSchema(getPaimonSchema());
List<FieldSchema> hivePartitionedFields = Lists.newArrayList();
List<FieldSchema> hiveNonPartitionedFields = Lists.newArrayList();
List<Column> impalaNonPartitionedFields = Lists.newArrayList();
List<Column> impalaPartitionedFields = Lists.newArrayList();
// lookup the clustering columns
for (String name : partitionKeys) {
int colIndex = PaimonUtil.getFieldIndexByNameIgnoreCase(getPaimonSchema(), name);
Preconditions.checkArgument(colIndex >= 0);
hivePartitionedFields.add(hiveFields.get(colIndex));
impalaPartitionedFields.add(impalaFields.get(colIndex));
}
// put non-clustering columns in natural order
for (int i = 0; i < dataFields.size(); i++) {
if (!partitionKeys.contains(dataFields.get(i).name().toLowerCase())) {
hiveNonPartitionedFields.add(hiveFields.get(i));
impalaNonPartitionedFields.add(impalaFields.get(i));
}
}
// update hive ms table metadata
if (!hivePartitionedFields.isEmpty()) {
msTable_.setPartitionKeys(hivePartitionedFields);
}
msTable_.getSd().setCols(hiveNonPartitionedFields);
// update impala table metadata
int colPos = 0;
for (Column col : impalaPartitionedFields) {
col.setPosition(colPos++);
addColumn(col);
}
for (Column col : impalaNonPartitionedFields) {
col.setPosition(colPos++);
addColumn(col);
}
numClusteringCols_ = impalaPartitionedFields.size();
// sync table properties from underlying paimon table
final Map<String, String> paimonProps =
Maps.newHashMap(getPaimonApiTable().options());
for (String key : PAIMON_EXCLUDED_PROPERTIES) { paimonProps.remove(key); }
for (String key : paimonProps.keySet()) {
msTable_.getParameters().put(key, paimonProps.get(key));
}
} catch (ImpalaRuntimeException e) {
throw new TableLoadingException(e.getMessage(), e);
}
}
/**
* Add virtual columns for Paimon table, Paimon table has 4 metadata
* columns: paimon_file_path,paimon_row_index,__paimon_partition,__paimon_bucket.
* in these metadata columns,paimon_file_path,and paimon_row_index,__paimon_partition
* will be mapped to existing virtual column INPUT_FILE_NAME, FILE_POSITION.
* __paimon_partition,__paimon_bucket will be mapped to newly added
* PARTITION_VALUE_SERIALIZED, BUCKET_ID separately.
*/
private void addVirtualColumns() {
addVirtualColumn(VirtualColumn.INPUT_FILE_NAME);
addVirtualColumn(VirtualColumn.FILE_POSITION);
addVirtualColumn(VirtualColumn.PARTITION_VALUE_SERIALIZED);
addVirtualColumn(VirtualColumn.BUCKET_ID);
}
/**
* Loads the metadata of a Paimon table.
* <p>
* Schema and partitioning schemes are loaded directly from Paimon. for column stats,
* will try to loaded from HMS first, if they are absent, will load from Paimon table.
* The function also updates the table schema in HMS in order to
* propagate alterations made to the Pqimon table to HMS.
* </p>
*/
@Override
public void load(boolean reuseMetadata, IMetaStoreClient msClient,
org.apache.hadoop.hive.metastore.api.Table msTbl, String reason,
EventSequence catalogTimeline) throws TableLoadingException {
final Timer.Context context =
getMetrics().getTimer(Table.LOAD_DURATION_METRIC).time();
verifyTable(msTbl);
try {
// Copy the table to check later if anything has changed.
msTable_ = msTbl;
setTableStats(msTable_);
// Load metadata from Paimon
final Timer.Context ctxStorageLdTime =
getMetrics().getTimer(Table.LOAD_DURATION_STORAGE_METADATA).time();
try {
table_ = PaimonUtil.createFileStoreTable(msTbl);
catalogTimeline.markEvent("Loaded Paimon API table");
loadSchemaFromPaimon();
catalogTimeline.markEvent("Loaded schema from Paimon");
applyPaimonTableStatsIfPresent();
loadAllColumnStats(msClient, catalogTimeline);
applyPaimonColumnStatsIfPresent();
catalogTimeline.markEvent("Loaded stats from Paimon");
} catch (Exception e) {
throw new TableLoadingException(
"Error loading metadata for Paimon table " + msTbl.getTableName(), e);
} finally {
catalogTimeline.markEvent("Loaded all from Paimon");
storageMetadataLoadTime_ = ctxStorageLdTime.stop();
}
refreshLastUsedTime();
} finally {
context.stop();
}
}
@Override
public org.apache.paimon.table.Table getPaimonApiTable() {
return table_;
}
}

View File

@@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.impala.catalog.paimon;
import org.apache.impala.common.ImpalaException;
public class PaimonTableLoadingException extends ImpalaException {
public PaimonTableLoadingException(String msg, Throwable cause) { super(msg, cause); }
}

View File

@@ -0,0 +1,861 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.impala.catalog.paimon;
import static org.apache.impala.catalog.Table.isExternalPurgeTable;
import static org.apache.paimon.CoreOptions.PARTITION_DEFAULT_NAME;
import static org.apache.paimon.CoreOptions.PARTITION_GENERATE_LEGCY_NAME;
import static org.apache.paimon.utils.HadoopUtils.HADOOP_LOAD_DEFAULT_CONFIG;
import com.google.common.collect.Lists;
import org.apache.commons.lang3.SerializationUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
import org.apache.impala.catalog.Column;
import org.apache.impala.catalog.DatabaseNotFoundException;
import org.apache.impala.catalog.HdfsFileFormat;
import org.apache.impala.catalog.MetaStoreClientPool;
import org.apache.impala.catalog.Type;
import org.apache.impala.common.FileSystemUtil;
import org.apache.impala.common.ImpalaRuntimeException;
import org.apache.impala.common.PrintUtils;
import org.apache.impala.compat.MetastoreShim;
import org.apache.impala.thrift.TColumn;
import org.apache.impala.thrift.TDescribeHistoryParams;
import org.apache.impala.thrift.TGetTableHistoryResult;
import org.apache.impala.thrift.TGetTableHistoryResultItem;
import org.apache.impala.thrift.TPaimonCatalog;
import org.apache.impala.thrift.TPaimonTable;
import org.apache.impala.thrift.TPaimonTableKind;
import org.apache.impala.thrift.TResultSet;
import org.apache.impala.thrift.TResultSetMetadata;
import org.apache.impala.thrift.TShowFilesParams;
import org.apache.impala.thrift.TTableStats;
import org.apache.impala.util.TResultRowBuilder;
import org.apache.paimon.CoreOptions;
import org.apache.paimon.catalog.AbstractCatalog;
import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.CatalogContext;
import org.apache.paimon.catalog.CatalogFactory;
import org.apache.paimon.catalog.CatalogUtils;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.data.Timestamp;
import org.apache.paimon.fs.FileIO;
import org.apache.paimon.fs.Path;
import org.apache.paimon.hive.HiveCatalog;
import org.apache.paimon.hive.HiveTypeUtils;
import org.apache.paimon.hive.LocationKeyExtractor;
import org.apache.paimon.hive.utils.HiveUtils;
import org.apache.paimon.options.CatalogOptions;
import org.apache.paimon.options.Options;
import org.apache.paimon.partition.Partition;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.predicate.PredicateBuilder;
import org.apache.paimon.reader.RecordReader;
import org.apache.paimon.schema.Schema;
import org.apache.paimon.stats.ColStats;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.table.FileStoreTableFactory;
import org.apache.paimon.table.Table;
import org.apache.paimon.table.source.DataSplit;
import org.apache.paimon.table.source.DeletionFile;
import org.apache.paimon.table.source.RawFile;
import org.apache.paimon.table.source.ReadBuilder;
import org.apache.paimon.table.source.Split;
import org.apache.paimon.table.source.TableRead;
import org.apache.paimon.table.source.TableScan;
import org.apache.paimon.table.system.SystemTableLoader;
import org.apache.paimon.types.BigIntType;
import org.apache.paimon.types.BooleanType;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataTypeFamily;
import org.apache.paimon.types.DoubleType;
import org.apache.paimon.types.FloatType;
import org.apache.paimon.types.IntType;
import org.apache.paimon.types.RowType;
import org.apache.paimon.types.SmallIntType;
import org.apache.paimon.types.TinyIntType;
import org.apache.paimon.utils.InternalRowPartitionComputer;
import org.apache.thrift.TException;
import org.postgresql.shaded.com.ongres.scram.common.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.stream.Collectors;
public class PaimonUtil {
final static Logger LOG = LoggerFactory.getLogger(PaimonUtil.class);
public static final String PAIMON_STORAGE_HANDLER =
"org.apache.paimon.hive.PaimonStorageHandler";
public static final String STORAGE_HANDLER = "storage_handler";
public static final String PAIMON_CATALOG = "paimon.catalog";
public static final String HIVE_CATALOG = "hive";
public static final String PAIMON_PROPERTY_PREFIX = "";
public static final String PAIMON_HADOOP_CATALOG_LOCATION = "paimon.catalog_location";
public static final String PAIMON_TABLE_LOCATION = "paimon_location";
public static final String PAIMON_TABLE_IDENTIFIER = "paimon.table_identifier";
private static final HiveConf hiveConf_ = new HiveConf();
public static Catalog catalog_ = null;
private static final String metastoreClientClass_ =
"org.apache.hadoop.hive.metastore.HiveMetaStoreClient";
/**
* Returns true if the given Metastore Table represents an Paimon table.
* Versions of Hive/Paimon are inconsistent which Paimon related fields are set
* (e.g., HIVE-6548 changed the input format to null).
* For maximum compatibility consider all known fields that indicate an Paimon table.
*/
public static boolean isPaimonTable(org.apache.hadoop.hive.metastore.api.Table msTbl) {
if (msTbl.getParameters() != null
&& PAIMON_STORAGE_HANDLER.equals(
msTbl.getParameters().getOrDefault(STORAGE_HANDLER, ""))) {
return true;
}
StorageDescriptor sd = msTbl.getSd();
if (sd == null) return false;
if (sd.getInputFormat() != null
&& sd.getInputFormat().equals(HdfsFileFormat.PAIMON.inputFormat())) {
return true;
} else
return sd.getSerdeInfo() != null && sd.getSerdeInfo().getSerializationLib() != null
&& sd.getSerdeInfo().getSerializationLib().equals(
HdfsFileFormat.PAIMON.serializationLib());
}
public static ByteBuffer serialize(FePaimonTable paimonTable) throws IOException {
return ByteBuffer.wrap(SerializationUtils.serialize(paimonTable.getPaimonApiTable()));
}
public static Table deserialize(ByteBuffer b) throws Exception {
return SerializationUtils.deserialize(b.array());
}
/**
* Get Thrift object for paimon table.
*/
public static TPaimonTable getTPaimonTable(FePaimonTable paimonTable)
throws IOException {
TPaimonTable t_ = new TPaimonTable();
t_.setKind(TPaimonTableKind.JNI);
t_.setJni_tbl_obj(serialize(paimonTable));
return t_;
}
/**
* Converts Paimon schema to a Hive schema.
*/
public static List<FieldSchema> convertToHiveSchema(RowType schema)
throws ImpalaRuntimeException {
List<FieldSchema> ret = new ArrayList<>();
for (DataField dataField : schema.getFields()) {
ret.add(new FieldSchema(dataField.name().toLowerCase(),
HiveTypeUtils.toTypeInfo(dataField.type()).getTypeName(),
dataField.description()));
}
return ret;
}
/**
* Converts Paimon schema to an Impala schema.
*/
public static List<Column> convertToImpalaSchema(RowType schema)
throws ImpalaRuntimeException {
List<Column> ret = new ArrayList<>();
int pos = 0;
for (DataField dataField : schema.getFields()) {
Type colType = ImpalaTypeUtils.toImpalaType(dataField.type());
ret.add(new Column(dataField.name().toLowerCase(), colType, pos++));
}
return ret;
}
/**
* Generates Paimon schema from given columns.
*/
public static Schema genPaimonSchema(List<TColumn> columns, List<String> partitionKeys,
Map<String, String> options) {
Schema.Builder schemaBuilder = Schema.newBuilder();
for (TColumn column : columns) {
schemaBuilder.column(column.getColumnName().toLowerCase(),
ImpalaTypeUtils.fromImpalaType(Type.fromThrift(column.getColumnType())));
}
if (!partitionKeys.isEmpty()) { schemaBuilder.partitionKeys(partitionKeys); }
if (!options.isEmpty()) { schemaBuilder.options(options); }
return schemaBuilder.build();
}
/**
* Returns the corresponding paimon catalog implementation.
* TODO:
*/
public static Catalog getPaimonCatalog(TPaimonCatalog catalog, boolean isExternal,
String warehouse_location) throws ImpalaRuntimeException {
switch (catalog) {
case HADOOP_CATALOG: {
Preconditions.checkNotNull(
warehouse_location, "warehouse location should not be null");
CatalogContext context = CatalogContext.create(new Path(warehouse_location));
return CatalogFactory.createCatalog(context);
}
case HIVE_CATALOG: {
try {
String location = isExternal ?
hiveConf_.get(HiveConf.ConfVars.HIVE_METASTORE_WAREHOUSE_EXTERNAL.varname) :
hiveConf_.get(HiveConf.ConfVars.METASTOREWAREHOUSE.varname);
Path path = new Path(location);
Options catalogOptions = new Options();
catalogOptions.set(CatalogOptions.WAREHOUSE, location);
CatalogContext catalogContext = CatalogContext.create(catalogOptions);
FileIO fileIO = FileIO.get(path, catalogContext);
HiveCatalog externalWarehouseCatalog =
new HiveCatalog(fileIO, hiveConf_, metastoreClientClass_, location);
return externalWarehouseCatalog;
} catch (Exception ex) {
throw new ImpalaRuntimeException("failed to create hive catalog : ", ex);
}
}
default: throw new ImpalaRuntimeException("Unexpected catalog type: " + catalog);
}
}
/**
* get Paimon Identifier object.
*/
public static Identifier getTableIdentifier(String dbName, String tableName) {
return new Identifier(dbName, tableName);
}
/**
* Generates Paimon table identifier from HMS table object.
*/
public static Identifier getTableIdentifier(
org.apache.hadoop.hive.metastore.api.Table msTable) {
String name = msTable.getParameters().get(PAIMON_TABLE_IDENTIFIER);
if (name == null || name.isEmpty()) {
return getTableIdentifier(
msTable.getDbName().toLowerCase(), msTable.getTableName().toLowerCase());
} else {
if (!name.contains(".")) {
return getTableIdentifier(msTable.getDbName(), name);
} else {
String[] names = name.split("\\.");
return getTableIdentifier(names[0], names[1]);
}
}
}
/**
* Convert paimon column stats to HMS ColumnStatisticsData.
*/
public static Optional<ColumnStatisticsData> convertColStats(
ColStats<?> colStats, DataField dataField) {
ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData();
colStats.deserializeFieldsFromString(dataField.type());
Set<DataTypeFamily> fieldFamilySet = dataField.type().getTypeRoot().getFamilies();
if (fieldFamilySet.contains(DataTypeFamily.NUMERIC)) {
if (fieldFamilySet.contains(DataTypeFamily.INTEGER_NUMERIC)) {
// LONG_STATS
LongColumnStatsData longColumnStatsData = new LongColumnStatsData();
if (colStats.nullCount().isPresent()) {
longColumnStatsData.setNumNulls(colStats.nullCount().getAsLong());
}
if (dataField.type() instanceof BigIntType) {
if (colStats.min().isPresent()) {
longColumnStatsData.setLowValue((Long) colStats.min().get());
}
if (colStats.max().isPresent()) {
longColumnStatsData.setHighValue((Long) colStats.max().get());
}
} else if (dataField.type() instanceof IntType) {
if (colStats.min().isPresent()) {
longColumnStatsData.setLowValue((Integer) colStats.min().get());
}
if (colStats.max().isPresent()) {
longColumnStatsData.setHighValue((Integer) colStats.max().get());
}
} else if (dataField.type() instanceof SmallIntType) {
if (colStats.min().isPresent()) {
longColumnStatsData.setLowValue((Short) colStats.min().get());
}
if (colStats.max().isPresent()) {
longColumnStatsData.setHighValue((Short) colStats.max().get());
}
} else if (dataField.type() instanceof TinyIntType) {
if (colStats.min().isPresent()) {
longColumnStatsData.setLowValue((Byte) colStats.min().get());
}
if (colStats.max().isPresent()) {
longColumnStatsData.setHighValue((Byte) colStats.max().get());
}
} else {
LOG.warn(String.format("Column stats doesn't support data type %s",
dataField.type().asSQLString()));
return Optional.empty();
}
if (colStats.distinctCount().isPresent()) {
longColumnStatsData.setNumDVs(colStats.distinctCount().getAsLong());
}
columnStatisticsData.setLongStats(longColumnStatsData);
return Optional.of(columnStatisticsData);
} else if (fieldFamilySet.contains(DataTypeFamily.APPROXIMATE_NUMERIC)) {
// DOUBLE_STATS
DoubleColumnStatsData doubleColumnStatsData = new DoubleColumnStatsData();
if (colStats.nullCount().isPresent()) {
doubleColumnStatsData.setNumNulls(colStats.nullCount().getAsLong());
}
if (dataField.type() instanceof DoubleType) {
if (colStats.min().isPresent()) {
doubleColumnStatsData.setLowValue((Double) colStats.min().get());
}
if (colStats.max().isPresent()) {
doubleColumnStatsData.setHighValue((Double) colStats.max().get());
}
} else if (dataField.type() instanceof FloatType) {
if (colStats.min().isPresent()) {
doubleColumnStatsData.setLowValue((Float) colStats.min().get());
}
if (colStats.max().isPresent()) {
doubleColumnStatsData.setHighValue((Float) colStats.max().get());
}
} else {
LOG.warn(String.format("Column stats doesn't support data type %s",
dataField.type().asSQLString()));
return Optional.empty();
}
if (colStats.distinctCount().isPresent()) {
doubleColumnStatsData.setNumDVs(colStats.distinctCount().getAsLong());
}
columnStatisticsData.setDoubleStats(doubleColumnStatsData);
return Optional.of(columnStatisticsData);
} else {
LOG.warn(String.format("Column stats doesn't support data type %s",
dataField.type().asSQLString()));
return Optional.empty();
}
} else if (fieldFamilySet.contains(DataTypeFamily.CHARACTER_STRING)) {
// STRING_STATS
StringColumnStatsData stringColumnStatsData = new StringColumnStatsData();
if (colStats.nullCount().isPresent()) {
stringColumnStatsData.setNumNulls(colStats.nullCount().getAsLong());
}
if (colStats.avgLen().isPresent()) {
stringColumnStatsData.setAvgColLen(colStats.avgLen().getAsLong());
}
if (colStats.maxLen().isPresent()) {
stringColumnStatsData.setMaxColLen(colStats.maxLen().getAsLong());
}
columnStatisticsData.setStringStats(stringColumnStatsData);
return Optional.of(columnStatisticsData);
} else if (fieldFamilySet.contains(DataTypeFamily.BINARY_STRING)) {
// BINARY_STATS
BinaryColumnStatsData binaryColumnStatsData = new BinaryColumnStatsData();
if (colStats.nullCount().isPresent()) {
binaryColumnStatsData.setNumNulls(colStats.nullCount().getAsLong());
}
if (colStats.avgLen().isPresent()) {
binaryColumnStatsData.setAvgColLen(colStats.avgLen().getAsLong());
}
if (colStats.maxLen().isPresent()) {
binaryColumnStatsData.setMaxColLen(colStats.maxLen().getAsLong());
}
columnStatisticsData.setBinaryStats(binaryColumnStatsData);
return Optional.of(columnStatisticsData);
} else if (dataField.type() instanceof BooleanType) {
// BOOLEAN_STATS
BooleanColumnStatsData booleanColumnStatsData = new BooleanColumnStatsData();
if (colStats.nullCount().isPresent()) {
booleanColumnStatsData.setNumNulls(colStats.nullCount().getAsLong());
}
columnStatisticsData.setBooleanStats(booleanColumnStatsData);
return Optional.of(columnStatisticsData);
} else {
LOG.warn(String.format("Column stats doesn't support data type %s",
dataField.type().asSQLString()));
return Optional.empty();
}
}
/**
* Get FileStore Object for paimon table in HMS.
*/
public static FileStoreTable createFileStoreTable(
org.apache.hadoop.hive.metastore.api.Table table) throws MetaException {
Options options = HiveUtils.extractCatalogConfig(FePaimonTable.jobConf);
options.set(CoreOptions.PATH,
LocationKeyExtractor.getPaimonLocation(FePaimonTable.jobConf, table));
CatalogContext catalogContext;
if (options.get(HADOOP_LOAD_DEFAULT_CONFIG)) {
catalogContext = CatalogContext.create(options, FePaimonTable.jobConf);
} else {
catalogContext = CatalogContext.create(options);
}
return FileStoreTableFactory.create(catalogContext);
}
/**
* Get FileStore Object for specified location
*/
public static FileStoreTable createFileStoreTable(String tableLocation)
throws MetaException {
Options options = HiveUtils.extractCatalogConfig(FePaimonTable.jobConf);
options.set(CoreOptions.PATH, tableLocation);
CatalogContext catalogContext;
if (options.get(HADOOP_LOAD_DEFAULT_CONFIG)) {
catalogContext = CatalogContext.create(options, FePaimonTable.jobConf);
} else {
catalogContext = CatalogContext.create(options);
}
return FileStoreTableFactory.create(catalogContext);
}
/**
* Return paimon catalog from table properties
*/
public static List<String> fieldNames(RowType rowType) {
return rowType.getFields()
.stream()
.map(DataField::name)
.map(String::toLowerCase)
.collect(Collectors.toList());
}
/**
* check if the given table has primary keys
*/
public static boolean hasPrimaryKey(Table table) {
return !table.primaryKeys().isEmpty();
}
/**
* check if the given table is partitioned
*/
public static boolean hasPartition(Table table) {
return !table.partitionKeys().isEmpty();
}
/**
* lookup result for given table, max 1000 rows.
*/
public static List<InternalRow> lookupInTable(Table table, List<Predicate> predicates) {
return lookupInTable(table, predicates, 1000);
}
/**
* lookup result for given table.
*/
protected static List<InternalRow> lookupInTable(
Table table, List<Predicate> predicates, int maxcount) {
ReadBuilder readBuilder = table.newReadBuilder().withFilter(predicates);
List<Split> splits = readBuilder.newScan().plan().splits();
TableRead read = readBuilder.newRead();
final List<InternalRow> internalRows = Lists.newArrayList();
try {
try (RecordReader<InternalRow> recordReader = read.createReader(splits)) {
RecordReader.RecordIterator<InternalRow> recordIterator =
recordReader.readBatch();
InternalRow internalRow = null;
while ((internalRow = recordIterator.next()) != null) {
internalRows.add(internalRow);
if (internalRows.size() >= maxcount) { break; }
}
}
} catch (IOException ex) {
LOG.warn("failed to read table", ex);
return Lists.newArrayList();
}
return internalRows;
}
/**
* check whether the given table support predicate pushdown.
*/
public static boolean canApplyPredicatePushDown(Table table) {
return table instanceof FileStoreTable;
}
/**
* query the snapshot history for paimon table.
*/
public static TGetTableHistoryResult getPaimonTableHistory(FePaimonTable feTable,
TDescribeHistoryParams params) throws DatabaseNotFoundException {
try {
// Column indexes for paimon snapshot table.
// Used to select required columns from snapshot table.
// 5: commit_time
// 0: snapshot id
// 1: schema_id
final int[] SNAPSHOT_TABLE_PROJECTION = {5, 0, 1};
TGetTableHistoryResult historyResult = new TGetTableHistoryResult();
FileStoreTable table = (FileStoreTable) feTable.getPaimonApiTable();
org.apache.paimon.table.Table snapshotTable =
SystemTableLoader.load("snapshots", table);
PredicateBuilder predicateBuilder = new PredicateBuilder(snapshotTable.rowType());
Optional<Predicate> predicataOpt = Optional.empty();
if (params.isSetFrom_time()) {
// DESCRIBE HISTORY <table> FROM <ts>
// check if commit-time >= from_time
predicataOpt = Optional.of(predicateBuilder.greaterOrEqual(
0, Timestamp.fromEpochMillis(params.getFrom_time())));
} else if (params.isSetBetween_start_time() && params.isSetBetween_end_time()) {
predicataOpt = Optional.of(PredicateBuilder.and(
predicateBuilder.greaterOrEqual(
0, Timestamp.fromEpochMillis(params.getBetween_start_time())),
predicateBuilder.lessOrEqual(
0, Timestamp.fromEpochMillis(params.getBetween_end_time()))));
}
ReadBuilder readBuilder =
snapshotTable.newReadBuilder().withProjection(SNAPSHOT_TABLE_PROJECTION);
predicataOpt.ifPresent(readBuilder::withFilter);
List<Split> splits = readBuilder.newScan().plan().splits();
RecordReader<InternalRow> internalRowRecordReader =
readBuilder.newRead().createReader(splits);
List<TGetTableHistoryResultItem> result =
com.google.common.collect.Lists.newArrayList();
internalRowRecordReader.forEachRemaining(new Consumer<InternalRow>() {
@Override
public void accept(InternalRow internalRow) {
TGetTableHistoryResultItem resultItem = new TGetTableHistoryResultItem();
long snapshotId = internalRow.getLong(1);
Timestamp timestamp = internalRow.getTimestamp(0, 9);
resultItem.setCreation_time(timestamp.getMillisecond());
resultItem.setSnapshot_id(snapshotId);
// note: parent id and ancestor id is always null
result.add(resultItem);
}
});
historyResult.setResult(result);
return historyResult;
} catch (Exception ex) {
throw new DatabaseNotFoundException("Failed to get snapshot: " + ex.getMessage());
}
}
/**
* Get HdfsFileFormat from a string, usually from table properties.
* Returns PARQUET when 'format' is null. Returns null for invalid formats.
*/
public static HdfsFileFormat getPaimonFileFormat(String format) {
if ("PARQUET".equalsIgnoreCase(format) || format == null) {
return HdfsFileFormat.PARQUET;
} else if ("ORC".equalsIgnoreCase(format)) {
return HdfsFileFormat.ORC;
}
return null;
}
/**
* A table is synchronized table if its Managed table or if its a external table with
* <code>external.table.purge</code> property set to true.
* We need to create/drop/etc. synchronized tables through the Paimon APIs as well.
*/
public static boolean isSynchronizedTable(
org.apache.hadoop.hive.metastore.api.Table msTbl) {
com.google.common.base.Preconditions.checkState(isPaimonTable(msTbl));
return isManagedTable(msTbl) || isExternalPurgeTable(msTbl);
}
/**
* Returns if this metastore table has managed table type
*/
public static boolean isManagedTable(org.apache.hadoop.hive.metastore.api.Table msTbl) {
return msTbl.getTableType().equalsIgnoreCase(TableType.MANAGED_TABLE.toString());
}
/**
* get the location for the newly created paimon table.
*/
public static String getPaimonCatalogLocation(
MetaStoreClientPool.MetaStoreClient msClient,
org.apache.hadoop.hive.metastore.api.Table msTable) throws TException {
TPaimonCatalog catalog = getTPaimonCatalog(msTable);
if (catalog == TPaimonCatalog.HADOOP_CATALOG) {
// Using catalog location to create table
// We cannot set location for 'hadoop.catalog' table in SQL
String location = msTable.getParameters().get(PAIMON_HADOOP_CATALOG_LOCATION);
Identifier table_identifier = getTableIdentifier(msTable);
return AbstractCatalog.newTableLocation(location, table_identifier).toString();
} else {
// Using normal location as 'hadoop.tables' table location and create
// table
return MetastoreShim.getPathForNewTable(
msClient.getHiveClient().getDatabase(msTable.getDbName()), msTable);
}
}
/**
* Get Paimon table catalog location with 'paimon.catalog_location' when using
* 'hadoop.catalog'
*/
public static String getPaimonCatalogLocation(
org.apache.hadoop.hive.metastore.api.Table msTable) {
return msTable.getParameters().get(PAIMON_HADOOP_CATALOG_LOCATION);
}
/**
* Get Paimon table catalog type from hms table properties
* use HiveCatalog as default
*/
public static TPaimonCatalog getTPaimonCatalog(
org.apache.hadoop.hive.metastore.api.Table msTable) {
return getTPaimonCatalog(msTable.getParameters());
}
/**
* Get Paimon table catalog type from properties
* use HiveCatalog as default
*/
public static TPaimonCatalog getTPaimonCatalog(Map<String, String> props) {
return getTPaimonCatalog(props.get(PAIMON_CATALOG));
}
/**
* Get Paimon table catalog type string
* use HiveCatalog as default
*/
public static TPaimonCatalog getTPaimonCatalog(String catalog) {
if ("hadoop".equalsIgnoreCase(catalog)) {
return TPaimonCatalog.HADOOP_CATALOG;
} else if (HIVE_CATALOG.equalsIgnoreCase(catalog) || catalog == null) {
return TPaimonCatalog.HIVE_CATALOG;
} else {
return TPaimonCatalog.HIVE_CATALOG;
}
}
/**
* Extract column names from string
*/
public static List<String> extractColumnNames(String value) {
return Arrays.stream(value.split(",")).
map(String::toLowerCase).collect(Collectors.toList());
}
/**
* Create catalog context from HMS table and location
*/
public static CatalogContext catalogContext(
org.apache.hadoop.hive.metastore.api.Table table, String location) {
Options options = HiveUtils.extractCatalogConfig(hiveConf_);
options.set(CoreOptions.PATH, location);
table.getParameters().forEach(options::set);
return CatalogContext.create(options, hiveConf_);
}
/**
* Create catalog context from HMS table and location
*/
public static String partitionSpecToString(final Map<String, String> spec) {
List<String> speclist = spec.keySet()
.stream()
.map(k -> String.join("=", k, spec.get(k)))
.collect(Collectors.toList());
return String.join("/", speclist);
}
/**
* Get partition stats for the given fe paimon table.
*/
public static TResultSet doGetTableStats(FePaimonTable table) {
TResultSet result = new TResultSet();
TResultSetMetadata resultSchema = new TResultSetMetadata();
result.setSchema(resultSchema);
result.setRows(new ArrayList<>());
resultSchema.addToColumns(new TColumn("Number Of Rows", Type.BIGINT.toThrift()));
resultSchema.addToColumns(new TColumn("Number Of Bytes", Type.BIGINT.toThrift()));
TTableStats stats = table.getTTableStats();
{
TResultRowBuilder builder = new TResultRowBuilder();
builder.add(stats.getNum_rows());
builder.add(stats.getTotal_file_bytes());
result.addToRows(builder.get());
}
return result;
}
public static long localMillisToUTCMillis(long epochMillis) {
ZoneId zone = ZoneId.systemDefault();
ZoneOffset offset = zone.getRules().getOffset(Instant.now());
return epochMillis + offset.getTotalSeconds() * 1000L;
}
/**
* Get partition stats for the given fe paimon table.
*/
public static TResultSet doGetPartitionStats(FePaimonTable table) {
TResultSet result = new TResultSet();
TResultSetMetadata resultSchema = new TResultSetMetadata();
result.setSchema(resultSchema);
result.setRows(new ArrayList<>());
resultSchema.addToColumns(new TColumn("Partition", Type.STRING.toThrift()));
resultSchema.addToColumns(new TColumn("Number Of Rows", Type.BIGINT.toThrift()));
resultSchema.addToColumns(new TColumn("Number Of Files", Type.BIGINT.toThrift()));
resultSchema.addToColumns(new TColumn("Number Of Bytes", Type.BIGINT.toThrift()));
resultSchema.addToColumns(new TColumn("Last Creation Time", Type.BIGINT.toThrift()));
List<Partition> partitions =
CatalogUtils.listPartitionsFromFileSystem(table.getPaimonApiTable());
for (Partition partition : partitions) {
TResultRowBuilder builder = new TResultRowBuilder();
builder.add(partitionSpecToString(partition.spec()));
builder.add(partition.recordCount());
builder.add(partition.fileCount());
builder.add(partition.fileSizeInBytes());
// epoch millis obtained via paimon api is from LocalDateTime
// with default timezone. Different time zone will yield different values,
// will remove default timezone offset for the epoch millis obtained via
// piamon api.
builder.add(localMillisToUTCMillis(partition.lastFileCreationTime()));
result.addToRows(builder.get());
}
return result;
}
public static int getFieldIndexByNameIgnoreCase(RowType rowType, String fieldName) {
for (int i = 0; i < rowType.getFields().size(); ++i) {
if (rowType.getFields().get(i).name().equalsIgnoreCase(fieldName)) { return i; }
}
return -1;
}
private static Optional<Predicate> extractPartitionFilter(
Table table, TShowFilesParams request) {
Iterator<java.util.List<org.apache.impala.thrift.TPartitionKeyValue>> iter =
request.getPartition_setIterator();
PredicateBuilder predicateBuilder = new PredicateBuilder(table.rowType());
List<Predicate> predicates =
request.getPartition_set()
.parallelStream()
.map(l
-> PredicateBuilder.and(
l.stream()
.map(kv
-> predicateBuilder.equal(getFieldIndexByNameIgnoreCase(
table.rowType(), kv.getName()),
kv.getValue()))
.collect(Collectors.toList())))
.collect(Collectors.toList());
if (!predicates.isEmpty()) {
return Optional.of(PredicateBuilder.or(predicates));
} else {
return Optional.empty();
}
}
/**
* Get files for the given fe paimon table.
*/
public static TResultSet doGetTableFiles(
FePaimonTable paimon_table, TShowFilesParams request) {
Table table = paimon_table.getPaimonApiTable();
TResultSet result = new TResultSet();
TResultSetMetadata resultSchema = new TResultSetMetadata();
result.setSchema(resultSchema);
resultSchema.addToColumns(new TColumn("Path", Type.STRING.toThrift()));
resultSchema.addToColumns(new TColumn("Size", Type.STRING.toThrift()));
resultSchema.addToColumns(new TColumn("Partition", Type.STRING.toThrift()));
resultSchema.addToColumns(new TColumn("EC Policy", Type.STRING.toThrift()));
resultSchema.addToColumns(new TColumn("Type", Type.STRING.toThrift()));
ReadBuilder readBuilder = table.newReadBuilder();
result.setRows(new ArrayList<>());
if (request.isSetPartition_set()) {
Optional<Predicate> predicate = extractPartitionFilter(table, request);
if (predicate.isPresent()) {
readBuilder = readBuilder.withFilter(predicate.get());
}
}
TableScan.Plan plan = readBuilder.newScan().plan();
Options options = Options.fromMap(table.options());
InternalRowPartitionComputer computer =
new InternalRowPartitionComputer(options.get(PARTITION_DEFAULT_NAME),
table.rowType(), table.partitionKeys().toArray(new String[0]),
options.get(PARTITION_GENERATE_LEGCY_NAME));
for (Split split : plan.splits()) {
if (!(split instanceof DataSplit)) continue;
DataSplit dataSplit = (DataSplit) split;
Optional<List<RawFile>> rawFiles = dataSplit.convertToRawFiles();
if (rawFiles.isPresent()) {
for (RawFile rawFile : rawFiles.get()) {
TResultRowBuilder builder = new TResultRowBuilder();
builder.add(rawFile.path());
builder.add(PrintUtils.printBytes(rawFile.fileSize()));
builder.add(
partitionSpecToString(computer.generatePartValues(dataSplit.partition())));
builder.add(FileSystemUtil.getErasureCodingPolicy(
new org.apache.hadoop.fs.Path(rawFile.path())));
builder.add("DATA");
result.addToRows(builder.get());
}
}
if (split.deletionFiles().isPresent()) {
for (DeletionFile deletionFile : split.deletionFiles().get()) {
if (deletionFile == null) break;
TResultRowBuilder builder = new TResultRowBuilder();
builder.add(deletionFile.path());
builder.add(PrintUtils.printBytes(deletionFile.length()));
builder.add(
partitionSpecToString(computer.generatePartValues(dataSplit.partition())));
builder.add(FileSystemUtil.getErasureCodingPolicy(
new org.apache.hadoop.fs.Path(deletionFile.path())));
builder.add("DELETE");
result.addToRows(builder.get());
}
}
}
return result;
}
}

View File

@@ -157,6 +157,10 @@ import org.apache.impala.catalog.events.MetastoreEventsProcessor;
import org.apache.impala.catalog.events.MetastoreNotificationException;
import org.apache.impala.catalog.monitor.CatalogMonitor;
import org.apache.impala.catalog.monitor.CatalogOperationTracker;
import org.apache.impala.catalog.paimon.FePaimonTable;
import org.apache.impala.catalog.paimon.PaimonCatalogOpExecutor;
import org.apache.impala.catalog.paimon.PaimonTableLoadingException;
import org.apache.impala.catalog.paimon.PaimonUtil;
import org.apache.impala.common.FileSystemUtil;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.ImpalaRuntimeException;
@@ -231,6 +235,7 @@ import org.apache.impala.thrift.TIcebergCatalog;
import org.apache.impala.thrift.TImpalaTableType;
import org.apache.impala.thrift.TIcebergPartitionSpec;
import org.apache.impala.thrift.TKuduPartitionParam;
import org.apache.impala.thrift.TPaimonCatalog;
import org.apache.impala.thrift.TPartitionDef;
import org.apache.impala.thrift.TPartitionKeyValue;
import org.apache.impala.thrift.TPartitionStats;
@@ -267,6 +272,7 @@ import org.apache.impala.util.MetaStoreUtil;
import org.apache.impala.util.MetaStoreUtil.TableInsertEventInfo;
import org.apache.impala.util.NoOpEventSequence;
import org.apache.impala.util.ThreadNameAnnotator;
import org.apache.paimon.table.DataTable;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -3322,7 +3328,10 @@ public class CatalogOpExecutor {
tableName.getTbl()), e);
}
}
if (existingTbl instanceof FePaimonTable) {
needsHmsDropTable = PaimonCatalogOpExecutor.dropTable(
msTbl, existingTbl, catalogTimeline, params);
}
if (needsHmsDropTable) {
try (MetaStoreClient msClient = catalog_.getMetaStoreClient(catalogTimeline)) {
@@ -3866,6 +3875,9 @@ public class CatalogOpExecutor {
params.if_not_exists, params.getColumns(), params.getPartition_spec(),
params.getPrimary_key_column_names(), params.getTable_properties(),
params.getComment(), debugAction);
} else if (PaimonUtil.isPaimonTable(tbl)) {
return createPaimonTable(
tbl, wantMinimalResult, response, catalogTimeline, params, debugAction);
}
Preconditions.checkState(params.getColumns().size() > 0,
"Empty column list given as argument to Catalog.createTable");
@@ -4354,6 +4366,134 @@ public class CatalogOpExecutor {
return true;
}
/**
* Creates a new Paimon table.
*/
private boolean createPaimonTable(org.apache.hadoop.hive.metastore.api.Table newTable,
boolean wantMinimalResult, TDdlExecResponse response, EventSequence catalogTimeline,
TCreateTableParams params, @Nullable String debugAction) throws ImpalaException {
Preconditions.checkState(PaimonUtil.isPaimonTable(newTable));
acquireMetastoreDdlLock(catalogTimeline);
try {
// Add the table to the HMS and the catalog cache. Acquire metastoreDdlLock_ to
// ensure the atomicity of these operations.
List<NotificationEvent> events;
try (MetaStoreClient msClient = catalog_.getMetaStoreClient(catalogTimeline)) {
boolean tableInMetastore = msClient.getHiveClient().tableExists(
newTable.getDbName(), newTable.getTableName());
catalogTimeline.markEvent(CHECKED_HMS_TABLE_EXISTENCE);
if (!tableInMetastore) {
long eventId = getCurrentEventId(msClient, catalogTimeline);
String location = newTable.getSd().getLocation();
// Create table in paimon if necessary
if (PaimonUtil.isSynchronizedTable(newTable)) {
// Set location here if not been specified in sql
if (location == null) {
location = PaimonUtil.getPaimonCatalogLocation(msClient, newTable);
}
if (debugAction != null) {
DebugUtils.executeDebugAction(debugAction, DebugUtils.PAIMON_CREATE);
}
String tableLoc = PaimonCatalogOpExecutor.createTable(
PaimonUtil.getTableIdentifier(newTable), location, params, newTable);
newTable.getSd().setLocation(tableLoc);
catalogTimeline.markEvent(PaimonCatalogOpExecutor.CREATED_PAIMON_TABLE +
newTable.getTableName());
} else {
// If this is not a synchronized table, we assume that the table must be
// existing in an Paimon Catalog.
TPaimonCatalog underlyingCatalog = PaimonUtil.getTPaimonCatalog(newTable);
if (underlyingCatalog != TPaimonCatalog.HADOOP_CATALOG &&
underlyingCatalog != TPaimonCatalog.HIVE_CATALOG) {
throw new TableLoadingException(
"Paimon table only support hadoop catalog and hive catalog.");
}
String locationToLoadFrom;
if (underlyingCatalog == TPaimonCatalog.HIVE_CATALOG) {
if (location == null) {
addSummary(response,
"Location is necessary for external paimon table with hive catalog.");
return false;
}
locationToLoadFrom = location;
} else {
// For HadoopCatalog tables 'locationToLoadFrom' is the location of the
// hadoop catalog. For HiveCatalog tables it remains null.
locationToLoadFrom =
PaimonUtil.getPaimonCatalogLocation(msClient, newTable);
}
try {
org.apache.paimon.table.Table paimonTable =
PaimonUtil.createFileStoreTable(locationToLoadFrom);
// Populate the HMS table schema based on the Paimon table's schema because
// the Paimon metadata is the source of truth. This also avoids an
// unnecessary ALTER TABLE.
PaimonCatalogOpExecutor.populateExternalTableSchemaFromPaimonTable(
newTable, paimonTable);
catalogTimeline.markEvent(PaimonCatalogOpExecutor.LOADED_PAIMON_TABLE);
if (location == null) {
// Using the location of the loaded Paimon table we can also get the
// correct location for tables stored in nested namespaces.
newTable.getSd().setLocation(
((DataTable) paimonTable).location().toString());
}
} catch (Exception ex) {
// if failed to load paimon table
if (newTable.getSd().getCols().isEmpty()) {
// if user doesn't specify schema in table, we should load from underlying
// paimon table, but it fails. throw the exception.
throw new PaimonTableLoadingException(
"Failed to extract paimon schema from underlying paimon table", ex);
} else {
// user has specify schema in table ddl, try to create a new paimon table
// instead.
String tableLoc = PaimonCatalogOpExecutor.createTable(
PaimonUtil.getTableIdentifier(newTable), location, params, newTable);
newTable.getSd().setLocation(tableLoc);
}
}
}
msClient.getHiveClient().createTable(newTable);
catalogTimeline.markEvent(CREATED_HMS_TABLE);
events = getNextMetastoreEventsForTableIfEnabled(catalogTimeline, eventId,
newTable.getDbName(), newTable.getTableName(), CreateTableEvent.EVENT_TYPE);
} else {
addSummary(response, "Table already exists.");
return false;
}
}
Pair<Long, org.apache.hadoop.hive.metastore.api.Table> eventTblPair =
getTableFromEvents(events, params.if_not_exists);
long createEventId = eventTblPair == null ? -1 : eventTblPair.first;
org.apache.hadoop.hive.metastore.api.Table msTable =
eventTblPair == null ? null : eventTblPair.second;
setTableNameAndCreateTimeInResponse(msTable, newTable.getDbName(),
newTable.getTableName(), response, catalogTimeline);
// Add the table to the catalog cache
Table newTbl =
catalog_.addIncompleteTable(newTable.getDbName(), newTable.getTableName(),
TImpalaTableType.TABLE, params.getComment(), createEventId);
catalogTimeline.markEvent(CREATED_CATALOG_TABLE);
LOG.debug("Created an paimon table {} in catalog with create event Id {} ",
newTbl.getFullName(), createEventId);
addTableToCatalogUpdate(newTbl, wantMinimalResult, response.result);
} catch (Exception e) {
if (params.if_not_exists
&& (e instanceof AlreadyExistsException
|| e instanceof org.apache.iceberg.exceptions.AlreadyExistsException)) {
addSummary(response, "Table already exists.");
return false;
}
throw new ImpalaRuntimeException(
String.format(HMS_RPC_ERROR_FORMAT_STR, "createTable"), e);
} finally { getMetastoreDdlLock().unlock(); }
addSummary(response, "Table has been created.");
return true;
}
/**
* Creates a new table in the metastore based on the definition of an existing table.
* No data is copied as part of this process, it is a metadata only operation. If the

View File

@@ -147,6 +147,8 @@ import org.apache.impala.catalog.TableLoadingException;
import org.apache.impala.catalog.Type;
import org.apache.impala.catalog.local.InconsistentMetadataFetchException;
import org.apache.impala.catalog.iceberg.IcebergMetadataTable;
import org.apache.impala.catalog.paimon.FePaimonTable;
import org.apache.impala.catalog.paimon.FeShowFileStmtSupport;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.common.UserCancelledException;
import org.apache.impala.common.FileSystemUtil;
@@ -1755,6 +1757,8 @@ public class Frontend {
return ((FeHBaseTable) table).getTableStats();
} else if (table instanceof FeDataSourceTable) {
return ((FeDataSourceTable) table).getTableStats();
} else if (table instanceof FePaimonTable) {
return ((FePaimonTable) table).getTableStats(op);
} else if (table instanceof FeKuduTable) {
if (op == TShowStatsOp.RANGE_PARTITIONS) {
return FeKuduTable.Utils.getRangePartitions((FeKuduTable) table, false);
@@ -3456,7 +3460,9 @@ public class Frontend {
throws ImpalaException{
FeTable table = getCatalog().getTable(request.getTable_name().getDb_name(),
request.getTable_name().getTable_name());
if (table instanceof FeFsTable) {
if (table instanceof FeShowFileStmtSupport) {
return ((FeShowFileStmtSupport) table).doGetTableFiles(request);
} else if (table instanceof FeFsTable) {
return FeFsTable.Utils.getFiles((FeFsTable)table, request.getPartition_set());
} else {
throw new InternalException("SHOW FILES only supports Hdfs table. " +

View File

@@ -66,6 +66,9 @@ public class DebugUtils {
// debug action label for Iceberg create table.
public static final String ICEBERG_CREATE = "catalogd_iceberg_create";
// debug action label for Paimon create table.
public static final String PAIMON_CREATE = "catalogd_paimon_create";
// debug action label for throwing an exception during loadFileMetadataForPartitions.
public static final String LOAD_FILE_METADATA_THROW_EXCEPTION =
"catalogd_load_file_metadata_throw_exception";

View File

@@ -158,6 +158,7 @@ import org.apache.impala.thrift.TReservedWordsVersion;
keywordMap.put("having", SqlParserSymbols.KW_HAVING);
keywordMap.put("hudiparquet", SqlParserSymbols.KW_HUDIPARQUET);
keywordMap.put("iceberg", SqlParserSymbols.KW_ICEBERG);
keywordMap.put("paimon", SqlParserSymbols.KW_PAIMON);
keywordMap.put("if", SqlParserSymbols.KW_IF);
keywordMap.put("ignore", SqlParserSymbols.KW_IGNORE);
keywordMap.put("ilike", SqlParserSymbols.KW_ILIKE);

View File

@@ -1084,7 +1084,7 @@ public class AnalyzeDDLTest extends FrontendTestBase {
"only supported on TEXT file format");
}
String[] unsupportedFileFormats = {
"parquet", "rcfile", "avro", "iceberg"};
"parquet", "rcfile", "avro", "iceberg", "paimon"};
for (String format: unsupportedFileFormats) {
AnalysisError("create table " + tmpTableName + " (id int) with serdeproperties(" +
"'serialization.encoding'='GBK') stored as " + format,
@@ -2446,7 +2446,9 @@ public class AnalyzeDDLTest extends FrontendTestBase {
AnalysisError("create table foo stored as RCFILE as select 1",
"CREATE TABLE AS SELECT does not support the (RCFILE) file format. " +
"Supported formats are: (PARQUET, TEXTFILE, KUDU, ICEBERG)");
AnalysisError("create table foo stored as PAIMON as select 1",
"CREATE TABLE AS SELECT does not support the (PAIMON) file format. " +
"Supported formats are: (PARQUET, TEXTFILE, KUDU, ICEBERG)");
// CTAS with a WITH clause and inline view (IMPALA-1100)
AnalyzesOk("create table test_with as with with_1 as (select 1 as int_col from " +
"functional.alltypes as t1 right join (select 1 as int_col from " +
@@ -3050,7 +3052,55 @@ public class AnalyzeDDLTest extends FrontendTestBase {
AnalysisError("CREATE EXTERNAL TABLE Foo (i int) PARTITIONED BY (d decimal) " +
"STORED BY JDBC ",
"PARTITIONED BY cannot be used in a JDBC table.");
// Create Paimon tables.
AnalyzesOk("CREATE TABLE IF NOT EXISTS foo (user_id BIGINT, item_id BIGINT, " +
"behavior STRING) STORED AS PAIMON");
AnalyzesOk("CREATE TABLE IF NOT EXISTS foo (user_id BIGINT, item_id BIGINT, " +
"behavior STRING) PARTITIONED BY (dt STRING, hh STRING) STORED AS PAIMON");
AnalyzesOk("CREATE TABLE foo (user_id BIGINT, item_id BIGINT, behavior STRING) " +
"PARTITIONED BY (dt STRING, hh STRING) STORED AS PAIMON TBLPROPERTIES " +
"('primary-key'='user_id')");
AnalyzesOk("CREATE TABLE foo (user_id BIGINT, item_id BIGINT, behavior STRING, " +
"PRIMARY KEY(user_id)) PARTITIONED BY (dt STRING, hh STRING) STORED AS PAIMON");
AnalyzesOk("CREATE TABLE foo (user_id BIGINT, item_id BIGINT, behavior STRING)" +
" STORED AS PAIMON TBLPROPERTIES ('bucket' = '4', 'bucket-key'='behavior')");
AnalyzesOk("CREATE TABLE foo (user_id BIGINT, item_id BIGINT, behavior STRING, " +
"PRIMARY KEY(user_id)) PARTITIONED BY (dt STRING, hh STRING) STORED AS PAIMON " +
"LOCATION 'hdfs:///test-warehouse/test_create_managed_location_paimon_table'");
AnalyzesOk("CREATE TABLE foo (`USER_ID` BIGINT, `ITEM_ID` BIGINT, BEHAVIOR STRING, " +
"PRIMARY KEY(`USER_ID`)) PARTITIONED BY (`DT` STRING, `HH` STRING) " +
"STORED AS PAIMON");
AnalyzesOk("CREATE EXTERNAL TABLE foo STORED AS PAIMON LOCATION " +
"'hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db/" +
"paimon_non_partitioned'");
AnalyzesOk("CREATE EXTERNAL TABLE foo STORED AS PAIMON LOCATION " +
"'hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db/" +
"paimon_partitioned'");
AnalyzesOk("CREATE EXTERNAL TABLE foo STORED AS PAIMON TBLPROPERTIES" +
"('paimon.catalog'='hadoop', 'paimon.catalog_location'='hdfs:///test-warehouse" +
"/paimon_test/paimon_catalog/warehouse', 'paimon.table_identifier'=" +
"'functional.paimon_non_partitioned')");
AnalyzesOk("CREATE EXTERNAL TABLE foo STORED AS PAIMON TBLPROPERTIES(" +
"'paimon.catalog'='hadoop', 'paimon.catalog_location'='hdfs:///test-warehouse" +
"/paimon_test/paimon_catalog/warehouse', 'paimon.table_identifier'=" +
"'functional.paimon_partitioned')");
AnalyzesOk("CREATE TABLE foo (col_boolean BOOLEAN, col_tinyint TINYINT, " +
"col_smallint SMALLINT, col_int INT, col_integer INTEGER, col_bigint BIGINT, " +
"col_float FLOAT, col_double DOUBLE, col_decimal DECIMAL(10,2), " +
"col_string STRING, col_char CHAR(20), col_varchar VARCHAR(100), " +
"col_timestamp TIMESTAMP, col_date DATE, col_binary BINARY) STORED AS PAIMON");
AnalysisError("CREATE TABLE impala_paimon_complex_types_array " +
"(col_array_string ARRAY<STRING>) STORED AS PAIMON;",
"Tables stored by Paimon do not support the " +
"column col_array_string type: ARRAY<STRING>");
AnalysisError("CREATE TABLE impala_paimon_complex_types_map " +
"(col_map_string MAP<STRING,STRING>) STORED AS PAIMON;",
"Tables stored by Paimon do not support the" +
" column col_map_string type: MAP<STRING,STRING>");
AnalysisError("CREATE TABLE impala_paimon_complex_types_struct " +
"(col_struct_string STRUCT<f1:STRING, f2:STRING>) STORED AS PAIMON;",
"Tables stored by Paimon do not support the " +
"column col_struct_string type: STRUCT<f1:STRING,f2:STRING>");
// Create table PRODUCED BY DATA SOURCE
final String DATA_SOURCE_NAME = "TestDataSource1";
catalog_.addDataSource(new DataSource(DATA_SOURCE_NAME, "/foo.jar",
@@ -3131,7 +3181,7 @@ public class AnalyzeDDLTest extends FrontendTestBase {
AnalyzesOk("CREATE TABLE functional.bucket (i int COMMENT 'hello', s string) " +
"CLUSTERED BY(i) SORT BY (s) INTO 24 BUCKETS");
// Bucketed table not supported for Kudu, ICEBERG and JDBC table
// Bucketed table not supported for Kudu, ICEBERG, PAIMON and JDBC table
AnalysisError("CREATE TABLE functional.bucket (i int COMMENT 'hello', s string) " +
"CLUSTERED BY (i) INTO 24 BUCKETS STORED BY KUDU", "CLUSTERED BY not " +
"support fileformat: 'KUDU'");
@@ -3141,6 +3191,10 @@ public class AnalyzeDDLTest extends FrontendTestBase {
AnalysisError("CREATE TABLE functional.bucket (i int COMMENT 'hello', s string) " +
"CLUSTERED BY (i) INTO 24 BUCKETS STORED BY JDBC",
"CLUSTERED BY not support fileformat: 'JDBC'");
AnalysisError("CREATE TABLE functional.bucket (i int COMMENT 'hello', s string) " +
"CLUSTERED BY (i) INTO 24 BUCKETS STORED BY PAIMON",
"CLUSTERED BY clause is not support by PAIMON now, use property " +
"bucket-key instead.");
// Bucketed columns must not contain partition column and don't duplicate
AnalysisError("CREATE TABLE functional.bucket (i int COMMENT 'hello', s string) " +
"PARTITIONED BY(dt string) CLUSTERED BY (dt) INTO 24 BUCKETS",

View File

@@ -2894,8 +2894,8 @@ public class ParserTest extends FrontendTestBase {
ParsesOk("CREATE TABLE T (i int COMMENT 'hi') PARTITIONED BY (j int COMMENT 'bye')");
// Supported file formats
String [] supportedFileFormats =
{"TEXTFILE", "SEQUENCEFILE", "PARQUET", "PARQUETFILE", "RCFILE", "AVRO"};
String[] supportedFileFormats = {
"TEXTFILE", "SEQUENCEFILE", "PARQUET", "PARQUETFILE", "RCFILE", "AVRO", "PAIMON"};
for (String format: supportedFileFormats) {
ParsesOk("CREATE TABLE Foo (i int, s string) STORED AS " + format);
ParsesOk("CREATE EXTERNAL TABLE Foo (i int, s string) STORED AS " + format);

View File

@@ -382,6 +382,48 @@ public class ToSqlTest extends FrontendTestBase {
+ "test_pk_fk.fk ( seq INT, id INT, year STRING, a INT, FOREIGN KEY(id, year) "
+ "REFERENCES functional.parent_table(id, year), FOREIGN KEY(a) REFERENCES "
+ "functional.parent_table_2(a) ) STORED AS TEXTFILE", true);
// Test create table for paimon table.
testToSql("CREATE TABLE test_create_managed_paimon_table ("
+ "user_id BIGINT, item_id BIGINT, behavior STRING) STORED AS PAIMON",
"default",
"CREATE TABLE default.test_create_managed_paimon_table ("
+ " user_id BIGINT, item_id BIGINT, behavior STRING )"
+ " STORED AS PAIMON TBLPROPERTIES ('deletion-vectors.enabled'='true', "
+ "'storage_handler'='org.apache.paimon.hive.PaimonStorageHandler')",
true);
testToSql("CREATE TABLE test_create_managed_part_paimon_table ("
+ " user_id BIGINT, item_id BIGINT, behavior STRING) PARTITIONED BY ("
+ "dt STRING, hh STRING ) STORED AS PAIMON", "default",
"CREATE TABLE default.test_create_managed_part_paimon_table ("
+ " user_id BIGINT, item_id BIGINT, behavior STRING ) "
+ "PARTITIONED BY ( dt STRING, hh STRING ) STORED AS PAIMON "
+ "TBLPROPERTIES ('deletion-vectors.enabled'='true', "
+ "'storage_handler'='org.apache.paimon.hive.PaimonStorageHandler')",
true);
testToSql("CREATE TABLE test_create_managed_part_pk_paimon_table ("
+ "user_id BIGINT, item_id BIGINT, behavior STRING) PARTITIONED BY ("
+ "dt STRING, hh STRING) STORED AS PAIMON TBLPROPERTIES ("
+ "'primary-key'='user_id')", "default",
"CREATE TABLE default.test_create_managed_part_pk_paimon_table ( "
+ "user_id BIGINT, item_id BIGINT, behavior STRING ) "
+ "PARTITIONED BY ( dt STRING, hh STRING ) "
+ "STORED AS PAIMON TBLPROPERTIES ('deletion-vectors.enabled'='true',"
+ " 'primary-key'='user_id', "
+ "'storage_handler'='org.apache.paimon.hive.PaimonStorageHandler')",
true);
testToSql("CREATE TABLE test_create_managed_bucket_paimon_table ("
+ "user_id BIGINT, item_id BIGINT, behavior STRING) STORED AS PAIMON "
+ "TBLPROPERTIES ('bucket' = '4', 'bucket-key'='behavior')", "default",
"CREATE TABLE default.test_create_managed_bucket_paimon_table ("
+ " user_id BIGINT, item_id BIGINT, behavior STRING ) "
+ "STORED AS PAIMON TBLPROPERTIES "
+ "('bucket'='4', 'bucket-key'='behavior', "
+ "'deletion-vectors.enabled'='true', "
+ "'storage_handler'='org.apache.paimon.hive.PaimonStorageHandler')",
true);
}
@Test

View File

@@ -0,0 +1,161 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.catalog.paimon;
import static org.junit.Assert.assertEquals;
import org.apache.impala.catalog.ScalarType;
import org.apache.impala.catalog.StructField;
import org.apache.impala.catalog.StructType;
import org.apache.impala.catalog.Type;
import org.apache.paimon.types.ArrayType;
import org.apache.paimon.types.BigIntType;
import org.apache.paimon.types.BooleanType;
import org.apache.paimon.types.CharType;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataTypes;
import org.apache.paimon.types.DecimalType;
import org.apache.paimon.types.DoubleType;
import org.apache.paimon.types.FloatType;
import org.apache.paimon.types.IntType;
import org.apache.paimon.types.MapType;
import org.apache.paimon.types.RowType;
import org.apache.paimon.types.SmallIntType;
import org.apache.paimon.types.TimestampType;
import org.apache.paimon.types.TinyIntType;
import org.apache.paimon.types.VarCharType;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
public class ImpalaTypeUtilsTest {
private interface ConvFunc { public void convertFun(); }
private static void assertThrowUnexpected(ConvFunc convFunc) {
try {
convFunc.convertFun();
} catch (UnsupportedOperationException ex) {
// do nothing
}
}
@Test
public void testToImpalaType() {
// Test primitive types
assertEquals(Type.BOOLEAN, ImpalaTypeUtils.toImpalaType(new BooleanType()));
assertEquals(Type.TINYINT, ImpalaTypeUtils.toImpalaType(new TinyIntType()));
assertEquals(Type.SMALLINT, ImpalaTypeUtils.toImpalaType(new SmallIntType()));
assertEquals(Type.INT, ImpalaTypeUtils.toImpalaType(new IntType()));
assertEquals(Type.BIGINT, ImpalaTypeUtils.toImpalaType(new BigIntType()));
assertEquals(Type.FLOAT, ImpalaTypeUtils.toImpalaType(new FloatType()));
assertEquals(Type.DOUBLE, ImpalaTypeUtils.toImpalaType(new DoubleType()));
assertEquals(Type.DATE, ImpalaTypeUtils.toImpalaType(DataTypes.DATE()));
assertEquals(Type.TIMESTAMP, ImpalaTypeUtils.toImpalaType(new TimestampType()));
// Test decimal type
DecimalType decimalType = new DecimalType(10, 2);
assertEquals(
ScalarType.createDecimalType(10, 2), ImpalaTypeUtils.toImpalaType(decimalType));
// Test char and varchar types
assertEquals(
ScalarType.createCharType(10), ImpalaTypeUtils.toImpalaType(new CharType(10)));
assertEquals(ScalarType.createVarcharType(10),
ImpalaTypeUtils.toImpalaType(new VarCharType(10)));
assertEquals(Type.STRING, ImpalaTypeUtils.toImpalaType(new CharType(256)));
assertEquals(Type.STRING, ImpalaTypeUtils.toImpalaType(new VarCharType(65536)));
// Test array type
ArrayType arrayType = new ArrayType(new IntType());
assertEquals(new org.apache.impala.catalog.ArrayType(Type.INT),
ImpalaTypeUtils.toImpalaType(arrayType));
// Test map type
MapType mapType = new MapType(new IntType(), DataTypes.STRING());
assertEquals(new org.apache.impala.catalog.MapType(Type.INT, Type.STRING),
ImpalaTypeUtils.toImpalaType(mapType));
// Test row type
RowType rowType = new RowType(Arrays.asList(new DataField(0, "id", new IntType()),
new DataField(1, "name", DataTypes.STRING())));
StructType expectedStructType = new StructType(Arrays.asList(
new StructField("id", Type.INT), new StructField("name", Type.STRING)));
assertEquals(expectedStructType, ImpalaTypeUtils.toImpalaType(rowType));
// doesn't support time
assertThrowUnexpected(() -> ImpalaTypeUtils.toImpalaType(DataTypes.TIME()));
// doesn't support multiset
assertThrowUnexpected(
() -> ImpalaTypeUtils.toImpalaType(DataTypes.MULTISET(DataTypes.INT())));
// doesn't support timestamp with local timezone
assertThrowUnexpected(
() -> ImpalaTypeUtils.toImpalaType(DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE()));
}
@Test
public void testFromImpalaType() {
// Test primitive types
assertEquals(DataTypes.BOOLEAN(), ImpalaTypeUtils.fromImpalaType(Type.BOOLEAN));
assertEquals(DataTypes.TINYINT(), ImpalaTypeUtils.fromImpalaType(Type.TINYINT));
assertEquals(DataTypes.SMALLINT(), ImpalaTypeUtils.fromImpalaType(Type.SMALLINT));
assertEquals(DataTypes.INT(), ImpalaTypeUtils.fromImpalaType(Type.INT));
assertEquals(DataTypes.BIGINT(), ImpalaTypeUtils.fromImpalaType(Type.BIGINT));
assertEquals(DataTypes.FLOAT(), ImpalaTypeUtils.fromImpalaType(Type.FLOAT));
assertEquals(DataTypes.DOUBLE(), ImpalaTypeUtils.fromImpalaType(Type.DOUBLE));
assertEquals(DataTypes.DATE(), ImpalaTypeUtils.fromImpalaType(Type.DATE));
assertEquals(DataTypes.TIMESTAMP(), ImpalaTypeUtils.fromImpalaType(Type.TIMESTAMP));
// Test decimal type
ScalarType decimalType = ScalarType.createDecimalType(10, 2);
assertEquals(DataTypes.DECIMAL(10, 2), ImpalaTypeUtils.fromImpalaType(decimalType));
// Test char and varchar types
assertEquals(DataTypes.CHAR(10),
ImpalaTypeUtils.fromImpalaType(ScalarType.createCharType(10)));
assertEquals(DataTypes.VARCHAR(255),
ImpalaTypeUtils.fromImpalaType(ScalarType.createVarcharType(255)));
// Test array type
org.apache.impala.catalog.ArrayType arrayType =
new org.apache.impala.catalog.ArrayType(Type.INT);
assertEquals(
DataTypes.ARRAY(DataTypes.INT()), ImpalaTypeUtils.fromImpalaType(arrayType));
// Test map type
org.apache.impala.catalog.MapType mapType =
new org.apache.impala.catalog.MapType(Type.INT, Type.STRING);
assertEquals(DataTypes.MAP(DataTypes.INT(), DataTypes.STRING()),
ImpalaTypeUtils.fromImpalaType(mapType));
// Test struct type
StructType structType = new StructType(Arrays.asList(
new StructField("id", Type.INT), new StructField("name", Type.STRING)));
RowType expectedRowType = RowType.of(new DataField(0, "id", DataTypes.INT()),
new DataField(1, "name", DataTypes.STRING()));
assertEquals(expectedRowType, ImpalaTypeUtils.fromImpalaType(structType));
// doesn't support datetime
assertThrowUnexpected(() -> ImpalaTypeUtils.fromImpalaType(Type.DATETIME));
// doesn't support NULL TYPE
assertThrowUnexpected(() -> ImpalaTypeUtils.fromImpalaType(Type.NULL));
// doesn't support INVALID
assertThrowUnexpected(() -> ImpalaTypeUtils.fromImpalaType(Type.INVALID));
// doesn't support FIXED_UDA_INTERMEDIATE
assertThrowUnexpected(
() -> ImpalaTypeUtils.fromImpalaType(Type.FIXED_UDA_INTERMEDIATE));
}
}

View File

@@ -41,6 +41,7 @@ under the License.
<hive.major.version>${env.IMPALA_HIVE_MAJOR_VERSION}</hive.major.version>
<hive.dist.type>${env.IMPALA_HIVE_DIST_TYPE}</hive.dist.type>
<hudi.version>${env.IMPALA_HUDI_VERSION}</hudi.version>
<paimon.version>${env.IMPALA_PAIMON_VERSION}</paimon.version>
<ranger.version>${env.IMPALA_RANGER_VERSION}</ranger.version>
<postgres.jdbc.version>${env.IMPALA_POSTGRES_JDBC_DRIVER_VERSION}</postgres.jdbc.version>
<hbase.version>${env.IMPALA_HBASE_VERSION}</hbase.version>

View File

@@ -0,0 +1,28 @@
{
"version" : 3,
"id" : 0,
"fields" : [ {
"id" : 0,
"name" : "userId",
"type" : "INT"
}, {
"id" : 1,
"name" : "movieId",
"type" : "INT"
}, {
"id" : 2,
"name" : "rating",
"type" : "FLOAT"
}, {
"id" : 3,
"name" : "timestamp",
"type" : "BIGINT"
} ],
"highestFieldId" : 3,
"partitionKeys" : [ ],
"primaryKeys" : [ ],
"options" : {
"owner" : "jichen"
},
"timeMillis" : 1737733553684
}

View File

@@ -0,0 +1,29 @@
{
"version" : 3,
"id" : 1,
"fields" : [ {
"id" : 0,
"name" : "userId",
"type" : "INT"
}, {
"id" : 1,
"name" : "movieId",
"type" : "INT"
}, {
"id" : 2,
"name" : "rating",
"type" : "FLOAT"
}, {
"id" : 3,
"name" : "timestamp",
"type" : "BIGINT"
} ],
"highestFieldId" : 3,
"partitionKeys" : [ ],
"primaryKeys" : [ ],
"options" : {
"owner" : "jichen",
"deletion-vectors.enabled" : "true"
},
"timeMillis" : 1737734737038
}

View File

@@ -0,0 +1,19 @@
{
"version" : 3,
"id" : 10,
"schemaId" : 1,
"baseManifestList" : "manifest-list-5cd761f7-4c95-4181-841e-aa8b405e71be-0",
"baseManifestListSize" : 884,
"deltaManifestList" : "manifest-list-5cd761f7-4c95-4181-841e-aa8b405e71be-1",
"deltaManifestListSize" : 884,
"changelogManifestList" : null,
"indexManifest" : "index-manifest-1eecc41b-3dcd-4388-afdb-54527b51be2a-0",
"commitUser" : "ea690e49-679c-46bb-8307-a6016b7ce27c",
"commitIdentifier" : 9223372036854775807,
"commitKind" : "COMPACT",
"timeMillis" : 1748965436933,
"logOffsets" : { },
"totalRecordCount" : 0,
"deltaRecordCount" : 0,
"changelogRecordCount" : 0
}

View File

@@ -0,0 +1,19 @@
{
"version" : 3,
"id" : 11,
"schemaId" : 1,
"baseManifestList" : "manifest-list-6e11688a-4b7c-4bfd-9118-5e49eac61016-0",
"baseManifestListSize" : 884,
"deltaManifestList" : "manifest-list-6e11688a-4b7c-4bfd-9118-5e49eac61016-1",
"deltaManifestListSize" : 989,
"changelogManifestList" : null,
"indexManifest" : "index-manifest-1eecc41b-3dcd-4388-afdb-54527b51be2a-0",
"commitUser" : "b2d1075a-036e-4b86-b864-b13b797fe6bc",
"commitIdentifier" : 9223372036854775807,
"commitKind" : "OVERWRITE",
"timeMillis" : 1748965464523,
"logOffsets" : { },
"totalRecordCount" : 452,
"deltaRecordCount" : 452,
"changelogRecordCount" : 0
}

View File

@@ -0,0 +1,20 @@
{
"version" : 3,
"id" : 12,
"schemaId" : 1,
"baseManifestList" : "manifest-list-d86f1254-05df-4560-8885-693e94b46cbf-0",
"baseManifestListSize" : 989,
"deltaManifestList" : "manifest-list-d86f1254-05df-4560-8885-693e94b46cbf-1",
"deltaManifestListSize" : 884,
"changelogManifestList" : null,
"indexManifest" : "index-manifest-1eecc41b-3dcd-4388-afdb-54527b51be2a-0",
"commitUser" : "2e57810a-5006-4dec-8b0f-40166fbec40f",
"commitIdentifier" : 9223372036854775807,
"commitKind" : "ANALYZE",
"timeMillis" : 1748965940065,
"logOffsets" : { },
"totalRecordCount" : 452,
"deltaRecordCount" : 0,
"changelogRecordCount" : 0,
"statistics" : "stat-eaea66dc-9657-4d89-823a-98315aa27329-0"
}

View File

@@ -0,0 +1,19 @@
{
"version" : 3,
"id" : 9,
"schemaId" : 1,
"baseManifestList" : "manifest-list-025dfcce-8e34-42c0-a8f7-def795d489df-0",
"baseManifestListSize" : 1050,
"deltaManifestList" : "manifest-list-025dfcce-8e34-42c0-a8f7-def795d489df-1",
"deltaManifestListSize" : 989,
"changelogManifestList" : null,
"indexManifest" : "index-manifest-1eecc41b-3dcd-4388-afdb-54527b51be2a-0",
"commitUser" : "e2925e8d-4a2e-47ef-94a0-06a72d8886cb",
"commitIdentifier" : 9223372036854775807,
"commitKind" : "OVERWRITE",
"timeMillis" : 1748965369869,
"logOffsets" : { },
"totalRecordCount" : 0,
"deltaRecordCount" : -452,
"changelogRecordCount" : 0
}

View File

@@ -0,0 +1,44 @@
{
"snapshotId" : 11,
"schemaId" : 1,
"mergedRecordCount" : 452,
"mergedRecordSize" : 7352,
"colStats" : {
"rating" : {
"colId" : 2,
"distinctCount" : 5,
"min" : "1.0",
"max" : "5.0",
"nullCount" : 0,
"avgLen" : 4,
"maxLen" : 4
},
"movieId" : {
"colId" : 1,
"distinctCount" : 386,
"min" : "1",
"max" : "3893",
"nullCount" : 0,
"avgLen" : 4,
"maxLen" : 4
},
"userId" : {
"colId" : 0,
"distinctCount" : 5,
"min" : "1",
"max" : "5",
"nullCount" : 0,
"avgLen" : 4,
"maxLen" : 4
},
"timestamp" : {
"colId" : 3,
"distinctCount" : 228,
"min" : "978241072",
"max" : "978824351",
"nullCount" : 0,
"avgLen" : 8,
"maxLen" : 8
}
}
}

View File

@@ -0,0 +1,28 @@
{
"version" : 3,
"id" : 0,
"fields" : [ {
"id" : 0,
"name" : "userId",
"type" : "INT"
}, {
"id" : 1,
"name" : "movieId",
"type" : "INT"
}, {
"id" : 2,
"name" : "rating",
"type" : "FLOAT"
}, {
"id" : 3,
"name" : "ts",
"type" : "BIGINT"
} ],
"highestFieldId" : 3,
"partitionKeys" : [ "userId" ],
"primaryKeys" : [ ],
"options" : {
"owner" : "jichen"
},
"timeMillis" : 1741358917111
}

View File

@@ -0,0 +1,29 @@
{
"version" : 3,
"id" : 1,
"fields" : [ {
"id" : 0,
"name" : "userId",
"type" : "INT"
}, {
"id" : 1,
"name" : "movieId",
"type" : "INT"
}, {
"id" : 2,
"name" : "rating",
"type" : "FLOAT"
}, {
"id" : 3,
"name" : "ts",
"type" : "BIGINT"
} ],
"highestFieldId" : 3,
"partitionKeys" : [ "userId" ],
"primaryKeys" : [ ],
"options" : {
"owner" : "jichen",
"snapshot.num-retained" : "1"
},
"timeMillis" : 1747666219242
}

View File

@@ -0,0 +1,31 @@
{
"version" : 3,
"id" : 2,
"fields" : [ {
"id" : 0,
"name" : "userId",
"type" : "INT"
}, {
"id" : 1,
"name" : "movieId",
"type" : "INT"
}, {
"id" : 2,
"name" : "rating",
"type" : "FLOAT"
}, {
"id" : 3,
"name" : "ts",
"type" : "BIGINT"
} ],
"highestFieldId" : 3,
"partitionKeys" : [ "userId" ],
"primaryKeys" : [ ],
"options" : {
"owner" : "jichen",
"snapshot.num-retained.max" : "1",
"snapshot.num-retained" : "1",
"snapshot.num-retained.min" : "1"
},
"timeMillis" : 1747666351718
}

View File

@@ -0,0 +1,30 @@
{
"version" : 3,
"id" : 3,
"fields" : [ {
"id" : 0,
"name" : "userId",
"type" : "INT"
}, {
"id" : 1,
"name" : "movieId",
"type" : "INT"
}, {
"id" : 2,
"name" : "rating",
"type" : "FLOAT"
}, {
"id" : 3,
"name" : "ts",
"type" : "BIGINT"
} ],
"highestFieldId" : 3,
"partitionKeys" : [ "userId" ],
"primaryKeys" : [ ],
"options" : {
"owner" : "jichen",
"snapshot.num-retained.max" : "1",
"snapshot.num-retained.min" : "1"
},
"timeMillis" : 1747717105767
}

View File

@@ -0,0 +1,29 @@
{
"version" : 3,
"id" : 4,
"fields" : [ {
"id" : 0,
"name" : "userId",
"type" : "INT"
}, {
"id" : 1,
"name" : "movieId",
"type" : "INT"
}, {
"id" : 2,
"name" : "rating",
"type" : "FLOAT"
}, {
"id" : 3,
"name" : "ts",
"type" : "BIGINT"
} ],
"highestFieldId" : 3,
"partitionKeys" : [ "userId" ],
"primaryKeys" : [ ],
"options" : {
"owner" : "jichen",
"snapshot.num-retained.min" : "1"
},
"timeMillis" : 1747717647085
}

View File

@@ -0,0 +1,28 @@
{
"version" : 3,
"id" : 5,
"fields" : [ {
"id" : 0,
"name" : "userId",
"type" : "INT"
}, {
"id" : 1,
"name" : "movieId",
"type" : "INT"
}, {
"id" : 2,
"name" : "rating",
"type" : "FLOAT"
}, {
"id" : 3,
"name" : "ts",
"type" : "BIGINT"
} ],
"highestFieldId" : 3,
"partitionKeys" : [ "userId" ],
"primaryKeys" : [ ],
"options" : {
"owner" : "jichen"
},
"timeMillis" : 1747717651365
}

View File

@@ -0,0 +1,18 @@
{
"version" : 3,
"id" : 5,
"schemaId" : 5,
"baseManifestList" : "manifest-list-fececc90-17e5-4cde-bc69-84a98da91b79-0",
"baseManifestListSize" : 1028,
"deltaManifestList" : "manifest-list-fececc90-17e5-4cde-bc69-84a98da91b79-1",
"deltaManifestListSize" : 992,
"changelogManifestList" : null,
"commitUser" : "0bd4c93e-9d0c-466c-9cd6-68cec360df29",
"commitIdentifier" : 9223372036854775807,
"commitKind" : "OVERWRITE",
"timeMillis" : 1748963555332,
"logOffsets" : { },
"totalRecordCount" : 452,
"deltaRecordCount" : 452,
"changelogRecordCount" : 0
}

View File

@@ -0,0 +1,19 @@
{
"version" : 3,
"id" : 6,
"schemaId" : 5,
"baseManifestList" : "manifest-list-22716ac0-0c0e-4f03-a48c-31c807cbf8d9-0",
"baseManifestListSize" : 1064,
"deltaManifestList" : "manifest-list-22716ac0-0c0e-4f03-a48c-31c807cbf8d9-1",
"deltaManifestListSize" : 884,
"changelogManifestList" : null,
"commitUser" : "d9cb06ed-8061-4a5b-af9c-daffa4e2337e",
"commitIdentifier" : 9223372036854775807,
"commitKind" : "ANALYZE",
"timeMillis" : 1748965930673,
"logOffsets" : { },
"totalRecordCount" : 452,
"deltaRecordCount" : 0,
"changelogRecordCount" : 0,
"statistics" : "stat-7061d17d-5a4d-4532-b98f-714d66103721-0"
}

View File

@@ -0,0 +1,44 @@
{
"snapshotId" : 5,
"schemaId" : 5,
"mergedRecordCount" : 452,
"mergedRecordSize" : 7282,
"colStats" : {
"rating" : {
"colId" : 2,
"distinctCount" : 5,
"min" : "1.0",
"max" : "5.0",
"nullCount" : 0,
"avgLen" : 4,
"maxLen" : 4
},
"movieId" : {
"colId" : 1,
"distinctCount" : 386,
"min" : "1",
"max" : "3893",
"nullCount" : 0,
"avgLen" : 4,
"maxLen" : 4
},
"userId" : {
"colId" : 0,
"distinctCount" : 5,
"min" : "1",
"max" : "5",
"nullCount" : 0,
"avgLen" : 4,
"maxLen" : 4
},
"ts" : {
"colId" : 3,
"distinctCount" : 228,
"min" : "978241072",
"max" : "978824351",
"nullCount" : 0,
"avgLen" : 8,
"maxLen" : 8
}
}
}

View File

@@ -4800,3 +4800,28 @@ delimited fields terminated by ','
insert overwrite table {db_name}{db_suffix}.{table_name}
select id, concat(cast(timestamp_col as string format 'YYYY-MM-DD HH24:MM:SS'), ' +08:00') from functional.alltypestiny;
====
---- DATASET
functional
---- BASE_TABLE_NAME
paimon_partitioned
---- CREATE
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
STORED AS PAIMON
LOCATION '/test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db/paimon_partitioned';
---- DEPENDENT_LOAD
`hadoop fs -mkdir -p /test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db && \
hadoop fs -put -f ${IMPALA_HOME}/testdata/data/paimon_test/paimon_catalog/warehouse/functional.db/paimon_partitioned /test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db
====
---- DATASET
functional
---- BASE_TABLE_NAME
paimon_non_partitioned
---- CREATE
CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
STORED AS PAIMON
LOCATION '/test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db/paimon_non_partitioned'
---- DEPENDENT_LOAD
`hadoop fs -mkdir -p /test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db && \
hadoop fs -put -f ${IMPALA_HOME}//testdata/data/paimon_test/paimon_catalog/warehouse/functional.db/paimon_non_partitioned /test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db
====

View File

@@ -440,3 +440,7 @@ table_name:unique_with_nulls, constraint:restrict_to, table_format:parquet/none/
# The table is used to test casting string values to date type. Don't need to test on
# all formats.
table_name:timestamp_with_tz_str, constraint:restrict_to, table_format:parquet/none/none
# Paimon tests are executed in the PARQUET file format dimension
table_name:paimon_partitioned, constraint:restrict_to, table_format:parquet/none/none
table_name:paimon_non_partitioned, constraint:restrict_to, table_format:parquet/none/none
1 # Table level constraints:
440
441
442
443
444
445
446

View File

@@ -0,0 +1,24 @@
====
---- QUERY
CREATE TABLE impala_paimon_complex_types_array (
col_array_string ARRAY<STRING>
)
STORED AS PAIMON;
---- CATCH
AnalysisException: Tables stored by Paimon do not support the column col_array_string type: ARRAY<STRING>
====
---- QUERY
CREATE TABLE impala_paimon_complex_types_map (
col_map_string MAP<STRING,STRING>
)
STORED AS PAIMON;
---- CATCH
AnalysisException: Tables stored by Paimon do not support the column col_map_string type: MAP<STRING,STRING>
====
---- QUERY
CREATE TABLE impala_paimon_complex_types_struct (
col_struct_string STRUCT<f1:STRING, f2:STRING>
)
STORED AS PAIMON;
---- CATCH
AnalysisException: Tables stored by Paimon do not support the column col_struct_string type: STRUCT<f1:STRING,f2:STRING>

View File

@@ -0,0 +1,211 @@
====
---- QUERY
CREATE TABLE IF NOT EXISTS test_create_managed_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
STORED AS PAIMON;
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE TABLE IF NOT EXISTS test_create_managed_part_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
PARTITIONED BY (
dt STRING COMMENT 'The dt field',
hh STRING COMMENT 'The hh field'
)
STORED AS PAIMON;
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE TABLE test_create_managed_part_pk_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
PARTITIONED BY (
dt STRING COMMENT 'The dt field',
hh STRING COMMENT 'The hh field'
)
STORED AS PAIMON
TBLPROPERTIES (
'primary-key'='user_id'
);
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE TABLE test_create_managed_part_pkstmt_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field',
PRIMARY KEY(user_id)
)
PARTITIONED BY (
dt STRING COMMENT 'The dt field',
hh STRING COMMENT 'The hh field'
)
STORED AS PAIMON;
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE TABLE test_create_managed_bucket_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
STORED AS PAIMON
TBLPROPERTIES (
'bucket' = '4',
'bucket-key'='behavior'
);
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE TABLE test_create_managed_location_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field',
PRIMARY KEY(user_id)
)
PARTITIONED BY (
dt STRING COMMENT 'The dt field',
hh STRING COMMENT 'The hh field'
)
STORED AS PAIMON
LOCATION 'hdfs:///test-warehouse/test_create_managed_location_paimon_table';
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE TABLE TEST_CREATE_MANAGED_UPPERCASE_PAIMON_TABLE (
`USER_ID` BIGINT COMMENT 'The user_id field',
`ITEM_ID` BIGINT COMMENT 'The item_id field',
BEHAVIOR STRING COMMENT 'The behavior field',
PRIMARY KEY(`USER_ID`)
)
PARTITIONED BY (
`DT` STRING COMMENT 'The dt field',
`HH` STRING COMMENT 'The hh field'
)
STORED AS PAIMON;
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE EXTERNAL TABLE test_create_external_hivecat_nocol_paimon_table
STORED AS PAIMON
LOCATION 'hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db/paimon_non_partitioned';
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE EXTERNAL TABLE test_create_external_hivecat_nocol_paimon_part_table
STORED AS PAIMON
LOCATION 'hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db/paimon_partitioned';
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE EXTERNAL TABLE test_create_external_hadoopcat_nocol_paimon_table
STORED AS PAIMON
TBLPROPERTIES('paimon.catalog'='hadoop',
'paimon.catalog_location'='hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse',
'paimon.table_identifier'='functional.paimon_non_partitioned');
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE EXTERNAL TABLE test_create_external_hadoopcat_nocol_paimon_part_table
STORED AS PAIMON
TBLPROPERTIES('paimon.catalog'='hadoop',
'paimon.catalog_location'='hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse',
'paimon.table_identifier'='functional.paimon_partitioned');
---- RESULTS
'Table has been created.'
====
---- QUERY
CREATE TABLE impala_paimon_data_types_test (
col_boolean BOOLEAN,
col_tinyint TINYINT,
col_smallint SMALLINT,
col_int INT,
col_integer INTEGER,
col_bigint BIGINT,
col_float FLOAT,
col_double DOUBLE,
col_decimal DECIMAL(10,2),
col_string STRING,
col_char CHAR(20),
col_varchar VARCHAR(100),
col_timestamp TIMESTAMP,
col_date DATE,
col_binary BINARY
)
COMMENT 'Table to test all Impala paimon-supported data types'
STORED AS PAIMON;
---- RESULTS
'Table has been created.'
====
---- QUERY
DROP TABLE IF EXISTS test_create_managed_paimon_table;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
DROP TABLE IF EXISTS test_create_managed_part_paimon_table;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
DROP TABLE IF EXISTS test_create_managed_part_pk_paimon_table;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
DROP TABLE IF EXISTS test_create_managed_part_pkstmt_paimon_table;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
DROP TABLE IF EXISTS test_create_managed_bucket_paimon_table;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
DROP TABLE IF EXISTS test_create_managed_location_paimon_table;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
DROP TABLE IF EXISTS TEST_CREATE_MANAGED_UPPERCASE_PAIMON_TABLE;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
DROP TABLE IF EXISTS test_create_external_hivecat_nocol_paimon_part_table;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
DROP TABLE IF EXISTS test_create_external_hadoopcat_nocol_paimon_table;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
DROP TABLE IF EXISTS test_create_external_hadoopcat_nocol_paimon_part_table;
---- RESULTS
'Table has been dropped.'
====
---- QUERY
DROP TABLE IF EXISTS impala_paimon_data_types_test;
---- RESULTS
'Table has been dropped.'

View File

@@ -0,0 +1,49 @@
====
---- QUERY
SHOW TABLE STATS functional_parquet.paimon_partitioned;
---- LABELS
Number Of Rows,Number Of Bytes
---- RESULTS
452,7282
---- TYPES
BIGINT, BIGINT
====
---- QUERY
SHOW COLUMN STATS functional_parquet.paimon_partitioned;
---- LABELS
Column,Type,#Distinct Values,#Nulls,Max Size,Avg Size,#Trues,#Falses
---- RESULTS
'movieid','INT',386,0,4,4.0,-1,-1
'rating','FLOAT',5,0,4,4.0,-1,-1
'ts','BIGINT',228,0,8,8.0,-1,-1
'userid','INT',5,0,4,4.0,-1,-1
---- TYPES
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
====
---- QUERY
SHOW PARTITIONS functional_parquet.paimon_partitioned;
---- LABELS
Partition,Number Of Rows,Number Of Files,Number Of Bytes,Last Creation Time
---- RESULTS
'userId=3',51,1,1301,1748992355131
'userId=2',129,1,1657,1748992355155
'userId=5',198,1,1922,1748992355177
'userId=1',53,1,1301,1748992355201
'userId=4',21,1,1101,1748992355224
---- TYPES
STRING, BIGINT, BIGINT, BIGINT, BIGINT
====
---- QUERY
SHOW PARTITIONS functional_parquet.paimon_non_partitioned;
---- CATCH
AnalysisException: Table is not partitioned: functional_parquet.paimon_non_partitioned
====
---- QUERY
SHOW RANGE PARTITIONS functional_parquet.paimon_non_partitioned;
---- CATCH
AnalysisException: SHOW RANGE PARTITIONS functional_parquet.paimon_non_partitioned is not supported for Paimon Table
====
---- QUERY
SHOW FILES IN functional_parquet.paimon_partitioned PARTITION(userid=1403);
---- CATCH
AnalysisException: SHOW FILES with partition filter is not applicable to table type:PAIMON

View File

@@ -147,7 +147,7 @@ Invalid Kudu read mode: 'bar'. Valid values are DEFAULT(0), READ_LATEST(1), READ
---- QUERY
set default_file_format=bar
---- CATCH
Invalid default file format: 'bar'. Valid values are TEXT(0), RC_FILE(1), SEQUENCE_FILE(2), AVRO(3), PARQUET(4), KUDU(5), ORC(6), HUDI_PARQUET(7), ICEBERG(8), JSON(9), JDBC(10).
Invalid default file format: 'bar'. Valid values are TEXT(0), RC_FILE(1), SEQUENCE_FILE(2), AVRO(3), PARQUET(4), KUDU(5), ORC(6), HUDI_PARQUET(7), ICEBERG(8), JSON(9), JDBC(10), PAIMON(11).
====
---- QUERY
set default_transactional_type=bar

View File

@@ -0,0 +1,201 @@
====
---- CREATE_TABLE
CREATE TABLE test_create_managed_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
STORED AS PAIMON;
---- RESULTS-HIVE
CREATE TABLE show_create_table_test_db.test_create_managed_paimon_table (
user_id BIGINT,
item_id BIGINT,
behavior STRING
)
STORED AS PAIMON
TBLPROPERTIES ('deletion-vectors.enabled'='true')
====
---- CREATE_TABLE
CREATE TABLE test_create_managed_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
STORED AS PAIMON;
---- RESULTS-HIVE
CREATE TABLE show_create_table_test_db.test_create_managed_paimon_table (
user_id BIGINT,
item_id BIGINT,
behavior STRING
)
STORED AS PAIMON
TBLPROPERTIES ('deletion-vectors.enabled'='true')
====
---- CREATE_TABLE
CREATE TABLE test_create_managed_part_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
PARTITIONED BY (
dt STRING COMMENT 'The dt field',
hh STRING COMMENT 'The hh field'
)
STORED AS PAIMON;
---- RESULTS-HIVE
CREATE TABLE show_create_table_test_db.test_create_managed_part_paimon_table (
user_id BIGINT,
item_id BIGINT,
behavior STRING
)
PARTITIONED BY (
dt STRING,
hh STRING
)
STORED AS PAIMON
TBLPROPERTIES ('deletion-vectors.enabled'='true')
====
---- CREATE_TABLE
CREATE TABLE test_create_managed_part_pk_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
PARTITIONED BY (
dt STRING COMMENT 'The dt field',
hh STRING COMMENT 'The hh field'
)
STORED AS PAIMON
TBLPROPERTIES (
'primary-key'='user_id'
);
---- RESULTS-HIVE
CREATE TABLE show_create_table_test_db.test_create_managed_part_pk_paimon_table (
user_id BIGINT,
item_id BIGINT,
behavior STRING,
PRIMARY KEY (user_id)
)
PARTITIONED BY (
dt STRING,
hh STRING
)
STORED AS PAIMON
TBLPROPERTIES ('deletion-vectors.enabled'='true')
====
---- CREATE_TABLE
CREATE TABLE test_create_managed_part_pkstmt_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field',
PRIMARY KEY (user_id)
)
PARTITIONED BY (
dt STRING COMMENT 'The dt field',
hh STRING COMMENT 'The hh field'
)
STORED AS PAIMON;
---- RESULTS-HIVE
CREATE TABLE show_create_table_test_db.test_create_managed_part_pkstmt_paimon_table (
user_id BIGINT,
item_id BIGINT,
behavior STRING,
PRIMARY KEY (user_id)
)
PARTITIONED BY (
dt STRING,
hh STRING
)
STORED AS PAIMON
TBLPROPERTIES ('deletion-vectors.enabled'='true')
====
---- CREATE_TABLE
CREATE TABLE test_create_managed_bucket_paimon_table (
user_id BIGINT COMMENT 'The user_id field',
item_id BIGINT COMMENT 'The item_id field',
behavior STRING COMMENT 'The behavior field'
)
STORED AS PAIMON
TBLPROPERTIES (
'bucket' = '4',
'bucket-key'='behavior'
);
---- RESULTS-HIVE
CREATE TABLE show_create_table_test_db.test_create_managed_bucket_paimon_table (
user_id BIGINT,
item_id BIGINT,
behavior STRING
)
STORED AS PAIMON
TBLPROPERTIES ('bucket'='4', 'bucket-key'='behavior', 'deletion-vectors.enabled'='true')
====
---- CREATE_TABLE
CREATE EXTERNAL TABLE test_create_external_hivecat_nocol_paimon_table
STORED AS PAIMON
LOCATION 'hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db/paimon_non_partitioned';
---- RESULTS-HIVE
CREATE EXTERNAL TABLE show_create_table_test_db.test_create_external_hivecat_nocol_paimon_table (
userid INT,
movieid INT,
rating FLOAT,
`timestamp` BIGINT
)
STORED AS PAIMON
LOCATION '$$location_uri$$'
TBLPROPERTIES ('deletion-vectors.enabled'='true')
====
---- CREATE_TABLE
CREATE EXTERNAL TABLE test_create_external_hadoopcat_nocol_paimon_table
STORED AS PAIMON
TBLPROPERTIES('paimon.catalog'='hadoop',
'paimon.catalog_location'='hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse',
'paimon.table_identifier'='functional.paimon_non_partitioned');
---- RESULTS-HIVE
CREATE EXTERNAL TABLE show_create_table_test_db.test_create_external_hadoopcat_nocol_paimon_table (
userid INT,
movieid INT,
rating FLOAT,
`timestamp` BIGINT
)
STORED AS PAIMON
LOCATION '$$location_uri$$'
TBLPROPERTIES ('deletion-vectors.enabled'='true', 'paimon.catalog'='hadoop', 'paimon.catalog_location'='hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse', 'paimon.table_identifier'='functional.paimon_non_partitioned')
====
---- CREATE_TABLE
CREATE EXTERNAL TABLE test_create_external_hivecat_nocol_paimon_part_table
STORED AS PAIMON
LOCATION 'hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse/functional.db/paimon_partitioned';
---- RESULTS-HIVE
CREATE EXTERNAL TABLE show_create_table_test_db.test_create_external_hivecat_nocol_paimon_part_table (
movieid INT,
rating FLOAT,
ts BIGINT
)
PARTITIONED BY (
userid INT
)
STORED AS PAIMON
LOCATION '$$location_uri$$'
TBLPROPERTIES ('deletion-vectors.enabled'='true')
====
---- CREATE_TABLE
CREATE EXTERNAL TABLE test_create_external_hadoopcat_nocol_paimon_part_table
STORED AS PAIMON
TBLPROPERTIES('paimon.catalog'='hadoop',
'paimon.catalog_location'='hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse',
'paimon.table_identifier'='functional.paimon_partitioned')
---- RESULTS-HIVE
CREATE EXTERNAL TABLE show_create_table_test_db.test_create_external_hadoopcat_nocol_paimon_part_table (
movieid INT,
rating FLOAT,
ts BIGINT
)
PARTITIONED BY (
userid INT
)
STORED AS PAIMON
LOCATION '$$location_uri$$'
TBLPROPERTIES ('deletion-vectors.enabled'='true', 'paimon.catalog'='hadoop', 'paimon.catalog_location'='hdfs:///test-warehouse/paimon_test/paimon_catalog/warehouse', 'paimon.table_identifier'='functional.paimon_partitioned')

View File

@@ -21,7 +21,7 @@ import re
import shlex
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfFS, SkipIfHive2
from tests.common.skip import SkipIf, SkipIfFS, SkipIfHive2
from tests.common.test_dimensions import create_uncompressed_text_dimension
from tests.util.test_file_parser import QueryTestSectionReader, remove_comments
from tests.common.environ import HIVE_MAJOR_VERSION
@@ -88,6 +88,12 @@ class TestShowCreateTable(ImpalaTestSuite):
vector,
unique_database)
@SkipIf.not_hdfs
def test_show_create_table_paimon(self, vector, unique_database):
self.__run_show_create_table_test_case('QueryTest/show-create-table-paimon',
vector,
unique_database)
def __run_show_create_table_test_case(self, test_file_name, vector, unique_db_name):
"""
Runs a show-create-table test file, containing the following sections:

View File

@@ -0,0 +1,50 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIf
@SkipIf.not_hdfs
class TestCreatingPaimonTable(ImpalaTestSuite):
@classmethod
def get_workload(cls):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestCreatingPaimonTable, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_constraint(
lambda v: v.get_value('table_format').file_format == 'parquet')
def test_paimon_ddl(self, vector, unique_database):
self.run_test_case('QueryTest/paimon-ddl', vector, unique_database)
def test_paimon_show_stats(self, vector, unique_database):
self.run_test_case('QueryTest/paimon-show-stats', vector, unique_database)
def test_paimon_show_file_stats(self):
tables = ["paimon_non_partitioned", "paimon_partitioned"]
for table in tables:
result = self.client.execute("show files in functional_parquet." + table)
assert len(result.data) == 5, "Expected 5 files"
def test_create_paimon_ddl_negative(self, vector, unique_database):
self.run_test_case('QueryTest/paimon-ddl-negative',
vector, unique_database)