diff --git a/fe/src/main/cup/sql-parser.cup b/fe/src/main/cup/sql-parser.cup index 2115c89d2..5827b7dc9 100644 --- a/fe/src/main/cup/sql-parser.cup +++ b/fe/src/main/cup/sql-parser.cup @@ -1776,7 +1776,7 @@ iceberg_partition_spec_def ::= ; iceberg_partition_field_list ::= - KW_PARTITION KW_BY KW_SPEC LPAREN iceberg_partition_field_defs:cols RPAREN + KW_PARTITIONED KW_BY KW_SPEC LPAREN iceberg_partition_field_defs:cols RPAREN {: RESULT = cols; :} ; @@ -1795,16 +1795,26 @@ iceberg_partition_field_defs ::= ; iceberg_partition_field_def ::= - IDENT:col_name iceberg_partition_transform:partition_transform - {: RESULT = new IcebergPartitionField(col_name, partition_transform); :} - ; - -iceberg_partition_transform ::= - iceberg_partition_transform_type:transfrom_type INTEGER_LITERAL:transform_param - {: RESULT = IcebergUtil.getPartitionTransform(transfrom_type, - transform_param.intValue()); :} - | IDENT:transfrom_type - {: RESULT = IcebergUtil.getPartitionTransform(transfrom_type); :} + IDENT:col_name + {: + RESULT = new IcebergPartitionField(col_name, + IcebergUtil.getPartitionTransform("IDENTITY")); + :} + | + iceberg_partition_transform_type:partition_transform + LPAREN IDENT:col_name RPAREN + {: + RESULT = new IcebergPartitionField(col_name, IcebergUtil.getPartitionTransform( + partition_transform)); + :} + | iceberg_partition_transform_type:partition_transform + LPAREN + INTEGER_LITERAL:transform_param COMMA IDENT:col_name + RPAREN + {: + RESULT = new IcebergPartitionField(col_name, IcebergUtil.getPartitionTransform( + partition_transform, transform_param.intValue())); + :} ; iceberg_partition_transform_type ::= diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java index a4f4ec012..cdc7fade2 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java @@ -298,7 +298,7 @@ public class CreateTableStmt extends StatementBase { List iceSpec = tableDef_.getIcebergPartitionSpecs(); if (iceSpec != null && !iceSpec.isEmpty()) { throw new AnalysisException( - "PARTITION BY SPEC is only valid for Iceberg tables."); + "PARTITIONED BY SPEC is only valid for Iceberg tables."); } } diff --git a/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionField.java b/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionField.java index 659530af9..bb77c3699 100644 --- a/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionField.java +++ b/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionField.java @@ -85,9 +85,7 @@ public class IcebergPartitionField extends StmtNode { @Override public String toSql(ToSqlOptions options) { - StringBuilder builder = new StringBuilder(); - builder.append(origFieldName_+ " " + transform_.toSql()); - return builder.toString(); + return transform_.toSql(origFieldName_); } public TIcebergPartitionField toThrift() { diff --git a/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionSpec.java b/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionSpec.java index 1e96401cc..4153f2e7f 100644 --- a/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionSpec.java +++ b/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionSpec.java @@ -27,15 +27,15 @@ import org.apache.impala.compat.MetastoreShim; import org.apache.impala.thrift.TIcebergPartitionSpec; /** - * Represents the partitioning of a Iceberg table as defined in the PARTITION BY SPEC + * Represents the partitioning of a Iceberg table as defined in the PARTITIONED BY SPEC * clause of a CREATE TABLE statement. Iceberg supported kinds of partition. * Examples: - * PARTITION BY SPEC + * PARTITIONED BY SPEC * ( - * dt identity, - * event_time hour, - * event_time day, - * event_time month + * dt, + * hour(event_time), + * day(event_time), + * month(event_time) * ) */ public class IcebergPartitionSpec extends StmtNode { diff --git a/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionTransform.java b/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionTransform.java index 8877fa1d4..a20509e0f 100644 --- a/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionTransform.java +++ b/fe/src/main/java/org/apache/impala/analysis/IcebergPartitionTransform.java @@ -87,6 +87,25 @@ public class IcebergPartitionTransform extends StmtNode { return builder.toString(); } + public final String toSql(String colName) { + return toSql(colName, ToSqlOptions.DEFAULT); + } + + public String toSql(String colName, ToSqlOptions options) { + StringBuilder builder = new StringBuilder(); + if (transformType_ != TIcebergPartitionTransformType.IDENTITY) { + builder.append(transformType_.toString()).append ("("); + if (transformParam_ != null) { + builder.append(transformParam_.toString()).append(", "); + } + } + builder.append(colName); + if (transformType_ != TIcebergPartitionTransformType.IDENTITY) { + builder.append(")"); + } + return builder.toString(); + } + public TIcebergPartitionTransform toThrift() { TIcebergPartitionTransform transform = new TIcebergPartitionTransform(); transform.setTransform_type(transformType_); diff --git a/fe/src/main/java/org/apache/impala/analysis/TableDataLayout.java b/fe/src/main/java/org/apache/impala/analysis/TableDataLayout.java index d11c2c19c..fd1005bfd 100644 --- a/fe/src/main/java/org/apache/impala/analysis/TableDataLayout.java +++ b/fe/src/main/java/org/apache/impala/analysis/TableDataLayout.java @@ -22,7 +22,7 @@ import java.util.List; /** * Represents the PARTITION BY and PARTITIONED BY clauses of a DDL statement. - * We can use PARTITION BY SPEC clause to create iceberg table partitions. + * We can use PARTITIONED BY SPEC clause to create iceberg table partitions. */ class TableDataLayout { diff --git a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java index 0aa540456..6b4bf87bb 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java +++ b/fe/src/main/java/org/apache/impala/analysis/ToSqlUtils.java @@ -419,7 +419,7 @@ public class ToSqlUtils { properties.remove(StatsSetupConst.DO_NOT_UPDATE_STATS); properties.remove(IcebergTable.METADATA_LOCATION); - // Fill "PARTITION BY SPEC" part if the Iceberg table is partitioned. + // Fill "PARTITIONED BY SPEC" part if the Iceberg table is partitioned. FeIcebergTable feIcebergTable= (FeIcebergTable)table; if (!feIcebergTable.getPartitionSpecs().isEmpty()) { IcebergPartitionSpec latestPartitionSpec = @@ -503,7 +503,7 @@ public class ToSqlUtils { Joiner.on(", \n ").join(sortProperties.first))); } if (icebergPartitions != null && !icebergPartitions.isEmpty()) { - sb.append("PARTITION BY SPEC\n"); + sb.append("PARTITIONED BY SPEC\n"); sb.append(icebergPartitions); sb.append("\n"); } diff --git a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java index 5be0a9f51..48c67890a 100644 --- a/fe/src/main/java/org/apache/impala/util/IcebergUtil.java +++ b/fe/src/main/java/org/apache/impala/util/IcebergUtil.java @@ -284,24 +284,23 @@ public class IcebergUtil { public static TIcebergPartitionTransformType getPartitionTransformType( String transformType) throws TableLoadingException { + Preconditions.checkNotNull(transformType); transformType = transformType.toUpperCase(); if ("IDENTITY".equals(transformType)) { return TIcebergPartitionTransformType.IDENTITY; - } else if ("HOUR".equals(transformType)) { - return TIcebergPartitionTransformType.HOUR; - } else if ("DAY".equals(transformType)) { - return TIcebergPartitionTransformType.DAY; - } else if ("MONTH".equals(transformType)) { - return TIcebergPartitionTransformType.MONTH; - } else if ("YEAR".equals(transformType)) { - return TIcebergPartitionTransformType.YEAR; } else if (transformType != null && transformType.startsWith("BUCKET")) { return TIcebergPartitionTransformType.BUCKET; } else if (transformType != null && transformType.startsWith("TRUNCATE")) { return TIcebergPartitionTransformType.TRUNCATE; - } else { - throw new TableLoadingException("Unsupported iceberg partition type: " + - transformType); + } + switch (transformType) { + case "HOUR": case "HOURS": return TIcebergPartitionTransformType.HOUR; + case "DAY": case "DAYS": return TIcebergPartitionTransformType.DAY; + case "MONTH": case "MONTHS": return TIcebergPartitionTransformType.MONTH; + case "YEAR": case "YEARS": return TIcebergPartitionTransformType.YEAR; + default: + throw new TableLoadingException("Unsupported iceberg partition type: " + + transformType); } } diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java index 12aa6bd96..23b63453e 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java @@ -4858,21 +4858,30 @@ public class AnalyzeStmtsTest extends AnalyzerTest { public void testCreatePartitionedIcebergTable() throws ImpalaException { String tblProperties = " TBLPROPERTIES ('iceberg.catalog'='hadoop.tables')"; AnalyzesOk("CREATE TABLE tbl1 (i int, p1 int, p2 timestamp) " + - "PARTITION BY SPEC (p1 BUCKET 10, p1 TRUNCATE 5, p2 DAY) STORED AS ICEBERG" + - tblProperties); + "PARTITIONED BY SPEC (BUCKET(10, p1), TRUNCATE(5, p1), DAY(p2)) " + + "STORED AS ICEBERG" + tblProperties); + AnalyzesOk("CREATE TABLE tbl1 (ts timestamp) " + + "PARTITIONED BY SPEC (YEAR(ts), MONTH(ts), DAY(ts), HOUR(ts)) " + + "STORED AS ICEBERG" + tblProperties); + AnalyzesOk("CREATE TABLE tbl1 (ts timestamp) " + + "PARTITIONED BY SPEC (YEARS(ts), MONTHS(ts), DAYS(ts), HOURS(ts)) " + + "STORED AS ICEBERG" + tblProperties); AnalysisError("CREATE TABLE tbl1 (i int, p1 int, p2 timestamp) " + - "PARTITION BY SPEC (p1 BUCKET, p2 DAY) STORED AS ICEBERG" + tblProperties, + "PARTITIONED BY SPEC (BUCKET(p1), DAY(p2)) STORED AS ICEBERG" + tblProperties, + "BUCKET and TRUNCATE partition transforms should have a parameter."); + AnalysisError("CREATE TABLE tbl1 (i int, p1 int) " + + "PARTITIONED BY SPEC (TRUNCATE(p1)) STORED AS ICEBERG", "BUCKET and TRUNCATE partition transforms should have a parameter."); AnalysisError("CREATE TABLE tbl1 (i int, p1 int, p2 timestamp) " + - "PARTITION BY SPEC (p1 BUCKET 0, p2 DAY) STORED AS ICEBERG" + tblProperties, + "PARTITIONED BY SPEC (BUCKET(0, p1), DAY(p2)) STORED AS ICEBERG" + tblProperties, "The parameter of a partition transform should be greater than zero."); AnalysisError("CREATE TABLE tbl1 (i int, p1 int, p2 timestamp) " + - "PARTITION BY SPEC (p1 TRUNCATE 0, p2 DAY) STORED AS ICEBERG" + tblProperties, + "PARTITIONED BY SPEC (TRUNCATE(0, p1), DAY(p2)) STORED AS ICEBERG" + + tblProperties, "The parameter of a partition transform should be greater than zero."); - AnalysisError("CREATE TABLE tbl1 (i int, p1 int, p2 timestamp) " + - "PARTITION BY SPEC (p1 BUCKET 10, p2 DAY 10) STORED AS ICEBERG" + tblProperties, + "PARTITIONED BY SPEC (BUCKET(10, p1), DAY(10, p2)) STORED AS ICEBERG" + + tblProperties, "Only BUCKET and TRUNCATE partition transforms accept a parameter."); } - } diff --git a/fe/src/test/java/org/apache/impala/analysis/ParserTest.java b/fe/src/test/java/org/apache/impala/analysis/ParserTest.java index 88b34be57..b17fcad30 100644 --- a/fe/src/test/java/org/apache/impala/analysis/ParserTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/ParserTest.java @@ -2997,10 +2997,6 @@ public class ParserTest extends FrontendTestBase { ParserError("CREATE TABLE Foo(a int PRIMARY KEY, b int BLOCK_SIZE 1+1) " + "STORED AS KUDU"); ParserError("CREATE TABLE Foo(a int PRIMARY KEY BLOCK_SIZE -1) STORED AS KUDU"); - - // Iceberg TRUNCATE partition transform without parameter results a parse error. - ParserError("CREATE TABLE tbl1 (i int, p1 int) PARTITION BY SPEC (p1 TRUNCATE) " + - "STORED AS ICEBERG TBLPROPERTIES ('iceberg.catalog'='hadoop.tables')"); } @Test diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 56989474d..3c0e90074 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -3050,7 +3050,7 @@ functional iceberg_int_partitioned ---- CREATE CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (i INT, j INT, k INT) -PARTITION BY SPEC (i identity, j identity) +PARTITIONED BY SPEC (i, j) STORED AS ICEBERG; ==== ---- DATASET @@ -3060,7 +3060,7 @@ iceberg_partition_transforms_zorder ---- CREATE CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (ts timestamp, s string, i int, j int) -PARTITION BY SPEC (ts year, s bucket 5) +PARTITIONED BY SPEC (year(ts), bucket(5, s)) SORT BY ZORDER (i, j) STORED AS ICEBERG; ==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test index eba73d866..5486554e4 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-create.test @@ -9,14 +9,14 @@ CREATE TABLE iceberg_hadoop_tables( map_test MAP >, struct_test STRUCT ) -PARTITION BY SPEC +PARTITIONED BY SPEC ( - level IDENTITY, - level TRUNCATE 10, - event_time IDENTITY, - event_time HOUR, - event_time BUCKET 1000, - register_time DAY + level, + TRUNCATE(10, level), + event_time, + HOURS(event_time), + BUCKET(1000, event_time), + DAY(register_time) ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.tables'); @@ -78,11 +78,11 @@ TBLPROPERTIES('iceberg.catalog'='hadoop.tables'); CREATE TABLE iceberg_hadoop_tbls_with_loc( level STRING ) -PARTITION BY SPEC +PARTITIONED BY SPEC ( - level IDENTITY, - level BUCKET 12345, - level TRUNCATE 15 + level, + BUCKET(12345, level), + TRUNCATE(15, level) ) STORED AS ICEBERG LOCATION '/$DATABASE.iceberg_test_with_location' @@ -153,14 +153,14 @@ CREATE TABLE iceberg_hadoop_catalog( map_test MAP >, struct_test STRUCT ) -PARTITION BY SPEC +PARTITIONED BY SPEC ( - level IDENTITY, - level TRUNCATE 10, - event_time IDENTITY, - event_time HOUR, - event_time BUCKET 1000, - register_time DAY + level, + TRUNCATE(10, level), + event_time, + HOUR(event_time), + BUCKET(1000, event_time), + DAYS(register_time) ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', @@ -202,11 +202,11 @@ DROP TABLE iceberg_hadoop_catalog; CREATE TABLE iceberg_hadoop_catalog( level STRING ) -PARTITION BY SPEC +PARTITIONED BY SPEC ( - level IDENTITY, - level BUCKET 12345, - level TRUNCATE 10 + level, + BUCKET(12345, level), + TRUNCATE(10, level) ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', @@ -327,12 +327,12 @@ CREATE TABLE iceberg_hadoop_cat_with_orc( map_test MAP >, struct_test STRUCT ) -PARTITION BY SPEC +PARTITIONED BY SPEC ( - level IDENTITY, - event_time IDENTITY, - event_time HOUR, - register_time DAY + level, + event_time, + HOUR(event_time), + DAY(register_time) ) STORED AS ICEBERG TBLPROPERTIES('iceberg.file_format'='orc','iceberg.catalog'='hadoop.catalog', @@ -395,12 +395,12 @@ CREATE TABLE iceberg_part_hive_cat( map_test MAP >, struct_test STRUCT ) -PARTITION BY SPEC +PARTITIONED BY SPEC ( - level IDENTITY, - event_time IDENTITY, - event_time HOUR, - register_time DAY + level, + event_time, + HOUR(event_time), + DAY(register_time) ) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hive.catalog') diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-ctas.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-ctas.test index eddac9553..6cc17b26f 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-ctas.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-ctas.test @@ -57,8 +57,8 @@ STRING, STRING, STRING ---- QUERY # Test CTAS in Iceberg HadoopTables catalog. # Set table location to custom location. -# Use PARTITION BY SPEC -CREATE TABLE ice_ctas_hadoop_tables_part PARTITION BY SPEC (d month) +# Use PARTITIONED BY SPEC +CREATE TABLE ice_ctas_hadoop_tables_part PARTITIONED BY SPEC (month(d)) STORED AS ICEBERG LOCATION '/test-warehouse/$DATABASE.db/loc_test' TBLPROPERTIES ('iceberg.catalog'='hadoop.tables') AS SELECT s, ts, d FROM value_tbl; @@ -78,10 +78,10 @@ STRING, STRING, STRING ---- QUERY # Test CTAS in Iceberg HadoopCatalog catalog. # Set 'iceberg.catalog_location' and 'iceberg.table_identifier' -# Partition by TRUNCATE +# Partitioned by TRUNCATE # Cast TINYINT to INT. # INSERT additional row. -CREATE TABLE ice_ctas_hadoop_catalog_part PARTITION BY SPEC (s truncate 3) +CREATE TABLE ice_ctas_hadoop_catalog_part PARTITIONED BY SPEC (truncate(3, s)) STORED AS ICEBERG TBLPROPERTIES ('iceberg.catalog'='hadoop.catalog', 'iceberg.catalog_location'='/test-warehouse/$DATABASE.db/cat_loc', diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test index 2752e8f7b..d287c7873 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-negative.test @@ -9,20 +9,16 @@ AnalysisException: Table requires at least 1 column for managed iceberg table. CREATE TABLE iceberg_test( level STRING ) -PARTITION BY SPEC -( - level IDENTITY, - event_time HOUR -) +PARTITIONED BY SPEC(level, hour(event_time)) STORED AS ICEBERG; ---- CATCH AnalysisException: Cannot find source column: event_time ==== ---- QUERY CREATE TABLE non_iceberg_table_with_spec (i INT) -PARTITION BY SPEC (i identity); +PARTITIONED BY SPEC (i); ---- CATCH -AnalysisException: PARTITION BY SPEC is only valid for Iceberg tables. +AnalysisException: PARTITIONED BY SPEC is only valid for Iceberg tables. ==== ---- QUERY CREATE TABLE iceberg_table_hadoop_tables( @@ -94,7 +90,7 @@ row_regex:.*CAUSED BY: TableLoadingException: Table does not exist: fake_db.fake ==== ---- QUERY CREATE TABLE iceberg_overwrite_bucket (i int) -PARTITION BY SPEC (i bucket 3) +PARTITIONED BY SPEC (bucket(3, i)) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.tables'); INSERT OVERWRITE iceberg_overwrite_bucket VALUES (1), (2), (3); @@ -139,10 +135,7 @@ CREATE TABLE iceberg_partitioned_insert( level STRING, event_time TIMESTAMP ) -PARTITION BY SPEC -( - level IDENTITY -) +PARTITIONED BY SPEC(level) STORED AS ICEBERG TBLPROPERTIES('iceberg.catalog'='hadoop.tables'); ---- RESULTS @@ -155,8 +148,8 @@ Static partitioning is not supported for Iceberg tables. ==== ---- QUERY CREATE TABLE all_colss_needed_for_insert (i int, j int, k int) -partition by spec (j identity, k identity) -stored as iceberg; +PARTITIONED BY SPEC (j, k) +STORED AS ICEBERG; ---- RESULTS 'Table has been created.' ==== @@ -316,18 +309,18 @@ STORED AS ICEBERG AnalysisException: Unsupported column options for file format 'ICEBERG': 'id INT PRIMARY KEY' ==== ---- QUERY -# PARTITIONED BY and PARTITION BY SPEC is not allowed in same statement. +# PARTITIONED BY and PARTITIONED BY SPEC is not allowed in same statement. CREATE TABLE iceberg_part_spec_part (i INT) PARTITIONED BY (p INT) -PARTITION BY SPEC (i TRUNCATE 10) +PARTITIONED BY SPEC (TRUNCATE(10, i)) STORED AS ICEBERG; ---- CATCH Syntax error in line ==== ---- QUERY -# PARTITION BY SPEC and PARTITIONED BY is not allowed in same statement. +# PARTITIONED BY SPEC and PARTITIONED BY is not allowed in same statement. CREATE TABLE iceberg_part_part_spec (i INT) -PARTITION BY SPEC (i TRUNCATE 10) +PARTITIONED BY SPEC (TRUNCATE(10, i)) PARTITIONED BY (p INT) STORED AS ICEBERG; ---- CATCH @@ -347,3 +340,10 @@ ALTER TABLE iceberg_set_wrong_fileformat SET TBLPROPERTIES ('iceberg.file_format ---- CATCH Invalid fileformat for Iceberg table: parq ==== +---- QUERY +CREATE TABLE iceberg_wrong_partition (i int) +PARTITIONED BY SPEC (wrong(i)) +STORED AS ICEBERG; +---- CATCH +Unsupported iceberg partition type: WRONG +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-overwrite.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-overwrite.test index 3ad743bc7..4b12d3b12 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-overwrite.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-overwrite.test @@ -85,7 +85,7 @@ INT,INT ---- QUERY # Create DAY-partitioned table for INSERT OVERWRITE create table ice_day (ts timestamp) -partition by spec (ts DAY) +partitioned by spec (DAY(ts)) stored as iceberg; ---- RESULTS 'Table has been created.' @@ -131,7 +131,7 @@ TIMESTAMP ---- QUERY # Create TRUNCATE-partitioned table for INSERT OVERWRITE create table ice_trunc (d decimal(10, 2)) -partition by spec (d TRUNCATE 100) +partitioned by spec (TRUNCATE(100, d)) stored as iceberg; ---- RESULTS 'Table has been created.' diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test index 0c9c62365..1d7572936 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-partition-transform-insert.test @@ -3,7 +3,7 @@ # Test partitioned INSERTs with single column that is also # the partitioned column. Partition transform is BUCKET. create table single_col_bucket (s string) -partition by spec (s bucket 7) +partitioned by spec (bucket(7, s)) stored as iceberg; ==== ---- QUERY @@ -56,7 +56,7 @@ aggregation(SUM, NumRowGroups): 1 ---- QUERY # Bucket transform for multiple columns. create table multi_col_bucket (i int, s string, d date, t timestamp) -partition by spec (i bucket 3, s bucket 5, d bucket 5, t bucket 5) +partitioned by spec (bucket(3, i), bucket(5, s), bucket(5, d), bucket(5, t)) stored as iceberg; ==== ---- QUERY @@ -147,7 +147,7 @@ aggregation(SUM, NumRowGroups): 2 ---- QUERY # Test single col TRUNCATE create table single_col_truncate (d decimal(10, 4)) -partition by spec (d truncate 100) +partitioned by spec (truncate(100, d)) stored as iceberg; ==== ---- QUERY @@ -227,7 +227,7 @@ aggregation(SUM, NumRowGroups): 3 ---- QUERY # Test single col TRUNCATE create table multi_col_truncate (i int, b bigint, d decimal(16, 6), s string) -partition by spec (s truncate 15, i truncate 5, b truncate 11, d truncate 100000) +partitioned by spec (truncate(15, s), truncate(5, i), truncate(11, b), truncate(100000, d)) stored as iceberg; ==== ---- QUERY @@ -337,7 +337,7 @@ aggregation(SUM, NumRowGroups): 1 ---- QUERY # Create table with YEAR partition transform create table year_transform(t timestamp, d date) -partition by spec (t year, d year) +partitioned by spec (year(t), year(d)) stored as iceberg; ==== ---- QUERY @@ -457,7 +457,7 @@ aggregation(SUM, NumRowGroups): 2 ---- QUERY # Create table with MONTH partition transform create table month_transform(t timestamp, d date) -partition by spec (t month, d month) +partitioned by spec (month(t), month(d)) stored as iceberg; ==== ---- QUERY @@ -565,7 +565,7 @@ aggregation(SUM, NumRowGroups): 2 ---- QUERY # Create table with DAY partition transform create table day_transform(t timestamp, d date) -partition by spec (t day, d day) +partitioned by spec (day(t), day(d)) stored as iceberg; ==== ---- QUERY @@ -689,7 +689,7 @@ aggregation(SUM, NumRowGroups): 1 ---- QUERY # Create table with HOUR partition transform create table hour_transform(t timestamp) -partition by spec (t hour) +partitioned by spec (hour(t)) stored as iceberg; ==== ---- QUERY @@ -786,7 +786,7 @@ aggregation(SUM, NumRowGroups): 3 ==== ---- QUERY create table mixed_and_shuffled (s string, b bigint, de decimal(6, 2), t timestamp, da date) -partition by spec (t day, da year, s truncate 5, b bucket 3, de truncate 100) +partitioned by spec (day(t), year(da), truncate(5, s), bucket(3, b), truncate(100, de)) stored as iceberg; ==== ---- QUERY diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-partitioned-insert.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-partitioned-insert.test index 85534fca7..4a114dcba 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-partitioned-insert.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-partitioned-insert.test @@ -3,7 +3,7 @@ # Test partitioned INSERTs with single column that is also # the partitioned column. create table ice_only_part (i int) -partition by spec (i identity) +partitioned by spec (i) stored as iceberg; ---- RESULTS 'Table has been created.' @@ -44,7 +44,7 @@ aggregation(SUM, RowsRead): 1 ---- QUERY # Test inserts with multple partition columns. create table ice_multi_part (i int, d date, s string) -partition by spec(i identity, d identity) +partitioned by spec(i, d) stored as iceberg; ---- RESULTS 'Table has been created.' @@ -72,7 +72,7 @@ aggregation(SUM, RowsRead): 1 ---- QUERY # Test that Impala only writes one file per partitions. create table ice_bigints (i BIGINT, j BIGINT, k BIGINT) -partition by spec (i identity, j identity) +partitioned by spec (i, j) stored as iceberg; ---- RESULTS 'Table has been created.' @@ -110,15 +110,15 @@ create table alltypes_part ( date_col DATE, string_col STRING, timestamp_col TIMESTAMP) -partition by spec ( - id identity, - bool_col identity, - int_col identity, - bigint_col identity, - float_col identity, - double_col identity, - date_col identity, - string_col identity) +partitioned by spec ( + id, + bool_col, + int_col, + bigint_col, + float_col, + double_col, + date_col, + string_col) stored as iceberg; ---- RESULTS 'Table has been created.' @@ -226,7 +226,7 @@ aggregation(SUM, NumRowGroups): 8 ---- QUERY # Test inserts with multple partition columns. create table ice_part_non_order (i int, d date, s string) -partition by spec(s identity, d identity) +partitioned by spec(s, d) stored as iceberg; ---- RESULTS 'Table has been created.' diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-truncate.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-truncate.test index b64f67f49..1385d15f2 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-truncate.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-truncate.test @@ -38,7 +38,7 @@ STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT ---- QUERY # Create partitioned Iceberg table create table ice_part (i int, s string, t timestamp) -partition by spec (t year, i bucket 10) +partitioned by spec (year(t), bucket(10, i)) stored as iceberg; insert into ice_part values (1, 'ice', '2021-01-27 18:57:25.155746000'), diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test index 6b2591adb..8dcc4b686 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/show-create-table.test @@ -633,36 +633,14 @@ CREATE TABLE iceberg_test1_partitioned ( p3 TIMESTAMP, p4 INT ) -PARTITION BY SPEC ( - p1 YEAR, - p2 IDENTITY, - p2 BUCKET 500, - p2 TRUNCATE 15, - p3 HOUR, - p4 BUCKET 10, - p4 TRUNCATE 5 -) -STORED AS ICEBERG -TBLPROPERTIES('iceberg.file_format'='parquet', - 'iceberg.catalog'='hadoop.catalog', - 'iceberg.catalog_location'='/test-warehouse/hadoop_catalog_test') ----- RESULTS-HIVE -CREATE TABLE show_create_table_test_db.iceberg_test1_partitioned ( - level STRING NULL, - p1 DATE NULL, - p2 STRING NULL, - p3 TIMESTAMP NULL, - p4 INT NULL -) -PARTITION BY SPEC -( - p1 YEAR, - p2 IDENTITY, - p2 BUCKET 500, - p2 TRUNCATE 15, - p3 HOUR, - p4 BUCKET 10, - p4 TRUNCATE 5 +PARTITIONED BY SPEC ( + YEAR(p1), + p2, + BUCKET(500, p2), + TRUNCATE(15, p2), + HOUR(p3), + BUCKET(10, p4), + TRUNCATE(5, p4) ) STORED AS ICEBERG TBLPROPERTIES('iceberg.file_format'='parquet', @@ -676,15 +654,14 @@ CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_test1_partitioned ( p3 TIMESTAMP NULL, p4 INT NULL ) -PARTITION BY SPEC -( - p1 YEAR, - p2 IDENTITY, - p2 BUCKET 500, - p2 TRUNCATE 15, - p3 HOUR, - p4 BUCKET 10, - p4 TRUNCATE 5, +PARTITIONED BY SPEC ( + YEAR(p1), + p2, + BUCKET(500, p2), + TRUNCATE(15, p2), + HOUR(p3), + BUCKET(10, p4), + TRUNCATE(5, p4) ) STORED AS ICEBERG TBLPROPERTIES ('external.table.purge'='TRUE', 'iceberg.catalog'='hadoop.catalog', @@ -799,10 +776,7 @@ CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_old_style_partitions ( p STRING NULL, d DATE NULL ) -PARTITION BY SPEC ( - p IDENTITY, - d IDENTITY -) +PARTITIONED BY SPEC (p, d) STORED AS ICEBERG LOCATION '$$location_uri$$' TBLPROPERTIES ('external.table.purge'='TRUE', 'iceberg.file_format'='parquet', @@ -810,7 +784,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE', 'iceberg.file_format'='parquet', ==== ---- CREATE_TABLE CREATE TABLE iceberg_ctas -PARTITION BY SPEC(id bucket 5) +PARTITIONED BY SPEC (bucket(5, id)) STORED AS ICEBERG AS SELECT id, bool_col, bigint_col FROM functional.alltypes; ---- RESULTS-HIVE-3 @@ -819,9 +793,7 @@ CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_ctas ( bool_col BOOLEAN NULL, bigint_col BIGINT NULL ) -PARTITION BY SPEC ( - id BUCKET 5 -) +PARTITIONED BY SPEC (BUCKET(5, id)) STORED AS ICEBERG LOCATION '$$location_uri$$' TBLPROPERTIES ('external.table.purge'='TRUE', 'iceberg.file_format'='parquet', @@ -829,7 +801,7 @@ TBLPROPERTIES ('external.table.purge'='TRUE', 'iceberg.file_format'='parquet', ==== ---- CREATE_TABLE CREATE TABLE iceberg_ctas_ht -PARTITION BY SPEC(id bucket 5) +PARTITIONED BY SPEC (bucket(5, id)) STORED AS ICEBERG TBLPROPERTIES ('iceberg.catalog'='hadoop.tables') AS SELECT id, bool_col, bigint_col FROM functional.alltypes; @@ -839,9 +811,7 @@ CREATE EXTERNAL TABLE show_create_table_test_db.iceberg_ctas_ht ( bool_col BOOLEAN NULL, bigint_col BIGINT NULL ) -PARTITION BY SPEC ( - id BUCKET 5 -) +PARTITIONED BY SPEC (BUCKET(5, id)) STORED AS ICEBERG LOCATION '$$location_uri$$' TBLPROPERTIES ('external.table.purge'='TRUE', 'iceberg.file_format'='parquet', diff --git a/tests/custom_cluster/test_event_processing.py b/tests/custom_cluster/test_event_processing.py index e5ed34d8a..f3272ba9c 100644 --- a/tests/custom_cluster/test_event_processing.py +++ b/tests/custom_cluster/test_event_processing.py @@ -224,7 +224,7 @@ class TestEventProcessing(CustomClusterTestSuite): self.execute_query("create database if not exists {0}".format(db_name)) self.execute_query(""" create table {0}.{1} (i int) - partition by spec (i bucket 5) + partitioned by spec (bucket(5, i)) stored as iceberg;""".format(db_name, tbl_name)) self.execute_query("insert into {0}.{1} values (1)".format(db_name, tbl_name)) data = self.execute_scalar("select * from {0}.{1}".format(db_name, tbl_name))