From 65639f16b973f6d82948a303823b2079dee2e4a9 Mon Sep 17 00:00:00 2001 From: Nandor Kollar Date: Thu, 27 Nov 2025 15:49:55 +0000 Subject: [PATCH] IMPALA-12330: Allow setting format-version in ALTER TABLE CONVERT TO This change allows modifying the format version table property in ALTER TABLE CONVERT TO statements. It adds verification for the property value too: only 1 or 2 is supported as of now. Change-Id: Iaed207feb83a277a1c2f81dcf58c42f0721c0865 Reviewed-on: http://gerrit.cloudera.org:8080/23721 Reviewed-by: Impala Public Jenkins Tested-by: Peter Rozsa --- .../analysis/ConvertTableToIcebergStmt.java | 46 +++++++++++++------ .../catalog/iceberg/IcebergCatalog.java | 4 +- .../impala/analysis/AnalyzeStmtsTest.java | 12 ++++- ...erg-migrate-from-external-hdfs-tables.test | 22 +++++++-- 4 files changed, 64 insertions(+), 20 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java b/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java index 7f91f1248..6048c9367 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java @@ -111,22 +111,42 @@ public class ConvertTableToIcebergStmt extends StatementBase implements SingleTa checkColumnTypeCompatibility(table); - if (properties_.size() > 1 || - properties_.keySet().stream().anyMatch( - key -> !key.equalsIgnoreCase(IcebergTable.ICEBERG_CATALOG)) ) { - throw new AnalysisException(String.format( - "CONVERT TO ICEBERG only accepts '%s' as TBLPROPERTY.", - IcebergTable.ICEBERG_CATALOG)); - } - - if (TIcebergCatalog.HADOOP_CATALOG == IcebergUtil.getTIcebergCatalog(properties_)) { - throw new AnalysisException("The Hadoop Catalog is not supported because the " + - "location may change"); - } + checkProperties(); createSubQueryStrings((FeFsTable) table); } + private void checkProperties() throws AnalysisException { + for (Map.Entry entry : properties_.entrySet()) { + String key = entry.getKey(); + String value = entry.getValue(); + if (IcebergTable.ICEBERG_CATALOG.equalsIgnoreCase(key)) { + if (TIcebergCatalog.HADOOP_CATALOG == IcebergUtil.getTIcebergCatalog(value)) { + throw new AnalysisException("The Hadoop Catalog is not supported " + + "because the location may change"); + } + continue; + } + + if (IcebergTable.FORMAT_VERSION.equalsIgnoreCase(key)) { + try { + int formatVersion = Integer.parseInt(value); + if (formatVersion == IcebergTable.ICEBERG_FORMAT_V1 + || formatVersion == IcebergTable.ICEBERG_FORMAT_V2) { + continue; + } + throw new AnalysisException( + String.format("Unsupported Iceberg format version '%s'.", formatVersion)); + } catch (NumberFormatException e) { + throw new AnalysisException( + String.format("Invalid Iceberg format version '%s'.", value)); + } + } + throw new AnalysisException( + String.format("CONVERT TO ICEBERG doesn't accept '%s' as TBLPROPERTY.", key)); + } + } + private void checkColumnTypeCompatibility(FeTable table) throws AnalysisException { try { IcebergSchemaConverter.convertToIcebergSchema(table.getMetaStoreTable()); @@ -173,7 +193,7 @@ public class ConvertTableToIcebergStmt extends StatementBase implements SingleTa .property(Table.TBL_PROP_EXTERNAL_TABLE_PURGE, "true").build(); } else { // In HiveCatalog we invoke an IM after creating the table to immediately propagate - // the existance of the new Iceberg table and avoid timing issues. + // the existence of the new Iceberg table and avoid timing issues. invalidateMetadataQuery_ = Invalidate.builder() .table(tableName_.toString()) .build(); diff --git a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalog.java b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalog.java index 6b6b04891..f1470f5aa 100644 --- a/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalog.java +++ b/fe/src/main/java/org/apache/impala/catalog/iceberg/IcebergCatalog.java @@ -79,13 +79,13 @@ public interface IcebergCatalog { /** * Renames Iceberg table. - * For HadoopTables, Iceberg does not supported 'renameTable' method + * For HadoopTables, Iceberg does not support 'renameTable' method * For HadoopCatalog, Iceberg implement 'renameTable' method with Exception threw */ void renameTable(FeIcebergTable feTable, TableIdentifier newTableId); /** - * Some of the implemetation methods might be running on native threads as they might + * Some of the implementation methods might be running on native threads as they might * be invoked via JNI. In that case the context class loader for those threads are * null. 'Catalogs' dynamically loads catalog implementations, e.g. HadoopCatalog or * HiveCatalog. It uses the context class loader, but as it is null it falls back diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java index 4d9811bc3..f7df74938 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java @@ -5221,8 +5221,18 @@ public class AnalyzeStmtsTest extends AnalyzerTest { AnalysisError("alter table functional.tinytable convert to iceberg", "CONVERT TO ICEBERG is not supported for " + "org.apache.hadoop.mapred.TextInputFormat"); + AnalyzesOk("alter table functional_parquet.tinytable convert to iceberg" + + " tblproperties('format-version'='1')"); + AnalyzesOk("alter table functional_parquet.tinytable convert to iceberg" + + " tblproperties('format-version'='2')"); + AnalysisError("alter table functional_parquet.tinytable convert to iceberg" + + " tblproperties('format-version'='3')", + "Unsupported Iceberg format version '3'"); + AnalysisError("alter table functional_parquet.tinytable convert to iceberg" + + " tblproperties('format-version'='unknown')", + "Invalid Iceberg format version 'unknown'"); AnalysisError("alter table functional_parquet.tinytable convert to iceberg" + " tblproperties('metadata.generator.threads'='a1')", - "CONVERT TO ICEBERG only accepts 'iceberg.catalog' as TBLPROPERTY."); + "CONVERT TO ICEBERG doesn't accept 'metadata.generator.threads' as TBLPROPERTY."); } } diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test index 396ea0bbd..ed41307eb 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test @@ -304,11 +304,25 @@ select * from table_at_random_location; int, string ==== ---- QUERY -# Currently not feasible to convert directly into a V2 Iceberg table. -create table converted_into_v2 (i int) partitioned by (s string) stored as parquet; -alter table converted_into_v2 convert to iceberg tblproperties ('format-version'='2'); +# Convert to non-default format-version 1, it should work. +create table converted_into_v1 (i int) partitioned by (s string) stored as parquet; +alter table converted_into_v1 convert to iceberg tblproperties ('format-version'='1'); +---- RESULTS +'Table has been migrated.' +==== +---- QUERY +describe formatted converted_into_v1; +---- RESULTS: VERIFY_IS_SUBSET +'','format-version ','1 ' +---- TYPES +string, string, string +==== +---- QUERY +# Currently not feasible to convert directly into a V3 Iceberg table. +create table converted_into_v3 (i int) partitioned by (s string) stored as parquet; +alter table converted_into_v3 convert to iceberg tblproperties ('format-version'='3'); ---- CATCH -AnalysisException: CONVERT TO ICEBERG only accepts 'iceberg.catalog' as TBLPROPERTY. +AnalysisException: Unsupported Iceberg format version '3'. ==== ---- QUERY create table simple_tbl (i int) stored as parquet;