IMPALA-12330: Allow setting format-version in ALTER TABLE CONVERT TO

This change allows modifying the format version table property
in ALTER TABLE CONVERT TO statements. It adds verification for
the property value too: only 1 or 2 is supported as of now.

Change-Id: Iaed207feb83a277a1c2f81dcf58c42f0721c0865
Reviewed-on: http://gerrit.cloudera.org:8080/23721
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Peter Rozsa <prozsa@cloudera.com>
This commit is contained in:
Nandor Kollar
2025-11-27 15:49:55 +00:00
committed by Peter Rozsa
parent b0ef1d843e
commit 65639f16b9
4 changed files with 64 additions and 20 deletions

View File

@@ -111,22 +111,42 @@ public class ConvertTableToIcebergStmt extends StatementBase implements SingleTa
checkColumnTypeCompatibility(table);
if (properties_.size() > 1 ||
properties_.keySet().stream().anyMatch(
key -> !key.equalsIgnoreCase(IcebergTable.ICEBERG_CATALOG)) ) {
throw new AnalysisException(String.format(
"CONVERT TO ICEBERG only accepts '%s' as TBLPROPERTY.",
IcebergTable.ICEBERG_CATALOG));
}
if (TIcebergCatalog.HADOOP_CATALOG == IcebergUtil.getTIcebergCatalog(properties_)) {
throw new AnalysisException("The Hadoop Catalog is not supported because the " +
"location may change");
}
checkProperties();
createSubQueryStrings((FeFsTable) table);
}
private void checkProperties() throws AnalysisException {
for (Map.Entry<String, String> entry : properties_.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
if (IcebergTable.ICEBERG_CATALOG.equalsIgnoreCase(key)) {
if (TIcebergCatalog.HADOOP_CATALOG == IcebergUtil.getTIcebergCatalog(value)) {
throw new AnalysisException("The Hadoop Catalog is not supported " +
"because the location may change");
}
continue;
}
if (IcebergTable.FORMAT_VERSION.equalsIgnoreCase(key)) {
try {
int formatVersion = Integer.parseInt(value);
if (formatVersion == IcebergTable.ICEBERG_FORMAT_V1
|| formatVersion == IcebergTable.ICEBERG_FORMAT_V2) {
continue;
}
throw new AnalysisException(
String.format("Unsupported Iceberg format version '%s'.", formatVersion));
} catch (NumberFormatException e) {
throw new AnalysisException(
String.format("Invalid Iceberg format version '%s'.", value));
}
}
throw new AnalysisException(
String.format("CONVERT TO ICEBERG doesn't accept '%s' as TBLPROPERTY.", key));
}
}
private void checkColumnTypeCompatibility(FeTable table) throws AnalysisException {
try {
IcebergSchemaConverter.convertToIcebergSchema(table.getMetaStoreTable());
@@ -173,7 +193,7 @@ public class ConvertTableToIcebergStmt extends StatementBase implements SingleTa
.property(Table.TBL_PROP_EXTERNAL_TABLE_PURGE, "true").build();
} else {
// In HiveCatalog we invoke an IM after creating the table to immediately propagate
// the existance of the new Iceberg table and avoid timing issues.
// the existence of the new Iceberg table and avoid timing issues.
invalidateMetadataQuery_ = Invalidate.builder()
.table(tableName_.toString())
.build();

View File

@@ -79,13 +79,13 @@ public interface IcebergCatalog {
/**
* Renames Iceberg table.
* For HadoopTables, Iceberg does not supported 'renameTable' method
* For HadoopTables, Iceberg does not support 'renameTable' method
* For HadoopCatalog, Iceberg implement 'renameTable' method with Exception threw
*/
void renameTable(FeIcebergTable feTable, TableIdentifier newTableId);
/**
* Some of the implemetation methods might be running on native threads as they might
* Some of the implementation methods might be running on native threads as they might
* be invoked via JNI. In that case the context class loader for those threads are
* null. 'Catalogs' dynamically loads catalog implementations, e.g. HadoopCatalog or
* HiveCatalog. It uses the context class loader, but as it is null it falls back

View File

@@ -5221,8 +5221,18 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
AnalysisError("alter table functional.tinytable convert to iceberg",
"CONVERT TO ICEBERG is not supported for " +
"org.apache.hadoop.mapred.TextInputFormat");
AnalyzesOk("alter table functional_parquet.tinytable convert to iceberg"
+ " tblproperties('format-version'='1')");
AnalyzesOk("alter table functional_parquet.tinytable convert to iceberg"
+ " tblproperties('format-version'='2')");
AnalysisError("alter table functional_parquet.tinytable convert to iceberg"
+ " tblproperties('format-version'='3')",
"Unsupported Iceberg format version '3'");
AnalysisError("alter table functional_parquet.tinytable convert to iceberg"
+ " tblproperties('format-version'='unknown')",
"Invalid Iceberg format version 'unknown'");
AnalysisError("alter table functional_parquet.tinytable convert to iceberg"
+ " tblproperties('metadata.generator.threads'='a1')",
"CONVERT TO ICEBERG only accepts 'iceberg.catalog' as TBLPROPERTY.");
"CONVERT TO ICEBERG doesn't accept 'metadata.generator.threads' as TBLPROPERTY.");
}
}

View File

@@ -304,11 +304,25 @@ select * from table_at_random_location;
int, string
====
---- QUERY
# Currently not feasible to convert directly into a V2 Iceberg table.
create table converted_into_v2 (i int) partitioned by (s string) stored as parquet;
alter table converted_into_v2 convert to iceberg tblproperties ('format-version'='2');
# Convert to non-default format-version 1, it should work.
create table converted_into_v1 (i int) partitioned by (s string) stored as parquet;
alter table converted_into_v1 convert to iceberg tblproperties ('format-version'='1');
---- RESULTS
'Table has been migrated.'
====
---- QUERY
describe formatted converted_into_v1;
---- RESULTS: VERIFY_IS_SUBSET
'','format-version ','1 '
---- TYPES
string, string, string
====
---- QUERY
# Currently not feasible to convert directly into a V3 Iceberg table.
create table converted_into_v3 (i int) partitioned by (s string) stored as parquet;
alter table converted_into_v3 convert to iceberg tblproperties ('format-version'='3');
---- CATCH
AnalysisException: CONVERT TO ICEBERG only accepts 'iceberg.catalog' as TBLPROPERTY.
AnalysisException: Unsupported Iceberg format version '3'.
====
---- QUERY
create table simple_tbl (i int) stored as parquet;