IMPALA-12893: (part 2): Upgrade Iceberg to version 1.5.2

This patch updates CDP_BUILD_NUMBER to 71942734 to in order to
upgrade Iceberg to 1.5.2.

This patch updates some tests so they pass with Iceberg 1.5.2. The
behavior changes of Iceberg 1.5.2 are (compared to 1.3.1):
 * Iceberg V2 tables are created by default
 * Metadata tables have different schema
 * Parquet compression is explicitly set for new tables (even for ORC
   tables)
 * Sequence numbers are assigned a bit differently

Updated the tests where needed.

Code changes to accomodate for the above behavior changes:
 * SHOW CREATE TABLE adds 'format-version'='1' for Iceberg V1 tables
 * CREATE TABLE statements don't throw errors when Parquet compression
   is set for ORC tables

Change-Id: Ic4f9ed3f7ee9f686044023be938d6b1d18c8842e
Reviewed-on: http://gerrit.cloudera.org:8080/23670
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Zoltan Borok-Nagy
2024-03-28 20:26:55 +01:00
committed by Impala Public Jenkins
parent e4a508529c
commit 275f03f10d
10 changed files with 112 additions and 173 deletions

View File

@@ -699,7 +699,7 @@ public class CreateTableStmt extends StatementBase implements SingleTableStmt {
private void addMergeOnReadPropertiesIfNeeded() {
Map<String, String> tblProps = getTblProperties();
String formatVersion = tblProps.get(TableProperties.FORMAT_VERSION);
if (formatVersion == null ||
if (formatVersion != null &&
Integer.valueOf(formatVersion) < IcebergTable.ICEBERG_FORMAT_V2) {
return;
}
@@ -715,16 +715,7 @@ public class CreateTableStmt extends StatementBase implements SingleTableStmt {
private void validateIcebergParquetCompressionCodec(
TIcebergFileFormat icebergFileFormat) throws AnalysisException {
if (icebergFileFormat != TIcebergFileFormat.PARQUET) {
if (getTblProperties().containsKey(IcebergTable.PARQUET_COMPRESSION_CODEC)) {
throw new AnalysisException(IcebergTable.PARQUET_COMPRESSION_CODEC +
" should be set only for parquet file format");
}
if (getTblProperties().containsKey(IcebergTable.PARQUET_COMPRESSION_LEVEL)) {
throw new AnalysisException(IcebergTable.PARQUET_COMPRESSION_LEVEL +
" should be set only for parquet file format");
}
} else {
if (icebergFileFormat == TIcebergFileFormat.PARQUET) {
StringBuilder errMsg = new StringBuilder();
if (IcebergUtil.parseParquetCompressionCodec(true, getTblProperties(), errMsg)
== null) {

View File

@@ -35,6 +35,7 @@ import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.ql.parse.HiveLexer;
import org.apache.iceberg.TableProperties;
import org.apache.impala.catalog.CatalogException;
import org.apache.impala.catalog.Column;
import org.apache.impala.catalog.ColumnStats;
@@ -187,6 +188,11 @@ public class ToSqlUtils {
}
commonProps.remove(KuduTable.KEY_TABLE_ID);
} else if (table instanceof FeIcebergTable) {
FeIcebergTable feIcebergTable = (FeIcebergTable) table;
if (feIcebergTable.getFormatVersion() == IcebergTable.ICEBERG_FORMAT_V1) {
commonProps.put(TableProperties.FORMAT_VERSION,
String.valueOf(IcebergTable.ICEBERG_FORMAT_V1));
}
// Hide Iceberg internal metadata properties
removeHiddenIcebergTableProperties(commonProps);
} else if (table instanceof FePaimonTable) {
@@ -551,8 +557,8 @@ public class ToSqlUtils {
if (table instanceof FeIcebergTable) {
storageHandlerClassName = null;
FeIcebergTable feIcebergTable = (FeIcebergTable)table;
// Fill "PARTITIONED BY SPEC" part if the Iceberg table is partitioned.
FeIcebergTable feIcebergTable= (FeIcebergTable)table;
if (!feIcebergTable.getPartitionSpecs().isEmpty()) {
IcebergPartitionSpec latestPartitionSpec =
feIcebergTable.getDefaultPartitionSpec();