diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergColumn.java b/fe/src/main/java/org/apache/impala/catalog/IcebergColumn.java index a9be0d934..1b6f953d3 100644 --- a/fe/src/main/java/org/apache/impala/catalog/IcebergColumn.java +++ b/fe/src/main/java/org/apache/impala/catalog/IcebergColumn.java @@ -41,7 +41,7 @@ public class IcebergColumn extends Column { public IcebergColumn(String name, Type type, String comment, int position, int fieldId, int fieldMapKeyId, int fieldMapValueId, boolean isNullable) { - super(name, type, comment, position); + super(name.toLowerCase(), type, comment, position); fieldId_ = fieldId; fieldMapKeyId_ = fieldMapKeyId; fieldMapValueId_ = fieldMapValueId; diff --git a/fe/src/main/java/org/apache/impala/util/IcebergSchemaConverter.java b/fe/src/main/java/org/apache/impala/util/IcebergSchemaConverter.java index 131a7f0cb..9757210f5 100644 --- a/fe/src/main/java/org/apache/impala/util/IcebergSchemaConverter.java +++ b/fe/src/main/java/org/apache/impala/util/IcebergSchemaConverter.java @@ -115,7 +115,7 @@ public class IcebergSchemaConverter { for (Types.NestedField column : schema.columns()) { Type colType = toImpalaType(column.type()); // Update sd cols by iceberg NestedField - ret.add(new FieldSchema(column.name(), colType.toSql().toLowerCase(), + ret.add(new FieldSchema(column.name().toLowerCase(), colType.toSql().toLowerCase(), column.doc())); } return ret; diff --git a/testdata/data/README b/testdata/data/README index 804c5a1e9..af49e9327 100644 --- a/testdata/data/README +++ b/testdata/data/README @@ -715,3 +715,6 @@ The tables that have the following schema changes since table migration: * Partition FLOAT column to DOUBLE * Partition DECIMAL(5,3) column to DECIMAL(8,3) * Non-partition column has been moved to end of the schema + +iceberg_test/hadoop_catalog/ice/iceberg_uppercase_col: +Generated by Impala, then modified the metadata.json file to contain uppercase characters. diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_uppercase_col/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_uppercase_col/metadata/v1.metadata.json new file mode 100644 index 000000000..5de147621 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_uppercase_col/metadata/v1.metadata.json @@ -0,0 +1,237 @@ +{ + "format-version" : 1, + "table-uuid" : "3a93e4c0-5357-4203-a7e1-242168207af8", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_uppercase_col", + "last-updated-ms" : 1648649057966, + "last-column-id" : 1, + "schema" : { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "Region", + "required" : false, + "type" : "string", + "doc" : "from deserializer" + }, { + "id" : 2, + "name" : "Nested_Struct", + "required" : false, + "type" : { + "type" : "struct", + "fields" : [ { + "id" : 3, + "name" : "A", + "required" : true, + "type" : "int" + }, { + "id" : 4, + "name" : "B", + "required" : true, + "type" : { + "type" : "list", + "element-id" : 7, + "element" : "int", + "element-required" : true + } + }, { + "id" : 5, + "name" : "C", + "required" : true, + "type" : { + "type" : "struct", + "fields" : [ { + "id" : 8, + "name" : "D", + "required" : true, + "type" : { + "type" : "list", + "element-id" : 9, + "element" : { + "type" : "list", + "element-id" : 10, + "element" : { + "type" : "struct", + "fields" : [ { + "id" : 11, + "name" : "E", + "required" : true, + "type" : "int" + }, { + "id" : 12, + "name" : "F", + "required" : true, + "type" : "string" + } ] + }, + "element-required" : true + }, + "element-required" : true + } + } ] + } + }, { + "id" : 6, + "name" : "G", + "required" : true, + "type" : { + "type" : "map", + "key-id" : 13, + "key" : "string", + "value-id" : 14, + "value" : { + "type" : "struct", + "fields" : [ { + "id" : 15, + "name" : "H", + "required" : true, + "type" : { + "type" : "struct", + "fields" : [ { + "id" : 16, + "name" : "I", + "required" : true, + "type" : { + "type" : "list", + "element-id" : 17, + "element" : "double", + "element-required" : true + } + } ] + } + } ] + }, + "value-required" : true + } + } ] + } + } ] + }, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "region", + "required" : false, + "type" : "string", + "doc" : "from deserializer" + }, { + "id" : 2, + "name" : "Nested_Struct", + "required" : false, + "type" : { + "type" : "struct", + "fields" : [ { + "id" : 3, + "name" : "A", + "required" : true, + "type" : "int" + }, { + "id" : 4, + "name" : "B", + "required" : true, + "type" : { + "type" : "list", + "element-id" : 7, + "element" : "int", + "element-required" : true + } + }, { + "id" : 5, + "name" : "C", + "required" : true, + "type" : { + "type" : "struct", + "fields" : [ { + "id" : 8, + "name" : "D", + "required" : true, + "type" : { + "type" : "list", + "element-id" : 9, + "element" : { + "type" : "list", + "element-id" : 10, + "element" : { + "type" : "struct", + "fields" : [ { + "id" : 11, + "name" : "E", + "required" : true, + "type" : "int" + }, { + "id" : 12, + "name" : "F", + "required" : true, + "type" : "string" + } ] + }, + "element-required" : true + }, + "element-required" : true + } + } ] + } + }, { + "id" : 6, + "name" : "G", + "required" : true, + "type" : { + "type" : "map", + "key-id" : 13, + "key" : "string", + "value-id" : 14, + "value" : { + "type" : "struct", + "fields" : [ { + "id" : 15, + "name" : "H", + "required" : true, + "type" : { + "type" : "struct", + "fields" : [ { + "id" : 16, + "name" : "I", + "required" : true, + "type" : { + "type" : "list", + "element-id" : 17, + "element" : "double", + "element-required" : true + } + } ] + } + } ] + }, + "value-required" : true + } + } ] + } + } ] + } ], + "partition-spec" : [ ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ ] + } ], + "last-partition-id" : 999, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "external.table.purge" : "TRUE", + "write.format.default" : "parquet", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler" + }, + "current-snapshot-id" : -1, + "snapshots" : [ ], + "snapshot-log" : [ ], + "metadata-log" : [ ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_uppercase_col/metadata/version-hint.txt b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_uppercase_col/metadata/version-hint.txt new file mode 100644 index 000000000..d00491fd7 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_uppercase_col/metadata/version-hint.txt @@ -0,0 +1 @@ +1 diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 21d5c16a6..a81f76da9 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -3297,6 +3297,20 @@ STORED AS ICEBERG; ---- DATASET functional ---- BASE_TABLE_NAME +iceberg_uppercase_col +---- CREATE +CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} +STORED AS ICEBERG +TBLPROPERTIES('write.format.default'='parquet', 'iceberg.catalog'='hadoop.catalog', + 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog', + 'iceberg.table_identifier'='ice.iceberg_uppercase_col'); +---- DEPENDENT_LOAD +`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \ +hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_uppercase_col /test-warehouse/iceberg_test/hadoop_catalog/ice +==== +---- DATASET +functional +---- BASE_TABLE_NAME alltypes_date_partition_2 ---- PARTITION_COLUMNS date_col date diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv index 0bb7711a6..c462d4aed 100644 --- a/testdata/datasets/functional/schema_constraints.csv +++ b/testdata/datasets/functional/schema_constraints.csv @@ -77,6 +77,7 @@ table_name:iceberg_alltypes_part, constraint:restrict_to, table_format:parquet/n table_name:iceberg_alltypes_part_orc, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_legacy_partition_schema_evolution, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_legacy_partition_schema_evolution_orc, constraint:restrict_to, table_format:parquet/none/none +table_name:iceberg_uppercase_col, constraint:restrict_to, table_format:parquet/none/none # TODO: Support Avro. Data loading currently fails for Avro because complex types # cannot be converted to the corresponding Avro types yet. diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test index 4d2b5900f..8adb1d8c1 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test @@ -507,3 +507,17 @@ Path,Size,Partition ---- TYPES STRING,STRING,STRING ==== +---- QUERY +describe formatted iceberg_uppercase_col; +---- RESULTS: VERIFY_IS_SUBSET +'region','string','from deserializer' +'nested_struct','struct,c:struct>>>,g:map>>>>','NULL' +---- TYPES +string, string, string +==== +---- QUERY +SELECT * FROM iceberg_uppercase_col; +---- RESULTS +---- TYPES +STRING +====