Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrated-table-field-id-resolution.test
Zoltan Borok-Nagy a49ff618f1 IMPALA-13853: Don't adjust Iceberg field IDs for data files that don't have complex types
In migrated Iceberg tables we can have data files with missing field
IDs. We assume that their schema corresponds to the table schema at the
point when the table migration happened. This means during runtime we
can generate the field ids. The logic is more complicated when there are
complex types in the table and the table is partitioned. In such cases
we need to do some adjustments during field ID generation, in which case
we verify that the file schema corresponds to the table schema.

These adjustments are not needed when the table doesn't have complex
types, hence we can be a bit more relaxed and skip schema verification,
because field ID generation for top-level columns are not affected.
This means Impala would still be able to read the table if there were
trivial schema changes before migration.

With this change we allow all data files that have a compatible schema
with the table schema, which was the case before IMPALA-13364. This
behavior is also aligned with Hive.

Testing:
 * e2e tests added for both Parquet and ORC files

Change-Id: Ib1f1d0cf36792d0400de346c83e999fa50c0fa67
Reviewed-on: http://gerrit.cloudera.org:8080/22610
Reviewed-by: Daniel Becker <daniel.becker@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2025-03-12 12:51:16 +00:00

247 lines
6.3 KiB
Plaintext

====
---- QUERY
select * from iceberg_migrated_alter_test;
---- RESULTS
0,'A',0.5
1,'B',1.5
2,'C',2.5
---- TYPES
INT, STRING, DOUBLE
====
---- QUERY
alter table iceberg_migrated_alter_test drop column string_col;
select * from iceberg_migrated_alter_test;
---- RESULTS
0,0.5
1,1.5
2,2.5
---- TYPES
INT, DOUBLE
====
---- QUERY
alter table iceberg_migrated_alter_test add column string_col string;
select * from iceberg_migrated_alter_test;
---- RESULTS
0,0.5,'NULL'
1,1.5,'NULL'
2,2.5,'NULL'
---- TYPES
INT, DOUBLE, STRING
====
---- QUERY
alter table iceberg_migrated_alter_test change column double_col renamed_double_col double;
select * from iceberg_migrated_alter_test;
---- RESULTS
0,0.5,'NULL'
1,1.5,'NULL'
2,2.5,'NULL'
---- TYPES
INT, DOUBLE, STRING
====
---- QUERY
alter table iceberg_migrated_alter_test add column double_col double;
select * from iceberg_migrated_alter_test;
---- RESULTS
0,0.5,'NULL',NULL
1,1.5,'NULL',NULL
2,2.5,'NULL',NULL
---- TYPES
INT, DOUBLE, STRING, DOUBLE
====
---- QUERY
insert into iceberg_migrated_alter_test values (3,3.5,"D",3.8);
select * from iceberg_migrated_alter_test;
---- RESULTS
3,3.5,'D',3.8
0,0.5,'NULL',NULL
1,1.5,'NULL',NULL
2,2.5,'NULL',NULL
---- TYPES
INT, DOUBLE, STRING, DOUBLE
====
---- QUERY
select * from iceberg_migrated_alter_test_orc;
---- RESULTS
0,'A',0.5
1,'B',1.5
2,'C',2.5
---- TYPES
INT, STRING, DOUBLE
====
---- QUERY
alter table iceberg_migrated_alter_test_orc drop column string_col;
select * from iceberg_migrated_alter_test_orc;
---- RESULTS
0,0.5
1,1.5
2,2.5
---- TYPES
INT, DOUBLE
====
---- QUERY
alter table iceberg_migrated_alter_test_orc add column string_col string;
select * from iceberg_migrated_alter_test_orc;
---- RESULTS
0,0.5,'NULL'
1,1.5,'NULL'
2,2.5,'NULL'
---- TYPES
INT, DOUBLE, STRING
====
---- QUERY
alter table iceberg_migrated_alter_test_orc change column double_col renamed_double_col double;
select * from iceberg_migrated_alter_test_orc;
---- RESULTS
0,0.5,'NULL'
1,1.5,'NULL'
2,2.5,'NULL'
---- TYPES
INT, DOUBLE, STRING
====
---- QUERY
alter table iceberg_migrated_alter_test_orc add column double_col double;
select * from iceberg_migrated_alter_test_orc;
---- RESULTS
0,0.5,'NULL',NULL
1,1.5,'NULL',NULL
2,2.5,'NULL',NULL
---- TYPES
INT, DOUBLE, STRING, DOUBLE
====
---- QUERY
select struct_1_col.string_col, struct_2_col.struct_3_col.float_col from iceberg_migrated_complex_test;
---- RESULTS
'A',0.5
---- TYPES
STRING, FLOAT
====
---- QUERY
select my_array_1.pos, my_array_1.item from iceberg_migrated_complex_test, iceberg_migrated_complex_test.struct_1_col.int_array_col as my_array_1;
---- RESULTS
0,0
---- TYPES
BIGINT, INT
====
---- QUERY
select my_array_2.pos, my_array_2.item from iceberg_migrated_complex_test, iceberg_migrated_complex_test.struct_2_col.struct_3_col.bigint_array_col as my_array_2;
---- RESULTS
0,4
---- TYPES
BIGINT, BIGINT
====
---- QUERY
select my_map_1.key, my_map_1.value from iceberg_migrated_complex_test, iceberg_migrated_complex_test.struct_1_col.bool_int_map_col as my_map_1;
---- RESULTS
true,1
---- TYPES
BOOLEAN, INT
====
---- QUERY
select my_map_2.key, my_map_2.value from iceberg_migrated_complex_test, iceberg_migrated_complex_test.int_bigint_map_col as my_map_2;
---- RESULTS
2,3
---- TYPES
INT, BIGINT
====
---- QUERY
select my_map_3.key, my_map_3.value from iceberg_migrated_complex_test, iceberg_migrated_complex_test.struct_2_col.struct_3_col.string_double_map_col as my_map_3;
---- RESULTS
'B',1.5
---- TYPES
STRING, DOUBLE
====
---- QUERY
select my_map_4.key, my_map_4.value from iceberg_migrated_complex_test, iceberg_migrated_complex_test.struct_2_col.int_int_map_col as my_map_4;
---- RESULTS
5,6
---- TYPES
INT, INT
====
---- QUERY
select struct_1_col.string_col, struct_2_col.struct_3_col.float_col from iceberg_migrated_complex_test_orc;
---- RESULTS
'A',0.5
---- TYPES
STRING, FLOAT
====
---- QUERY
select my_array_1.pos, my_array_1.item from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.struct_1_col.int_array_col as my_array_1;
---- RESULTS
0,0
---- TYPES
BIGINT, INT
====
---- QUERY
select my_array_2.pos, my_array_2.item from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.struct_2_col.struct_3_col.bigint_array_col as my_array_2;
---- RESULTS
0,4
---- TYPES
BIGINT, BIGINT
====
---- QUERY
select my_map_1.key, my_map_1.value from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.struct_1_col.bool_int_map_col as my_map_1;
---- RESULTS
true,1
---- TYPES
BOOLEAN, INT
====
---- QUERY
select my_map_2.key, my_map_2.value from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.int_bigint_map_col as my_map_2;
---- RESULTS
2,3
---- TYPES
INT, BIGINT
====
---- QUERY
select my_map_3.key, my_map_3.value from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.struct_2_col.struct_3_col.string_double_map_col as my_map_3;
---- RESULTS
'B',1.5
---- TYPES
STRING, DOUBLE
====
---- QUERY
select my_map_4.key, my_map_4.value from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.struct_2_col.int_int_map_col as my_map_4;
---- RESULTS
5,6
---- TYPES
INT, INT
====
---- QUERY
# Regression test for IMPALA-13853. We should still be able to read Parquet file
# if starting columns match the schema.
create table mig_add_column(i int) partitioned by (p int) stored as parquet;
insert into mig_add_column partition(p) values (1, 2);
alter table mig_add_column add column j int;
alter table mig_add_column convert to iceberg;
select * from mig_add_column;
---- RESULTS
1,NULL,2
---- TYPES
INT,INT,INT
====
---- QUERY
# Regression test for IMPALA-13853. We should still be able to read Parquet file
# if starting columns match the schema.
create table mig_drop_column(i int, j int) partitioned by (p int) stored as parquet;
insert into mig_drop_column partition(p) values (1, 100, 2);
alter table mig_drop_column drop column j;
alter table mig_drop_column convert to iceberg;
select * from mig_drop_column;
---- RESULTS
1,2
---- TYPES
INT,INT
====
---- QUERY
# Regression test for IMPALA-13853. We should reject Parquet files that have mis-matching
# schema
create table mig_wrong_type(i int, s string) partitioned by (p int) stored as parquet;
insert into mig_wrong_type partition(p) values (1, 'IMPALA', 2);
alter table mig_wrong_type drop column s;
alter table mig_wrong_type add column j int;
alter table mig_wrong_type convert to iceberg;
select * from mig_wrong_type;
---- CATCH
has an incompatible Parquet schema for column
====