mirror of
https://github.com/apache/impala.git
synced 2025-12-23 21:08:39 -05:00
In migrated Iceberg tables we can have data files with missing field IDs. We assume that their schema corresponds to the table schema at the point when the table migration happened. This means during runtime we can generate the field ids. The logic is more complicated when there are complex types in the table and the table is partitioned. In such cases we need to do some adjustments during field ID generation, in which case we verify that the file schema corresponds to the table schema. These adjustments are not needed when the table doesn't have complex types, hence we can be a bit more relaxed and skip schema verification, because field ID generation for top-level columns are not affected. This means Impala would still be able to read the table if there were trivial schema changes before migration. With this change we allow all data files that have a compatible schema with the table schema, which was the case before IMPALA-13364. This behavior is also aligned with Hive. Testing: * e2e tests added for both Parquet and ORC files Change-Id: Ib1f1d0cf36792d0400de346c83e999fa50c0fa67 Reviewed-on: http://gerrit.cloudera.org:8080/22610 Reviewed-by: Daniel Becker <daniel.becker@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
247 lines
6.3 KiB
Plaintext
247 lines
6.3 KiB
Plaintext
====
|
|
---- QUERY
|
|
select * from iceberg_migrated_alter_test;
|
|
---- RESULTS
|
|
0,'A',0.5
|
|
1,'B',1.5
|
|
2,'C',2.5
|
|
---- TYPES
|
|
INT, STRING, DOUBLE
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_migrated_alter_test drop column string_col;
|
|
select * from iceberg_migrated_alter_test;
|
|
---- RESULTS
|
|
0,0.5
|
|
1,1.5
|
|
2,2.5
|
|
---- TYPES
|
|
INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_migrated_alter_test add column string_col string;
|
|
select * from iceberg_migrated_alter_test;
|
|
---- RESULTS
|
|
0,0.5,'NULL'
|
|
1,1.5,'NULL'
|
|
2,2.5,'NULL'
|
|
---- TYPES
|
|
INT, DOUBLE, STRING
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_migrated_alter_test change column double_col renamed_double_col double;
|
|
select * from iceberg_migrated_alter_test;
|
|
---- RESULTS
|
|
0,0.5,'NULL'
|
|
1,1.5,'NULL'
|
|
2,2.5,'NULL'
|
|
---- TYPES
|
|
INT, DOUBLE, STRING
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_migrated_alter_test add column double_col double;
|
|
select * from iceberg_migrated_alter_test;
|
|
---- RESULTS
|
|
0,0.5,'NULL',NULL
|
|
1,1.5,'NULL',NULL
|
|
2,2.5,'NULL',NULL
|
|
---- TYPES
|
|
INT, DOUBLE, STRING, DOUBLE
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_migrated_alter_test values (3,3.5,"D",3.8);
|
|
select * from iceberg_migrated_alter_test;
|
|
---- RESULTS
|
|
3,3.5,'D',3.8
|
|
0,0.5,'NULL',NULL
|
|
1,1.5,'NULL',NULL
|
|
2,2.5,'NULL',NULL
|
|
---- TYPES
|
|
INT, DOUBLE, STRING, DOUBLE
|
|
====
|
|
---- QUERY
|
|
select * from iceberg_migrated_alter_test_orc;
|
|
---- RESULTS
|
|
0,'A',0.5
|
|
1,'B',1.5
|
|
2,'C',2.5
|
|
---- TYPES
|
|
INT, STRING, DOUBLE
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_migrated_alter_test_orc drop column string_col;
|
|
select * from iceberg_migrated_alter_test_orc;
|
|
---- RESULTS
|
|
0,0.5
|
|
1,1.5
|
|
2,2.5
|
|
---- TYPES
|
|
INT, DOUBLE
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_migrated_alter_test_orc add column string_col string;
|
|
select * from iceberg_migrated_alter_test_orc;
|
|
---- RESULTS
|
|
0,0.5,'NULL'
|
|
1,1.5,'NULL'
|
|
2,2.5,'NULL'
|
|
---- TYPES
|
|
INT, DOUBLE, STRING
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_migrated_alter_test_orc change column double_col renamed_double_col double;
|
|
select * from iceberg_migrated_alter_test_orc;
|
|
---- RESULTS
|
|
0,0.5,'NULL'
|
|
1,1.5,'NULL'
|
|
2,2.5,'NULL'
|
|
---- TYPES
|
|
INT, DOUBLE, STRING
|
|
====
|
|
---- QUERY
|
|
alter table iceberg_migrated_alter_test_orc add column double_col double;
|
|
select * from iceberg_migrated_alter_test_orc;
|
|
---- RESULTS
|
|
0,0.5,'NULL',NULL
|
|
1,1.5,'NULL',NULL
|
|
2,2.5,'NULL',NULL
|
|
---- TYPES
|
|
INT, DOUBLE, STRING, DOUBLE
|
|
====
|
|
---- QUERY
|
|
select struct_1_col.string_col, struct_2_col.struct_3_col.float_col from iceberg_migrated_complex_test;
|
|
---- RESULTS
|
|
'A',0.5
|
|
---- TYPES
|
|
STRING, FLOAT
|
|
====
|
|
---- QUERY
|
|
select my_array_1.pos, my_array_1.item from iceberg_migrated_complex_test, iceberg_migrated_complex_test.struct_1_col.int_array_col as my_array_1;
|
|
---- RESULTS
|
|
0,0
|
|
---- TYPES
|
|
BIGINT, INT
|
|
====
|
|
---- QUERY
|
|
select my_array_2.pos, my_array_2.item from iceberg_migrated_complex_test, iceberg_migrated_complex_test.struct_2_col.struct_3_col.bigint_array_col as my_array_2;
|
|
---- RESULTS
|
|
0,4
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
select my_map_1.key, my_map_1.value from iceberg_migrated_complex_test, iceberg_migrated_complex_test.struct_1_col.bool_int_map_col as my_map_1;
|
|
---- RESULTS
|
|
true,1
|
|
---- TYPES
|
|
BOOLEAN, INT
|
|
====
|
|
---- QUERY
|
|
select my_map_2.key, my_map_2.value from iceberg_migrated_complex_test, iceberg_migrated_complex_test.int_bigint_map_col as my_map_2;
|
|
---- RESULTS
|
|
2,3
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
select my_map_3.key, my_map_3.value from iceberg_migrated_complex_test, iceberg_migrated_complex_test.struct_2_col.struct_3_col.string_double_map_col as my_map_3;
|
|
---- RESULTS
|
|
'B',1.5
|
|
---- TYPES
|
|
STRING, DOUBLE
|
|
====
|
|
---- QUERY
|
|
select my_map_4.key, my_map_4.value from iceberg_migrated_complex_test, iceberg_migrated_complex_test.struct_2_col.int_int_map_col as my_map_4;
|
|
---- RESULTS
|
|
5,6
|
|
---- TYPES
|
|
INT, INT
|
|
====
|
|
---- QUERY
|
|
select struct_1_col.string_col, struct_2_col.struct_3_col.float_col from iceberg_migrated_complex_test_orc;
|
|
---- RESULTS
|
|
'A',0.5
|
|
---- TYPES
|
|
STRING, FLOAT
|
|
====
|
|
---- QUERY
|
|
select my_array_1.pos, my_array_1.item from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.struct_1_col.int_array_col as my_array_1;
|
|
---- RESULTS
|
|
0,0
|
|
---- TYPES
|
|
BIGINT, INT
|
|
====
|
|
---- QUERY
|
|
select my_array_2.pos, my_array_2.item from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.struct_2_col.struct_3_col.bigint_array_col as my_array_2;
|
|
---- RESULTS
|
|
0,4
|
|
---- TYPES
|
|
BIGINT, BIGINT
|
|
====
|
|
---- QUERY
|
|
select my_map_1.key, my_map_1.value from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.struct_1_col.bool_int_map_col as my_map_1;
|
|
---- RESULTS
|
|
true,1
|
|
---- TYPES
|
|
BOOLEAN, INT
|
|
====
|
|
---- QUERY
|
|
select my_map_2.key, my_map_2.value from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.int_bigint_map_col as my_map_2;
|
|
---- RESULTS
|
|
2,3
|
|
---- TYPES
|
|
INT, BIGINT
|
|
====
|
|
---- QUERY
|
|
select my_map_3.key, my_map_3.value from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.struct_2_col.struct_3_col.string_double_map_col as my_map_3;
|
|
---- RESULTS
|
|
'B',1.5
|
|
---- TYPES
|
|
STRING, DOUBLE
|
|
====
|
|
---- QUERY
|
|
select my_map_4.key, my_map_4.value from iceberg_migrated_complex_test_orc, iceberg_migrated_complex_test_orc.struct_2_col.int_int_map_col as my_map_4;
|
|
---- RESULTS
|
|
5,6
|
|
---- TYPES
|
|
INT, INT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-13853. We should still be able to read Parquet file
|
|
# if starting columns match the schema.
|
|
create table mig_add_column(i int) partitioned by (p int) stored as parquet;
|
|
insert into mig_add_column partition(p) values (1, 2);
|
|
alter table mig_add_column add column j int;
|
|
alter table mig_add_column convert to iceberg;
|
|
select * from mig_add_column;
|
|
---- RESULTS
|
|
1,NULL,2
|
|
---- TYPES
|
|
INT,INT,INT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-13853. We should still be able to read Parquet file
|
|
# if starting columns match the schema.
|
|
create table mig_drop_column(i int, j int) partitioned by (p int) stored as parquet;
|
|
insert into mig_drop_column partition(p) values (1, 100, 2);
|
|
alter table mig_drop_column drop column j;
|
|
alter table mig_drop_column convert to iceberg;
|
|
select * from mig_drop_column;
|
|
---- RESULTS
|
|
1,2
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-13853. We should reject Parquet files that have mis-matching
|
|
# schema
|
|
create table mig_wrong_type(i int, s string) partitioned by (p int) stored as parquet;
|
|
insert into mig_wrong_type partition(p) values (1, 'IMPALA', 2);
|
|
alter table mig_wrong_type drop column s;
|
|
alter table mig_wrong_type add column j int;
|
|
alter table mig_wrong_type convert to iceberg;
|
|
select * from mig_wrong_type;
|
|
---- CATCH
|
|
has an incompatible Parquet schema for column
|
|
====
|