IMPALA-11807: Fix TestIcebergTable.test_avro_file_format and test_mixed_file_format

Iceberg hardcodes URIs in metadata files. If the table was written
in a certain storage location and then moved to another file system,
the hardcoded URIs will still point to the old location instead of
the current one. Therefore Impala will be unable to read the table.

TestIcebergTable.test_avro_file_format and test_mixed_file_format
use Hive from Impala to write tables. If the tables are created in
a different file system than the one they will be read from, the tests
fail due to the invalid URIs.
Skipping these 2 tests if testing is not done on HDFS.

Updated the data load schema of the 2 test tables created by Hive and
set LOCATION to the same as in the previous test tables. If this
makes it possible to rewrite the URIs in the metadata and makes the
tables accessible from another file system as well later, then the
tests can be enabled again.

Testing:
 - Testing locally on HDFS minicluster
 - Triggered an Ozone build to verify that it is skipped on a different
   file system

Change-Id: Ie2f126de80c6e7f825d02f6814fcf69ae320a781
Reviewed-on: http://gerrit.cloudera.org:8080/19387
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
noemi
2022-12-21 15:07:42 +01:00
committed by Impala Public Jenkins
parent 6ff99431a6
commit 4a05eaf988
3 changed files with 7 additions and 3 deletions

View File

@@ -3616,7 +3616,8 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
double_col double,
bool_col boolean
)
STORED BY ICEBERG STORED AS AVRO;
STORED BY ICEBERG STORED AS AVRO
LOCATION '/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_format';
INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(1, 'A', 0.5, true),(2, 'B', 1.5, true),(3, 'C', 2.5, false);
====
---- DATASET
@@ -3631,10 +3632,11 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
bool_col boolean
)
STORED BY ICEBERG
TBLPROPERTIES('write.format.default'='avro');
LOCATION '/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_mixed_file_format';
---- DEPENDENT_LOAD_HIVE
-- This INSERT must run in Hive, because Impala doesn't support inserting into tables
-- with avro and orc file formats.
ALTER TABLE {db_name}{db_suffix}.{table_name} SET TBLPROPERTIES('write.format.default'='avro');
INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(1, 'avro', 0.5, true);
ALTER TABLE {db_name}{db_suffix}.{table_name} SET TBLPROPERTIES('write.format.default'='orc');
INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(2, 'orc', 1.5, false);

View File

@@ -104,7 +104,7 @@ class SkipIf:
sfs_unsupported = pytest.mark.skipif(not (IS_HDFS or IS_S3 or IS_ABFS or IS_ADLS
or IS_GCS), reason="Hive support for sfs+ is limited, HIVE-26757")
hardcoded_uris = pytest.mark.skipif(not IS_HDFS,
reason="Iceberg delete files hardcode the full URI in parquet files")
reason="Iceberg hardcodes the full URI in parquet delete files and metadata files")
not_ec = pytest.mark.skipif(not IS_EC, reason="Erasure Coding needed")
no_secondary_fs = pytest.mark.skipif(not SECONDARY_FILESYSTEM,
reason="Secondary filesystem needed")

View File

@@ -786,6 +786,7 @@ class TestIcebergTable(IcebergTestSuite):
self.run_test_case('QueryTest/iceberg-multiple-storage-locations-table',
vector, unique_database)
@SkipIf.hardcoded_uris
def test_mixed_file_format(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
unique_database)
@@ -901,6 +902,7 @@ class TestIcebergTable(IcebergTestSuite):
def test_virtual_columns(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-virtual-columns', vector, unique_database)
@SkipIf.hardcoded_uris
def test_avro_file_format(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-avro', vector, unique_database)