mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
IMPALA-11807: Fix TestIcebergTable.test_avro_file_format and test_mixed_file_format
Iceberg hardcodes URIs in metadata files. If the table was written in a certain storage location and then moved to another file system, the hardcoded URIs will still point to the old location instead of the current one. Therefore Impala will be unable to read the table. TestIcebergTable.test_avro_file_format and test_mixed_file_format use Hive from Impala to write tables. If the tables are created in a different file system than the one they will be read from, the tests fail due to the invalid URIs. Skipping these 2 tests if testing is not done on HDFS. Updated the data load schema of the 2 test tables created by Hive and set LOCATION to the same as in the previous test tables. If this makes it possible to rewrite the URIs in the metadata and makes the tables accessible from another file system as well later, then the tests can be enabled again. Testing: - Testing locally on HDFS minicluster - Triggered an Ozone build to verify that it is skipped on a different file system Change-Id: Ie2f126de80c6e7f825d02f6814fcf69ae320a781 Reviewed-on: http://gerrit.cloudera.org:8080/19387 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
6ff99431a6
commit
4a05eaf988
@@ -3616,7 +3616,8 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
double_col double,
|
||||
bool_col boolean
|
||||
)
|
||||
STORED BY ICEBERG STORED AS AVRO;
|
||||
STORED BY ICEBERG STORED AS AVRO
|
||||
LOCATION '/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_avro_format';
|
||||
INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(1, 'A', 0.5, true),(2, 'B', 1.5, true),(3, 'C', 2.5, false);
|
||||
====
|
||||
---- DATASET
|
||||
@@ -3631,10 +3632,11 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
|
||||
bool_col boolean
|
||||
)
|
||||
STORED BY ICEBERG
|
||||
TBLPROPERTIES('write.format.default'='avro');
|
||||
LOCATION '/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_mixed_file_format';
|
||||
---- DEPENDENT_LOAD_HIVE
|
||||
-- This INSERT must run in Hive, because Impala doesn't support inserting into tables
|
||||
-- with avro and orc file formats.
|
||||
ALTER TABLE {db_name}{db_suffix}.{table_name} SET TBLPROPERTIES('write.format.default'='avro');
|
||||
INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(1, 'avro', 0.5, true);
|
||||
ALTER TABLE {db_name}{db_suffix}.{table_name} SET TBLPROPERTIES('write.format.default'='orc');
|
||||
INSERT INTO TABLE {db_name}{db_suffix}.{table_name} values(2, 'orc', 1.5, false);
|
||||
|
||||
@@ -104,7 +104,7 @@ class SkipIf:
|
||||
sfs_unsupported = pytest.mark.skipif(not (IS_HDFS or IS_S3 or IS_ABFS or IS_ADLS
|
||||
or IS_GCS), reason="Hive support for sfs+ is limited, HIVE-26757")
|
||||
hardcoded_uris = pytest.mark.skipif(not IS_HDFS,
|
||||
reason="Iceberg delete files hardcode the full URI in parquet files")
|
||||
reason="Iceberg hardcodes the full URI in parquet delete files and metadata files")
|
||||
not_ec = pytest.mark.skipif(not IS_EC, reason="Erasure Coding needed")
|
||||
no_secondary_fs = pytest.mark.skipif(not SECONDARY_FILESYSTEM,
|
||||
reason="Secondary filesystem needed")
|
||||
|
||||
@@ -786,6 +786,7 @@ class TestIcebergTable(IcebergTestSuite):
|
||||
self.run_test_case('QueryTest/iceberg-multiple-storage-locations-table',
|
||||
vector, unique_database)
|
||||
|
||||
@SkipIf.hardcoded_uris
|
||||
def test_mixed_file_format(self, vector, unique_database):
|
||||
self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
|
||||
unique_database)
|
||||
@@ -901,6 +902,7 @@ class TestIcebergTable(IcebergTestSuite):
|
||||
def test_virtual_columns(self, vector, unique_database):
|
||||
self.run_test_case('QueryTest/iceberg-virtual-columns', vector, unique_database)
|
||||
|
||||
@SkipIf.hardcoded_uris
|
||||
def test_avro_file_format(self, vector, unique_database):
|
||||
self.run_test_case('QueryTest/iceberg-avro', vector, unique_database)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user