From f8443d982891e3fd81e84c58394777580c9a5ea2 Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Wed, 16 Nov 2022 14:13:54 -0800 Subject: [PATCH] IMPALA-11697: Enable SkipIf.not_hdfs tests for Ozone Convert SkipIf.not_hdfs to SkipIf.not_dfs for tests that require filesystem semantics, adding more feature test coverage with Ozone. Creates a separate not_scratch_fs flag for scratch dir tests as they're not supported with Ozone yet. Filed IMPALA-11730 to address this. Preserves not_hdfs for a specific test that uses the dfsadmin CLI to put it in safemode. Adds sfs_ofs_unsupported for SmallFileSystem tests. This should work for many of our filesystems based on https://github.com/apache/hive/blob/ebb1e2fa9914bcccecad261d53338933b699ccb1/ql/src/java/org/apache/hadoop/hive/ql/io/SingleFileSystem.java#L62-L87. Makes sfs tests work on S3. Adds hardcoded_uris for IcebergV2 tests where deletes are implemented as hardcoded URIs in parquet files. Adding a parquet read/write library for Python is beyond the scope if this patch. Change-Id: Iafc1dac52d013e74a459fdc4336c26891a256ef1 Reviewed-on: http://gerrit.cloudera.org:8080/19254 Tested-by: Impala Public Jenkins Reviewed-by: Joe McDonnell --- .../queries/QueryTest/sfs.test | 42 ++++++++++--------- tests/common/skip.py | 10 ++++- tests/custom_cluster/test_disable_features.py | 4 +- tests/custom_cluster/test_hedged_reads.py | 3 +- tests/custom_cluster/test_scratch_disk.py | 12 +++--- tests/data_errors/test_data_errors.py | 9 +++- tests/metadata/test_ddl.py | 2 +- tests/query_test/test_acid.py | 4 +- tests/query_test/test_iceberg.py | 18 ++++---- tests/query_test/test_sfs.py | 7 ++-- tests/stress/test_acid_stress.py | 2 +- tests/stress/test_insert_stress.py | 5 ++- 12 files changed, 68 insertions(+), 50 deletions(-) diff --git a/testdata/workloads/functional-query/queries/QueryTest/sfs.test b/testdata/workloads/functional-query/queries/QueryTest/sfs.test index ec03e5d26..03cf028e2 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/sfs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/sfs.test @@ -3,7 +3,7 @@ # We do not hardcode the host name to something like "localhost" since the host name may # be an IP address in a test environment. CREATE EXTERNAL TABLE test_tbl_01 (s STRING, i INT) STORED AS PARQUET -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/$DATABASE.db/sfs_d1.parq/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/$DATABASE.db/sfs_d1.parq/#SINGLEFILE#' ---- RESULTS 'Table has been created.' ==== @@ -11,7 +11,7 @@ LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/$DATABASE.db/sfs CREATE EXTERNAL TABLE test_tbl_02 (s STRING, i INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/$DATABASE.db/sfs_d2.txt/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/$DATABASE.db/sfs_d2.txt/#SINGLEFILE#' ---- RESULTS 'Table has been created.' ==== @@ -48,22 +48,22 @@ INSERT INTO TABLE test_tbl_02 VALUES ('x', 100); row_regex: .*Unable to INSERT into target table .+ because .+ is not a supported filesystem.* ==== ---- QUERY -LOAD DATA INPATH 'hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/$DATABASE.db/sfs_d3.parq' INTO TABLE test_tbl_01 +LOAD DATA INPATH '$NAMENODE/test-warehouse/$DATABASE.db/sfs_d3.parq' INTO TABLE test_tbl_01 ---- CATCH Unsupported SFS filesystem operation! ==== ---- QUERY -LOAD DATA INPATH 'hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/$DATABASE.db/sfs_d4.txt' INTO TABLE test_tbl_02 +LOAD DATA INPATH '$NAMENODE/test-warehouse/$DATABASE.db/sfs_d4.txt' INTO TABLE test_tbl_02 ---- CATCH Unsupported SFS filesystem operation! ==== ---- QUERY -LOAD DATA INPATH 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' INTO TABLE test_tbl_01 +LOAD DATA INPATH 'sfs+$NAMENODE/test-warehouse/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' INTO TABLE test_tbl_01 ---- CATCH row_regex: .*INPATH location .+ must point to one of the supported filesystem URI scheme.* ==== ---- QUERY -LOAD DATA INPATH 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' INTO TABLE test_tbl_02 +LOAD DATA INPATH 'sfs+$NAMENODE/test-warehouse/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' INTO TABLE test_tbl_02 ---- CATCH row_regex: .*INPATH location .+ must point to one of the supported filesystem URI scheme.* ==== @@ -89,7 +89,7 @@ COMPUTE STATS $DATABASE.test_tbl_02 ==== ---- QUERY CREATE EXTERNAL TABLE test_tbl_03_ext (s STRING, i INT) STORED AS PARQUET -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/managed/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' ---- RESULTS 'Table has been created.' ==== @@ -97,7 +97,7 @@ LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABAS CREATE EXTERNAL TABLE test_tbl_04_ext (s STRING, i INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/managed/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' ---- RESULTS 'Table has been created.' ==== @@ -163,7 +163,7 @@ DROP TABLE test_tbl_04_ext; # The table can actually be created. CREATE TABLE test_tbl_03 (s STRING, i INT) STORED AS PARQUET -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/managed/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' ---- RESULTS 'Table has been created.' ==== @@ -172,7 +172,7 @@ LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABAS CREATE TABLE test_tbl_04 (s STRING, i INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/managed/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' ---- RESULTS 'Table has been created.' ==== @@ -242,7 +242,7 @@ DROP TABLE test_tbl_04 # test_tbl_05 can be created, which shows that sfs_d3.parq has not been deleted after # test_tbl_03 was dropped. CREATE TABLE test_tbl_05 (s STRING, i INT) STORED AS PARQUET -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/managed/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' ---- RESULTS 'Table has been created.' ==== @@ -252,7 +252,7 @@ LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABAS CREATE TABLE test_tbl_06 (s STRING, i INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/managed/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' ---- RESULTS 'Table has been created.' ==== @@ -286,7 +286,7 @@ STRING, INT SET DEFAULT_TRANSACTIONAL_TYPE=INSERT_ONLY; CREATE TABLE test_tbl_03 (s STRING, i INT) STORED AS PARQUET -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/managed/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' ---- CATCH A managed table's location should be located within managed warehouse root directory or within its database's managedLocationUri. ==== @@ -297,7 +297,7 @@ SET DEFAULT_TRANSACTIONAL_TYPE=INSERT_ONLY; CREATE TABLE test_tbl_04 (s STRING, i INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/managed/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' ---- CATCH A managed table's location should be located within managed warehouse root directory or within its database's managedLocationUri. ==== @@ -305,23 +305,25 @@ A managed table's location should be located within managed warehouse root direc SET DEFAULT_TRANSACTIONAL_TYPE=INSERT_ONLY; CREATE TABLE test_tbl_03 (s STRING, i INT) STORED AS PARQUET -LOCATION 'hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABASE.db/sfs_d3.parq' ----- CATCH +LOCATION '$NAMENODE/test-warehouse/managed/$DATABASE.db/sfs_d3.parq' +---- CATCH: ANY_OF Path is not a directory +Path is a file ==== ---- QUERY SET DEFAULT_TRANSACTIONAL_TYPE=INSERT_ONLY; CREATE TABLE test_tbl_04 (s STRING, i INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE -LOCATION 'hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/managed/$DATABASE.db/sfs_d4.txt' ----- CATCH +LOCATION '$NAMENODE/test-warehouse/managed/$DATABASE.db/sfs_d4.txt' +---- CATCH: ANY_OF Path is not a directory +Path is a file ==== ---- QUERY # The table can actually be created but the contents of the table cannot be retrieved. CREATE EXTERNAL TABLE test_tbl_03 (s STRING) PARTITIONED BY (i INT) STORED AS PARQUET -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/$DATABASE.db/sfs_d3.parq/#SINGLEFILE#' ---- RESULTS 'Table has been created.' ==== @@ -330,7 +332,7 @@ LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/$DATABASE.db/sfs CREATE EXTERNAL TABLE test_tbl_04 (s STRING) PARTITIONED BY (i INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE -LOCATION 'sfs+hdfs://$INTERNAL_LISTEN_HOST:20500/test-warehouse/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' +LOCATION 'sfs+$NAMENODE/test-warehouse/$DATABASE.db/sfs_d4.txt/#SINGLEFILE#' ---- RESULTS 'Table has been created.' ==== diff --git a/tests/common/skip.py b/tests/common/skip.py index 6ed5b477d..e5b856485 100644 --- a/tests/common/skip.py +++ b/tests/common/skip.py @@ -98,7 +98,15 @@ class SkipIf: skip_hbase = pytest.mark.skipif(pytest.config.option.skip_hbase, reason="--skip_hbase argument specified") not_s3 = pytest.mark.skipif(not IS_S3, reason="S3 Filesystem needed") - not_hdfs = pytest.mark.skipif(not IS_HDFS, reason="HDFS Filesystem needed") + not_hdfs = pytest.mark.skipif(not IS_HDFS, reason="HDFS admin needed") + not_dfs = pytest.mark.skipif(not (IS_HDFS or IS_OZONE), + reason="HDFS/Ozone Filesystem needed") + not_scratch_fs = pytest.mark.skipif(not IS_HDFS, + reason="Scratch dirs for temporary file spilling not supported") + sfs_unsupported = pytest.mark.skipif(not (IS_HDFS or IS_S3 or IS_ABFS or IS_ADLS + or IS_GCS), reason="Hive support for sfs+ is limited, HIVE-26757") + hardcoded_uris = pytest.mark.skipif(not IS_HDFS, + reason="Iceberg delete files hardcode the full URI in parquet files") not_ec = pytest.mark.skipif(not IS_EC, reason="Erasure Coding needed") no_secondary_fs = pytest.mark.skipif(not SECONDARY_FILESYSTEM, reason="Secondary filesystem needed") diff --git a/tests/custom_cluster/test_disable_features.py b/tests/custom_cluster/test_disable_features.py index 5ce270125..632322301 100644 --- a/tests/custom_cluster/test_disable_features.py +++ b/tests/custom_cluster/test_disable_features.py @@ -19,7 +19,7 @@ import pytest from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.parametrize import UniqueDatabase -from tests.common.skip import SkipIf +from tests.common.skip import SkipIfFS class TestDisableFeatures(CustomClusterTestSuite): @@ -29,7 +29,7 @@ class TestDisableFeatures(CustomClusterTestSuite): def get_workload(self): return 'functional-query' - @SkipIf.not_hdfs + @SkipIfFS.hdfs_caching @pytest.mark.execute_serially @UniqueDatabase.parametrize(sync_ddl=True) @CustomClusterTestSuite.with_args( diff --git a/tests/custom_cluster/test_hedged_reads.py b/tests/custom_cluster/test_hedged_reads.py index b24fd924a..e1d36e73b 100644 --- a/tests/custom_cluster/test_hedged_reads.py +++ b/tests/custom_cluster/test_hedged_reads.py @@ -19,7 +19,8 @@ import pytest from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.skip import SkipIf -@SkipIf.not_hdfs + +@SkipIf.not_dfs class TestHedgedReads(CustomClusterTestSuite): """ Exercises the hedged reads code path. NOTE: We unfortunately cannot force hedged reads on a minicluster, but we enable diff --git a/tests/custom_cluster/test_scratch_disk.py b/tests/custom_cluster/test_scratch_disk.py index a5ca75bbe..66492bcf7 100644 --- a/tests/custom_cluster/test_scratch_disk.py +++ b/tests/custom_cluster/test_scratch_disk.py @@ -277,7 +277,7 @@ class TestScratchDir(CustomClusterTestSuite): client.close() @pytest.mark.execute_serially - @SkipIf.not_hdfs + @SkipIf.not_scratch_fs def test_scratch_dirs_remote_spill(self, vector): # Test one remote directory with one its local buffer directory. normal_dirs = self.generate_dirs(1) @@ -305,7 +305,7 @@ class TestScratchDir(CustomClusterTestSuite): client.close() @pytest.mark.execute_serially - @SkipIf.not_hdfs + @SkipIf.not_scratch_fs def test_scratch_dirs_mix_local_and_remote_dir_spill_local_only(self, vector): '''Two local directories, the first one is always used as local buffer for remote directories. Set the second directory big enough so that only spills @@ -338,7 +338,7 @@ class TestScratchDir(CustomClusterTestSuite): client.close() @pytest.mark.execute_serially - @SkipIf.not_hdfs + @SkipIf.not_scratch_fs def test_scratch_dirs_mix_local_and_remote_dir_spill_both(self, vector): '''Two local directories, the first one is always used as local buffer for remote directories. Set the second directory small enough so that it spills @@ -372,7 +372,7 @@ class TestScratchDir(CustomClusterTestSuite): client.close() @pytest.mark.execute_serially - @SkipIf.not_hdfs + @SkipIf.not_scratch_fs def test_scratch_dirs_remote_spill_with_options(self, vector): # One local buffer directory and one remote directory. normal_dirs = self.generate_dirs(1) @@ -402,7 +402,7 @@ class TestScratchDir(CustomClusterTestSuite): client.close() @pytest.mark.execute_serially - @SkipIf.not_hdfs + @SkipIf.not_scratch_fs def test_scratch_dirs_remote_spill_concurrent(self, vector): '''Concurrently execute multiple queries that trigger the spilling to the remote directory to test if there is a deadlock issue.''' @@ -449,7 +449,7 @@ class TestScratchDir(CustomClusterTestSuite): assert (total_size > 0 and total_size % (8 * 1024 * 1024) == 0) @pytest.mark.execute_serially - @SkipIf.not_hdfs + @SkipIf.not_scratch_fs def test_scratch_dirs_batch_reading(self, vector): # Set the buffer directory small enough to spill to the remote one. normal_dirs = self.generate_dirs(1) diff --git a/tests/data_errors/test_data_errors.py b/tests/data_errors/test_data_errors.py index 562eef2bf..d098dfed0 100644 --- a/tests/data_errors/test_data_errors.py +++ b/tests/data_errors/test_data_errors.py @@ -26,6 +26,8 @@ from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import SkipIf, SkipIfFS from tests.common.test_dimensions import create_exec_option_dimension +from tests.util.filesystem_utils import get_fs_path + class TestDataErrors(ImpalaTestSuite): # batch_size of 1 can expose some interesting corner cases at row batch boundaries. @@ -42,12 +44,13 @@ class TestDataErrors(ImpalaTestSuite): def get_workload(self): return 'functional-query' + # Regression test for IMP-633. Added as a part of IMPALA-5198. -@SkipIf.not_hdfs +@SkipIf.not_dfs class TestHdfsFileOpenFailErrors(ImpalaTestSuite): @pytest.mark.execute_serially def test_hdfs_file_open_fail(self): - absolute_location = "/test-warehouse/file_open_fail" + absolute_location = get_fs_path("/test-warehouse/file_open_fail") create_stmt = \ "create table file_open_fail (x int) location '" + absolute_location + "'" insert_stmt = "insert into file_open_fail values(1)" @@ -64,6 +67,7 @@ class TestHdfsFileOpenFailErrors(ImpalaTestSuite): assert "Failed to open HDFS file" in str(e) self.client.execute(drop_stmt) + # Test for IMPALA-5331 to verify that the libHDFS API hdfsGetLastExceptionRootCause() # works. @SkipIf.not_hdfs @@ -161,6 +165,7 @@ class TestAvroErrors(TestDataErrors): vector.get_value('exec_option')['abort_on_error'] = 0 self.run_test_case('DataErrorsTest/avro-errors', vector) + class TestHBaseDataErrors(TestDataErrors): @classmethod def add_test_dimensions(cls): diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py index ba590f8b5..303a258c8 100644 --- a/tests/metadata/test_ddl.py +++ b/tests/metadata/test_ddl.py @@ -455,7 +455,7 @@ class TestDdlStatements(TestDdlBase): self.run_test_case('QueryTest/alter-table', vector, use_db=unique_database, multiple_impalad=self._use_multiple_impalad(vector)) - @SkipIf.not_hdfs + @SkipIfFS.hdfs_caching @SkipIfLocal.hdfs_client @UniqueDatabase.parametrize(sync_ddl=True, num_dbs=2) def test_alter_table_hdfs_caching(self, vector, unique_database): diff --git a/tests/query_test/test_acid.py b/tests/query_test/test_acid.py index d3d3ed17f..2b09d830a 100644 --- a/tests/query_test/test_acid.py +++ b/tests/query_test/test_acid.py @@ -304,7 +304,7 @@ class TestAcid(ImpalaTestSuite): assert len(self.execute_query("select * from {}".format(tbl_name)).data) == 0 @SkipIfHive2.acid - @SkipIf.not_hdfs + @SkipIf.not_dfs def test_full_acid_schema_without_file_metadata_tag(self, vector, unique_database): """IMPALA-10115: Some files have full ACID schema without having 'hive.acid.version' set. We still need to identify such files as full ACID""" @@ -315,7 +315,7 @@ class TestAcid(ImpalaTestSuite): table_uri = self._get_table_location(fq_table_name, vector) acid_file = (os.environ['IMPALA_HOME'] + "/testdata/data/full_acid_schema_but_no_acid_version.orc") - self.hdfs_client.copy_from_local(acid_file, table_uri + "/bucket_00000") + self.filesystem_client.copy_from_local(acid_file, table_uri + "/bucket_00000") self.execute_query("refresh {}".format(fq_table_name)) result = self.execute_query("select count(*) from {0}".format(fq_table_name)) assert "3" in result.data diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py index f264fd5e5..c737b9fde 100644 --- a/tests/query_test/test_iceberg.py +++ b/tests/query_test/test_iceberg.py @@ -126,17 +126,17 @@ class TestIcebergTable(IcebergTestSuite): # trigger a known bug: IMPALA-11509. Hence, turning this test off until there is a fix # for this issue. Note, we could add a sleep righ after table creation that could # workaround the above mentioned bug but then we would hit another issue: IMPALA-11502. - @SkipIf.not_hdfs + @SkipIf.not_dfs def test_drop_incomplete_table(self, vector, unique_database): """Test DROP TABLE when the underlying directory is deleted. In that case table loading fails, but we should be still able to drop the table from Impala.""" - pytest.skip() + pytest.skip("Gets into a metadata update loop") tbl_name = unique_database + ".synchronized_iceberg_tbl" - cat_location = "/test-warehouse/" + unique_database + cat_location = get_fs_path("/test-warehouse/" + unique_database) self.client.execute("""create table {0} (i int) stored as iceberg tblproperties('iceberg.catalog'='hadoop.catalog', 'iceberg.catalog_location'='{1}')""".format(tbl_name, cat_location)) - self.hdfs_client.delete_file_dir(cat_location, True) + self.filesystem_client.delete_file_dir(cat_location, True) self.execute_query_expect_success(self.client, """drop table {0}""".format(tbl_name)) def test_insert(self, vector, unique_database): @@ -455,7 +455,7 @@ class TestIcebergTable(IcebergTestSuite): except Exception as e: assert "Cannot find a snapshot older than" in str(e) - @SkipIf.not_hdfs + @SkipIf.not_dfs def test_strings_utf8(self, vector, unique_database): # Create table table_name = "ice_str_utf8" @@ -542,7 +542,7 @@ class TestIcebergTable(IcebergTestSuite): os.remove(local_path) return datafiles - @SkipIf.not_hdfs + @SkipIf.not_dfs def test_writing_metrics_to_metadata(self, vector, unique_database): # Create table table_name = "ice_stats" @@ -872,17 +872,17 @@ class TestIcebergV2Table(IcebergTestSuite): # the data files via full URI, i.e. they start with 'hdfs://localhost:2050/...'. In the # dockerised environment the namenode is accessible on a different hostname/port. @SkipIfDockerizedCluster.internal_hostname - @SkipIf.not_hdfs + @SkipIf.hardcoded_uris def test_read_position_deletes(self, vector): self.run_test_case('QueryTest/iceberg-v2-read-position-deletes', vector) @SkipIfDockerizedCluster.internal_hostname - @SkipIf.not_hdfs + @SkipIf.hardcoded_uris def test_read_position_deletes_orc(self, vector): self.run_test_case('QueryTest/iceberg-v2-read-position-deletes-orc', vector) @SkipIfDockerizedCluster.internal_hostname - @SkipIf.not_hdfs + @SkipIf.hardcoded_uris def test_table_sampling_v2(self, vector): self.run_test_case('QueryTest/iceberg-tablesample-v2', vector, use_db="functional_parquet") diff --git a/tests/query_test/test_sfs.py b/tests/query_test/test_sfs.py index 3973a1c4c..5db318e21 100644 --- a/tests/query_test/test_sfs.py +++ b/tests/query_test/test_sfs.py @@ -21,8 +21,10 @@ from tests.common.file_utils import copy_files_to_hdfs_dir from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import SkipIf +from tests.util.filesystem_utils import WAREHOUSE +@SkipIf.sfs_unsupported class TestSFS(ImpalaTestSuite): @classmethod def get_workload(cls): @@ -37,14 +39,13 @@ class TestSFS(ImpalaTestSuite): cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('exec_option')['disable_codegen'] is False) - @SkipIf.not_hdfs def test_sfs(self, vector, unique_database): files_for_external_tables = ["testdata/data/sfs_d1.parq", "testdata/data/sfs_d2.txt", "testdata/data/sfs_d3.parq", "testdata/data/sfs_d4.txt"] files_for_managed_tables = ["testdata/data/sfs_d3.parq", "testdata/data/sfs_d4.txt"] - hdfs_dir_for_external_tables = "/test-warehouse/{0}.db/".format(unique_database) + hdfs_dir_for_external_tables = "{0}/{1}.db/".format(WAREHOUSE, unique_database) hdfs_dir_for_managed_tables =\ - "/test-warehouse/managed/{0}.db/".format(unique_database) + "{0}/managed/{1}.db/".format(WAREHOUSE, unique_database) copy_files_to_hdfs_dir(files_for_external_tables, hdfs_dir_for_external_tables) copy_files_to_hdfs_dir(files_for_managed_tables, hdfs_dir_for_managed_tables) diff --git a/tests/stress/test_acid_stress.py b/tests/stress/test_acid_stress.py index 96a61854d..6ba33d65f 100644 --- a/tests/stress/test_acid_stress.py +++ b/tests/stress/test_acid_stress.py @@ -190,7 +190,7 @@ class TestAcidInsertsBasic(TestAcidStress): @pytest.mark.execute_serially @pytest.mark.stress - @SkipIf.not_hdfs + @SkipIf.not_dfs @UniqueDatabase.parametrize(sync_ddl=True) def test_partitioned_inserts(self, unique_database): """Check that the different ACID write operations take appropriate locks. diff --git a/tests/stress/test_insert_stress.py b/tests/stress/test_insert_stress.py index fe43a38c9..3b5af90d5 100644 --- a/tests/stress/test_insert_stress.py +++ b/tests/stress/test_insert_stress.py @@ -25,6 +25,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.parametrize import UniqueDatabase from tests.common.skip import SkipIf from tests.stress.stress_util import run_tasks, Task +from tests.util.filesystem_utils import WAREHOUSE # Stress test for concurrent INSERT operations. @@ -103,7 +104,7 @@ class TestInsertStress(ImpalaTestSuite): @pytest.mark.execute_serially @pytest.mark.stress - @SkipIf.not_hdfs + @SkipIf.not_dfs @UniqueDatabase.parametrize(sync_ddl=True) def test_iceberg_inserts(self, unique_database): """Issues INSERT statements against multiple impalads in a way that some @@ -114,7 +115,7 @@ class TestInsertStress(ImpalaTestSuite): self.client.execute("""create table {0} (wid int, i int) stored as iceberg tblproperties('iceberg.catalog'='hadoop.catalog', 'iceberg.catalog_location'='{1}')""".format( - tbl_name, '/test-warehouse/' + unique_database)) + tbl_name, "{0}/{1}".format(WAREHOUSE, unique_database))) counter = Value('i', 0) num_writers = 4