IMPALA-13959: Fix TestHmsIntegration.test_change_parquet_column_type

TestHmsIntegration.test_change_parquet_column_type fail in exhaustive
mode due to a missing int parsing introduced by IMPALA-13920.

This patch add the missing int parsing. It also fix flake8 issues
in test_hms_integration.py, including unused vector fixture.

Testing:
Run and pass test_hms_integration.py in exhaustive mode.

Change-Id: If5fb9f96b4087e86b0ebaac7135e14b7a14936ea
Reviewed-on: http://gerrit.cloudera.org:8080/22774
Reviewed-by: Csaba Ringhofer <csringhofer@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Riza Suminto
2025-04-12 22:29:07 -07:00
committed by Impala Public Jenkins
parent bfa4402c13
commit 50a98dce46

View File

@@ -51,7 +51,7 @@ class TestHmsIntegrationSanity(ImpalaTestSuite):
create_uncompressed_text_dimension(cls.get_workload()))
@pytest.mark.execute_serially
def test_sanity(self, vector, cluster_properties):
def test_sanity(self, cluster_properties):
"""Verifies that creating a catalog entity (database, table) in Impala using
'IF NOT EXISTS' while the entity exists in HMS, does not throw an error."""
# Create a database in Hive
@@ -129,6 +129,7 @@ class TestHmsIntegrationSanity(ImpalaTestSuite):
finally:
self.run_stmt_in_hive("drop database %s cascade" % db)
@SkipIfFS.hive
class TestHmsIntegration(ImpalaTestSuite):
@@ -159,7 +160,7 @@ class TestHmsIntegration(ImpalaTestSuite):
'create database if not exists ' + self.db_name)
return self.db_name
def __exit__(self, typ, value, traceback):
def __exit__(self, typ, value, traceback): # noqa: U100
self.impala.cleanup_db(self.db_name)
class ImpalaTableWrapper(object):
@@ -179,7 +180,7 @@ class TestHmsIntegration(ImpalaTestSuite):
(self.table_name, self.table_spec))
return self.table_name
def __exit__(self, typ, value, traceback):
def __exit__(self, typ, value, traceback): # noqa: U100
self.impala.client.execute('drop table if exists %s' % self.table_name)
def impala_table_stats(self, table):
@@ -297,26 +298,24 @@ class TestHmsIntegration(ImpalaTestSuite):
command, strs_in_error)
@pytest.mark.execute_serially
def test_hive_db_hive_table_add_partition(self, vector):
self.add_hive_partition_helper(vector, HiveDbWrapper, HiveTableWrapper)
def test_hive_db_hive_table_add_partition(self):
self.add_hive_partition_helper(HiveDbWrapper, HiveTableWrapper)
@pytest.mark.execute_serially
def test_hive_db_impala_table_add_partition(self, vector):
self.add_hive_partition_helper(vector, HiveDbWrapper, self.ImpalaTableWrapper)
def test_hive_db_impala_table_add_partition(self):
self.add_hive_partition_helper(HiveDbWrapper, self.ImpalaTableWrapper)
@pytest.mark.execute_serially
def test_impala_db_impala_table_add_partition(self, vector):
self.add_hive_partition_helper(vector, self.ImpalaDbWrapper,
self.ImpalaTableWrapper)
def test_impala_db_impala_table_add_partition(self):
self.add_hive_partition_helper(self.ImpalaDbWrapper, self.ImpalaTableWrapper)
@pytest.mark.execute_serially
def test_impala_db_hive_table_add_partition(self, vector):
self.add_hive_partition_helper(vector, self.ImpalaDbWrapper,
HiveTableWrapper)
def test_impala_db_hive_table_add_partition(self):
self.add_hive_partition_helper(self.ImpalaDbWrapper, HiveTableWrapper)
@pytest.mark.xfail(run=False, reason="This is a bug: IMPALA-2426")
@pytest.mark.execute_serially
def test_incremental_stats_new_partition(self, vector):
def test_incremental_stats_new_partition(self):
with self.ImpalaDbWrapper(self, self.unique_string()) as db_name:
with self.ImpalaTableWrapper(self, db_name + '.' + self.unique_string(),
'(x int) partitioned by (y int)') as table_name:
@@ -330,7 +329,7 @@ class TestHmsIntegration(ImpalaTestSuite):
assert '0' == table_stats[('333',)]['#rows']
assert '0' == table_stats[('333',)]['#files']
def add_hive_partition_helper(self, vector, DbWrapper, TableWrapper):
def add_hive_partition_helper(self, DbWrapper, TableWrapper):
"""
Partitions added in Hive can be viewed in Impala after computing stats.
"""
@@ -348,10 +347,9 @@ class TestHmsIntegration(ImpalaTestSuite):
assert [('333', '5309')] == self.get_impala_partition_info(table_name, 'y', 'z')
# Impala's compute stats didn't alter Hive's knowledge of the partition
assert ['y=333/z=5309'] == self.hive_partition_names(table_name)
self.add_hive_partition_table_stats_helper(vector, DbWrapper, TableWrapper)
self.add_hive_partition_table_stats_helper(DbWrapper, TableWrapper)
def add_hive_partition_table_stats_helper(
self, vector, DbWrapper, TableWrapper):
def add_hive_partition_table_stats_helper(self, DbWrapper, TableWrapper):
"""
Partitions added in Hive don't make Impala's table stats incorrect.
"""
@@ -376,7 +374,7 @@ class TestHmsIntegration(ImpalaTestSuite):
('42', '867')]
@pytest.mark.execute_serially
def test_add_impala_partition(self, vector):
def test_add_impala_partition(self):
"""
Partitions added in Impala can be viewed in Hive immediately
"""
@@ -390,7 +388,7 @@ class TestHmsIntegration(ImpalaTestSuite):
assert ['y=42/z=867'] == self.hive_partition_names(table_name)
@pytest.mark.execute_serially
def test_drop_column_maintains_stats(self, vector):
def test_drop_column_maintains_stats(self):
"""
Dropping a column in Impala doesn't alter the stats of other columns in Hive
or Impala.
@@ -424,7 +422,7 @@ class TestHmsIntegration(ImpalaTestSuite):
'x']
@pytest.mark.execute_serially
def test_select_without_compute_stats(self, vector):
def test_select_without_compute_stats(self):
"""
Data added in Hive shows up in Impala 'select *', and if the table is not
partitioned, 'compute incremental stats' is not required.
@@ -453,7 +451,7 @@ class TestHmsIntegration(ImpalaTestSuite):
@pytest.mark.xfail(run=False, reason="This is a bug: IMPALA-2458")
@pytest.mark.execute_serially
def test_overwrite_added_column(self, vector):
def test_overwrite_added_column(self):
"""
Impala can't overwrite Hive's column types, and vice versa.
"""
@@ -488,7 +486,7 @@ class TestHmsIntegration(ImpalaTestSuite):
@SkipIfHive3.col_stat_separated_by_engine
@pytest.mark.execute_serially
def test_compute_stats_get_to_hive(self, vector):
def test_compute_stats_get_to_hive(self):
"""Stats computed in Impala are also visible in Hive."""
with self.ImpalaDbWrapper(self, self.unique_string()) as db_name:
with self.ImpalaTableWrapper(self, db_name + '.' + self.unique_string(),
@@ -504,7 +502,7 @@ class TestHmsIntegration(ImpalaTestSuite):
@SkipIfHive3.col_stat_separated_by_engine
@pytest.mark.execute_serially
def test_compute_stats_get_to_impala(self, vector):
def test_compute_stats_get_to_impala(self):
"""Column stats computed in Hive are also visible in Impala."""
with HiveDbWrapper(self, self.unique_string()) as db_name:
with HiveTableWrapper(self, db_name + '.' + self.unique_string(),
@@ -529,7 +527,7 @@ class TestHmsIntegration(ImpalaTestSuite):
assert '0' == new_impala_stats['x']['#nulls']
@SkipIfHive2.col_stat_not_separated_by_engine
def test_engine_separates_col_stats(self, vector):
def test_engine_separates_col_stats(self):
"""
The 'engine' column in TAB_COL_STATS and PART_COL_STATS HMS tables is used to
differentiate among column stats computed by different engines.
@@ -577,7 +575,7 @@ class TestHmsIntegration(ImpalaTestSuite):
assert '0' == hive_x_stats['num_nulls']
@SkipIfHive2.col_stat_not_separated_by_engine
def test_engine_separates_partitioned_col_stats(self, vector):
def test_engine_separates_partitioned_col_stats(self):
"""
The 'engine' column in TAB_COL_STATS and PART_COL_STATS HMS tables is used to
differentiate among column stats computed by different engines.
@@ -628,7 +626,7 @@ class TestHmsIntegration(ImpalaTestSuite):
assert '0' == hive_x_stats['num_nulls']
@pytest.mark.execute_serially
def test_drop_partition(self, vector):
def test_drop_partition(self):
"""
Impala can see that a partitions was dropped by Hive by invalidating
metadata.
@@ -647,7 +645,7 @@ class TestHmsIntegration(ImpalaTestSuite):
'select * from %s' % table_name).get_data()
@pytest.mark.execute_serially
def test_drop_column_with_data(self, vector):
def test_drop_column_with_data(self):
"""Columns dropped by Hive are ignored in Impala 'select *'."""
with self.ImpalaDbWrapper(self, self.unique_string()) as db_name:
with self.ImpalaTableWrapper(self, db_name + '.' + self.unique_string(),
@@ -660,7 +658,7 @@ class TestHmsIntegration(ImpalaTestSuite):
'select * from %s' % table_name).get_data()
@pytest.mark.execute_serially
def test_add_column(self, vector):
def test_add_column(self):
"""Columns added in one engine are visible in the other via DESCRIBE."""
with self.ImpalaDbWrapper(self, self.unique_string()) as db_name:
with self.ImpalaTableWrapper(self, db_name + '.' + self.unique_string(),
@@ -682,14 +680,14 @@ class TestHmsIntegration(ImpalaTestSuite):
assert expected == self.impala_columns(table_name)
@pytest.mark.execute_serially
def test_drop_database(self, vector):
def test_drop_database(self):
"""
If a DB is created, then dropped, in Hive, Impala can create one with the
same name without invalidating metadata.
"""
test_db = self.unique_string()
with HiveDbWrapper(self, test_db) as db_name:
with HiveDbWrapper(self, test_db):
pass
# if events processing is turned on we should make sure that the drop
# database event above is processed to avoid flakiness
@@ -701,11 +699,11 @@ class TestHmsIntegration(ImpalaTestSuite):
self.unique_string()),
'Database does not exist: %s' %
test_db)
with self.ImpalaDbWrapper(self, test_db) as db_name:
with self.ImpalaDbWrapper(self, test_db):
pass
@pytest.mark.execute_serially
def test_table_format_change(self, vector):
def test_table_format_change(self):
"""
Hive storage format changes propagate to Impala.
"""
@@ -724,7 +722,7 @@ class TestHmsIntegration(ImpalaTestSuite):
assert 'AVRO' == self.impala_table_stats(table_name)[()]['format']
@pytest.mark.execute_serially
def test_change_column_type(self, vector):
def test_change_column_type(self):
"""Hive column type changes propagate to Impala."""
with HiveDbWrapper(self, self.unique_string()) as db_name:
@@ -741,7 +739,7 @@ class TestHmsIntegration(ImpalaTestSuite):
assert 'string' == self.impala_columns(table_name)['y']['type']
@pytest.mark.execute_serially
def test_change_parquet_column_type(self, vector):
def test_change_parquet_column_type(self):
"""
Changing column types in Parquet doesn't always work in Hive and it causes
'select *' to fail in Impala as well, after invalidating metadata. This is a
@@ -786,7 +784,7 @@ class TestHmsIntegration(ImpalaTestSuite):
# The error message is different in newer Javas than in 17
# TODO: find out which version changed it exactly
err_msg = err_msg.format(
"class " if os.environ.get('IMPALA_JDK_VERSION_NUM') >= 17 else "")
"class " if int(os.environ.get('IMPALA_JDK_VERSION_NUM')) >= 17 else "")
self.assert_sql_error(
self.run_stmt_in_hive, 'select * from %s' % table_name, err_msg)
self.client.execute('invalidate metadata %s' % table_name)
@@ -795,7 +793,7 @@ class TestHmsIntegration(ImpalaTestSuite):
"Column type: INT, Parquet schema:")
@SkipIfHive2.acid
def test_acid_inserts(self, vector, unique_database):
def test_acid_inserts(self, unique_database):
"""
Insert data to insert-only ACID table from Impala and checks that Hive is able to
see the data.
@@ -858,7 +856,7 @@ class TestHmsIntegration(ImpalaTestSuite):
assert '4,41' == hive_result[4]
@SkipIfHive2.acid
def test_drop_acid_table(self, vector, unique_database):
def test_drop_acid_table(self, unique_database):
"""
Tests that a transactional table dropped by Impala is also dropped if we check from
Hive.
@@ -876,7 +874,7 @@ class TestHmsIntegration(ImpalaTestSuite):
assert "acid_insert" not in show_tables_result_after_drop
@SkipIfHive2.acid
def test_truncate_acid_table(self, vector, unique_database):
def test_truncate_acid_table(self, unique_database):
"""
Tests that a transactional table truncated by Impala shows no rows when
queried by Hive.
@@ -896,7 +894,7 @@ class TestHmsIntegration(ImpalaTestSuite):
assert "0" == query_result_after_truncate.split('\n')[1]
@pytest.mark.execute_serially
def test_change_table_name(self, vector):
def test_change_table_name(self):
"""
Changing the table name in Hive propagates to Impala after 'invalidate
metadata'.
@@ -919,7 +917,7 @@ class TestHmsIntegration(ImpalaTestSuite):
'Could not resolve path')
@pytest.mark.execute_serially
def test_impala_partitions_accessible_in_hive(self, vector):
def test_impala_partitions_accessible_in_hive(self):
"""
IMPALA-1670, IMPALA-4141: Partitions added in Impala are accessible through Hive
"""