IMPALA-9071: Fix wrong table path of transaction table created by CTAS

The previous patch of IMPALA-9071 assumes that all tables created by
CTAS statement are non transactional table. This is wrong since CTAS
statement can also specify tblproperties so can create transactional
table.

This patch fixs the hard coded external checking. Instead, we judge on
whether the table is transactional. If not, it will be translated to
external table by HMS.

Tests:
 - Add coverage for creating transactional tables by CTAS.

Change-Id: I4b585216e33e4f7962b19ae2351165288691eaf2
Reviewed-on: http://gerrit.cloudera.org:8080/14546
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
stiga-huang
2019-10-25 16:45:08 +08:00
committed by Joe McDonnell
parent 9100a98273
commit 0f70ade0d7
4 changed files with 49 additions and 27 deletions

View File

@@ -15,11 +15,12 @@
# specific language governing permissions and limitations
# under the License.
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
import pytest
from os import getenv
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.skip import SkipIfHive2
HIVE_SITE_EXT_DIR = getenv('IMPALA_HOME') + '/fe/src/test/resources/hive-site-ext'
@@ -33,6 +34,7 @@ class TestCustomHiveConfigs(CustomClusterTestSuite):
super(TestCustomHiveConfigs, cls).setup_class()
# TODO: Remove the xfail marker after bumping CDP_BUILD_NUMBER to contain HIVE-22158
@SkipIfHive2.acid
@pytest.mark.xfail(run=True, reason="May fail on Hive3 versions without HIVE-22158")
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(hive_conf_dir=HIVE_SITE_EXT_DIR)
@@ -42,25 +44,37 @@ class TestCustomHiveConfigs(CustomClusterTestSuite):
'metastore.warehouse.external.dir' is different from 'metastore.warehouse.dir'
in Hive.
"""
self.execute_query_expect_success(
self.client, 'create table %s.ctas_tbl as select 1, 2, "name"' %
unique_database)
res = self.execute_query_expect_success(
self.client, 'select * from %s.ctas_tbl' % unique_database)
assert '1\t2\tname' == res.get_data()
# Test creating non-ACID managed table by CTAS. The HMS transformer will translate it
# into an external table. But we should still be able to read/write it correctly.
self.__check_query_results(
unique_database + '.ctas_tbl', '1\t2\tname',
'create table %s as select 1, 2, "name"')
self.execute_query_expect_success(
self.client, 'create external table %s.ctas_ext_tbl as select 1, 2, "name"' %
unique_database)
# Test creating non-ACID external table by CTAS.
self.__check_query_results(
unique_database + '.ctas_ext_tbl', '1\t2\tname',
'create external table %s as select 1, 2, "name"')
# Set "external.table.purge"="true" so we can clean files of the external table
# finally.
self.execute_query_expect_success(
self.client, 'alter table %s.ctas_ext_tbl set tblproperties'
'("external.table.purge"="true")' % unique_database)
res = self.execute_query_expect_success(
self.client, 'select * from %s.ctas_ext_tbl' % unique_database)
assert '1\t2\tname' == res.get_data()
# Explicitly drop the database with CASCADE to clean files of the external table
self.execute_query_expect_success(
self.client, 'drop database if exists cascade' + unique_database)
# Test creating insert-only ACID managed table by CTAS.
self.__check_query_results(
unique_database + '.insertonly_acid_ctas', '1\t2\tname',
'create table %s '
'tblproperties("transactional"="true", "transactional_properties"="insert_only") '
'as select 1, 2, "name"')
# Test creating insert-only ACID external table by CTAS. Should not be allowed.
self.execute_query_expect_failure(
self.client,
'create external table %s.insertonly_acid_ext_ctas '
'tblproperties("transactional"="true", "transactional_properties"="insert_only") '
'as select 1, 2, "name"' % unique_database)
def __check_query_results(self, table, expected_results, query_format):
self.execute_query_expect_success(self.client, query_format % table)
res = self.execute_query_expect_success(self.client, "select * from " + table)
assert expected_results == res.get_data()