mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
This change adds get_workload() to ImpalaTestSuite and removes it from all test suites that already returned 'functional-query'. get_workload() is also removed from CustomClusterTestSuite which used to return 'tpch'. All other changes besides impala_test_suite.py and custom_cluster_test_suite.py are just mass removals of get_workload() functions. The behavior is only changed in custom cluster tests that didn't override get_workload(). By returning 'functional-query' instead of 'tpch', exploration_strategy() will no longer return 'core' in 'exhaustive' test runs. See IMPALA-3947 on why workload affected exploration_strategy. An example for affected test is TestCatalogHMSFailures which was skipped both in core and exhaustive runs before this change. get_workload() functions that return a different workload than 'functional-query' are not changed - it is possible that some of these also don't handle exploration_strategy() as expected, but individually checking these tests is out of scope in this patch. Change-Id: I9ec6c41ffb3a30e1ea2de773626d1485c69fe115 Reviewed-on: http://gerrit.cloudera.org:8080/22726 Reviewed-by: Riza Suminto <riza.suminto@cloudera.com> Reviewed-by: Daniel Becker <daniel.becker@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
228 lines
11 KiB
Python
228 lines
11 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from __future__ import absolute_import, division, print_function
|
|
import getpass
|
|
import pytest
|
|
|
|
from tests.common.impala_test_suite import ImpalaTestSuite
|
|
from tests.common.skip import SkipIfFS
|
|
from tests.common.test_dimensions import (
|
|
create_single_exec_option_dimension,
|
|
create_uncompressed_text_dimension)
|
|
from tests.common.test_vector import ImpalaTestDimension
|
|
from tests.util.shell_util import exec_process
|
|
|
|
TEST_DB = 'test_encryption_db'
|
|
|
|
# PyWebHdfs takes absolute paths without a leading slash
|
|
PYWEBHDFS_TMP_DIR = 'tmp/test_encryption_load_data'
|
|
TMP_DIR = '/%s' % (PYWEBHDFS_TMP_DIR)
|
|
|
|
|
|
@SkipIfFS.hdfs_encryption
|
|
@pytest.mark.execute_serially
|
|
class TestHdfsEncryption(ImpalaTestSuite):
|
|
''' Tests LOAD DATA commands work between HDFS encryption zones.
|
|
|
|
A test directory is created with data to be loaded into a test table.
|
|
The test directory and/or the table directory are created as encryption
|
|
zones with different keys, and the LOAD DATA command is executed. The
|
|
tests operate on both partitioned and non-partitioned tables.
|
|
'''
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestHdfsEncryption, cls).add_test_dimensions()
|
|
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
|
|
cls.ImpalaTestMatrix.add_dimension(
|
|
create_uncompressed_text_dimension(cls.get_workload()))
|
|
|
|
PARTITIONED = [True, False]
|
|
# For 'core', just test loading from a directory that is encrypted.
|
|
KEY_LOAD_DIR = ["testkey1"]
|
|
KEY_TBL_DIR = [None]
|
|
|
|
if cls.exploration_strategy() == 'exhaustive':
|
|
KEY_LOAD_DIR = [None, "testkey1", "testkey2"]
|
|
KEY_TBL_DIR = [None, "testkey1", "testkey2"]
|
|
|
|
cls.ImpalaTestMatrix.add_dimension(
|
|
ImpalaTestDimension('partitioned', *PARTITIONED))
|
|
cls.ImpalaTestMatrix.add_dimension(
|
|
ImpalaTestDimension('key_load_dir', *KEY_LOAD_DIR))
|
|
cls.ImpalaTestMatrix.add_dimension(
|
|
ImpalaTestDimension('key_tbl_dir', *KEY_TBL_DIR))
|
|
cls.ImpalaTestMatrix.add_constraint(lambda v:\
|
|
v.get_value('key_load_dir') is not None or\
|
|
v.get_value('key_tbl_dir') is not None)
|
|
|
|
def setup_method(self, method):
|
|
self.__cleanup()
|
|
self.client.execute('create database if not exists %s' % TEST_DB)
|
|
self.client.execute('use %s' % TEST_DB)
|
|
self.hdfs_client.make_dir(PYWEBHDFS_TMP_DIR)
|
|
# Few tests depend on the .Trash directory being present. In case it doesn't
|
|
# exist, we create a random text file and delete it so that hdfs recreates
|
|
# the hierarchy of trash
|
|
if not self.hdfs_client.exists("/user/{0}/.Trash/".format(getpass.getuser())):
|
|
self.hdfs_client.create_file("test-warehouse/random",file_data="random")
|
|
rc, stdout, stderr = exec_process("hadoop fs -rm /test-warehouse/random")
|
|
assert rc == 0, 'Error re-creating trash: %s %s' % (stdout, stderr)
|
|
|
|
def teardown_method(self, method):
|
|
self.__cleanup()
|
|
# Clean up trash directory so that further tests aren't affected
|
|
rc, stdout, stderr = exec_process(
|
|
"hadoop fs -rmr /user/{0}/.Trash/".format(getpass.getuser()))
|
|
assert rc == 0, 'Error deleting Trash: %s %s' % (stdout, stderr)
|
|
|
|
def create_encryption_zone(self, key, path):
|
|
"""Creates an encryption zone using key 'key' on path 'path'"""
|
|
rc, stdout, stderr = exec_process(
|
|
"hdfs crypto -createZone -keyName %s -path %s" % (key, path))
|
|
assert rc == 0, 'Error creating encryption zone: %s %s' % (stdout, stderr)
|
|
|
|
def __cleanup(self):
|
|
self.client.execute('use default')
|
|
self.client.execute('drop table if exists %s.tbl purge' % TEST_DB)
|
|
self.client.execute('drop table if exists %s.t1 purge' % TEST_DB)
|
|
self.cleanup_db(TEST_DB)
|
|
self.hdfs_client.delete_file_dir(PYWEBHDFS_TMP_DIR, recursive=True)
|
|
|
|
def test_load_data(self, vector):
|
|
key_tbl_dir = vector.get_value('key_tbl_dir')
|
|
key_load_dir = vector.get_value('key_load_dir')
|
|
|
|
if vector.get_value('partitioned'):
|
|
src_file = "/test-warehouse/alltypes/year=2010/month=1/100101.txt"
|
|
src_tbl_schema = "functional.alltypes"
|
|
else:
|
|
src_file = "/test-warehouse/tinytable/data.csv"
|
|
src_tbl_schema = "functional.tinytable"
|
|
|
|
if key_load_dir is not None:
|
|
rc, stdout, stderr = exec_process(
|
|
'hdfs crypto -createZone -keyName %s -path %s' % (key_load_dir, TMP_DIR))
|
|
assert rc == 0, 'Error executing hdfs crypto: %s %s' % (stdout, stderr)
|
|
|
|
# hdfs_client doesn't support copy
|
|
rc, stdout, stderr = exec_process('hdfs dfs -cp %s %s' % (src_file, TMP_DIR))
|
|
assert rc == 0, 'Error executing hdfs cp: %s %s' % (stdout, stderr)
|
|
|
|
self.client.execute('create table tbl like %s' % (src_tbl_schema))
|
|
|
|
if key_tbl_dir is not None:
|
|
rc, stdout, stderr = exec_process(
|
|
'hdfs crypto -createZone -keyName %s -path /test-warehouse/%s.db/tbl' %
|
|
(key_tbl_dir, TEST_DB))
|
|
assert rc == 0, 'Error executing hdfs crypto: %s %s' % (stdout, stderr)
|
|
|
|
if vector.get_value('partitioned'):
|
|
# insert a single value to create the partition spec
|
|
self.client.execute('insert into tbl partition (year=2010, month=1) '\
|
|
'values (0,true,0,0,0,0,0,0,NULL,NULL,NULL)')
|
|
self.client.execute('load data inpath \'%s\' into table tbl '\
|
|
'partition(year=2010, month=1)' % (TMP_DIR))
|
|
else:
|
|
self.client.execute('load data inpath \'%s\' into table tbl ' % (TMP_DIR))
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_drop_partition_encrypt(self):
|
|
"""Verifies if alter <tbl> drop partition purge works in case
|
|
where the Trash dir and partition dir are in different encryption
|
|
zones. Check IMPALA-2310 for details"""
|
|
self.client.execute("create table {0}.t1(i int) partitioned\
|
|
by (j int)".format(TEST_DB))
|
|
# Add three partitions (j=1), (j=2), (j=3) to table t1
|
|
self.client.execute("alter table {0}.t1 add partition(j=1)".format(TEST_DB));
|
|
self.client.execute("alter table {0}.t1 add partition(j=2)".format(TEST_DB));
|
|
self.client.execute("alter table {0}.t1 add partition(j=3)".format(TEST_DB));
|
|
# Clean up the trash directory to create an encrypted zone
|
|
rc, stdout, stderr = exec_process(
|
|
"hadoop fs -rm -r /user/{0}/.Trash/*".format(getpass.getuser()))
|
|
assert rc == 0, 'Error deleting Trash: %s %s' % (stdout, stderr)
|
|
# Create the necessary encryption zones
|
|
self.create_encryption_zone("testkey1", "/test-warehouse/{0}.db/t1/j=1"\
|
|
.format(TEST_DB))
|
|
self.create_encryption_zone("testkey2", "/test-warehouse/{0}.db/t1/j=2"\
|
|
.format(TEST_DB))
|
|
self.create_encryption_zone("testkey1", "/test-warehouse/{0}.db/t1/j=3"\
|
|
.format(TEST_DB))
|
|
|
|
# HDFS 2.8+ behavior is to create individual trash per encryption zone;
|
|
# don't create an encryption zone on .Trash in that case, otherwise
|
|
# recursive trash is created.
|
|
has_own_trash = self.hdfs_client.exists(
|
|
"/test-warehouse/{0}.db/t1/j=1/.Trash".format(TEST_DB))
|
|
if not has_own_trash:
|
|
self.create_encryption_zone("testkey2", "/user/{0}/.Trash/".format(\
|
|
getpass.getuser()))
|
|
|
|
# Load sample data into the partition directories
|
|
self.hdfs_client.create_file("test-warehouse/{0}.db/t1/j=1/j1.txt"\
|
|
.format(TEST_DB), file_data='j1')
|
|
self.hdfs_client.create_file("test-warehouse/{0}.db/t1/j=2/j2.txt"\
|
|
.format(TEST_DB), file_data='j2')
|
|
self.hdfs_client.create_file("test-warehouse/{0}.db/t1/j=3/j3.txt"\
|
|
.format(TEST_DB), file_data='j3')
|
|
|
|
# Drop the partition (j=1) without purge and make sure partition directory still
|
|
# exists. This behavior is expected due to the difference in encryption zones
|
|
# between the .Trash and the warehouse directory (prior to HDFS 2.8)
|
|
if not has_own_trash:
|
|
self.execute_query_expect_failure(self.client, "alter table {0}.t1 drop \
|
|
partition(j=1)".format(TEST_DB));
|
|
assert self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".format(TEST_DB))
|
|
assert self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1".format(TEST_DB))
|
|
else:
|
|
# HDFS 2.8+ behavior succeeds the query and creates trash; the partition removal
|
|
# ends up destroying the directories which moves this back to the user's trash
|
|
self.client.execute("alter table {0}.t1 drop partition(j=1)".format(TEST_DB));
|
|
assert self.hdfs_client.exists(
|
|
"/user/{0}/.Trash/Current/test-warehouse/{1}.db/t1/j=1/j1.txt"\
|
|
.format(getpass.getuser(), TEST_DB))
|
|
assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1/j1.txt".format(TEST_DB))
|
|
assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=1".format(TEST_DB))
|
|
|
|
# Drop the partition j=2 (with purge) and make sure the partition directory is deleted
|
|
self.client.execute("alter table {0}.t1 drop partition(j=2) purge".format(TEST_DB))
|
|
assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=2/j2.txt".format(TEST_DB))
|
|
assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=2".format(TEST_DB))
|
|
# Drop the partition j=3 (with purge) and make sure the partition is deleted
|
|
# This is the case where the trash directory and partition data directory
|
|
# are in different encryption zones. Using purge should delete the partition
|
|
# data pemanently by skipping trash
|
|
self.client.execute("alter table {0}.t1 drop partition(j=3) purge".format(TEST_DB))
|
|
assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=3/j3.txt".format(TEST_DB))
|
|
assert not self.hdfs_client.exists("test-warehouse/{0}.db/t1/j=3".format(TEST_DB))
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_drop_table_encrypt(self):
|
|
"""Verifies if drop <table> purge works in a case where Trash directory and table
|
|
directory in different encryption zones"""
|
|
self.client.execute("create table {0}.t3(i int)".format(TEST_DB))
|
|
|
|
# Clean up the trash directory to create an encrypted zone
|
|
rc, stdout, stderr = exec_process(
|
|
"hadoop fs -rmr /user/{0}/.Trash/*".format(getpass.getuser()))
|
|
assert rc == 0, 'Error deleting Trash: %s %s' % (stdout, stderr)
|
|
# Create table directory and trash directory in different encryption zones
|
|
self.create_encryption_zone("testkey1", "/test-warehouse/{0}.db/t3".format(TEST_DB))
|
|
self.create_encryption_zone("testkey2", "/user/{0}/.Trash/".format(getpass.getuser()))
|
|
self.client.execute("drop table {0}.t3 purge".format(TEST_DB))
|
|
assert not self.hdfs_client.exists("test-warehouse/{0}.db/t3".format(TEST_DB))
|