mirror of
https://github.com/apache/impala.git
synced 2026-01-08 21:03:01 -05:00
Some tests have constraints that were there only to help reduce runtime which reduces coverage when running in exhaustive mode. The majority of the constraints are because it adds no value to run the test across additional dimensions (or it is invalid to run with those dimensions). Updates the tests that have legitimate constraints to use two new helper methods for constraining the table format dimension: create_uncompressed_text_dimension() create_parquet_dimension() These will create a dimension that will produce a single test vector, either uncompressed text or parquet respectively. Change-Id: Id85387c1efd5d192f8059ef89934933389bfe247 Reviewed-on: http://gerrit.ent.cloudera.com:8080/2149 Reviewed-by: Lenni Kuff <lskuff@cloudera.com> Tested-by: jenkins (cherry picked from commit e02acbd469bc48c684b2089405b4a20552802481) Reviewed-on: http://gerrit.ent.cloudera.com:8080/2290
249 lines
10 KiB
Python
249 lines
10 KiB
Python
#!/usr/bin/env python
|
|
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
# Impala tests for DDL statements
|
|
import logging
|
|
import pytest
|
|
import shlex
|
|
import time
|
|
from tests.common.test_result_verifier import *
|
|
from subprocess import call
|
|
from tests.common.test_vector import *
|
|
from tests.common.test_dimensions import ALL_NODES_ONLY
|
|
from tests.common.impala_test_suite import *
|
|
|
|
# Validates DDL statements (create, drop)
|
|
class TestDdlStatements(ImpalaTestSuite):
|
|
TEST_DBS = ['ddl_test_db', 'alter_table_test_db', 'alter_table_test_db2',
|
|
'function_ddl_test', 'udf_test']
|
|
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestDdlStatements, cls).add_test_dimensions()
|
|
cls.TestMatrix.add_dimension(create_exec_option_dimension(
|
|
cluster_sizes=ALL_NODES_ONLY,
|
|
disable_codegen_options=[False],
|
|
batch_sizes=[0],
|
|
sync_ddl=[0, 1]))
|
|
|
|
# There is no reason to run these tests using all dimensions.
|
|
cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload()))
|
|
|
|
def setup_method(self, method):
|
|
self.cleanup()
|
|
|
|
# Get the current number of queries that are in the 'EXCEPTION' state. Used for
|
|
# verification after running each test case.
|
|
self.start_exception_count = self.query_exception_count()
|
|
self.cleanup_hdfs_dirs()
|
|
|
|
def teardown_method(self, method):
|
|
end_exception_count = self.query_exception_count()
|
|
# The number of exceptions may be < than what was in setup if the queries in the
|
|
# EXCEPTION state were bumped out of the FINISHED list. We should never see an
|
|
# increase in the number of queries in the exception state.
|
|
assert end_exception_count <= self.start_exception_count
|
|
|
|
def query_exception_count(self):
|
|
"""Returns the number of occurrences of 'EXCEPTION' on the debug /queries page"""
|
|
return len(re.findall('EXCEPTION',
|
|
self.impalad_test_service.read_debug_webpage('queries')))
|
|
|
|
def cleanup(self):
|
|
map(self.cleanup_db, self.TEST_DBS)
|
|
self.cleanup_hdfs_dirs()
|
|
|
|
def cleanup_hdfs_dirs(self):
|
|
# Cleanup the test table HDFS dirs between test runs so there are no errors the next
|
|
# time a table is created with the same location. This also helps remove any stale
|
|
# data from the last test run.
|
|
self.hdfs_client.delete_file_dir("test-warehouse/part_data/", recursive=True)
|
|
self.hdfs_client.delete_file_dir("test-warehouse/t1_tmp1/", recursive=True)
|
|
self.hdfs_client.delete_file_dir("test-warehouse/t_part_tmp/", recursive=True)
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_drop_cleans_hdfs_dirs(self):
|
|
self.hdfs_client.delete_file_dir("test-warehouse/ddl_test_db.db/", recursive=True)
|
|
assert not self.hdfs_client.exists("test-warehouse/ddl_test_db.db/")
|
|
|
|
self.client.execute('use default')
|
|
self.client.execute('create database ddl_test_db')
|
|
# Verify the db directory exists
|
|
assert self.hdfs_client.exists("test-warehouse/ddl_test_db.db/")
|
|
|
|
self.client.execute("create table ddl_test_db.t1(i int)")
|
|
# Verify the table directory exists
|
|
assert self.hdfs_client.exists("test-warehouse/ddl_test_db.db/t1/")
|
|
|
|
# Dropping the table removes the table's directory and preserves the db's directory
|
|
self.client.execute("drop table ddl_test_db.t1")
|
|
assert not self.hdfs_client.exists("test-warehouse/ddl_test_db.db/t1/")
|
|
assert self.hdfs_client.exists("test-warehouse/ddl_test_db.db/")
|
|
|
|
# Dropping the db removes the db's directory
|
|
self.client.execute("drop database ddl_test_db")
|
|
assert not self.hdfs_client.exists("test-warehouse/ddl_test_db.db/")
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_create(self, vector):
|
|
vector.get_value('exec_option')['abort_on_error'] = False
|
|
self.__create_db_synced('ddl_test_db', vector)
|
|
self.run_test_case('QueryTest/create', vector, use_db='ddl_test_db',
|
|
multiple_impalad=self.__use_multiple_impalad(vector))
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_alter_table(self, vector):
|
|
vector.get_value('exec_option')['abort_on_error'] = False
|
|
# Create directory for partition data that does not use the (key=value)
|
|
# format.
|
|
self.hdfs_client.make_dir("test-warehouse/part_data/", permission=777)
|
|
self.hdfs_client.create_file("test-warehouse/part_data/data.txt", file_data='1984')
|
|
|
|
# Create test databases
|
|
self.__create_db_synced('alter_table_test_db', vector)
|
|
self.__create_db_synced('alter_table_test_db2', vector)
|
|
self.run_test_case('QueryTest/alter-table', vector, use_db='alter_table_test_db',
|
|
multiple_impalad=self.__use_multiple_impalad(vector))
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_views_ddl(self, vector):
|
|
vector.get_value('exec_option')['abort_on_error'] = False
|
|
self.__create_db_synced('ddl_test_db', vector)
|
|
self.run_test_case('QueryTest/views-ddl', vector, use_db='ddl_test_db',
|
|
multiple_impalad=self.__use_multiple_impalad(vector))
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_functions_ddl(self, vector):
|
|
self.__create_db_synced('function_ddl_test', vector)
|
|
self.run_test_case('QueryTest/functions-ddl', vector, use_db='function_ddl_test',
|
|
multiple_impalad=self.__use_multiple_impalad(vector))
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_create_drop_function(self, vector):
|
|
# This will create, run, and drop the same function repeatedly, exercising the
|
|
# lib cache mechanism.
|
|
# TODO: it's hard to tell that the cache is working (i.e. if it did
|
|
# nothing to drop the cache, these tests would still pass). Testing
|
|
# that is a bit harder and requires us to update the udf binary in
|
|
# the middle.
|
|
create_fn_stmt = """create function f() returns int
|
|
location '/test-warehouse/libTestUdfs.so' symbol='NoArgs'"""
|
|
select_stmt = """select f() from functional.alltypes limit 10"""
|
|
drop_fn_stmt = "drop function f()"
|
|
self.__create_db_synced('udf_test', vector)
|
|
self.client.set_configuration(vector.get_value('exec_option'))
|
|
|
|
self.client.execute("use udf_test")
|
|
self.client.execute("drop function if exists f()")
|
|
for i in xrange(1, 10):
|
|
self.client.execute(create_fn_stmt)
|
|
self.client.execute(select_stmt)
|
|
self.client.execute(drop_fn_stmt)
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_create_alter_bulk_partition(self, vector):
|
|
# Only run during exhaustive exploration strategy, this doesn't add a lot of extra
|
|
# coverage to the existing test cases and takes a couple minutes to execute.
|
|
if self.exploration_strategy() != 'exhaustive': pytest.skip()
|
|
|
|
self.client.execute("use default")
|
|
self.client.execute("drop table if exists foo_part")
|
|
self.client.execute("create table foo_part(i int) partitioned by(j int, s string)")
|
|
|
|
# Add some partitions
|
|
for i in xrange(10):
|
|
start = time.time()
|
|
self.client.execute("alter table foo_part add partition(j=%d, s='%s')" % (i, i))
|
|
print 'ADD PARTITION #%d exec time: %s' % (i, time.time() - start)
|
|
|
|
# Modify one of the partitions
|
|
self.client.execute("""alter table foo_part partition(j=5, s='5')
|
|
set fileformat parquetfile""")
|
|
|
|
# Add some more partitions
|
|
for i in xrange(10, 50):
|
|
start = time.time()
|
|
self.client.execute("alter table foo_part add partition(j=%d,s='%s')" % (i,i))
|
|
print 'ADD PARTITION #%d exec time: %s' % (i, time.time() - start)
|
|
|
|
# Insert data and verify it shows up.
|
|
self.client.execute("insert into table foo_part partition(j=5, s='5') select 1")
|
|
assert '1' == self.execute_scalar("select count(*) from foo_part")
|
|
self.client.execute("drop table foo_part")
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_create_alter_tbl_properties(self, vector):
|
|
self.__create_db_synced('alter_table_test_db', vector)
|
|
self.client.execute("use alter_table_test_db")
|
|
|
|
# Specify TBLPROPERTIES and SERDEPROPERTIES at CREATE time
|
|
self.client.execute("""create table test_alter_tbl (i int)
|
|
with serdeproperties ('s1'='s2', 's3'='s4')
|
|
tblproperties ('p1'='v0', 'p1'='v1')""")
|
|
properties = self.__get_tbl_properties('test_alter_tbl')
|
|
|
|
assert len(properties) == 2
|
|
# The transient_lastDdlTime is variable, so don't verify the value.
|
|
assert 'transient_lastDdlTime' in properties
|
|
del properties['transient_lastDdlTime']
|
|
assert {'p1': 'v1'} == properties
|
|
|
|
properties = self.__get_serde_properties('test_alter_tbl')
|
|
assert {'s1': 's2', 's3': 's4'} == properties
|
|
|
|
# Modify the SERDEPROPERTIES using ALTER TABLE SET.
|
|
self.client.execute("alter table test_alter_tbl set serdeproperties "\
|
|
"('s1'='new', 's5'='s6')")
|
|
properties = self.__get_serde_properties('test_alter_tbl')
|
|
assert {'s1': 'new', 's3': 's4', 's5': 's6'} == properties
|
|
|
|
# Modify the TBLPROPERTIES using ALTER TABLE SET.
|
|
self.client.execute("alter table test_alter_tbl set tblproperties "\
|
|
"('prop1'='val1', 'p2'='val2', 'p2'='val3', ''='')")
|
|
properties = self.__get_tbl_properties('test_alter_tbl')
|
|
|
|
assert len(properties) == 5
|
|
assert 'transient_lastDdlTime' in properties
|
|
del properties['transient_lastDdlTime']
|
|
assert {'p1': 'v1', 'prop1': 'val1', 'p2': 'val3', '': ''} == properties
|
|
|
|
@classmethod
|
|
def __use_multiple_impalad(cls, vector):
|
|
return vector.get_value('exec_option')['sync_ddl'] == 1
|
|
|
|
@classmethod
|
|
def __create_db_synced(cls, db_name, vector):
|
|
"""Creates a database using synchronized DDL to ensure all nodes have the test
|
|
database available for use before executing the .test file(s).
|
|
"""
|
|
cls.client.execute('use default')
|
|
cls.client.set_configuration({'sync_ddl': 1})
|
|
cls.client.execute('create database %s' % db_name)
|
|
cls.client.set_configuration(vector.get_value('exec_option'))
|
|
|
|
def __get_tbl_properties(self, table_name):
|
|
"""Extracts the table properties mapping from the output of DESCRIBE FORMATTED"""
|
|
return self.__get_properties('Table Parameters:', table_name)
|
|
|
|
def __get_serde_properties(self, table_name):
|
|
"""Extracts the serde properties mapping from the output of DESCRIBE FORMATTED"""
|
|
return self.__get_properties('Storage Desc Params:', table_name)
|
|
|
|
def __get_properties(self, section_name, table_name):
|
|
"""Extracts the table properties mapping from the output of DESCRIBE FORMATTED"""
|
|
result = self.client.execute("describe formatted " + table_name)
|
|
match = False
|
|
properties = dict();
|
|
for row in result.data:
|
|
if section_name in row:
|
|
match = True
|
|
elif match:
|
|
row = row.split('\t')
|
|
if (row[1] == 'NULL'):
|
|
break
|
|
properties[row[1].rstrip()] = row[2].rstrip()
|
|
return properties
|