mirror of
https://github.com/apache/impala.git
synced 2026-01-05 12:01:11 -05:00
I looked at the latest run from master and took the tests suites that had long execution times. This cleans those test suites up to either completely disable them on 'core' or add constraints to limit the number of test vectors. It shouldn't impact nightly coverage since we still run the same tests exhaustively. Change-Id: I10c78c35155b00de0c36d9fc0923b2b1fc6b44de Reviewed-on: http://gerrit.ent.cloudera.com:8080/3119 Reviewed-by: Marcel Kornacker <marcel@cloudera.com> Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/3125 Reviewed-by: Lenni Kuff <lskuff@cloudera.com>
200 lines
8.0 KiB
Python
200 lines
8.0 KiB
Python
#!/usr/bin/env python
|
|
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
#
|
|
# This test suite validates the scanners by running queries against ALL file formats and
|
|
# their permutations (e.g. compression codec/compression type). This works by exhaustively
|
|
# generating the table format test vectors for this specific test suite. This way, other
|
|
# tests can run with the normal exploration strategy and the overall test runtime doesn't
|
|
# explode.
|
|
|
|
import logging
|
|
import pytest
|
|
from copy import deepcopy
|
|
|
|
from testdata.common import widetable
|
|
from tests.common.test_vector import *
|
|
from tests.common.impala_test_suite import *
|
|
from tests.util.test_file_parser import *
|
|
from tests.common.test_dimensions import create_single_exec_option_dimension
|
|
|
|
class TestScannersAllTableFormats(ImpalaTestSuite):
|
|
BATCH_SIZES = [0, 1, 16]
|
|
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestScannersAllTableFormats, cls).add_test_dimensions()
|
|
# Exhaustively generate all table format vectors. This can still be overridden
|
|
# using the --table_formats flag.
|
|
cls.TestMatrix.add_dimension(cls.create_table_info_dimension('exhaustive'))
|
|
cls.TestMatrix.add_dimension(
|
|
TestDimension('batch_size', *TestScannersAllTableFormats.BATCH_SIZES))
|
|
|
|
def test_scanners(self, vector):
|
|
new_vector = deepcopy(vector)
|
|
new_vector.get_value('exec_option')['batch_size'] = vector.get_value('batch_size')
|
|
self.run_test_case('QueryTest/scanners', new_vector)
|
|
|
|
# Test all the scanners with a simple limit clause. The limit clause triggers
|
|
# cancellation in the scanner code paths.
|
|
class TestScannersAllTableFormatsWithLimit(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestScannersAllTableFormatsWithLimit, cls).add_test_dimensions()
|
|
# Exhaustively generate all table format vectors. This can still be overridden
|
|
# using the --table_formats flag.
|
|
cls.TestMatrix.add_dimension(cls.create_table_info_dimension('exhaustive'))
|
|
|
|
def test_limit(self, vector):
|
|
# Use a small batch size so changing the limit affects the timing of cancellation
|
|
vector.get_value('exec_option')['batch_size'] = 100
|
|
iterations = 50
|
|
query_template = "select * from alltypes limit %s"
|
|
for i in range(1, iterations):
|
|
# Vary the limit to vary the timing of cancellation
|
|
query = query_template % ((iterations * 100) % 1000 + 1)
|
|
self.execute_query(query, vector.get_value('exec_option'),
|
|
table_format=vector.get_value('table_format'))
|
|
|
|
# Test case to verify the scanners work properly when the table metadata (specifically the
|
|
# number of columns in the table) does not match the number of columns in the data file.
|
|
class TestUnmatchedSchema(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestUnmatchedSchema, cls).add_test_dimensions()
|
|
# TODO: Does it add anything to enumerate all the supported compression codecs
|
|
# for each table format?
|
|
cls.TestMatrix.add_dimension(cls.create_table_info_dimension('exhaustive'))
|
|
cls.TestMatrix.add_dimension(create_single_exec_option_dimension())
|
|
# Avro has a more advanced schema evolution process which is covered in more depth
|
|
# in the test_avro_schema_evolution test suite.
|
|
cls.TestMatrix.add_constraint(\
|
|
lambda v: v.get_value('table_format').file_format != 'avro')
|
|
|
|
def __get_table_location(self, table_name, vector):
|
|
result = self.execute_query_using_client(self.client,
|
|
"describe formatted %s" % table_name, vector)
|
|
for row in result.data:
|
|
if 'Location:' in row:
|
|
return row.split('\t')[1]
|
|
# This should never happen.
|
|
assert 0, 'Unable to get location for table: ' + table_name
|
|
|
|
def __create_test_table(self, vector):
|
|
"""
|
|
Creates the test table
|
|
|
|
Cannot be done in a setup method because we need access to the current test vector
|
|
"""
|
|
self.__drop_test_table(vector)
|
|
self.execute_query_using_client(self.client,
|
|
"create external table jointbl_test like jointbl", vector)
|
|
|
|
# Update the location of the new table to point the same location as the old table
|
|
location = self.__get_table_location('jointbl', vector)
|
|
self.execute_query_using_client(self.client,
|
|
"alter table jointbl_test set location '%s'" % location, vector)
|
|
|
|
def __drop_test_table(self, vector):
|
|
self.execute_query_using_client(self.client,
|
|
"drop table if exists jointbl_test", vector)
|
|
|
|
def test_unmatched_schema(self, vector):
|
|
table_format = vector.get_value('table_format')
|
|
# jointbl has no columns with unique values. When loaded in hbase, the table looks
|
|
# different, as hbase collapses duplicates.
|
|
if table_format.file_format == 'hbase':
|
|
pytest.skip()
|
|
self.__create_test_table(vector)
|
|
self.run_test_case('QueryTest/test-unmatched-schema', vector)
|
|
self.__drop_test_table(vector)
|
|
|
|
|
|
# Tests that scanners can read a single-column, single-row, 10MB table
|
|
class TestWideRow(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestWideRow, cls).add_test_dimensions()
|
|
# I can't figure out how to load a huge row into hbase
|
|
cls.TestMatrix.add_constraint(
|
|
lambda v: v.get_value('table_format').file_format != 'hbase')
|
|
|
|
def test_wide_row(self, vector):
|
|
new_vector = deepcopy(vector)
|
|
# Use a 5MB scan range, so we will have to perform 5MB of sync reads
|
|
new_vector.get_value('exec_option')['max_scan_range_length'] = 5 * 1024 * 1024
|
|
# We need > 10 MB of memory because we're creating extra buffers:
|
|
# - 10 MB table / 5 MB scan range = 2 scan ranges, each of which may allocate ~20MB
|
|
# - Sync reads will allocate ~5MB of space
|
|
# The 80MB value used here was determined empirically by raising the limit until the query
|
|
# succeeded for all file formats -- I don't know exactly why we need this much.
|
|
# TODO: figure out exact breakdown of memory usage (IMPALA-681)
|
|
new_vector.get_value('exec_option')['mem_limit'] = 80 * 1024 * 1024
|
|
self.run_test_case('QueryTest/wide-row', new_vector)
|
|
|
|
class TestWideTable(ImpalaTestSuite):
|
|
# TODO: expand this to more rows when we have the capability
|
|
NUM_COLS = [250, 500, 1000]
|
|
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestWideTable, cls).add_test_dimensions()
|
|
cls.TestMatrix.add_constraint(
|
|
lambda v: v.get_value('table_format').file_format != 'hbase')
|
|
cls.TestMatrix.add_dimension(TestDimension("num_cols", *cls.NUM_COLS))
|
|
# To cut down on test execution time, only run in exhaustive.
|
|
if cls.exploration_strategy() != 'exhaustive':
|
|
cls.TestMatrix.add_constraint(lambda v: False)
|
|
|
|
def test_wide_table(self, vector):
|
|
NUM_COLS = vector.get_value('num_cols')
|
|
NUM_ROWS = 10
|
|
DB_NAME = QueryTestSectionReader.get_db_name(vector.get_value('table_format'))
|
|
TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS)
|
|
|
|
result = self.client.execute("select count(*) from %s " % TABLE_NAME)
|
|
assert result.data == [str(NUM_ROWS)]
|
|
|
|
expected_result = widetable.get_data(NUM_COLS, NUM_ROWS, quote_strings=True)
|
|
result = self.client.execute("select * from %s" % TABLE_NAME)
|
|
|
|
types = parse_column_types(result.schema)
|
|
labels = parse_column_labels(result.schema)
|
|
expected = QueryTestResult(expected_result, types, labels, order_matters=False)
|
|
actual = QueryTestResult(parse_result_rows(result), types, labels,
|
|
order_matters=False)
|
|
assert expected == actual
|
|
|
|
class TestParquet(ImpalaTestSuite):
|
|
@classmethod
|
|
def get_workload(cls):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestParquet, cls).add_test_dimensions()
|
|
cls.TestMatrix.add_constraint(
|
|
lambda v: v.get_value('table_format').file_format == 'parquet')
|
|
|
|
def test_parquet(self, vector):
|
|
self.run_test_case('QueryTest/parquet', vector)
|