mirror of
https://github.com/apache/impala.git
synced 2026-01-02 03:00:32 -05:00
Some tests have constraints that were there only to help reduce runtime which reduces coverage when running in exhaustive mode. The majority of the constraints are because it adds no value to run the test across additional dimensions (or it is invalid to run with those dimensions). Updates the tests that have legitimate constraints to use two new helper methods for constraining the table format dimension: create_uncompressed_text_dimension() create_parquet_dimension() These will create a dimension that will produce a single test vector, either uncompressed text or parquet respectively. Change-Id: Id85387c1efd5d192f8059ef89934933389bfe247 Reviewed-on: http://gerrit.ent.cloudera.com:8080/2149 Reviewed-by: Lenni Kuff <lskuff@cloudera.com> Tested-by: jenkins (cherry picked from commit e02acbd469bc48c684b2089405b4a20552802481) Reviewed-on: http://gerrit.ent.cloudera.com:8080/2290
174 lines
7.5 KiB
Python
174 lines
7.5 KiB
Python
#!/usr/bin/env python
|
|
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
# Validates limit on scan nodes
|
|
#
|
|
import logging
|
|
import pytest
|
|
from copy import copy
|
|
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
|
|
from tests.common.impala_test_suite import ImpalaTestSuite
|
|
from tests.common.test_vector import *
|
|
from tests.util.test_file_parser import QueryTestSectionReader
|
|
|
|
class TestLimit(ImpalaTestSuite):
|
|
LIMIT_VALUES = [1, 2, 3, 4, 5, 10, 100, 5000]
|
|
QUERIES = ["select * from lineitem limit %d"]
|
|
|
|
# TODO: we should be able to run count(*) in setup rather than hardcoding the values
|
|
# but I have no idea how to do this with this framework.
|
|
TOTAL_ROWS = 6001215
|
|
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'tpch'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestLimit, cls).add_test_dimensions()
|
|
|
|
# Add two more dimensions
|
|
cls.TestMatrix.add_dimension(
|
|
TestDimension('limit_value', *TestLimit.LIMIT_VALUES))
|
|
cls.TestMatrix.add_dimension(TestDimension('query', *TestLimit.QUERIES))
|
|
|
|
# Don't run with large limits and tiny batch sizes. This generates excessive
|
|
# network traffic and makes the machine run very slowly.
|
|
cls.TestMatrix.add_constraint(lambda v:\
|
|
v.get_value('limit_value') < 100 or v.get_value('exec_option')['batch_size'] == 0)
|
|
# TPCH is not generated in hbase format.
|
|
# TODO: Add test coverage for hbase.
|
|
cls.TestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').file_format != "hbase")
|
|
|
|
def test_limit(self, vector):
|
|
# We can't validate the rows that are returned since that is non-deterministic.
|
|
# This is why this is a python test rather than a .test.
|
|
limit = vector.get_value('limit_value')
|
|
expected_num_rows = min(limit, TestLimit.TOTAL_ROWS)
|
|
query_string = vector.get_value('query') % limit
|
|
result = self.execute_query(query_string, vector.get_value('exec_option'),
|
|
table_format=vector.get_value('table_format'))
|
|
assert(len(result.data) == expected_num_rows)
|
|
|
|
# Base class for TestLimit
|
|
class TestLimitBase(ImpalaTestSuite):
|
|
def exec_query_validate(self, query, exec_options, should_succeed, expected_rows,
|
|
expected_error):
|
|
"""Executes a query and validates the results"""
|
|
try:
|
|
result = self.execute_query(query, exec_options)
|
|
assert should_succeed, 'Query was expected to fail'
|
|
assert len(result.data) == expected_rows,\
|
|
'Wrong number of rows returned %d' % len(result.data)
|
|
except ImpalaBeeswaxException as e:
|
|
assert not should_succeed, 'Query was not expected to fail: %s' % e
|
|
if (expected_error not in str(e)):
|
|
print str(e)
|
|
assert expected_error in str(e)
|
|
|
|
# Validates the default order by limit query option functionality
|
|
class TestDefaultOrderByLimitValue(TestLimitBase):
|
|
# Interesting default limit values. TODO: What about value of -2?
|
|
DEFAULT_ORDER_BY_LIMIT_VALUES = [None, -1, 0, 1, 10, 100]
|
|
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestDefaultOrderByLimitValue, cls).add_test_dimensions()
|
|
# Not interested in exploring different file formats
|
|
cls.TestMatrix.clear_dimension('table_format')
|
|
|
|
cls.TestMatrix.add_dimension(TestDimension('default_order_by_limit_value',
|
|
*TestDefaultOrderByLimitValue.DEFAULT_ORDER_BY_LIMIT_VALUES))
|
|
cls.TestMatrix.add_constraint(lambda v:\
|
|
v.get_value('exec_option')['batch_size'] == 0)
|
|
|
|
def test_default_order_by_limit_values(self, vector):
|
|
limit_value = vector.get_value('default_order_by_limit_value')
|
|
exec_options = copy(vector.get_value('exec_option'))
|
|
if limit_value is not None:
|
|
exec_options['default_order_by_limit'] = limit_value
|
|
|
|
expected_error = 'ORDER BY without LIMIT currently not supported'
|
|
|
|
# Unless the default order by limit is -1 or None (not specified) we expect SELECT
|
|
# ORDER BY without any limit specified to work properly.
|
|
no_limit_should_succeed = limit_value not in [None, -1]
|
|
|
|
# Validate the default order by limit option kicks on when no limit is specified.
|
|
query_no_limit = "select * from functional.alltypes order by int_col"
|
|
self.exec_query_validate(query_no_limit, exec_options,
|
|
no_limit_should_succeed, limit_value, expected_error)
|
|
|
|
# Validate that user specified limits override the default limit value.
|
|
query_with_limit = "select * from functional.alltypes order by int_col limit 20"
|
|
self.exec_query_validate(query_with_limit, exec_options, True, 20, expected_error)
|
|
|
|
query_no_orderby = "select * from functional.alltypes limit 25"
|
|
self.exec_query_validate(query_no_orderby, exec_options, True, 25, expected_error)
|
|
|
|
# Validates the abort_on_default_limit_exceeded query option functionality
|
|
class TestAbortOnDefaultLimitExceeded(TestLimitBase):
|
|
# The test vector is list of tuple of:
|
|
# default_order_by_limit (number)
|
|
# abort_on_default_limit_exceeded (boolean)
|
|
# query_limit (string)
|
|
# expected rows, only applicable if it should succeed (number)
|
|
TEST_VECTOR = [# This will hit the default order by limit and should fail
|
|
(1, True, None, 100),
|
|
# This will hit the default order by limit but won't fail
|
|
(1, False, None, 1),
|
|
# This will not use the default order by limit and won't fail
|
|
(1, True, 2, 2),
|
|
# This will use the default order by limit but won't go beyond the limit
|
|
(100, True, None, 100),
|
|
# This will use the default order by limit but will hit the limit
|
|
(99, True, None, 100)]
|
|
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestAbortOnDefaultLimitExceeded, cls).add_test_dimensions()
|
|
# Not interested in exploring different file formats
|
|
cls.TestMatrix.clear_dimension('table_format')
|
|
|
|
cls.TestMatrix.add_dimension(TestDimension('abort_on_default_limit_exceeded',
|
|
*TestAbortOnDefaultLimitExceeded.TEST_VECTOR))
|
|
|
|
def test_abort_on_default_limit_exceeded(self, vector):
|
|
#TODO: do not check the error message because it might not have arrived the coord.
|
|
#expected_error = 'DEFAULT_ORDER_BY_LIMIT has been exceeded.'
|
|
expected_error = ''
|
|
exec_options = copy(vector.get_value('exec_option'))
|
|
test_param = vector.get_value('abort_on_default_limit_exceeded')
|
|
default_order_by_limit, abort_on_default_limit_exceeded, user_limit, expected_rows = test_param
|
|
exec_options['default_order_by_limit'] = default_order_by_limit
|
|
exec_options['abort_on_default_limit_exceeded'] = abort_on_default_limit_exceeded
|
|
query_limit = ''
|
|
if (user_limit is not None):
|
|
query_limit = " limit " + str(user_limit)
|
|
query = "select * from functional.alltypessmall order by int_col" + query_limit
|
|
|
|
# The query would fail if
|
|
# 1. abort_on_default_limit_exceeded is set and
|
|
# 2. default_order_by_limit is set and
|
|
# 3. no user specified limit and
|
|
# 4. the number of rows returned exceeded the default limit
|
|
num_rows = 100
|
|
if (user_limit is not None):
|
|
num_rows = min(num_rows, user_limit)
|
|
should_fail = (abort_on_default_limit_exceeded and \
|
|
(default_order_by_limit is not None) and (default_order_by_limit > 0) and \
|
|
user_limit is None and \
|
|
num_rows > default_order_by_limit)
|
|
|
|
self.exec_query_validate(query, exec_options, not should_fail, expected_rows,
|
|
expected_error)
|
|
|