#!/usr/bin/env python # Copyright (c) 2012 Cloudera, Inc. All rights reserved. # Validates limit on scan nodes # import logging import pytest from copy import copy from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.test_vector import * from tests.util.test_file_parser import QueryTestSectionReader class TestLimit(ImpalaTestSuite): LIMIT_VALUES = [1, 2, 3, 4, 5, 10, 100, 5000] QUERIES = ["select * from lineitem limit %d"] # TODO: we should be able to run count(*) in setup rather than hardcoding the values # but I have no idea how to do this with this framework. TOTAL_ROWS = 6001215 @classmethod def get_workload(self): return 'tpch' @classmethod def add_test_dimensions(cls): super(TestLimit, cls).add_test_dimensions() # Add two more dimensions cls.TestMatrix.add_dimension( TestDimension('limit_value', *TestLimit.LIMIT_VALUES)) cls.TestMatrix.add_dimension(TestDimension('query', *TestLimit.QUERIES)) # Don't run with large limits and tiny batch sizes. This generates excessive # network traffic and makes the machine run very slowly. cls.TestMatrix.add_constraint(lambda v:\ v.get_value('limit_value') < 100 or v.get_value('exec_option')['batch_size'] == 0) # TPCH is not generated in hbase format. # TODO: Add test coverage for hbase. cls.TestMatrix.add_constraint(lambda v:\ v.get_value('table_format').file_format != "hbase") def test_limit(self, vector): # We can't validate the rows that are returned since that is non-deterministic. # This is why this is a python test rather than a .test. limit = vector.get_value('limit_value') expected_num_rows = min(limit, TestLimit.TOTAL_ROWS) query_string = vector.get_value('query') % limit result = self.execute_query(query_string, vector.get_value('exec_option'), table_format=vector.get_value('table_format')) assert(len(result.data) == expected_num_rows) # Base class for TestLimit class TestLimitBase(ImpalaTestSuite): def exec_query_validate(self, query, exec_options, should_succeed, expected_rows, expected_error): """Executes a query and validates the results""" try: result = self.execute_query(query, exec_options) assert should_succeed, 'Query was expected to fail' assert len(result.data) == expected_rows,\ 'Wrong number of rows returned %d' % len(result.data) except ImpalaBeeswaxException as e: assert not should_succeed, 'Query was not expected to fail: %s' % e if (expected_error not in str(e)): print str(e) assert expected_error in str(e)