From 5f9cd044eedc7ad44a5feb4aca28ddf79d53414f Mon Sep 17 00:00:00 2001 From: Lenni Kuff Date: Tue, 5 Mar 2013 17:22:33 -0800 Subject: [PATCH] Add scanner test suite that runs across all file format/compression permuations --- .../queries/QueryTest/scanners.test | 52 +++++++++++++++++++ tests/common/impala_test_suite.py | 7 +-- .../test_scanners_all_table_formats.py | 28 ++++++++++ 3 files changed, 84 insertions(+), 3 deletions(-) create mode 100644 testdata/workloads/functional-query/queries/QueryTest/scanners.test create mode 100644 tests/query_test/test_scanners_all_table_formats.py diff --git a/testdata/workloads/functional-query/queries/QueryTest/scanners.test b/testdata/workloads/functional-query/queries/QueryTest/scanners.test new file mode 100644 index 000000000..2912ec894 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/scanners.test @@ -0,0 +1,52 @@ +# These tests are run over all file formats and their permutations (e.g. compression). +# Be careful about adding test cases here since they can blow up the time to run tests +==== +---- QUERY +# This query will do a full table scan, doing a simple aggregation on all cols with +# a simple predicate +select count(*), + sum(id), count(bool_col), sum(tinyint_col), sum(smallint_col), + sum(int_col), sum(bigint_col), max(float_col), max(double_col), + max(date_string_col), max(string_col), max(timestamp_col) +from alltypesagg +where id % 2 = 0 +---- RESULTS +5000,24995000,5000,20000,245000,2495000,24950000,1097.800048828125,10079.8,'01/10/10','998',2010-01-10 18:00:55.300000000 +---- TYPES +BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP +==== +---- QUERY +# This query will do a join, projecting one string col from each table. +# This is interesting because the join contains string cols which causes the scanners +# to do different memory handling. +select sum(t1.id), sum(t1.int_col),max(t1.date_string_col), max(t2.string_col) +from alltypesagg t1 +inner join alltypesagg t2 + on t1.id = t2.id +---- RESULTS +49995000,4995000,'01/10/10','999' +---- TYPES +BIGINT, BIGINT, STRING, STRING +==== +---- QUERY +# This query does a top-n on non-string cols. This is different because without +# string cols, scanners will handle io buffers differently. They don't need to +# be passed up the execution tree. +select id, bool_col, int_col +from alltypesagg +order by 1 desc, 2 desc, 3 desc +limit 10 +---- RESULTS +9999,false,999 +9998,true,998 +9997,false,997 +9996,true,996 +9995,false,995 +9994,true,994 +9993,false,993 +9992,true,992 +9991,false,991 +9990,true,990 +---- TYPES +INT, BOOLEAN, INT +==== diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index abcdccd4c..075a01bd3 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -36,7 +36,8 @@ class ImpalaTestSuite(BaseTestSuite): add more dimensions or different dimensions they can override this function. """ super(ImpalaTestSuite, cls).add_test_dimensions() - cls.TestMatrix.add_dimension(cls.__create_table_info_dimension()) + cls.TestMatrix.add_dimension( + cls.create_table_info_dimension(cls.exploration_strategy())) cls.TestMatrix.add_dimension(cls.__create_exec_option_dimension()) @classmethod @@ -241,7 +242,7 @@ class ImpalaTestSuite(BaseTestSuite): return test_section @classmethod - def __create_table_info_dimension(cls): + def create_table_info_dimension(cls, exploration_strategy): # If the user has specified a specific set of table formats to run against, then # use those. Otherwise, load from the workload test vectors. if pytest.config.option.table_formats: @@ -251,7 +252,7 @@ class ImpalaTestSuite(BaseTestSuite): table_formats.append(TableFormatInfo.create_from_string(dataset, tf)) return TestDimension('table_format', *table_formats) else: - return load_table_info_dimension(cls.get_workload(), cls.exploration_strategy()) + return load_table_info_dimension(cls.get_workload(), exploration_strategy) @classmethod def __create_exec_option_dimension(cls): diff --git a/tests/query_test/test_scanners_all_table_formats.py b/tests/query_test/test_scanners_all_table_formats.py new file mode 100644 index 000000000..04f0ca6d3 --- /dev/null +++ b/tests/query_test/test_scanners_all_table_formats.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Copyright (c) 2012 Cloudera, Inc. All rights reserved. +# +# This test suite validates the scanners by running queries against ALL file formats and +# their permutations (e.g. compression codec/compression type). This works by exhaustively +# generating the table format test vectors for this specific test suite. This way, other +# tests can run with the normal exploration strategy and the overall test runtime doesn't +# explode. + +import logging +import pytest +from tests.common.test_vector import * +from tests.common.impala_test_suite import * + +class TestScannersAllTableFormats(ImpalaTestSuite): + @classmethod + def get_workload(cls): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + super(TestScannersAllTableFormats, cls).add_test_dimensions() + # Exhaustively generate all table format vectors. This can still be overridden + # using the --table_formats flag. + cls.TestMatrix.add_dimension(cls.create_table_info_dimension('exhaustive')) + + def test_scanners(self, vector): + self.run_test_case('QueryTest/scanners', vector)