Files
impala/tests/query_test/test_col_stats.py
Lenni Kuff bb09b5270f IMPALA-839: Update tests to be more thorough when run exhaustively
Some tests have constraints that were there only to help reduce runtime which
reduces coverage when running in exhaustive mode. The majority of the constraints
are because it adds no value to run the test across additional dimensions (or
it is invalid to run with those dimensions). Updates the tests that have
legitimate constraints to use two new helper methods for constraining the table format
dimension:
create_uncompressed_text_dimension()
create_parquet_dimension()

These will create a dimension that will produce a single test vector, either
uncompressed text or parquet respectively.

Change-Id: Id85387c1efd5d192f8059ef89934933389bfe247
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2149
Reviewed-by: Lenni Kuff <lskuff@cloudera.com>
Tested-by: jenkins
(cherry picked from commit e02acbd469bc48c684b2089405b4a20552802481)
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2290
2014-04-18 20:11:31 -07:00

90 lines
3.5 KiB
Python

#!/usr/bin/env python
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Impala tests for column statistics
import logging
import pytest
import shlex
from tests.common.test_result_verifier import *
from subprocess import call
from tests.common.test_vector import *
from tests.common.impala_test_suite import *
TEST_DB = 'colstats_test_db'
# End-to-end validation of Impala column stats usage.
class TestColStats(ImpalaTestSuite):
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestColStats, cls).add_test_dimensions()
# There is no reason to run these tests using all dimensions.
cls.TestMatrix.add_dimension(create_single_exec_option_dimension())
cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload()))
def setup_method(self, method):
self.__cleanup()
def teardown_method(self, method):
self.__cleanup()
def __cleanup(self):
self.cleanup_db(TEST_DB)
def test_incompatible_col_stats(self, vector):
"""Tests Impala is able to use tables when the column stats data is not compatible
with the column type. Regression test for IMPALA-588."""
# Create a test database.
self.client.execute("create database " + TEST_DB);
self.client.execute("use " + TEST_DB)
# Create a table with a string column and populate it with some data.
self.client.execute("create table badstats(s string)")
self.client.execute("insert into table badstats select cast(int_col as string) "\
"from functional.alltypes limit 10")
# Compute stats for this table, they will be for the string column type.
self.__compute_table_stats(TEST_DB, 'badstats')
self.client.execute("refresh badstats")
# Change the column type to int which will cause a mismatch between the column
# stats data and the column type metadata.
self.client.execute("alter table badstats change s s int")
# Should still be able to query the table
result = self.client.execute("select s from badstats")
assert len(result.data) == 10
# Recompute stats with the new column type. Impala should now have stats for this
# column and should be able to access the table.
# TODO: Currently this just verifies Impala can query the table, it does not
# verify the stats are there or correct. Expand the verification once Impala has a
# mechanism to expose this metadata.
self.__compute_table_stats(TEST_DB, 'badstats')
self.client.execute("refresh badstats")
result = self.client.execute("select s from badstats")
assert len(result.data) == 10
def __compute_table_stats(self, db_name, table_name):
compute_stats_script =\
os.path.join(os.environ['IMPALA_HOME'],'tests/util/compute_table_stats.py')
rval = call([compute_stats_script,
'--db_names=' + db_name, '--table_names=' + table_name])
assert rval == 0, 'Compute table stats failed on: %s.%s' % (db_name, table_name)