mirror of
https://github.com/apache/impala.git
synced 2026-01-28 00:04:45 -05:00
This patch does a few things: 1) Move the metadata tests into their own folder under tests/. I think it's useful to loosely categorize them so it's easier to run a subset of the tests that are most useful for the changes you are making. 2) Reduce the test vectors for query_tests. We should have identical coverage in the daily exhaustive runs but the normal runs should be much better. In particular, deemphasizing scanner tests since that code is more stable now. 3) Misc test cleanup/consolidate python test files/etc. Change-Id: I03c2f34877aed192c2a50665bd5e15fa85e12f1e Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3831 Tested-by: jenkins Reviewed-by: Nong Li <nong@cloudera.com>
88 lines
3.9 KiB
Python
88 lines
3.9 KiB
Python
#!/usr/bin/env python
|
|
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
# Functional tests running EXPLAIN statements.
|
|
#
|
|
import logging
|
|
import pytest
|
|
import re
|
|
from tests.common.test_vector import *
|
|
from tests.common.impala_test_suite import *
|
|
|
|
# Tests the different explain levels [0-3] on a few queries.
|
|
# TODO: Clean up this test to use an explain level test dimension and appropriate
|
|
# result sub-sections for the expected explain plans.
|
|
class TestExplain(ImpalaTestSuite):
|
|
# Value for the num_scanner_threads query option to ensure that the memory estimates of
|
|
# scan nodes are consistent even when run on machines with different numbers of cores.
|
|
NUM_SCANNER_THREADS = 1
|
|
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional-query'
|
|
|
|
@classmethod
|
|
def add_test_dimensions(cls):
|
|
super(TestExplain, cls).add_test_dimensions()
|
|
cls.TestMatrix.add_constraint(lambda v:\
|
|
v.get_value('table_format').file_format == 'text' and\
|
|
v.get_value('table_format').compression_codec == 'none' and\
|
|
v.get_value('exec_option')['batch_size'] == 0 and\
|
|
v.get_value('exec_option')['disable_codegen'] == False and\
|
|
v.get_value('exec_option')['num_nodes'] != 1)
|
|
|
|
@pytest.mark.xfail(run=False, reason="Expected per-host mem requirements inconsistent")
|
|
def test_explain_level0(self, vector):
|
|
vector.get_value('exec_option')['num_scanner_threads'] = self.NUM_SCANNER_THREADS
|
|
vector.get_value('exec_option')['explain_level'] = 0
|
|
self.run_test_case('QueryTest/explain-level0', vector)
|
|
|
|
@pytest.mark.xfail(run=False, reason="Expected per-host mem requirements inconsistent")
|
|
def test_explain_level1(self, vector):
|
|
vector.get_value('exec_option')['num_scanner_threads'] = self.NUM_SCANNER_THREADS
|
|
vector.get_value('exec_option')['explain_level'] = 1
|
|
self.run_test_case('QueryTest/explain-level1', vector)
|
|
|
|
@pytest.mark.xfail(run=False, reason="The test for missing table stats fails for avro")
|
|
def test_explain_level2(self, vector):
|
|
vector.get_value('exec_option')['num_scanner_threads'] = self.NUM_SCANNER_THREADS
|
|
vector.get_value('exec_option')['explain_level'] = 2
|
|
self.run_test_case('QueryTest/explain-level2', vector)
|
|
|
|
@pytest.mark.xfail(run=False, reason="The test for missing table stats fails for avro")
|
|
def test_explain_level3(self, vector):
|
|
vector.get_value('exec_option')['num_scanner_threads'] = self.NUM_SCANNER_THREADS
|
|
vector.get_value('exec_option')['explain_level'] = 3
|
|
self.run_test_case('QueryTest/explain-level3', vector)
|
|
|
|
def test_explain_validate_cardinality_estimates(self, vector):
|
|
# Tests that the cardinality estimates are correct for partitioned tables.
|
|
# TODO Cardinality estimation tests should eventually be part of the planner tests.
|
|
# TODO Remove this test
|
|
db_name = 'functional'
|
|
tbl_name = 'alltypes'
|
|
|
|
def check_cardinality(query_result, expected_cardinality):
|
|
regex = re.compile('tuple-ids=\d+ row-size=\d+B cardinality=(\d+)')
|
|
for res in query_result:
|
|
m = regex.match(res.strip())
|
|
if m:
|
|
assert len(m.groups()) == 1
|
|
# The cardinality should be zero.
|
|
assert m.groups()[0] == expected_cardinality
|
|
|
|
# All partitions are filtered out, cardinality should be 0.
|
|
result = self.execute_query("explain select * from %s.%s where year = 1900" % (
|
|
db_name, tbl_name), query_options={'explain_level':3})
|
|
check_cardinality(result.data, '0')
|
|
|
|
# Half of the partitions are filtered out, cardinality should be 3650.
|
|
result = self.execute_query("explain select * from %s.%s where year = 2010" % (
|
|
db_name, tbl_name), query_options={'explain_level':3})
|
|
check_cardinality(result.data, '3650')
|
|
|
|
# None of the partitions are filtered out, cardinality should be 7300.
|
|
result = self.execute_query("explain select * from %s.%s" % (db_name, tbl_name),
|
|
query_options={'explain_level':3})
|
|
check_cardinality(result.data, '7300')
|
|
|