Files
impala/tests/failure/test_failpoints.py
Taras Bobrovytsky 7faaa65996 Added order by query tests
- Added static order by tests to test_queries.py and QueryTest/sort.test
- test_order_by.py also contains tests with static queries that are run with
  multiple memory limits.
- Added stress, scratch disk and failpoints tests
- Incorporated Srinath's change that copied all order by with limit tests into
  the top-n.test file

Extra time required:

Serial:
scratch disk: 42 seconds
test queries sort : 77 seconds
test sort: 56 seconds
sort stress: 142 seconds
TOTAL: 5 min 17 seconds

Parallel(8 threads):
scratch disk: 40 seconds
test queries sort: 42 seconds
test sort: 49 seconds
sort stress: 93 seconds
TOTAL: 3 min 44 sec

Change-Id: Ic5716bcfabb5bb3053c6b9cebc9bfbbb9dc64a7c
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2820
Reviewed-by: Taras Bobrovytsky <tbobrovytsky@cloudera.com>
Tested-by: jenkins
Reviewed-on: http://gerrit.ent.cloudera.com:8080/3205
2014-06-20 13:35:10 -07:00

126 lines
5.1 KiB
Python

#!/usr/bin/env python
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
# Injects failures at specific locations in each of the plan nodes. Currently supports
# two types of failures - cancellation of the query and a failure test hook.
#
import pytest
import re
from copy import copy
from collections import defaultdict
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.impala_test_suite import ImpalaTestSuite, ALL_NODES_ONLY, LOG
from tests.common.test_vector import TestDimension
from tests.common.test_dimensions import create_exec_option_dimension
from tests.util.test_file_parser import QueryTestSectionReader
from time import sleep
FAILPOINT_ACTION = ['FAIL', 'CANCEL']
FAILPOINT_LOCATION = ['PREPARE', 'OPEN', 'GETNEXT', 'CLOSE']
# The goal of this query is to use all of the node types.
# TODO: This query could be simplified a bit...
QUERY = """
select a.int_col, count(b.int_col) int_sum from functional_hbase.alltypesagg a
join
(select * from alltypes
where year=2009 and month=1 order by int_col limit 2500
union all
select * from alltypes
where year=2009 and month=2 limit 3000) b
on (a.int_col = b.int_col)
group by a.int_col
order by int_sum
"""
# TODO: Update to include INSERT when we support failpoints in the HDFS/Hbase sinks using
# a similar pattern as test_cancellation.py
QUERY_TYPE = ["SELECT"]
class TestFailpoints(ImpalaTestSuite):
@classmethod
def get_workload(cls):
return 'functional-query'
@classmethod
def parse_plan_nodes_from_explain_output(cls, query, use_db="default"):
"""Parses the EXPLAIN <query> output and returns a map of node_name->list(node_id)"""
client = cls.create_impala_client()
client.execute("use %s" % use_db)
explain_result = client.execute("explain " + QUERY)
# Maps plan node names to their respective node ids. Expects format of <ID>:<NAME>
node_id_map = defaultdict(list)
for row in explain_result.data:
match = re.search(r'\s*(?P<node_id>\d+)\:(?P<node_type>\S+\s*\S+)', row)
if match is not None:
node_id_map[match.group('node_type')].append(int(match.group('node_id')))
return node_id_map
@classmethod
def add_test_dimensions(cls):
super(TestFailpoints, cls).add_test_dimensions()
node_id_map = TestFailpoints.parse_plan_nodes_from_explain_output(QUERY, "functional")
assert node_id_map
cls.TestMatrix.add_dimension(TestDimension('location', *FAILPOINT_LOCATION))
cls.TestMatrix.add_dimension(TestDimension('target_node', *(node_id_map.items())))
cls.TestMatrix.add_dimension(TestDimension('action', *FAILPOINT_ACTION))
cls.TestMatrix.add_dimension(TestDimension('query_type', *QUERY_TYPE))
cls.TestMatrix.add_dimension(create_exec_option_dimension([0], [False], [0]))
# These are invalid test cases.
# For more info see IMPALA-55 and IMPALA-56.
cls.TestMatrix.add_constraint(lambda v: not (\
v.get_value('action') == 'FAIL' and\
v.get_value('location') in ['CLOSE'] and\
v.get_value('target_node')[0] in ['AGGREGATE', 'HASH JOIN']) and\
not (v.get_value('location') in ['PREPARE'] and \
v.get_value('action') == 'CANCEL'))
# Don't create CLOSE:WAIT debug actions to avoid leaking plan fragments (there's no
# way to cancel a plan fragment once Close() has been called)
cls.TestMatrix.add_constraint(
lambda v: not (v.get_value('action') == 'CANCEL'
and v.get_value('location') == 'CLOSE'))
def test_failpoints(self, vector):
query = QUERY
node_type, node_ids = vector.get_value('target_node')
action = vector.get_value('action')
location = vector.get_value('location')
for node_id in node_ids:
debug_action = '%d:%s:%s' % (node_id, location,
'WAIT' if action == 'CANCEL' else 'FAIL')
LOG.info('Current dubug action: SET DEBUG_ACTION=%s' % debug_action)
vector.get_value('exec_option')['debug_action'] = debug_action
if action == 'CANCEL':
self.__execute_cancel_action(query, vector)
elif action == 'FAIL':
self.__execute_fail_action(query, vector)
else:
assert 0, 'Unknown action: %s' % action
# We should be able to execute the same query successfully when no failures are
# injected.
del vector.get_value('exec_option')['debug_action']
self.execute_query(query, vector.get_value('exec_option'))
def __execute_fail_action(self, query, vector):
try:
self.execute_query(query, vector.get_value('exec_option'),
table_format=vector.get_value('table_format'))
assert 'Expected Failure'
except ImpalaBeeswaxException as e:
LOG.debug(e)
def __execute_cancel_action(self, query, vector):
LOG.info('Starting async query execution')
handle = self.execute_query_async(query, vector.get_value('exec_option'),
table_format=vector.get_value('table_format'))
LOG.info('Sleeping')
sleep(3)
cancel_result = self.client.cancel(handle)
self.client.close_query(handle)
assert cancel_result.status_code == 0,\
'Unexpected status code from cancel request: %s' % cancel_result