mirror of
https://github.com/apache/impala.git
synced 2026-01-07 00:02:28 -05:00
Many python files had a hashbang and the executable bit set though they were not intended to be run a standalone script. That makes determining which python files are actually scripts very difficult. A future patch will update the hashbang in real python scripts so they use $IMPALA_HOME/bin/impala-python. Change-Id: I04eafdc73201feefe65b85817a00474e182ec2ba Reviewed-on: http://gerrit.cloudera.org:8080/599 Reviewed-by: Casey Ching <casey@cloudera.com> Reviewed-by: Taras Bobrovytsky <tbobrovytsky@cloudera.com> Tested-by: Internal Jenkins
91 lines
3.7 KiB
Python
91 lines
3.7 KiB
Python
# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
|
|
#
|
|
# Failure injection module for the Impala service. There are two main ways this module
|
|
# can be used - the first is to initialize the failure injector and then call start()
|
|
# which will kick off a timer that chooses a random impalad/state store process
|
|
# to fail each time timer fires.
|
|
# The second way this module can be used to to initialize it and call the actions
|
|
# directly (ex. kill_random_impalad()). This provides a bit more control over exactly
|
|
# when a failure will happen and is useful for targeted test scenarios.
|
|
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
from tests.common.impala_cluster import *
|
|
from random import choice
|
|
from threading import Timer
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(threadName)s: %(message)s')
|
|
LOG = logging.getLogger('failure-injector')
|
|
|
|
# This class is used for injecting failures for the Impala service.
|
|
class FailureInjector(object):
|
|
def __init__(self, impala_cluster, failure_frequency, impalad_exclude_list=None):
|
|
"""
|
|
Initializes the FailureInjector object.
|
|
|
|
impala_cluster - An ImpalaCluster object (see the impala_cluster module)
|
|
failure_frequency - Interval to fire timer (in seconds)
|
|
impalad_exclude_list - A list of impalad host:port name to not inject failures
|
|
on. Useful to filter out the coordinator.
|
|
"""
|
|
self.cluster = impala_cluster
|
|
self.cluster.get_impala_service().set_process_auto_restart_config(value=True)
|
|
# TODO: Do we need to restart the impala service to apply this?
|
|
# self.cluster.get_impala_service().restart()
|
|
self.failure_frequency = failure_frequency
|
|
num_impalad_procs = len(self.cluster.get_impala_service().get_all_impalad_processes())
|
|
|
|
self.impalad_exclude_list = impalad_exclude_list
|
|
|
|
# Build a weighted list of possible actions. This is done using a trivial approach
|
|
# where we just add the item multiple times (weight value) into the action list.
|
|
# TODO: Provide a way to dynamically configure the weights
|
|
actions_with_weights = {self.kill_random_impalad: num_impalad_procs * 2,
|
|
self.kill_state_store: 1}
|
|
|
|
self.possible_actions = list()
|
|
for key, value in actions_with_weights.items():
|
|
self.possible_actions.extend([key] * value)
|
|
|
|
def start(self):
|
|
""" Starts the timer, triggering failures for the specified interval """
|
|
self.__start_timer()
|
|
|
|
def cancel(self):
|
|
""" Stops the timer, canceling any additional failures from occurring """
|
|
if self.__timer is not None:
|
|
self.__timer.cancel()
|
|
|
|
def kill_random_impalad(self):
|
|
""" Kills a randomly selected impalad instance not in the exlude list """
|
|
filtered_impalad = \
|
|
filter(lambda impalad: '%s:%d' % (impalad.hostname, impalad.be_port)\
|
|
not in self.impalad_exclude_list,
|
|
self.cluster.get_impala_service().get_all_impalad_processes())
|
|
self.kill_impalad(choice(filtered_impalad))
|
|
|
|
def kill_impalad(self, impalad):
|
|
""" Kills the specified impalad instance """
|
|
LOG.info('Chose impalad on "%s" to kill' % impalad.hostname)
|
|
impalad.kill()
|
|
|
|
def kill_state_store(self):
|
|
""" Kills the statestore process """
|
|
state_store = self.cluster.get_impala_service().get_state_store_process()
|
|
LOG.info('Chose statestore on "%s" to kill' % state_store.hostname)
|
|
state_store.kill()
|
|
|
|
def __start_timer(self):
|
|
""" Starts a new timer, cancelling the previous timer if it is running """
|
|
self.cancel()
|
|
self.__timer = Timer(self.failure_frequency, self.__choose_action)
|
|
self.__timer.start()
|
|
|
|
def __choose_action(self):
|
|
""" Chooses a failure action to perform """
|
|
action = choice(self.possible_actions)
|
|
LOG.info('Executing action: %s' % action)
|
|
action()
|
|
self.__start_timer()
|