impala/bin/collect_minidumps.py

#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# This script is to be called by Cloudera Manager to collect Breakpad minidump files up to
# a specified date/time. A compressed tarball is created in the user specified location.
# We try to fit as many files as possible into the tarball until a size limit is reached.
# Example invokation by CM to:
#     ./collect_minidumps.py --conf_dir=/var/run/.../5555-impala-STATESTORE/impala-conf \
#       --role_name=statestored --max_output_size=50000000 --end_time=1463033495000 \
#       --output_file_path=/tmp/minidump_package.tar.gz

from __future__ import absolute_import, division, print_function
import os
import re
import sys
import tarfile

from contextlib import closing
from optparse import OptionParser

class FileArchiver(object):
  '''This is a generic class that makes a tarball out of files in the source_dir.  We
  assume that source_dir contains only files. The resulting file will be compressed with
  gzip and placed into output_file_path. If a file with that name already exists, it will
  be deleted and re-created. Max_result_size is the maximum allowed size of the resulting
  tarball. If all files in the source_dir can't fit into the allowed size, most recent
  files will be preferred. start_time and end_time paramenters (in milliseconds UTC) allow
  us to specify an interval of time for which to consider the files.
  '''

  def __init__(self,
      source_dir,
      output_file_path,
      max_output_size,
      start_time=None,
      end_time=None):
    self.source_dir = source_dir
    self.max_output_size = max_output_size
    self.start_time = start_time
    self.end_time = end_time
    self.output_file_path = output_file_path
    # Maps the number of files in the tarball to the resulting size (in bytes).
    self.resulting_sizes = {}
    self.file_list = []

  def _remove_output_file(self):
    try:
      os.remove(self.output_file_path)
    except OSError:
      pass

  def _tar_files(self, num_files=None):
    '''Make a tarball with num_files most recent files in the file_list. Record the
    resulting size into resulting_sizes map and return it.
    '''
    num_files = num_files or len(self.file_list)
    self._remove_output_file()
    if num_files == 0:
      size = 0
    else:
      with closing(tarfile.open(self.output_file_path, mode='w:gz')) as out:
        for i in range(num_files):
          out.add(self.file_list[i])
      size = os.stat(self.output_file_path).st_size
    self.resulting_sizes[num_files] = size
    return size

  def _compute_file_list(self):
    '''Computes a sorted list of eligible files in the source directory by filtering out
    files with modified date not in the desired time range. Directories and other
    non-files are ignored.
    '''
    file_list = []
    for f in os.listdir(self.source_dir):
      full_path = os.path.join(self.source_dir, f)
      if not os.path.isfile(full_path):
        continue
      # st_mtime is in seconds UTC, so we need to multiply by 1000 to get milliseconds.
      time_modified = os.stat(full_path).st_mtime * 1000
      if self.start_time and self.start_time > time_modified:
        continue
      if self.end_time and self.end_time < time_modified:
        continue
      file_list.append(full_path)
    self.file_list = sorted(file_list, key=lambda f: os.stat(f).st_mtime, reverse=True)

  def _binary_search(self):
    '''Calculates the maximum number of files that can be collected, such that the tarball
    size is less than max_output_size.
    '''
    min_num = 0
    max_num = len(self.file_list)
    while max_num - min_num > 1:
      mid = (min_num + max_num) // 2
      if self._tar_files(mid) <= self.max_output_size:
        min_num = mid
      else:
        max_num = mid
    return min_num

  def make_tarball(self):
    '''Make a tarball with the maximum number of files such that the size of the tarball
    is less than or equal to max_output_size. Returns a pair (status (int), message
    (str)). status represents the result of the operation and follows the unix convention
    where 0 equals success. message provides additional information. A status of 1 is
    returned if source_dir is not empty and no files were able to fit into the tarball.
    '''
    self._compute_file_list()
    if len(self.file_list) == 0:
      status = 0
      msg = 'No files found in "{0}".'
      return status, msg.format(self.source_dir)
    output_size = self._tar_files()
    if output_size <= self.max_output_size:
      status = 0
      msg = 'Success, archived all {0} files in "{1}".'
      return status, msg.format(len(self.file_list), self.source_dir)
    else:
      max_num_files = self._binary_search()
      if max_num_files == 0:
        self._remove_output_file()
        status = 1
        msg = ('Unable to archive any files in "{0}". '
            'Increase max_output_size to at least {1} bytes.')
        # If max_num_files is 0, we are guaranteed that the binary search tried making a
        # tarball with 1 file.
        return status, msg.format(self.source_dir, self.resulting_sizes[1])
      else:
        self._tar_files(max_num_files)
        status = 0
        msg = 'Success. Archived {0} out of {1} files in "{2}".'
        return status, msg.format(max_num_files, len(self.file_list), self.source_dir)

def get_config_parameter_value(conf_dir, role_name, config_parameter_name):
  '''Extract a single config parameter from the configuration file of a particular
  daemon.
  '''
  ROLE_FLAGFILE_MAP = {
      'impalad': 'impalad_flags',
      'statestored': 'state_store_flags',
      'catalogd': 'catalogserver_flags'}
  config_parameter_value = None
  try:
    file_path = os.path.join(conf_dir, ROLE_FLAGFILE_MAP[role_name])
    with open(file_path, 'r') as f:
      for line in f:
        m = re.match('-{0}=(.*)'.format(config_parameter_name), line)
        if m:
          config_parameter_value = m.group(1)
  except IOError as e:
    print('Error: Unable to open "{0}".'.format(file_path), file=sys.stderr)
    sys.exit(1)
  return config_parameter_value

def get_minidump_dir(conf_dir, role_name):
  '''Extracts the minidump directory path for a given role from the configuration file.
  The directory defaults to 'minidumps', relative paths are prepended with log_dir, which
  defaults to '/tmp'.
  '''
  minidump_path = get_config_parameter_value(
    conf_dir, role_name, 'minidump_path') or 'minidumps'
  if not os.path.isabs(minidump_path):
    log_dir = get_config_parameter_value(conf_dir, role_name, 'log_dir') or '/tmp'
    minidump_path = os.path.join(log_dir, minidump_path)
  result = os.path.join(minidump_path, role_name)
  if not os.path.isdir(result):
    msg = 'Error: minidump directory does not exist.'
    print(msg, file=sys.stderr)
    sys.exit(1)
  return result

def main():
  parser = OptionParser()
  parser.add_option('--conf_dir',
      help='Directory in which to look for the config file with startup flags')
  parser.add_option('--role_name', type='choice',
      choices=['impalad', 'statestored', 'catalogd'], default='impalad',
      help='For which role to collect the minidumps.')
  parser.add_option('--max_output_size', default=40*1024*1024, type='int',
      help='The maximum file size of the result tarball to be written given in bytes. '
           'If the total size exceeds this value, most recent files will be preferred')
  parser.add_option('--start_time', default=None, type='int',
      help='Interval start time (in epoch milliseconds UTC).')
  parser.add_option('--end_time', default=None, type='int',
      help='Interval end time, until when to collect the minidump files '
           '(in epoch milliseconds UTC).')
  parser.add_option('--output_file_path', help='The full path of the output file.')
  options, args = parser.parse_args()
  if not options.conf_dir:
    msg = 'Error: conf_dir is not specified.'
    print(msg, file=sys.stderr)
    sys.exit(1)
  if not options.output_file_path:
    msg = 'Error: output_file_path is not specified.'
    print(msg, file=sys.stderr)
    sys.exit(1)

  minidump_dir = get_minidump_dir(options.conf_dir, options.role_name)
  file_archiver = FileArchiver(source_dir=minidump_dir,
      max_output_size=options.max_output_size,
      start_time=options.start_time,
      end_time=options.end_time,
      output_file_path=options.output_file_path)
  status, msg = file_archiver.make_tarball()
  print(msg, file=sys.stderr)
  sys.exit(status)

if __name__ == '__main__':
  main()